Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def run_driver():
output = run_string_as_driver(driver_script)
assert "success" in output
iteration = 0
running_ids = [
run_driver._remote(
args=[], kwargs={}, num_cpus=0, resources={str(i): 0.01})
for i in range(num_nodes)
]
start_time = time.time()
previous_time = start_time
while True:
# Wait for a driver to finish and start a new driver.
[ready_id], running_ids = ray.wait(running_ids, num_returns=1)
ray.get(ready_id)
running_ids.append(
run_driver._remote(
args=[],
kwargs={},
num_cpus=0,
resources={str(iteration % num_nodes): 0.01}))
new_time = time.time()
print("Iteration {}:\n"
" - Iteration time: {}.\n"
" - Absolute time: {}.\n"
" - Total elapsed time: {}.".format(
iteration, new_time - previous_time, new_time,
new_time - start_time))
timeout=0.1)
upd = [ray.get(up) for up in updated]
self._idle_worker_ids.extend(upd)
# if there are idle workers, use them to collect trajectories
# mark the newly busy workers as active
while self._idle_worker_ids:
idle_worker_id = self._idle_worker_ids.pop()
self._active_worker_ids.append(idle_worker_id)
worker = self._all_workers[idle_worker_id]
_active_workers.append(worker.rollout.remote())
# check which workers are done/not done collecting a sample
# if any are done, send them to process the collected trajectory
# if they are not, keep checking if they are done
ready, not_ready = ray.wait(_active_workers,
num_returns=1,
timeout=0.001)
_active_workers = not_ready
for result in ready:
trajectory, num_returned_samples = self._process_trajectory(
result)
completed_samples += num_returned_samples
pbar.inc(num_returned_samples)
traj.append(trajectory)
pbar.stop()
return traj
file_left = args.left
file_size_left = os.path.getsize(file_left)
file_right = args.right
file_size_right = os.path.getsize(file_right)
if not os.path.exists(os.path.split(args.logfile)[0]):
os.makedirs(os.path.split(args.logfile)[0])
logging.basicConfig(filename=args.logfile, level=logging.INFO)
df_left = pd.read_csv(file_left)
df_right = pd.read_csv(file_right)
blocks = df_left._block_partitions.flatten().tolist()
ray.wait(blocks, len(blocks))
blocks = df_right._block_partitions.flatten().tolist()
ray.wait(blocks, len(blocks))
with time_logger(
"Inner Join: {} & {}; Left Size: {} bytes; Right Size: {} "
"bytes".format(file_left, file_right, file_size_left, file_size_right)
):
result = df_left.join(df_right, how="inner", lsuffix="left_")
ray.wait(result._block_partitions.flatten().tolist())
with time_logger(
"Outer Join: {} & {}; Left Size: {} bytes; Right Size: {} "
"bytes".format(file_left, file_right, file_size_left, file_size_right)
):
result = df_left.join(df_right, how="outer", lsuffix="left_")
ray.wait(result._block_partitions.flatten().tolist())
import ray
ray.init()
sync_todo_id = sync.remote(hyperparams['agent'])
print('launched sync')
for i in range(n_worker):
if i == n_worker - 1:
detach = ''
else: detach = '&'
cmd = "python bench_worker_cmd.py {} {} {} {} {}".format(args.experiment, gpu_id, start_idx[i], end_idx[i], detach)
print(cmd)
os.system(cmd)
if 'master_datadir' in hyperparams['agent']:
ray.wait([sync_todo_id])
if 'benchmarks' in hyperparams_file:
if 'RESULT_DIR' in os.environ:
result_dir = os.environ['RESULT_DIR']
else:
result_dir = hyperparams['current_dir']
combine_scores(hyperparams, result_dir)
sys.exit()
traindir = hyperparams['agent']["data_save_dir"]
testdir = '/'.join(traindir.split('/')[:-1] + ['/test'])
if not os.path.exists(testdir):
os.makedirs(testdir)
import shutil
files = glob.glob(traindir + '/*')
files = sorted_alphanumeric(files)
grad_buffer = {k: np.zeros_like(v) for k, v in model.items()}
# Update the rmsprop memory.
rmsprop_cache = {k: np.zeros_like(v) for k, v in model.items()}
actors = [PongEnv.remote() for _ in range(batch_size)]
iteration = 0
while iteration != args.iterations:
iteration += 1
model_id = ray.put(model)
actions = []
# Launch tasks to compute gradients from multiple rollouts in parallel.
start_time = time.time()
for i in range(batch_size):
action_id = actors[i].compute_gradient.remote(model_id)
actions.append(action_id)
for i in range(batch_size):
action_id, actions = ray.wait(actions)
grad, reward_sum = ray.get(action_id[0])
# Accumulate the gradient over batch.
for k in model:
grad_buffer[k] += grad[k]
running_reward = (reward_sum if running_reward is None else
running_reward * 0.99 + reward_sum * 0.01)
end_time = time.time()
print("Batch {} computed {} rollouts in {} seconds, "
"running mean is {}".format(batch_num, batch_size,
end_time - start_time,
running_reward))
for k, v in model.items():
g = grad_buffer[k]
rmsprop_cache[k] = (
decay_rate * rmsprop_cache[k] + (1 - decay_rate) * g**2)
model[k] += learning_rate * g / (np.sqrt(rmsprop_cache[k]) + 1e-5)
def train(self):
print("Running main training loop...")
ray.wait(
[actor.run.remote() for actor in self.all_actors]
)
def completed(self, blocking_wait=False):
pending = list(self._tasks)
if pending:
ready, _ = ray.wait(pending, num_returns=len(pending), timeout=0)
if not ready and blocking_wait:
ready, _ = ray.wait(pending, num_returns=1, timeout=10.0)
for obj_id in ready:
yield (self._tasks.pop(obj_id), self._objects.pop(obj_id))
def loop(self):
"""Main loop for router. It will does the following things:
1. Check which running actors finished.
2. Iterate over free actors and request queues, dispatch requests batch
to free actors.
3. Tail recursively schedule itself.
"""
# 1. Check which running actors finished.
ready_oids, _ = ray.wait(
object_ids=list(self.running_queries.keys()),
num_returns=len(self.running_queries),
timeout=0,
)
for ready_oid in ready_oids:
self.running_queries.pop(ready_oid)
busy_actors: Set[ray.actor.ActorHandle] = set(
self.running_queries.values())
# 2. Iterate over free actors and request queues, dispatch requests
# batch to free actors.
for actor_name, queue in self.query_queues.items():
# try to drain the queue
for actor_handle in self.actor_handles[actor_name]:
if len(queue) == 0:
if 'data_save_dir' in hyperparams['agent']:
modconf['record_saver'] = record_queue
modconf['counter'] = counter
conflist.append(modconf)
if parallel:
p = Pool(n_worker)
p.map(use_worker, conflist)
else:
use_worker(conflist[0], args.iex, args.ngpu)
if 'data_save_dir' in hyperparams['agent'] and not hyperparams.get('save_raw_images', False):
record_queue.put(None) # send flag to background thread that it can end saving after it's done
record_saver_proc.join() # joins thread and continues execution
if 'master_datadir' in hyperparams['agent']:
ray.wait([sync_todo_id])
if args.do_benchmark:
pdb.set_trace()
combine_scores(hyperparams, result_dir)
sys.exit()
if 'data_save_dir' in hyperparams['agent']:
modconf['record_saver'] = record_queue
modconf['counter'] = counter
conflist.append(modconf)
if parallel:
p = Pool(n_worker)
p.map(use_worker, conflist)
else:
use_worker(conflist[0], args.iex, args.ngpu)
if 'data_save_dir' in hyperparams['agent'] and not hyperparams.get('save_raw_images', False):
record_queue.put(None) # send flag to background thread that it can end saving after it's done
record_saver_proc.join() # joins thread and continues execution
if 'master_datadir' in hyperparams['agent']:
ray.wait([sync_todo_id])
if args.do_benchmark:
pdb.set_trace()
combine_scores(hyperparams, result_dir)
sys.exit()