Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __init__(self, num_agents):
self.env = football_env.create_environment(
env_name='test_example_multiagent', stacked=False,
logdir='/tmp/rllib_test',
write_goal_dumps=False, write_full_episode_dumps=False, render=True,
dump_frequency=0,
number_of_left_players_agent_controls=num_agents,
channel_dimensions=(42, 42))
self.action_space = gym.spaces.Discrete(self.env.action_space.nvec[1])
self.observation_space = gym.spaces.Box(
low=self.env.observation_space.low[0],
high=self.env.observation_space.high[0],
dtype=self.env.observation_space.dtype)
self.num_agents = num_agents
def create_single_football_env(args):
"""Creates gfootball environment."""
env = football_env.create_environment(\
env_name=args.env_name, stacked=True, with_checkpoints=False,
)
return env
break
return total_reward
def one_hot_encoding(probs):
one_hot = np.zeros_like(probs)
one_hot[:, np.argmax(probs, axis=1)] = 1
return one_hot
image_based = False
if image_based:
env = football_env.create_environment(env_name='academy_empty_goal', representation='pixels', render=True)
else:
env = football_env.create_environment(env_name='academy_empty_goal', representation='simple115')
state = env.reset()
state_dims = env.observation_space.shape
n_actions = env.action_space.n
dummy_n = np.zeros((1, 1, n_actions))
dummy_1 = np.zeros((1, 1, 1))
tensor_board = TensorBoard(log_dir='./logs')
if image_based:
model_actor = get_model_actor_image(input_dims=state_dims, output_dims=n_actions)
model_critic = get_model_critic_image(input_dims=state_dims)
else:
model_actor = get_model_actor_simple(input_dims=state_dims, output_dims=n_actions)
model_critic = get_model_critic_simple(input_dims=state_dims)
def create_single_football_env(iprocess):
"""Creates gfootball environment."""
env = football_env.create_environment(
env_name=FLAGS.level, stacked=('stacked' in FLAGS.state),
rewards=FLAGS.reward_experiment,
logdir=logger.get_dir(),
write_goal_dumps=FLAGS.dump_scores and (iprocess == 0),
write_full_episode_dumps=FLAGS.dump_full_episodes and (iprocess == 0),
render=FLAGS.render and (iprocess == 0),
dump_frequency=50 if FLAGS.render and iprocess == 0 else 0)
env = monitor.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(),
str(iprocess)))
return env
def main():
misc.set_random_seed(0)
env1 = env.create_environment(env_name=global_enviornment_name, render=True, representation='simple115')
env1 = chainerrl.wrappers.CastObservationToFloat32(env1)
timestep_limit = 180
obs_space = env1.observation_space
print(obs_space)
action_space = env1.action_space
print(action_space)
#21 is the action space for the environment.
model = A3CFFMellowmax(state_space_size,21)
opt = rmsprop_async.RMSpropAsync(
lr=7e-4, eps=1e-1, alpha=0.99)
opt.setup(model)
opt.add_hook(chainer.optimizer.GradientClipping(40))
def main(unused_argv):
model = get_inference_model(FLAGS.inference_model)
env = football_env.create_remote_environment(
FLAGS.username, FLAGS.token, FLAGS.model_name, track=FLAGS.track,
representation='extracted', stacked=True,
include_rendering=FLAGS.render)
for _ in range(FLAGS.how_many):
ob = env.reset()
cnt = 1
done = False
while not done:
try:
action = model(ob)
ob, rew, done, _ = env.step(action)
logging.info('Playing the game, step %d, action %s, rew %s, done %d',
cnt, action, rew, done)
cnt += 1
except grpc.RpcError as e:
print(e)