How to use gym - 10 common examples

To help you get started, we’ve selected a few gym examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github fabiopardo / qmap / qmap / envs / custom_mario.py View on Github external
self.frame_skip = frame_skip
        n_frames = stack * (3 * use_color + 1 * (not use_color) + use_rc_frame)
        self.frames = deque([], maxlen=(self.frame_skip * (self.stack - 1) + 1))
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, n_frames))
        # coordinates
        self.coords_ratio = coords_ratio
        assert coords_ratio % screen_ratio == 0, (coords_ratio, screen_ratio)
        self.coords_screen_ratio = coords_ratio // screen_ratio
        self.coords_height = self.original_height // coords_ratio
        self.coords_width = self.original_width // coords_ratio
        self.coords_shape = (self.coords_height, self.coords_width)
        # actions
        self.action_names = ['JUMP+LEFT', 'JUMP', 'JUMP+RIGHT', 'LEFT', 'NOOP', 'RIGHT']
        self.action_list = [actions[n] for n in self.action_names]
        n_actions = len(self.action_list)
        self.action_space = spaces.Discrete(n_actions)
        self.action_repeat = action_repeat
        # miscellaneous
        frame_name = 'RGB' if use_color else 'G'
        if use_rc_frame: frame_name += 'C'
        self.name = 'CustomSuperMarioAllStars_{}_obs{}x{}x{}x{}_qframes{}x{}x{}_skip{}_repeat{}-v0'.format(
            level, *self.screen_shape, frame_name, stack, *self.coords_shape, n_actions, frame_skip, action_repeat)
github fabiopardo / qmap / qmap / envs / custom_montezuma.py View on Github external
def __init__(self, screen_ratio=4, coords_ratio=4, use_color=True, use_rc_frame=True, stack=3, frame_skip=4, action_repeat=4):
        utils.EzPickle.__init__(self, 'montezuma_revenge', 'image')
        self.env = gym.make('MontezumaRevengeNoFrameskip-v4').unwrapped
        self.ale = self.env.ale
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'), 0) # deterministic
        self.max_lives = self.ale.lives()
        # observations
        self.screen_ratio = screen_ratio
        self.original_height = 224
        self.original_width = 160
        self.screen_height = self.original_height // screen_ratio
        self.screen_width = self.original_width // screen_ratio
        self.screen_shape = (self.screen_height, self.screen_width)
        self.use_color = use_color
        self.use_rc_frame = use_rc_frame
        self.stack = stack
        self.frame_skip = frame_skip
        n_frames = stack * (3 * use_color + 1 * (not use_color) + use_rc_frame)
github showkeyjar / AutoMakeHuman / test / async-rl / async_dqn.py View on Github external
def evaluation(session, graph_ops, saver):
    saver.restore(session, FLAGS.checkpoint_path)
    print "Restored model weights from ", FLAGS.checkpoint_path
    monitor_env = gym.make(FLAGS.game)
    monitor_env.monitor.start(FLAGS.eval_dir + "/" + FLAGS.experiment + "/eval")

    # Unpack graph ops
    s = graph_ops["s"]
    q_values = graph_ops["q_values"]

    # Wrap env with AtariEnvironment helper class
    env = AtariEnvironment(gym_env=monitor_env, resized_width=FLAGS.resized_width, resized_height=FLAGS.resized_height,
                           agent_history_length=FLAGS.agent_history_length)

    for i_episode in xrange(FLAGS.num_eval_episodes):
        s_t = env.get_initial_state()
        ep_reward = 0
        terminal = False
        while not terminal:
            monitor_env.render()
github showkeyjar / AutoMakeHuman / test / async-rl / a3c.py View on Github external
def train(session, graph_ops, saver):
    # Set up game environments (one per thread)
    envs = [gym.make(GAME) for i in range(NUM_CONCURRENT)]
    
    summary_ops = setup_summaries()
    summary_op = summary_ops[-1]

    # Initialize variables
    session.run(tf.global_variables_initializer())
    writer = tf.summary.FileWriter(SUMMARY_SAVE_PATH, session.graph)

    # Start NUM_CONCURRENT training threads
    actor_learner_threads = [threading.Thread(target=actor_learner_thread, args=(thread_id, envs[thread_id], session, graph_ops, summary_ops, saver)) for thread_id in range(NUM_CONCURRENT)]
    for t in actor_learner_threads:
        t.start()

    # Show the agents training and write summary statistics
    last_summary_time = 0
    while True:
github learnables / cherry / tests / dummy_env.py View on Github external
def __init__(self):
        low = np.array([-5, -5, -5, -5, -5])
        high = -np.array([-5, -5, -5, -5, -5])
        self.observation_space = gym.spaces.Box(low, high, dtype=np.float32)
        self.action_space = gym.spaces.Box(low, high, dtype=np.float32)
        self.rng = random.Random()
github chainer / chainerrl / tests / wrappers_tests / test_atari_wrappers.py View on Github external
def dtyped_rand():
                    return np_random.rand(1, 84, 84).astype(self.dtype)
                low, high = -1.0, 3.14
            else:
                assert False
            env.reset.side_effect = [dtyped_rand() for _ in range(steps)]
            env.step.side_effect = [
                (
                    dtyped_rand(),
                    np_random.rand(),
                    bool(np_random.randint(2)),
                    {},
                )
                for _ in range(steps)]
            env.action_space = gym.spaces.Discrete(2)
            env.observation_space = gym.spaces.Box(
                low=low, high=high, shape=(1, 84, 84), dtype=self.dtype)
            return env
github Stable-Baselines-Team / stable-baselines / tests / test_utils.py View on Github external
@pytest.mark.parametrize("wrapper_class", [None, gym.wrappers.TimeLimit])
def test_make_vec_env(env_id, n_envs, wrapper_class, use_subprocess):
    env = make_vec_env(env_id, n_envs, use_subprocess=use_subprocess,
                       wrapper_class=wrapper_class, monitor_dir=None, seed=0)

    assert env.num_envs == n_envs

    if not use_subprocess:
        assert isinstance(env, DummyVecEnv)
        if wrapper_class is not None:
            assert isinstance(env.envs[0], wrapper_class)
        else:
            assert isinstance(env.envs[0], Monitor)
    else:
        assert isinstance(env, SubprocVecEnv)
    # Kill subprocesses
    env.close()
github bowenliu16 / rl_graph_generation / rl-baselines / baselines / ppo1 / gcn_policy.py View on Github external
if __name__ == "__main__":
    adj_np = np.ones((5,3,4,4))
    adj = tf.placeholder(shape=(5,3,4,4),dtype=tf.float32)
    node_feature_np = np.ones((5,1,4,3))
    node_feature = tf.placeholder(shape=(5,1,4,3),dtype=tf.float32)


    ob_space = {}
    atom_type = 5
    ob_space['adj'] = gym.Space(shape=[3,5,5])
    ob_space['node'] = gym.Space(shape=[1,5,atom_type])
    ac_space = gym.spaces.MultiDiscrete([10, 10, 3])
    policy = GCNPolicy(name='policy',ob_space=ob_space,ac_space=ac_space)

    stochastic = True
    env = gym.make('molecule-v0')  # in gym format
    env.init()
    ob = env.reset()

    # ob['adj'] = np.repeat(ob['adj'][None],2,axis=0)
    # ob['node'] = np.repeat(ob['node'][None],2,axis=0)

    print('adj',ob['adj'].shape)
    print('node',ob['node'].shape)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(20):
            ob = env.reset()
            for j in range(0,20):
                ac,vpred,debug = policy.act(stochastic,ob)
                # if ac[0]==ac[1]:
                #     print('error')
github dragen1860 / TensorFlow-2.x-Tutorials / 17-A2C / a2c.py View on Github external
# from_logits argument ensures transformation into normalized probabilities
        weighted_sparse_ce = kls.SparseCategoricalCrossentropy(from_logits=True)
        # policy loss is defined by policy gradients, weighted by advantages
        # note: we only calculate the loss on the actions we've actually taken
        actions = tf.cast(actions, tf.int32)
        policy_loss = weighted_sparse_ce(actions, logits, sample_weight=advantages)
        # entropy loss can be calculated via CE over itself
        entropy_loss = kls.categorical_crossentropy(logits, logits, from_logits=True)
        # here signs are flipped because optimizer minimizes
        return policy_loss - self.params['entropy']*entropy_loss


if __name__ == '__main__':
    logging.getLogger().setLevel(logging.INFO)

    env = gym.make('CartPole-v0')
    model = Model(num_actions=env.action_space.n)
    agent = A2CAgent(model)
    
    rewards_history = agent.train(env)
    print("Finished training.")
    print("Total Episode Reward: %d out of 200" % agent.test(env, True))
    
    plt.style.use('seaborn')
    plt.plot(np.arange(0, len(rewards_history), 25), rewards_history[::25])
    plt.xlabel('Episode')
    plt.ylabel('Total Reward')
    plt.show()
github tensorflow / tensor2tensor / tensor2tensor / trax / rl / ppo_trainer.py View on Github external
self._max_timestep_eval = max_timestep_eval
    self._gamma = gamma
    self._lambda_ = lambda_
    self._c1 = c1
    self._c2 = c2
    self._eval_every_n = eval_every_n
    self._save_every_n = save_every_n
    self._done_frac_for_policy_save = done_frac_for_policy_save
    self._n_evals = n_evals
    self._len_history_for_policy = len_history_for_policy
    self._eval_temperatures = eval_temperatures
    self._separate_eval = separate_eval

    action_space = self.train_env.action_space
    assert isinstance(
        action_space, (gym.spaces.Discrete, gym.spaces.MultiDiscrete))
    if isinstance(action_space, gym.spaces.Discrete):
      n_actions = action_space.n
      n_controls = 1
    else:
      (n_controls,) = action_space.nvec.shape
      assert n_controls > 0
      assert onp.min(action_space.nvec) == onp.max(action_space.nvec), (
          "Every control must have the same number of actions.")
      n_actions = action_space.nvec[0]
    self._n_actions = n_actions
    self._n_controls = n_controls

    self._rng = trax.get_random_number_generator_and_set_seed(random_seed)
    self._rng, key1 = jax_random.split(self._rng, num=2)

    vocab_size = policy_and_value_vocab_size