How to use the parl.utils.logger function in parl

To help you get started, we’ve selected a few parl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github PaddlePaddle / PARL / examples / EagerMode / QuickStart / train.py View on Github external
def main():
    env = gym.make('CartPole-v0')
    model = CartpoleModel(name_scope='noIdeaWhyNeedThis', act_dim=ACT_DIM)
    alg = PolicyGradient(model, LEARNING_RATE)
    agent = CartpoleAgent(alg, OBS_DIM, ACT_DIM)

    with fluid.dygraph.guard():
        for i in range(1000):  # 100 episodes
            obs_list, action_list, reward_list = run_episode(env, agent)
            if i % 10 == 0:
                logger.info("Episode {}, Reward Sum {}.".format(
                    i, sum(reward_list)))

            batch_obs = np.array(obs_list)
            batch_action = np.array(action_list)
            batch_reward = calc_reward_to_go(reward_list)

            agent.learn(batch_obs, batch_action, batch_reward)
            if (i + 1) % 100 == 0:
                _, _, reward_list = run_episode(
                    env, agent, train_or_test='test')
                total_reward = np.sum(reward_list)
                logger.info('Test reward: {}'.format(total_reward))
github PaddlePaddle / PARL / examples / NeurIPS2018-AI-for-Prosthetics-Challenge / final_submit / submit_model.py View on Github external
def _load_params(self, dirname):
        logger.info('[{}]: Loading model from {}'.format(
            self.stage_name, dirname))
        fluid.io.load_params(
            executor=self.fluid_executor,
            dirname=dirname,
            main_program=self.ensemble_predict_program)
github PaddlePaddle / PARL / examples / ES / train.py View on Github external
def log_metrics(self, metrics):
        logger.info(metrics)
        for k, v in metrics.items():
            if v is not None:
                tensorboard.add_scalar(k, v, self.sample_total_steps)
github PaddlePaddle / PARL / examples / NeurIPS2019-Learn-to-Move-Challenge / evaluate.py View on Github external
'env_reward':
                    episode_env_reward,
                    'episode_length':
                    mem[-1].info['frame_count'],
                    'falldown':
                    not mem[-1].info['timeout'],
                })
                logger.info('{}, finish_cnt: {}'.format(
                    self.cur_model, len(self.evaluate_result)))
                logger.info('{}'.format(self.evaluate_result[-1]))
                if len(self.evaluate_result) >= args.evaluate_times:
                    mean_value = {}
                    for key in self.evaluate_result[0].keys():
                        mean_value[key] = np.mean(
                            [x[key] for x in self.evaluate_result])
                    logger.info('Model: {}, mean_value: {}'.format(
                        self.cur_model, mean_value))

                    eval_num = len(self.evaluate_result)
                    falldown_num = len(
                        [x for x in self.evaluate_result if x['falldown']])
                    falldown_rate = falldown_num / eval_num
                    logger.info('Falldown rate: {}'.format(falldown_rate))
                    for key in self.evaluate_result[0].keys():
                        mean_value[key] = np.mean([
                            x[key] for x in self.evaluate_result
                            if not x['falldown']
                        ])
                    logger.info(
                        'Model: {}, Exclude falldown, mean_value: {}'.format(
                            self.cur_model, mean_value))
                    if mean_value['shaping_reward'] > self.best_shaping_reward:
github kosoraYintai / PARL-Sample / flappy_bird / Train_Bird_PriorityRPM.py View on Github external
def restore(agent):
    learnDir = os.path.join(logger.get_dir(),'learn')
    predictDir = os.path.join(logger.get_dir(),'predict')   
    print('restore model from {}'.format(learnDir))
    agent.load_params(learnDir,predictDir)
github PaddlePaddle / PARL / examples / NeurIPS2019-Learn-to-Move-Challenge / train.py View on Github external
def save_rpm(self):
        save_path = os.path.join(logger.get_dir(), "rpm.npz")
        self.rpm.save(save_path)
github PaddlePaddle / PARL / examples / QuickStart / train.py View on Github external
def main():
    env = gym.make("CartPole-v0")
    model = CartpoleModel(act_dim=ACT_DIM)
    alg = parl.algorithms.PolicyGradient(model, lr=LEARNING_RATE)
    agent = CartpoleAgent(alg, obs_dim=OBS_DIM, act_dim=ACT_DIM)

    # if the file already exists, restore parameters from it
    if os.path.exists('./model.ckpt'):
        agent.restore('./model.ckpt')

    for i in range(1000):
        obs_list, action_list, reward_list = run_episode(env, agent)
        if i % 10 == 0:
            logger.info("Episode {}, Reward Sum {}.".format(
                i, sum(reward_list)))

        batch_obs = np.array(obs_list)
        batch_action = np.array(action_list)
        batch_reward = calc_reward_to_go(reward_list)

        agent.learn(batch_obs, batch_action, batch_reward)
        if (i + 1) % 100 == 0:
            _, _, reward_list = run_episode(env, agent, train_or_test='test')
            total_reward = np.sum(reward_list)
            logger.info('Test reward: {}'.format(total_reward))

    # save the parameters to ./model.ckpt
    agent.save('./model.ckpt')