Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from lagom import RandomAgent
from lagom.utils import pickle_dump
from lagom.utils import set_global_seeds
from lagom.experiment import Config
from lagom.experiment import Grid
from lagom.experiment import run_experiment
from lagom.envs import RecordEpisodeStatistics
from lagom.envs import TimeStepEnv
from baselines.ddpg_td3.ddpg_agent import Agent as DDPGAgent
from baselines.ddpg_td3.td3_agent import Agent as TD3Agent
from baselines.ddpg_td3.engine import Engine
from baselines.ddpg_td3.replay_buffer import ReplayBuffer
config = Config(
{'log.freq': 10,
'checkpoint.num': 3,
'env.id': Grid(['HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3', 'Swimmer-v3']),
'agent.gamma': 0.99,
'agent.polyak': 0.995, # polyak averaging coefficient for targets update
'agent.actor.lr': 1e-3,
'agent.actor.use_lr_scheduler': False,
'agent.critic.lr': 1e-3,
'agent.critic.use_lr_scheduler': False,
'agent.action_noise': 0.1,
'agent.max_grad_norm': 999999, # grad clipping by norm
# TD3 hyperparams
'agent.use_td3': True,
from lagom.experiment import Grid
from lagom.experiment import Sample
from lagom.experiment import run_experiment
from lagom.envs import make_vec_env
from lagom.envs.wrappers import TimeLimit
from lagom.envs.wrappers import ClipAction
from lagom.envs.wrappers import VecMonitor
from lagom.envs.wrappers import VecStandardizeObservation
from lagom.envs.wrappers import VecStandardizeReward
from lagom.runner import EpisodeRunner
from agent import Agent
from engine import Engine
config = Config(
{'cuda': True,
######
'cuda_ids': [1, 2, 3, 5, 6, 7],
######
'log.dir': 'logs/default',
'log.freq': 10,
'checkpoint.freq': 50,
'env.id': Grid(['HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3', 'Swimmer-v3']),
'env.standardize_obs': True,
'env.standardize_reward': True,
def make_configs(self):
config = Config()
config.add_grid(name='cuda', val=[True])
# Random seeds
# Generated by:
# import numpy as np
# np.random.randint(0, np.iinfo(np.int32).max, 10)
config.add_grid(name='seed', val=[1284204222, 1079618558, 310837894,
1130644153, 2099771862, 1234806135,
92464293, 146053987, 1140885110,
988661500])
config.add_item(name='lr', val=3e-2)
config.add_item(name='gamma', val=0.99)
config.add_item(name='standardize_pg', val=False)
config.add_item(name='train_iter', val=700)
config.add_item(name='N', val=1)
from lagom import EpisodeRunner
from lagom.transform import describe
from lagom.utils import CloudpickleWrapper
from lagom.utils import pickle_dump
from lagom.utils import tensorify
from lagom.utils import set_global_seeds
from lagom.experiment import Config
from lagom.experiment import Grid
from lagom.experiment import run_experiment
from lagom.envs import TimeStepEnv
from baselines.openaies.openaies import OpenAIES
from baselines.openaies.agent import Agent
config = Config(
{'log.freq': 10,
'checkpoint.num': 3,
'env.id': Grid(['Acrobot-v1', 'BipedalWalker-v2', 'Pendulum-v0', 'LunarLanderContinuous-v2']),
'nn.sizes': [64, 64],
# only for continuous control
'env.clip_action': True, # clip action within valid bound before step()
'agent.std0': 0.6, # initial std
'train.generations': 500, # total number of ES generations
'train.popsize': 32,
'train.worker_chunksize': 4, # must be divisible by popsize
'train.mu0': 0.0,
'train.std0': 1.0,
from lagom.experiment import Grid
from lagom.experiment import Sample
from lagom.experiment import Condition
from lagom.experiment import run_experiment
from lagom.envs import make_vec_env
from lagom.envs.wrappers import TimeLimit
from lagom.envs.wrappers import NormalizeAction
from lagom.envs.wrappers import VecMonitor
from lagom.envs.wrappers import VecStepInfo
from baselines.sac.agent import Agent
from baselines.sac.engine import Engine
from baselines.sac.replay_buffer import ReplayBuffer
config = Config(
{'log.freq': 1000, # every n timesteps
'checkpoint.num': 3,
'env.id': Grid(['HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3', 'Swimmer-v3']),
'agent.gamma': 0.99,
'agent.polyak': 0.995, # polyak averaging coefficient for targets update
'agent.actor.lr': 3e-4,
'agent.actor.use_lr_scheduler': False,
'agent.critic.lr': 3e-4,
'agent.critic.use_lr_scheduler': False,
'agent.initial_temperature': 1.0,
'agent.max_grad_norm': 999999, # grad clipping by norm
'replay.capacity': 1000000,
# number of time steps to take uniform actions initially
from lagom.experiment import Grid
from lagom.experiment import Sample
from lagom.experiment import Condition
from lagom.experiment import run_experiment
from lagom.envs import make_vec_env
from lagom.envs.wrappers import TimeLimit
from lagom.envs.wrappers import ClipAction
from lagom.envs.wrappers import VecMonitor
from lagom.envs.wrappers import VecStepInfo
from baselines.ddpg.agent import Agent
from baselines.ddpg.engine import Engine
from baselines.ddpg.replay_buffer import ReplayBuffer
config = Config(
{'log.freq': 1000, # every n timesteps
'checkpoint.num': 3,
'env.id': Grid(['HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3', 'Swimmer-v3']),
'agent.gamma': 0.99,
'agent.polyak': 0.995, # polyak averaging coefficient for targets update
'agent.actor.lr': 1e-3,
'agent.actor.use_lr_scheduler': False,
'agent.critic.lr': 1e-3,
'agent.critic.use_lr_scheduler': False,
'agent.action_noise': 0.1,
'agent.max_grad_norm': 999999, # grad clipping by norm
'replay.capacity': 1000000,
# number of time steps to take uniform actions initially
from lagom import EpisodeRunner
from lagom.transform import describe
from lagom.utils import CloudpickleWrapper
from lagom.utils import pickle_dump
from lagom.utils import tensorify
from lagom.utils import set_global_seeds
from lagom.experiment import Config
from lagom.experiment import Grid
from lagom.experiment import run_experiment
from lagom.envs import TimeStepEnv
from lagom import CEM
from baselines.cem.agent import Agent
config = Config(
{'log.freq': 10,
'checkpoint.num': 3,
'env.id': Grid(['Acrobot-v1', 'BipedalWalker-v2', 'Pendulum-v0', 'LunarLanderContinuous-v2']),
'nn.sizes': [64, 64],
# only for continuous control
'env.clip_action': True, # clip action within valid bound before step()
'agent.std0': 0.6, # initial std
'train.generations': 500, # total number of ES generations
'train.popsize': 32,
'train.worker_chunksize': 4, # must be divisible by popsize
'train.mu0': 0.0,
'train.std0': 1.0,
from lagom.utils import pickle_dump
from lagom.utils import set_global_seeds
from lagom.experiment import Config
from lagom.experiment import Grid
from lagom.experiment import run_experiment
from lagom.envs import RecordEpisodeStatistics
from lagom.envs import NormalizeObservation
from lagom.envs import NormalizeReward
from lagom.envs import TimeStepEnv
from baselines.vpg.agent import Agent
from baselines.vpg.agent_lstm import Agent as LSTMAgent
from baselines.vpg.engine import Engine
config = Config(
{'log.freq': 10,
'checkpoint.num': 3,
'env.id': Grid(['HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3']),
'env.normalize_obs': True,
'env.normalize_reward': True,
'use_lstm': Grid([True, False]),
'rnn.size': 128,
'nn.sizes': [64, 64],
'agent.lr': 1e-3,
'agent.use_lr_scheduler': False,
'agent.gamma': 0.99,
'agent.gae_lambda': 0.97,
'agent.standardize_adv': True, # standardize advantage estimates
from lagom.experiment import Sample
from lagom.experiment import run_experiment
from lagom.envs import make_vec_env
from lagom.envs.wrappers import TimeLimit
from lagom.envs.wrappers import ClipAction
from lagom.envs.wrappers import VecMonitor
from lagom.envs.wrappers import VecStandardizeObservation
from lagom.envs.wrappers import VecStandardizeReward
from lagom.envs.wrappers import VecStepInfo
from lagom.runner import EpisodeRunner
from baselines.vpg.agent import Agent
from baselines.vpg.engine import Engine
config = Config(
{'cuda': False, # CPU a bit faster
'log.dir': 'logs/default',
'log.freq': 10,
'checkpoint.num': 3,
'env.id': Grid(['HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3', 'Swimmer-v3']),
'env.standardize_obs': True,
'env.standardize_reward': True,
'nn.sizes': [64, 64],
'agent.lr': 1e-3,
'agent.use_lr_scheduler': False,
'agent.gamma': 0.99,
'agent.gae_lambda': 0.97,
'agent.standardize_adv': True, # standardize advantage estimates
from lagom import EpisodeRunner
from lagom.transform import describe
from lagom.utils import CloudpickleWrapper
from lagom.utils import pickle_dump
from lagom.utils import tensorify
from lagom.utils import set_global_seeds
from lagom.experiment import Config
from lagom.experiment import Grid
from lagom.experiment import run_experiment
from lagom.envs import TimeStepEnv
from lagom import CMAES
from baselines.cmaes.agent import Agent
config = Config(
{'log.freq': 10,
'checkpoint.num': 3,
'env.id': Grid(['Acrobot-v1', 'BipedalWalker-v2', 'Pendulum-v0', 'LunarLanderContinuous-v2']),
'nn.sizes': [64, 64],
# only for continuous control
'env.clip_action': True, # clip action within valid bound before step()
'agent.std0': 0.6, # initial std
'train.generations': 500, # total number of ES generations
'train.popsize': 32,
'train.worker_chunksize': 4, # must be divisible by popsize
'train.mu0': 0.0,
'train.std0': 1.0,