Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
config = ppo.DEFAULT_CONFIG.copy()
config['num_workers'] = N_CPUS
config['train_batch_size'] = HORIZON * N_ROLLOUTS
config['gamma'] = 0.999 # discount rate
config['model'].update({'fcnet_hiddens': [64, 64]})
config['clip_actions'] = True
config['horizon'] = HORIZON
config['vf_share_layers'] = True
# config['use_centralized_vf'] = False
# config['max_vf_agents'] = 140
# config['simple_optimizer'] = True
# config['vf_clip_param'] = 100
# Grid search things
config['lr'] = tune.grid_search([5e-5, 5e-4])
config['num_sgd_iter'] = tune.grid_search([10, 30])
# LSTM Things
# config['model']['use_lstm'] = tune.grid_search([True, False])
config['model']['lstm_use_prev_action_reward'] = True
#config['model']['use_lstm'] = tune.grid_search([True, False])
# # config['model']["max_seq_len"] = tune.grid_search([5, 10])
config['model']["lstm_cell_size"] = 64
# save the flow params for replay
flow_json = json.dumps(
flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
config['env_config']['flow_params'] = flow_json
config['env_config']['run'] = alg_run
create_env, env_name = make_create_env(params=flow_params, version=0)
config = ppo.DEFAULT_CONFIG.copy()
config['num_workers'] = N_CPUS
config['train_batch_size'] = HORIZON * N_ROLLOUTS
config['gamma'] = 0.999 # discount rate
config['model'].update({'fcnet_hiddens': [64, 64]})
config['clip_actions'] = True
config['horizon'] = HORIZON
config['vf_share_layers'] = True
# config['use_centralized_vf'] = False
# config['max_vf_agents'] = 140
# config['simple_optimizer'] = True
# config['vf_clip_param'] = 100
# Grid search things
config['lr'] = tune.grid_search([5e-5, 5e-4])
config['num_sgd_iter'] = tune.grid_search([10, 30])
# LSTM Things
# config['model']['use_lstm'] = tune.grid_search([True, False])
config['model']['lstm_use_prev_action_reward'] = True
#config['model']['use_lstm'] = tune.grid_search([True, False])
# # config['model']["max_seq_len"] = tune.grid_search([5, 10])
config['model']["lstm_cell_size"] = 64
# save the flow params for replay
flow_json = json.dumps(
flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
config['env_config']['flow_params'] = flow_json
config['env_config']['run'] = alg_run
create_env, env_name = make_create_env(params=flow_params, version=0)
from softlearning.samplers.utils import get_sampler_from_variant
from softlearning.misc.nn import feedforward_model
from softlearning.value_functions.utils import (
create_Q_function_from_variant,
create_V_function_from_variant)
from examples.utils import (
parse_universe_domain_task,
get_parser,
launch_experiments_rllab)
COMMON_PARAMS = {
'algorithm_params': {
'type': 'DIAYN'
},
'seed': tune.grid_search([1]),
'lr': 3E-4,
'discount': 0.99,
'tau': 0.01,
'K': 4,
'layer_size': 256,
'batch_size': 128,
'n_train_repeat': 1,
'epoch_length': 1000,
'snapshot_mode': 'gap',
'snapshot_gap': 10,
'sync_pkl': True,
'num_skills': 50,
'scale_entropy': 0.1,
'include_actions': False,
'learn_p_z': False,
'add_p_z': True,
"""Does using VecNormalize make a difference in performance?
(Answer: not much after we rescaled reward; before the reward clipping had big effect.)"""
train = dict(train)
_sparse_reward(train)
train["total_timesteps"] = int(5e6)
train["learning_rate"] = 2.5e-4
train["batch_size"] = 2048
train["rl_args"] = {"ent_coef": 0.00}
spec = {
"config": {
"env_name": tune.grid_search(
["multicomp/KickAndDefend-v0", "multicomp/SumoAnts-v0"],
),
"seed": tune.grid_search(list(range(3))),
"embed_path": tune.grid_search(["1", "2", "3"]),
"normalize": tune.grid_search([True, False]),
},
}
exp_name = "vec_normalize"
_ = locals() # quieten flake8 unused variable warning
del _
def _generic_finetune_defense(train, dual_defense=False, envs=None, exp_suffix=""):
"""Finetuning victim against adversary.
This is the most generic helper method, used as a base for `_hyper_finetune_defense`
and `_finetune_defense`.
"""
_sparse_reward(train)
train["num_env"] = 16 # TODO(adam): cleaner way of allowing finetuning LSTMs
train["normalize_observations"] = False
ray_config = {
FINETUNE_PATHS_TYPES: tune.grid_search(
_finetune_configs(envs=envs, dual_defense=dual_defense)
),
}
dual_name = "dual" if dual_defense else "single"
exp_name = f"finetune_defense_{dual_name}_{exp_suffix}"
return ray_config, exp_name
str
name of the training algorithm
str
name of the gym environment to be trained
dict
training configuration parameters
"""
alg_run = 'PPO'
agent_cls = get_agent_class(alg_run)
config = agent_cls._default_config.copy()
config['num_workers'] = N_CPUS
config['train_batch_size'] = HORIZON * N_ROLLOUTS
config['simple_optimizer'] = True
config['gamma'] = 0.999 # discount rate
config['model'].update({'fcnet_hiddens': [32, 32]})
config['lr'] = tune.grid_search([1e-5])
config['horizon'] = HORIZON
config['clip_actions'] = False
config['observation_filter'] = 'NoFilter'
# save the flow params for replay
flow_json = json.dumps(
flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
config['env_config']['flow_params'] = flow_json
config['env_config']['run'] = alg_run
create_env, env_name = make_create_env(params=flow_params, version=0)
# register as rllib env
register_env(env_name, create_env)
# multiagent configuration
config = ppo.DEFAULT_CONFIG.copy()
config['num_workers'] = N_CPUS
config['train_batch_size'] = HORIZON * N_ROLLOUTS
config['gamma'] = 0.999 # discount rate
config['model'].update({'fcnet_hiddens': [64, 64]})
config['clip_actions'] = True
config['horizon'] = HORIZON
config['vf_share_layers'] = True
# config['use_centralized_vf'] = False
# config['max_vf_agents'] = 140
# config['simple_optimizer'] = True
# config['vf_clip_param'] = 100
# Grid search things
config['lr'] = tune.grid_search([5e-5, 5e-4])
config['num_sgd_iter'] = tune.grid_search([10, 30])
# LSTM Things
# config['model']['use_lstm'] = tune.grid_search([True, False])
config['model']['lstm_use_prev_action_reward'] = True
#config['model']['use_lstm'] = tune.grid_search([True, False])
# # config['model']["max_seq_len"] = tune.grid_search([5, 10])
config['model']["lstm_cell_size"] = 64
# save the flow params for replay
flow_json = json.dumps(
flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
config['env_config']['flow_params'] = flow_json
config['env_config']['run'] = alg_run
create_env, env_name = make_create_env(params=flow_params, version=0)
# Register as rllib env
def _prepare_tune_config(self, space):
tune_config = {}
for k, v in space.items():
if isinstance(v, RandomSample):
tune_config[k] = tune.sample_from(v.func)
elif isinstance(v, GridSearch):
tune_config[k] = tune.grid_search(v.values)
else:
tune_config[k] = v
return tune_config