Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def setUpClass(cls):
super().setUpClass()
cls.irl_discrete = GAIL(
state_shape=cls.discrete_env.observation_space.shape,
action_dim=cls.discrete_env.action_space.n,
gpu=-1)
cls.irl_continuous = GAIL(
state_shape=cls.continuous_env.observation_space.shape,
action_dim=cls.continuous_env.action_space.low.size,
gpu=-1)
def setUpClass(cls):
super().setUpClass()
cls.irl_discrete = GAIL(
state_shape=cls.discrete_env.observation_space.shape,
action_dim=cls.discrete_env.action_space.n,
gpu=-1)
cls.irl_continuous = GAIL(
state_shape=cls.continuous_env.observation_space.shape,
action_dim=cls.continuous_env.action_space.low.size,
gpu=-1)
exit()
units = [400, 300]
env = gym.make(args.env_name)
test_env = gym.make(args.env_name)
policy = DDPG(
state_shape=env.observation_space.shape,
action_dim=env.action_space.high.size,
max_action=env.action_space.high[0],
gpu=args.gpu,
actor_units=units,
critic_units=units,
n_warmup=10000,
batch_size=100)
irl = GAIL(
state_shape=env.observation_space.shape,
action_dim=env.action_space.high.size,
units=units,
enable_sn=args.enable_sn,
batch_size=32,
gpu=args.gpu)
expert_trajs = restore_latest_n_traj(
args.expert_path_dir, n_path=20, max_steps=1000)
trainer = IRLTrainer(policy, env, args, irl, expert_trajs["obses"],
expert_trajs["acts"], test_env)
trainer()
import roboschool
import gym
from tf2rl.algos.ddpg import DDPG
from tf2rl.algos.gail import GAIL
from tf2rl.experiments.irl_trainer import IRLTrainer
from tf2rl.experiments.utils import restore_latest_n_traj
if __name__ == '__main__':
parser = IRLTrainer.get_argument()
parser = GAIL.get_argument(parser)
parser.add_argument('--env-name', type=str, default="RoboschoolReacher-v1")
args = parser.parse_args()
if args.expert_path_dir is None:
print("Plaese generate demonstrations first")
print("python examples/run_sac.py --env-name=RoboschoolReacher-v1 --save-test-path --test-interval=50000")
exit()
units = [400, 300]
env = gym.make(args.env_name)
test_env = gym.make(args.env_name)
policy = DDPG(
state_shape=env.observation_space.shape,
action_dim=env.action_space.high.size,
max_action=env.action_space.high[0],
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tf2rl.algos.policy_base import IRLPolicy
from tf2rl.algos.gail import GAIL, Discriminator
from tf2rl.networks.spectral_norm_dense import SNDense
class DiscriminatorWGAIL(Discriminator):
def compute_reward(self, inputs):
cur_rewards = -self.call(inputs)
# Normalize rewards so that positive examples should be zero
return rewards
class WGAIL(GAIL):
def __init__(
self,
state_shape,
action_dim,
units=[32, 32],
lr=0.001,
enable_sn=False,
enable_gp=True,
enable_gc=False,
name="WGAIL",
**kwargs):
"""
:param enable_sn (bool): If true, add spectral normalization in Dense layer
:param enable_gp (bool): If true, add gradient penalty to loss function
:param enable_gc (bool): If true, apply gradient clipping while training
"""
logstds = tf.clip_by_value(
logstds, self.LOG_SIG_CAP_MIN, self.LOG_SIG_CAP_MAX)
latents = means + tf.random.normal(shape=means.shape) * tf.math.exp(logstds)
# Binary classifier
out = self.l3(latents)
return out, means, logstds
def compute_reward(self, inputs):
features = tf.concat(inputs, axis=1)
features = self.l1(features)
features = self.l2(features)
means = self.l_mean(features)
return tf.math.log(self.l3(means) + 1e-8)
class VAIL(GAIL):
def __init__(
self,
state_shape,
action_dim,
units=[32, 32],
n_latent_unit=32,
lr=5e-5,
kl_target=0.5,
reg_param=0.,
enable_sn=False,
enable_gp=False,
name="VAIL",
**kwargs):
"""
:param enable_sn (bool): If true, add spectral normalization in Dense layer
:param enable_gp (bool): If true, add gradient penalty to loss function
tf.keras.Model.__init__(self, name=name)
DenseClass = SNDense if enable_sn else Dense
self.l1 = DenseClass(units[0], name="L1", activation="relu")
self.l2 = DenseClass(units[1], name="L2", activation="relu")
self.l3 = DenseClass(1, name="L3", activation=output_activation)
dummy_state = tf.constant(
np.zeros(shape=(1,) + state_shape, dtype=np.float32))
dummy_next_state = tf.constant(
np.zeros(shape=(1,) + state_shape, dtype=np.float32))
with tf.device("/cpu:0"):
self([dummy_state, dummy_next_state])
class GAIfO(GAIL):
def __init__(
self,
state_shape,
units=[32, 32],
lr=0.001,
enable_sn=False,
name="GAIfO",
**kwargs):
IRLPolicy.__init__(self, name=name, n_training=1, **kwargs)
self.disc = Discriminator(
state_shape=state_shape,
units=units, enable_sn=enable_sn)
self.optimizer = tf.keras.optimizers.Adam(
learning_rate=lr, beta_1=0.5)
def train(self, agent_states, agent_next_states,