Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
mu, _, std = super(TanhGaussianDistParams, self).get_dist_params(x)
# sampling actions
dist = Normal(mu, std)
z = dist.rsample()
# normalize action and log_prob
# see appendix C of 'https://arxiv.org/pdf/1812.05905.pdf'
action = torch.tanh(z)
log_prob = dist.log_prob(z) - torch.log(1 - action.pow(2) + epsilon)
log_prob = log_prob.sum(-1, keepdim=True)
return action, log_prob, z, mu, std
class CategoricalDist(MLP):
"""Multilayer perceptron with categorial distribution output.
Attributes:
last_layer (nn.Linear): output layer for softmax
"""
def __init__(
self,
input_size: int,
output_size: int,
hidden_sizes: list,
hidden_activation: Callable = F.relu,
init_fn: Callable = init_layer_uniform,
):
"""Initialization."""
super(CategoricalDist, self).__init__(
def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int):
"""Run training or test.
Args:
env (gym.Env): openAI Gym environment with continuous action space
args (argparse.Namespace): arguments including training settings
state_dim (int): dimension of states
action_dim (int): dimension of actions
"""
hidden_sizes_actor = [256, 256]
hidden_sizes_critic = [256, 256]
# create actor
actor = MLP(
input_size=state_dim,
output_size=action_dim,
hidden_sizes=hidden_sizes_actor,
output_activation=torch.tanh,
).to(device)
actor_target = MLP(
input_size=state_dim,
output_size=action_dim,
hidden_sizes=hidden_sizes_actor,
output_activation=torch.tanh,
).to(device)
actor_target.load_state_dict(actor.state_dict())
# create critic
critic = MLP(
import math
from typing import Callable, Optional, Tuple
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from algorithms.common.networks.cnn import CNN
from algorithms.common.networks.mlp import MLP, init_layer_uniform
from algorithms.dqn.linear import NoisyMLPHandler
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class DuelingMLP(MLP, NoisyMLPHandler):
"""Multilayer perceptron with dueling construction."""
def __init__(
self,
input_size: int,
output_size: int,
hidden_sizes: list,
hidden_activation: Callable = F.relu,
linear_layer: nn.Module = nn.Linear,
init_fn: Callable = init_layer_uniform,
):
"""Initialization."""
super(DuelingMLP, self).__init__(
input_size=input_size,
output_size=output_size,
hidden_sizes=hidden_sizes,
class C51CNN(CNN):
"""Convolution neural network for distributional RL."""
def forward_(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
"""Forward method implementation."""
x = self.get_cnn_features(x)
out = self.fc_layers.forward_(x)
return out
def reset_noise(self):
"""Re-sample noise for fc layers."""
self.fc_layers.reset_noise()
class C51DuelingMLP(MLP, NoisyMLPHandler):
"""Multilayered perceptron for C51 with dueling construction."""
def __init__(
self,
input_size: int,
action_size: int,
hidden_sizes: list,
atom_size: int = 51,
v_min: int = -10,
v_max: int = 10,
hidden_activation: Callable = F.relu,
linear_layer: nn.Module = nn.Linear,
init_fn: Callable = init_layer_uniform,
):
"""Initialization."""
super(C51DuelingMLP, self).__init__(
input_size=state_dim,
output_size=action_dim,
hidden_sizes=hidden_sizes_actor,
output_activation=torch.tanh,
).to(device)
actor_target = MLP(
input_size=state_dim,
output_size=action_dim,
hidden_sizes=hidden_sizes_actor,
output_activation=torch.tanh,
).to(device)
actor_target.load_state_dict(actor.state_dict())
# create critic
critic = MLP(
input_size=state_dim + action_dim,
output_size=1,
hidden_sizes=hidden_sizes_critic,
).to(device)
critic_target = MLP(
input_size=state_dim + action_dim,
output_size=1,
hidden_sizes=hidden_sizes_critic,
).to(device)
critic_target.load_state_dict(critic.state_dict())
# create optimizer
actor_optim = optim.Adam(
actor.parameters(),
lr=hyper_params["LR_ACTOR"],
hidden_sizes_vf = [256, 256]
hidden_sizes_qf = [256, 256]
# target entropy
target_entropy = -np.prod((action_dim,)).item() # heuristic
# create actor
actor = TanhGaussianDistParams(
input_size=state_dim, output_size=action_dim, hidden_sizes=hidden_sizes_actor
).to(device)
# create v_critic
vf = MLP(input_size=state_dim, output_size=1, hidden_sizes=hidden_sizes_vf).to(
device
)
vf_target = MLP(
input_size=state_dim, output_size=1, hidden_sizes=hidden_sizes_vf
).to(device)
vf_target.load_state_dict(vf.state_dict())
# create q_critic
qf_1 = FlattenMLP(
input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_qf
).to(device)
qf_2 = FlattenMLP(
input_size=state_dim + action_dim, output_size=1, hidden_sizes=hidden_sizes_qf
).to(device)
# create optimizers
actor_optim = optim.Adam(
actor.parameters(),
lr=hyper_params["LR_ACTOR"],
state_dim (int): dimension of states
action_dim (int): dimension of actions
"""
hidden_sizes_actor = [256, 256]
hidden_sizes_critic = [256, 256]
# create actor
actor = MLP(
input_size=state_dim,
output_size=action_dim,
hidden_sizes=hidden_sizes_actor,
output_activation=torch.tanh,
).to(device)
actor_target = MLP(
input_size=state_dim,
output_size=action_dim,
hidden_sizes=hidden_sizes_actor,
output_activation=torch.tanh,
).to(device)
actor_target.load_state_dict(actor.state_dict())
# create critic
critic = MLP(
input_size=state_dim + action_dim,
output_size=1,
hidden_sizes=hidden_sizes_critic,
).to(device)
critic_target = MLP(
input_size=state_dim + action_dim,
input_size=state_dim,
output_size=action_dim,
hidden_sizes=hidden_sizes_actor,
output_activation=torch.tanh,
).to(device)
actor_target = MLP(
input_size=state_dim,
output_size=action_dim,
hidden_sizes=hidden_sizes_actor,
output_activation=torch.tanh,
).to(device)
actor_target.load_state_dict(actor.state_dict())
# create critic
critic = MLP(
input_size=state_dim + action_dim,
output_size=1,
hidden_sizes=hidden_sizes_critic,
).to(device)
critic_target = MLP(
input_size=state_dim + action_dim,
output_size=1,
hidden_sizes=hidden_sizes_critic,
).to(device)
critic_target.load_state_dict(critic.state_dict())
# create optimizer
actor_optim = optim.Adam(
actor.parameters(),
lr=hyper_params["LR_ACTOR"],
).to(device)
actor_target = MLP(
input_size=state_dim,
output_size=action_dim,
hidden_sizes=hidden_sizes_actor,
output_activation=torch.tanh,
).to(device)
actor_target.load_state_dict(actor.state_dict())
# create critic
critic_1 = MLP(
input_size=state_dim + action_dim,
output_size=1,
hidden_sizes=hidden_sizes_critic,
).to(device)
critic_2 = MLP(
input_size=state_dim + action_dim,
output_size=1,
hidden_sizes=hidden_sizes_critic,
).to(device)
critic_target1 = MLP(
input_size=state_dim + action_dim,
output_size=1,
hidden_sizes=hidden_sizes_critic,
).to(device)
critic_target2 = MLP(
input_size=state_dim + action_dim,
output_size=1,
hidden_sizes=hidden_sizes_critic,
).to(device)
critic_target1.load_state_dict(critic_1.state_dict())
critic_target2.load_state_dict(critic_2.state_dict())
def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int):
"""Run training or test.
Args:
env (gym.Env): openAI Gym environment with continuous action space
args (argparse.Namespace): arguments including training settings
state_dim (int): dimension of states
action_dim (int): dimension of actions
"""
hidden_sizes_actor = [256, 256]
hidden_sizes_critic = [256, 256]
# create actor
actor = MLP(
input_size=state_dim,
output_size=action_dim,
hidden_sizes=hidden_sizes_actor,
output_activation=torch.tanh,
).to(device)
actor_target = MLP(
input_size=state_dim,
output_size=action_dim,
hidden_sizes=hidden_sizes_actor,
output_activation=torch.tanh,
).to(device)
actor_target.load_state_dict(actor.state_dict())
# create critic
critic = MLP(