Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
args: argparse.Namespace,
hyper_params: dict,
models: tuple,
optim: torch.optim.Adam,
):
"""Initialization.
Args:
env (gym.Env): openAI Gym environment
args (argparse.Namespace): arguments including hyperparameters and training settings
hyper_params (dict): hyper-parameters
models (tuple): models including main network and target
optim (torch.optim.Adam): optimizers for dqn
"""
Agent.__init__(self, env, args)
self.use_n_step = hyper_params["N_STEP"] > 1
self.epsilon = hyper_params["MAX_EPSILON"]
self.dqn, self.dqn_target = models
self.hyper_params = hyper_params
self.curr_state = np.zeros(1)
self.dqn_optimizer = optim
self.episode_step = 0
self.total_step = 0
self.i_episode = 0
# load the optimizer and model parameters
if args.load_from is not None and os.path.exists(args.load_from):
self.load_params(args.load_from)
self._initialize()
import gym
import numpy as np
import torch
import torch.nn.functional as F
import torch.optim as optim
import wandb
from algorithms.common.abstract.agent import Agent
from algorithms.common.buffer.replay_buffer import ReplayBuffer
import algorithms.common.helper_functions as common_utils
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class SACAgent(Agent):
"""SAC agent interacting with environment.
Attrtibutes:
memory (ReplayBuffer): replay memory
actor (nn.Module): actor model to select actions
actor_target (nn.Module): target actor model to select actions
actor_optimizer (Optimizer): optimizer for training actor
critic_1 (nn.Module): critic model to predict state values
critic_2 (nn.Module): critic model to predict state values
critic_target1 (nn.Module): target critic model to predict state values
critic_target2 (nn.Module): target critic model to predict state values
critic_optimizer1 (Optimizer): optimizer for training critic_1
critic_optimizer2 (Optimizer): optimizer for training critic_2
curr_state (np.ndarray): temporary storage of the current state
target_entropy (int): desired entropy used for the inequality constraint
beta (float): beta parameter for prioritized replay buffer
params = {
"actor": self.actor.state_dict(),
"qf_1": self.qf_1.state_dict(),
"qf_2": self.qf_2.state_dict(),
"vf": self.vf.state_dict(),
"vf_target": self.vf_target.state_dict(),
"actor_optim": self.actor_optimizer.state_dict(),
"qf_1_optim": self.qf_1_optimizer.state_dict(),
"qf_2_optim": self.qf_2_optimizer.state_dict(),
"vf_optim": self.vf_optimizer.state_dict(),
}
if self.hyper_params["AUTO_ENTROPY_TUNING"]:
params["alpha_optim"] = self.alpha_optimizer.state_dict()
Agent.save_params(self, params, n_episode)
import gym
import numpy as np
import torch
import torch.nn.functional as F
import wandb
from algorithms.common.abstract.agent import Agent
from algorithms.common.buffer.replay_buffer import ReplayBuffer
import algorithms.common.helper_functions as common_utils
from algorithms.common.noise import GaussianNoise
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class TD3Agent(Agent):
"""ActorCritic interacting with environment.
Attributes:
memory (ReplayBuffer): replay memory
exploration_noise (GaussianNoise): random noise for exploration
target_policy_noise (GaussianNoise): random noise for target values
actor (nn.Module): actor model to select actions
critic1 (nn.Module): critic model to predict state values
critic2 (nn.Module): critic model to predict state values
critic_target1 (nn.Module): target critic model to predict state values
critic_target2 (nn.Module): target critic model to predict state values
actor_target (nn.Module): target actor model to select actions
critic_optim (Optimizer): optimizer for training critic
actor_optim (Optimizer): optimizer for training actor
hyper_params (dict): hyper-parameters
curr_state (np.ndarray): temporary storage of the current state
def save_params(self, n_episode: int):
"""Save model and optimizer parameters."""
params = {
"dqn_state_dict": self.dqn.state_dict(),
"dqn_target_state_dict": self.dqn_target.state_dict(),
"dqn_optim_state_dict": self.dqn_optimizer.state_dict(),
}
Agent.save_params(self, params, n_episode)
def save_params(self, n_episode: int):
"""Save model and optimizer parameters."""
params = {
"actor_state_dict": self.actor.state_dict(),
"actor_target_state_dict": self.actor_target.state_dict(),
"critic_state_dict": self.critic.state_dict(),
"critic_target_state_dict": self.critic_target.state_dict(),
"actor_optim_state_dict": self.actor_optimizer.state_dict(),
"critic_optim_state_dict": self.critic_optimizer.state_dict(),
}
Agent.save_params(self, params, n_episode)
def save_params(self, n_episode: int):
"""Save model and optimizer parameters."""
params = {
"actor_state_dict": self.actor.state_dict(),
"critic_state_dict": self.critic.state_dict(),
"actor_optim_state_dict": self.actor_optimizer.state_dict(),
"critic_optim_state_dict": self.critic_optimizer.state_dict(),
}
Agent.save_params(self, params, n_episode)
import os
from typing import Tuple
import gym
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import wandb
from algorithms.common.abstract.agent import Agent
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class A2CAgent(Agent):
"""1-Step Advantage Actor-Critic interacting with environment.
Attributes:
actor (nn.Module): policy model to select actions
critic (nn.Module): critic model to evaluate states
hyper_params (dict): hyper-parameters
actor_optimizer (Optimizer): optimizer for actor
critic_optimizer (Optimizer): optimizer for critic
optimizer (Optimizer): optimizer for training
episode_step (int): step number of the current episode
transition (list): recent transition information
i_episode (int): current episode number
"""
def __init__(
def save_params(self, n_episode: int):
"""Save model and optimizer parameters."""
params = {
"actor_state_dict": self.actor.state_dict(),
"critic_state_dict": self.critic.state_dict(),
"actor_optim_state_dict": self.actor_optimizer.state_dict(),
"critic_optim_state_dict": self.critic_optimizer.state_dict(),
}
Agent.save_params(self, params, n_episode)
exploration_noise: GaussianNoise,
target_policy_noise: GaussianNoise,
):
"""Initialization.
Args:
env (gym.Env): openAI Gym environment
args (argparse.Namespace): arguments including hyperparameters and training settings
hyper_params (dict): hyper-parameters
models (tuple): models including actor and critic
optims (tuple): optimizers for actor and critic
exploration_noise (GaussianNoise): random noise for exploration
target_policy_noise (GaussianNoise): random noise for target values
"""
Agent.__init__(self, env, args)
self.actor, self.actor_target = models[0:2]
self.critic1, self.critic2 = models[2:4]
self.critic_target1, self.critic_target2 = models[4:6]
self.actor_optim = optims[0]
self.critic_optim = optims[1]
self.hyper_params = hyper_params
self.curr_state = np.zeros((1,))
self.exploration_noise = exploration_noise
self.target_policy_noise = target_policy_noise
self.total_step = 0
self.episode_step = 0
self.update_step = 0
self.i_episode = 0
# load the optimizer and model parameters