How to use the algorithms.common.abstract.agent.Agent function in algorithms

To help you get started, we’ve selected a few algorithms examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github medipixel / rl_algorithms / algorithms / dqn / agent.py View on Github external
args: argparse.Namespace,
        hyper_params: dict,
        models: tuple,
        optim: torch.optim.Adam,
    ):
        """Initialization.

        Args:
            env (gym.Env): openAI Gym environment
            args (argparse.Namespace): arguments including hyperparameters and training settings
            hyper_params (dict): hyper-parameters
            models (tuple): models including main network and target
            optim (torch.optim.Adam): optimizers for dqn

        """
        Agent.__init__(self, env, args)

        self.use_n_step = hyper_params["N_STEP"] > 1
        self.epsilon = hyper_params["MAX_EPSILON"]
        self.dqn, self.dqn_target = models
        self.hyper_params = hyper_params
        self.curr_state = np.zeros(1)
        self.dqn_optimizer = optim
        self.episode_step = 0
        self.total_step = 0
        self.i_episode = 0

        # load the optimizer and model parameters
        if args.load_from is not None and os.path.exists(args.load_from):
            self.load_params(args.load_from)

        self._initialize()
github medipixel / rl_algorithms / algorithms / sac / agent.py View on Github external
import gym
import numpy as np
import torch
import torch.nn.functional as F
import torch.optim as optim
import wandb

from algorithms.common.abstract.agent import Agent
from algorithms.common.buffer.replay_buffer import ReplayBuffer
import algorithms.common.helper_functions as common_utils

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


class SACAgent(Agent):
    """SAC agent interacting with environment.

    Attrtibutes:
        memory (ReplayBuffer): replay memory
        actor (nn.Module): actor model to select actions
        actor_target (nn.Module): target actor model to select actions
        actor_optimizer (Optimizer): optimizer for training actor
        critic_1 (nn.Module): critic model to predict state values
        critic_2 (nn.Module): critic model to predict state values
        critic_target1 (nn.Module): target critic model to predict state values
        critic_target2 (nn.Module): target critic model to predict state values
        critic_optimizer1 (Optimizer): optimizer for training critic_1
        critic_optimizer2 (Optimizer): optimizer for training critic_2
        curr_state (np.ndarray): temporary storage of the current state
        target_entropy (int): desired entropy used for the inequality constraint
        beta (float): beta parameter for prioritized replay buffer
github medipixel / rl_algorithms / algorithms / sac / agent.py View on Github external
params = {
            "actor": self.actor.state_dict(),
            "qf_1": self.qf_1.state_dict(),
            "qf_2": self.qf_2.state_dict(),
            "vf": self.vf.state_dict(),
            "vf_target": self.vf_target.state_dict(),
            "actor_optim": self.actor_optimizer.state_dict(),
            "qf_1_optim": self.qf_1_optimizer.state_dict(),
            "qf_2_optim": self.qf_2_optimizer.state_dict(),
            "vf_optim": self.vf_optimizer.state_dict(),
        }

        if self.hyper_params["AUTO_ENTROPY_TUNING"]:
            params["alpha_optim"] = self.alpha_optimizer.state_dict()

        Agent.save_params(self, params, n_episode)
github medipixel / rl_algorithms / algorithms / td3 / agent.py View on Github external
import gym
import numpy as np
import torch
import torch.nn.functional as F
import wandb

from algorithms.common.abstract.agent import Agent
from algorithms.common.buffer.replay_buffer import ReplayBuffer
import algorithms.common.helper_functions as common_utils
from algorithms.common.noise import GaussianNoise

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


class TD3Agent(Agent):
    """ActorCritic interacting with environment.

    Attributes:
        memory (ReplayBuffer): replay memory
        exploration_noise (GaussianNoise): random noise for exploration
        target_policy_noise (GaussianNoise): random noise for target values
        actor (nn.Module): actor model to select actions
        critic1 (nn.Module): critic model to predict state values
        critic2 (nn.Module): critic model to predict state values
        critic_target1 (nn.Module): target critic model to predict state values
        critic_target2 (nn.Module): target critic model to predict state values
        actor_target (nn.Module): target actor model to select actions
        critic_optim (Optimizer): optimizer for training critic
        actor_optim (Optimizer): optimizer for training actor
        hyper_params (dict): hyper-parameters
        curr_state (np.ndarray): temporary storage of the current state
github medipixel / rl_algorithms / algorithms / dqn / agent.py View on Github external
def save_params(self, n_episode: int):
        """Save model and optimizer parameters."""
        params = {
            "dqn_state_dict": self.dqn.state_dict(),
            "dqn_target_state_dict": self.dqn_target.state_dict(),
            "dqn_optim_state_dict": self.dqn_optimizer.state_dict(),
        }

        Agent.save_params(self, params, n_episode)
github medipixel / rl_algorithms / algorithms / ddpg / agent.py View on Github external
def save_params(self, n_episode: int):
        """Save model and optimizer parameters."""
        params = {
            "actor_state_dict": self.actor.state_dict(),
            "actor_target_state_dict": self.actor_target.state_dict(),
            "critic_state_dict": self.critic.state_dict(),
            "critic_target_state_dict": self.critic_target.state_dict(),
            "actor_optim_state_dict": self.actor_optimizer.state_dict(),
            "critic_optim_state_dict": self.critic_optimizer.state_dict(),
        }

        Agent.save_params(self, params, n_episode)
github medipixel / rl_algorithms / algorithms / a2c / agent.py View on Github external
def save_params(self, n_episode: int):
        """Save model and optimizer parameters."""
        params = {
            "actor_state_dict": self.actor.state_dict(),
            "critic_state_dict": self.critic.state_dict(),
            "actor_optim_state_dict": self.actor_optimizer.state_dict(),
            "critic_optim_state_dict": self.critic_optimizer.state_dict(),
        }

        Agent.save_params(self, params, n_episode)
github medipixel / rl_algorithms / algorithms / a2c / agent.py View on Github external
import os
from typing import Tuple

import gym
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import wandb

from algorithms.common.abstract.agent import Agent

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


class A2CAgent(Agent):
    """1-Step Advantage Actor-Critic interacting with environment.

    Attributes:
        actor (nn.Module): policy model to select actions
        critic (nn.Module): critic model to evaluate states
        hyper_params (dict): hyper-parameters
        actor_optimizer (Optimizer): optimizer for actor
        critic_optimizer (Optimizer): optimizer for critic
        optimizer (Optimizer): optimizer for training
        episode_step (int): step number of the current episode
        transition (list): recent transition information
        i_episode (int): current episode number

    """

    def __init__(
github medipixel / rl_algorithms / algorithms / ppo / agent.py View on Github external
def save_params(self, n_episode: int):
        """Save model and optimizer parameters."""
        params = {
            "actor_state_dict": self.actor.state_dict(),
            "critic_state_dict": self.critic.state_dict(),
            "actor_optim_state_dict": self.actor_optimizer.state_dict(),
            "critic_optim_state_dict": self.critic_optimizer.state_dict(),
        }
        Agent.save_params(self, params, n_episode)
github medipixel / rl_algorithms / algorithms / td3 / agent.py View on Github external
exploration_noise: GaussianNoise,
        target_policy_noise: GaussianNoise,
    ):
        """Initialization.

        Args:
            env (gym.Env): openAI Gym environment
            args (argparse.Namespace): arguments including hyperparameters and training settings
            hyper_params (dict): hyper-parameters
            models (tuple): models including actor and critic
            optims (tuple): optimizers for actor and critic
            exploration_noise (GaussianNoise): random noise for exploration
            target_policy_noise (GaussianNoise): random noise for target values

        """
        Agent.__init__(self, env, args)

        self.actor, self.actor_target = models[0:2]
        self.critic1, self.critic2 = models[2:4]
        self.critic_target1, self.critic_target2 = models[4:6]
        self.actor_optim = optims[0]
        self.critic_optim = optims[1]
        self.hyper_params = hyper_params
        self.curr_state = np.zeros((1,))
        self.exploration_noise = exploration_noise
        self.target_policy_noise = target_policy_noise
        self.total_step = 0
        self.episode_step = 0
        self.update_step = 0
        self.i_episode = 0

        # load the optimizer and model parameters