Module deepcomp.agent.dummy

Expand source code
import numpy as np

from deepcomp.agent.base import CentralAgent


class RandomAgent(CentralAgent):
    """Agent that always selects a random action. Following the stable_baselines API."""
    def __init__(self, action_space, num_vec_envs=None, seed=None):
        super().__init__()
        self.action_space = action_space
        self.action_space.seed(seed)
        # number of envs inside the VecEnv determines the number of actions to make in each step; or None if no VecEnv
        self.num_vec_envs = num_vec_envs

    def compute_action(self, observation):
        """Choose a random action independent of the observation and other args"""
        # num_vec_envs=None means we don't use a VecEnv --> return action directly (not in array)
        if self.num_vec_envs is None:
            return self.action_space.sample()
        else:
            return [self.action_space.sample() for _ in range(self.num_vec_envs)]


class FixedAgent(CentralAgent):
    """Agent that always selects a the same fixed action. Following the stable_baselines API."""
    def __init__(self, action, noop_interval=0, num_vec_envs=None):
        super().__init__()
        self.action = action
        # number of no op actions (action 0) between repeating actions
        self.noop_interval = noop_interval
        self.noop_counter = noop_interval
        # number of envs inside the VecEnv determines the number of actions to make in each step; or None if no VecEnv
        self.num_vec_envs = num_vec_envs

    def compute_action(self, observation):
        """
        Choose a same fixed action independent of the observation and other args.
        In between the same action, choose no operation (action 0) for the configured interval.
        """
        # no op during the interval
        if self.noop_counter < self.noop_interval:
            action = np.zeros(len(self.action))
            self.noop_counter += 1
        else:
            action = self.action
            self.noop_counter = 0

        # return action
        if self.num_vec_envs is None:
            return action
        else:
            return [action for _ in range(self.num_vec_envs)]

Classes

class FixedAgent (action, noop_interval=0, num_vec_envs=None)

Agent that always selects a the same fixed action. Following the stable_baselines API.

Expand source code
class FixedAgent(CentralAgent):
    """Agent that always selects a the same fixed action. Following the stable_baselines API."""
    def __init__(self, action, noop_interval=0, num_vec_envs=None):
        super().__init__()
        self.action = action
        # number of no op actions (action 0) between repeating actions
        self.noop_interval = noop_interval
        self.noop_counter = noop_interval
        # number of envs inside the VecEnv determines the number of actions to make in each step; or None if no VecEnv
        self.num_vec_envs = num_vec_envs

    def compute_action(self, observation):
        """
        Choose a same fixed action independent of the observation and other args.
        In between the same action, choose no operation (action 0) for the configured interval.
        """
        # no op during the interval
        if self.noop_counter < self.noop_interval:
            action = np.zeros(len(self.action))
            self.noop_counter += 1
        else:
            action = self.action
            self.noop_counter = 0

        # return action
        if self.num_vec_envs is None:
            return action
        else:
            return [action for _ in range(self.num_vec_envs)]

Ancestors

Methods

def compute_action(self, observation)

Choose a same fixed action independent of the observation and other args. In between the same action, choose no operation (action 0) for the configured interval.

Expand source code
def compute_action(self, observation):
    """
    Choose a same fixed action independent of the observation and other args.
    In between the same action, choose no operation (action 0) for the configured interval.
    """
    # no op during the interval
    if self.noop_counter < self.noop_interval:
        action = np.zeros(len(self.action))
        self.noop_counter += 1
    else:
        action = self.action
        self.noop_counter = 0

    # return action
    if self.num_vec_envs is None:
        return action
    else:
        return [action for _ in range(self.num_vec_envs)]
class RandomAgent (action_space, num_vec_envs=None, seed=None)

Agent that always selects a random action. Following the stable_baselines API.

Expand source code
class RandomAgent(CentralAgent):
    """Agent that always selects a random action. Following the stable_baselines API."""
    def __init__(self, action_space, num_vec_envs=None, seed=None):
        super().__init__()
        self.action_space = action_space
        self.action_space.seed(seed)
        # number of envs inside the VecEnv determines the number of actions to make in each step; or None if no VecEnv
        self.num_vec_envs = num_vec_envs

    def compute_action(self, observation):
        """Choose a random action independent of the observation and other args"""
        # num_vec_envs=None means we don't use a VecEnv --> return action directly (not in array)
        if self.num_vec_envs is None:
            return self.action_space.sample()
        else:
            return [self.action_space.sample() for _ in range(self.num_vec_envs)]

Ancestors

Methods

def compute_action(self, observation)

Choose a random action independent of the observation and other args

Expand source code
def compute_action(self, observation):
    """Choose a random action independent of the observation and other args"""
    # num_vec_envs=None means we don't use a VecEnv --> return action directly (not in array)
    if self.num_vec_envs is None:
        return self.action_space.sample()
    else:
        return [self.action_space.sample() for _ in range(self.num_vec_envs)]