Rl

This namespace provides various reinforcement learning utilities.

`ActClipWrapperModule (Module)` ¶

Source code in evotorch/neuroevolution/net/rl.py

class ActClipWrapperModule(nn.Module):
    def __init__(self, wrapped_module: nn.Module, obs_space: Box):
        super().__init__()

        device = device_of_module(wrapped_module)

        if not isinstance(obs_space, Box):
            raise TypeError(f"Unrecognized observation space: {obs_space}")

        self.wrapped_module = wrapped_module
        self.register_buffer("_low", torch.from_numpy(obs_space.low).to(device))
        self.register_buffer("_high", torch.from_numpy(obs_space.high).to(device))

    def forward(self, x: torch.Tensor, h: Any = None) -> Union[torch.Tensor, tuple]:
        if h is None:
            result = self.wrapped_module(x)
        else:
            result = self.wrapped_module(x, h)

        if isinstance(result, tuple):
            x, h = result
            got_h = True
        else:
            x = result
            h = None
            got_h = False

        x = torch.max(x, self._low)
        x = torch.min(x, self._high)

        if got_h:
            return x, h
        else:
            return x

`forward(self, x, h=None)` ¶

Defines the computation performed at every call.

Should be overridden by all subclasses.

.. note:: Although the recipe for forward pass needs to be defined within this function, one should call the :class:Module instance afterwards instead of this since the former takes care of running the registered hooks while the latter silently ignores them.

Source code in evotorch/neuroevolution/net/rl.py

def forward(self, x: torch.Tensor, h: Any = None) -> Union[torch.Tensor, tuple]:
    if h is None:
        result = self.wrapped_module(x)
    else:
        result = self.wrapped_module(x, h)

    if isinstance(result, tuple):
        x, h = result
        got_h = True
    else:
        x = result
        h = None
        got_h = False

    x = torch.max(x, self._low)
    x = torch.min(x, self._high)

    if got_h:
        return x, h
    else:
        return x

`AliveBonusScheduleWrapper (Wrapper)` ¶

A Wrapper which awards the agent for being alive in a scheduled manner This wrapper is meant to be used for non-vectorized environments.

Source code in evotorch/neuroevolution/net/rl.py

class AliveBonusScheduleWrapper(gym.Wrapper):
    """
    A Wrapper which awards the agent for being alive in a scheduled manner
    This wrapper is meant to be used for non-vectorized environments.
    """

    def __init__(self, env: gym.Env, alive_bonus_schedule: tuple, **kwargs):
        """
        `__init__(...)`: Initialize the AliveBonusScheduleWrapper.

        Args:
            env: Environment to wrap.
            alive_bonus_schedule: If given as a tuple `(t, b)`, an alive
                bonus `b` will be added onto all the rewards beyond the
                timestep `t`.
                If given as a tuple `(t0, t1, b)`, a partial (linearly
                increasing towards `b`) alive bonus will be added onto
                all the rewards between the timesteps `t0` and `t1`,
                and a full alive bonus (which equals to `b`) will be added
                onto all the rewards beyond the timestep `t1`.
            kwargs: Expected in the form of additional keyword arguments,
                these will be passed to the initialization method of the
                superclass.
        """
        super().__init__(env, **kwargs)
        self.__t: Optional[int] = None

        if len(alive_bonus_schedule) == 3:
            self.__t0, self.__t1, self.__bonus = (
                int(alive_bonus_schedule[0]),
                int(alive_bonus_schedule[1]),
                float(alive_bonus_schedule[2]),
            )
        elif len(alive_bonus_schedule) == 2:
            self.__t0, self.__t1, self.__bonus = (
                int(alive_bonus_schedule[0]),
                int(alive_bonus_schedule[0]),
                float(alive_bonus_schedule[1]),
            )
        else:
            raise ValueError(
                f"The argument `alive_bonus_schedule` was expected to have 2 or 3 elements."
                f" However, its value is {repr(alive_bonus_schedule)} (having {len(alive_bonus_schedule)} elements)."
            )

        if self.__t1 > self.__t0:
            self.__gap = self.__t1 - self.__t0
        else:
            self.__gap = None

    def reset(self, *args, **kwargs):
        self.__t = 0
        return self.env.reset(*args, **kwargs)

    def step(self, action) -> tuple:
        step_result = self.env.step(action)
        self.__t += 1

        observation = step_result[0]
        reward = step_result[1]
        rest = step_result[2:]

        if self.__t >= self.__t1:
            reward = reward + self.__bonus
        elif (self.__gap is not None) and (self.__t >= self.__t0):
            reward = reward + ((self.__t - self.__t0) / self.__gap) * self.__bonus

        return (observation, reward) + rest

`init(self, env, alive_bonus_schedule, **kwargs)` `special` ¶

__init__(...): Initialize the AliveBonusScheduleWrapper.

Parameters:

Name	Type	Description	Default
`env`	`Env`	Environment to wrap.	required
`alive_bonus_schedule`	`tuple`	If given as a tuple `(t, b)`, an alive bonus `b` will be added onto all the rewards beyond the timestep `t`. If given as a tuple `(t0, t1, b)`, a partial (linearly increasing towards `b`) alive bonus will be added onto all the rewards between the timesteps `t0` and `t1`, and a full alive bonus (which equals to `b`) will be added onto all the rewards beyond the timestep `t1`.	required
`kwargs`		Expected in the form of additional keyword arguments, these will be passed to the initialization method of the superclass.	`{}`

Source code in evotorch/neuroevolution/net/rl.py

def __init__(self, env: gym.Env, alive_bonus_schedule: tuple, **kwargs):
    """
    `__init__(...)`: Initialize the AliveBonusScheduleWrapper.

    Args:
        env: Environment to wrap.
        alive_bonus_schedule: If given as a tuple `(t, b)`, an alive
            bonus `b` will be added onto all the rewards beyond the
            timestep `t`.
            If given as a tuple `(t0, t1, b)`, a partial (linearly
            increasing towards `b`) alive bonus will be added onto
            all the rewards between the timesteps `t0` and `t1`,
            and a full alive bonus (which equals to `b`) will be added
            onto all the rewards beyond the timestep `t1`.
        kwargs: Expected in the form of additional keyword arguments,
            these will be passed to the initialization method of the
            superclass.
    """
    super().__init__(env, **kwargs)
    self.__t: Optional[int] = None

    if len(alive_bonus_schedule) == 3:
        self.__t0, self.__t1, self.__bonus = (
            int(alive_bonus_schedule[0]),
            int(alive_bonus_schedule[1]),
            float(alive_bonus_schedule[2]),
        )
    elif len(alive_bonus_schedule) == 2:
        self.__t0, self.__t1, self.__bonus = (
            int(alive_bonus_schedule[0]),
            int(alive_bonus_schedule[0]),
            float(alive_bonus_schedule[1]),
        )
    else:
        raise ValueError(
            f"The argument `alive_bonus_schedule` was expected to have 2 or 3 elements."
            f" However, its value is {repr(alive_bonus_schedule)} (having {len(alive_bonus_schedule)} elements)."
        )

    if self.__t1 > self.__t0:
        self.__gap = self.__t1 - self.__t0
    else:
        self.__gap = None

`reset(self, *args, **kwargs)` ¶

Uses the :meth:reset of the :attr:env that can be overwritten to change the returned data.

Source code in evotorch/neuroevolution/net/rl.py

def reset(self, *args, **kwargs):
    self.__t = 0
    return self.env.reset(*args, **kwargs)

`step(self, action)` ¶

Uses the :meth:step of the :attr:env that can be overwritten to change the returned data.

Source code in evotorch/neuroevolution/net/rl.py

def step(self, action) -> tuple:
    step_result = self.env.step(action)
    self.__t += 1

    observation = step_result[0]
    reward = step_result[1]
    rest = step_result[2:]

    if self.__t >= self.__t1:
        reward = reward + self.__bonus
    elif (self.__gap is not None) and (self.__t >= self.__t0):
        reward = reward + ((self.__t - self.__t0) / self.__gap) * self.__bonus

    return (observation, reward) + rest

`ObsNormWrapperModule (Module)` ¶

Source code in evotorch/neuroevolution/net/rl.py

class ObsNormWrapperModule(nn.Module):
    def __init__(self, wrapped_module: nn.Module, rn: Union[RunningStat, RunningNorm]):
        super().__init__()

        device = device_of_module(wrapped_module)
        self.wrapped_module = wrapped_module

        with torch.no_grad():
            normalizer = deepcopy(rn.to_layer()).to(device)
        self.normalizer = normalizer

    def forward(self, x: torch.Tensor, h: Any = None) -> Union[torch.Tensor, tuple]:
        x = self.normalizer(x)

        if h is None:
            result = self.wrapped_module(x)
        else:
            result = self.wrapped_module(x, h)

        if isinstance(result, tuple):
            x, h = result
            got_h = True
        else:
            x = result
            h = None
            got_h = False

        if got_h:
            return x, h
        else:
            return x

`forward(self, x, h=None)` ¶

Defines the computation performed at every call.

Should be overridden by all subclasses.

.. note:: Although the recipe for forward pass needs to be defined within this function, one should call the :class:Module instance afterwards instead of this since the former takes care of running the registered hooks while the latter silently ignores them.

Source code in evotorch/neuroevolution/net/rl.py

def forward(self, x: torch.Tensor, h: Any = None) -> Union[torch.Tensor, tuple]:
    x = self.normalizer(x)

    if h is None:
        result = self.wrapped_module(x)
    else:
        result = self.wrapped_module(x, h)

    if isinstance(result, tuple):
        x, h = result
        got_h = True
    else:
        x = result
        h = None
        got_h = False

    if got_h:
        return x, h
    else:
        return x

`reset_env(env)` ¶

Reset a gymnasium environment.

Even though the gymnasium library switched to a new API where the reset() method returns a tuple (observation, info), this function follows the conventions of the classical gym library and returns only the observation of the newly reset environment.

Parameters:

Name	Type	Description	Default
`env`	`Env`	The gymnasium environment which will be reset.	required

Returns:

Type	Description
`Iterable`	The initial observation

Source code in evotorch/neuroevolution/net/rl.py

def reset_env(env: gym.Env) -> Iterable:
    """
    Reset a gymnasium environment.

    Even though the `gymnasium` library switched to a new API where the
    `reset()` method returns a tuple `(observation, info)`, this function
    follows the conventions of the classical `gym` library and returns
    only the observation of the newly reset environment.

    Args:
        env: The gymnasium environment which will be reset.
    Returns:
        The initial observation
    """
    result = env.reset()
    if isinstance(result, tuple) and (len(result) == 2):
        result = result[0]
    return result

`take_step_in_env(env, action)` ¶

Take a step in the gymnasium environment. Taking a step means performing the action provided via the arguments.

Even though the gymnasium library switched to a new API where the step() method returns a 5-element tuple of the form (observation, reward, terminated, truncated, info), this function follows the conventions of the classical gym library and returns a 4-element tuple (observation, reward, done, info).

Parameters:

Name	Type	Description	Default
`env`	`Env`	The gymnasium environment in which the action will be performed.	required
`action`	`Iterable`	The action to be performed.	required

Returns:

Type	Description
`tuple`	A tuple in the form `(observation, reward, done, info)` where `observation` is the observation received after performing the action, `reward` is the amount of reward gained, `done` is a boolean value indicating whether or not the episode has ended, and `info` is additional information (usually as a dictionary).

Source code in evotorch/neuroevolution/net/rl.py

def take_step_in_env(env: gym.Env, action: Iterable) -> tuple:
    """
    Take a step in the gymnasium environment.
    Taking a step means performing the action provided via the arguments.

    Even though the `gymnasium` library switched to a new API where the
    `step()` method returns a 5-element tuple of the form
    `(observation, reward, terminated, truncated, info)`, this function
    follows the conventions of the classical `gym` library and returns
    a 4-element tuple `(observation, reward, done, info)`.

    Args:
        env: The gymnasium environment in which the action will be performed.
        action: The action to be performed.
    Returns:
        A tuple in the form `(observation, reward, done, info)` where
        `observation` is the observation received after performing the action,
        `reward` is the amount of reward gained,
        `done` is a boolean value indicating whether or not the episode has
        ended, and
        `info` is additional information (usually as a dictionary).
    """
    result = env.step(action)
    if isinstance(result, tuple):
        n = len(result)
        if n == 4:
            observation, reward, done, info = result
        elif n == 5:
            observation, reward, terminated, truncated, info = result
            done = terminated or truncated
        else:
            raise ValueError(
                f"The result of the `step(...)` method of the gym environment"
                f" was expected as a tuple of length 4 or 5."
                f" However, the received result is {repr(result)}, which is"
                f" of length {len(result)}."
            )
    else:
        raise TypeError(
            f"The result of the `step(...)` method of the gym environment"
            f" was expected as a tuple of length 4 or 5."
            f" However, the received result is {repr(result)}, which is"
            f" of type {type(result)}."
        )
    return observation, reward, done, info

Rl

ActClipWrapperModule (Module) ¶

forward(self, x, h=None) ¶

AliveBonusScheduleWrapper (Wrapper) ¶

__init__(self, env, alive_bonus_schedule, **kwargs) special ¶

reset(self, *args, **kwargs) ¶

step(self, action) ¶

ObsNormWrapperModule (Module) ¶

forward(self, x, h=None) ¶

reset_env(env) ¶

take_step_in_env(env, action) ¶

`ActClipWrapperModule (Module)` ¶

`forward(self, x, h=None)` ¶

`AliveBonusScheduleWrapper (Wrapper)` ¶

`init(self, env, alive_bonus_schedule, **kwargs)` `special` ¶

`reset(self, *args, **kwargs)` ¶

`step(self, action)` ¶

`ObsNormWrapperModule (Module)` ¶

`forward(self, x, h=None)` ¶

`reset_env(env)` ¶

`take_step_in_env(env, action)` ¶