Skip to content

Rl

This namespace provides various reinforcement learning utilities.

AliveBonusScheduleWrapper

Bases: Wrapper

A Wrapper which awards the agent for being alive in a scheduled manner This wrapper is meant to be used for non-vectorized environments.

Source code in evotorch/neuroevolution/net/rl.py
class AliveBonusScheduleWrapper(gym.Wrapper):
    """
    A Wrapper which awards the agent for being alive in a scheduled manner
    This wrapper is meant to be used for non-vectorized environments.
    """

    def __init__(self, env: gym.Env, alive_bonus_schedule: tuple, **kwargs):
        """
        `__init__(...)`: Initialize the AliveBonusScheduleWrapper.

        Args:
            env: Environment to wrap.
            alive_bonus_schedule: If given as a tuple `(t, b)`, an alive
                bonus `b` will be added onto all the rewards beyond the
                timestep `t`.
                If given as a tuple `(t0, t1, b)`, a partial (linearly
                increasing towards `b`) alive bonus will be added onto
                all the rewards between the timesteps `t0` and `t1`,
                and a full alive bonus (which equals to `b`) will be added
                onto all the rewards beyond the timestep `t1`.
            kwargs: Expected in the form of additional keyword arguments,
                these will be passed to the initialization method of the
                superclass.
        """
        super().__init__(env, **kwargs)
        self.__t: Optional[int] = None

        if len(alive_bonus_schedule) == 3:
            self.__t0, self.__t1, self.__bonus = (
                int(alive_bonus_schedule[0]),
                int(alive_bonus_schedule[1]),
                float(alive_bonus_schedule[2]),
            )
        elif len(alive_bonus_schedule) == 2:
            self.__t0, self.__t1, self.__bonus = (
                int(alive_bonus_schedule[0]),
                int(alive_bonus_schedule[0]),
                float(alive_bonus_schedule[1]),
            )
        else:
            raise ValueError(
                f"The argument `alive_bonus_schedule` was expected to have 2 or 3 elements."
                f" However, its value is {repr(alive_bonus_schedule)} (having {len(alive_bonus_schedule)} elements)."
            )

        if self.__t1 > self.__t0:
            self.__gap = self.__t1 - self.__t0
        else:
            self.__gap = None

    def reset(self, *args, **kwargs):
        self.__t = 0
        return self.env.reset(*args, **kwargs)

    def step(self, action) -> tuple:
        step_result = self.env.step(action)
        self.__t += 1

        observation = step_result[0]
        reward = step_result[1]
        rest = step_result[2:]

        if self.__t >= self.__t1:
            reward = reward + self.__bonus
        elif (self.__gap is not None) and (self.__t >= self.__t0):
            reward = reward + ((self.__t - self.__t0) / self.__gap) * self.__bonus

        return (observation, reward) + rest

__init__(env, alive_bonus_schedule, **kwargs)

__init__(...): Initialize the AliveBonusScheduleWrapper.

Parameters:

Name Type Description Default
env Env

Environment to wrap.

required
alive_bonus_schedule tuple

If given as a tuple (t, b), an alive bonus b will be added onto all the rewards beyond the timestep t. If given as a tuple (t0, t1, b), a partial (linearly increasing towards b) alive bonus will be added onto all the rewards between the timesteps t0 and t1, and a full alive bonus (which equals to b) will be added onto all the rewards beyond the timestep t1.

required
kwargs

Expected in the form of additional keyword arguments, these will be passed to the initialization method of the superclass.

{}
Source code in evotorch/neuroevolution/net/rl.py
def __init__(self, env: gym.Env, alive_bonus_schedule: tuple, **kwargs):
    """
    `__init__(...)`: Initialize the AliveBonusScheduleWrapper.

    Args:
        env: Environment to wrap.
        alive_bonus_schedule: If given as a tuple `(t, b)`, an alive
            bonus `b` will be added onto all the rewards beyond the
            timestep `t`.
            If given as a tuple `(t0, t1, b)`, a partial (linearly
            increasing towards `b`) alive bonus will be added onto
            all the rewards between the timesteps `t0` and `t1`,
            and a full alive bonus (which equals to `b`) will be added
            onto all the rewards beyond the timestep `t1`.
        kwargs: Expected in the form of additional keyword arguments,
            these will be passed to the initialization method of the
            superclass.
    """
    super().__init__(env, **kwargs)
    self.__t: Optional[int] = None

    if len(alive_bonus_schedule) == 3:
        self.__t0, self.__t1, self.__bonus = (
            int(alive_bonus_schedule[0]),
            int(alive_bonus_schedule[1]),
            float(alive_bonus_schedule[2]),
        )
    elif len(alive_bonus_schedule) == 2:
        self.__t0, self.__t1, self.__bonus = (
            int(alive_bonus_schedule[0]),
            int(alive_bonus_schedule[0]),
            float(alive_bonus_schedule[1]),
        )
    else:
        raise ValueError(
            f"The argument `alive_bonus_schedule` was expected to have 2 or 3 elements."
            f" However, its value is {repr(alive_bonus_schedule)} (having {len(alive_bonus_schedule)} elements)."
        )

    if self.__t1 > self.__t0:
        self.__gap = self.__t1 - self.__t0
    else:
        self.__gap = None

reset_env(env)

Reset a gymnasium environment.

Even though the gymnasium library switched to a new API where the reset() method returns a tuple (observation, info), this function follows the conventions of the classical gym library and returns only the observation of the newly reset environment.

Parameters:

Name Type Description Default
env Env

The gymnasium environment which will be reset.

required
Source code in evotorch/neuroevolution/net/rl.py
def reset_env(env: gym.Env) -> Iterable:
    """
    Reset a gymnasium environment.

    Even though the `gymnasium` library switched to a new API where the
    `reset()` method returns a tuple `(observation, info)`, this function
    follows the conventions of the classical `gym` library and returns
    only the observation of the newly reset environment.

    Args:
        env: The gymnasium environment which will be reset.
    Returns:
        The initial observation
    """
    result = env.reset()
    if isinstance(result, tuple) and (len(result) == 2):
        result = result[0]
    return result

take_step_in_env(env, action)

Take a step in the gymnasium environment. Taking a step means performing the action provided via the arguments.

Even though the gymnasium library switched to a new API where the step() method returns a 5-element tuple of the form (observation, reward, terminated, truncated, info), this function follows the conventions of the classical gym library and returns a 4-element tuple (observation, reward, done, info).

Parameters:

Name Type Description Default
env Env

The gymnasium environment in which the action will be performed.

required
action Iterable

The action to be performed.

required
Source code in evotorch/neuroevolution/net/rl.py
def take_step_in_env(env: gym.Env, action: Iterable) -> tuple:
    """
    Take a step in the gymnasium environment.
    Taking a step means performing the action provided via the arguments.

    Even though the `gymnasium` library switched to a new API where the
    `step()` method returns a 5-element tuple of the form
    `(observation, reward, terminated, truncated, info)`, this function
    follows the conventions of the classical `gym` library and returns
    a 4-element tuple `(observation, reward, done, info)`.

    Args:
        env: The gymnasium environment in which the action will be performed.
        action: The action to be performed.
    Returns:
        A tuple in the form `(observation, reward, done, info)` where
        `observation` is the observation received after performing the action,
        `reward` is the amount of reward gained,
        `done` is a boolean value indicating whether or not the episode has
        ended, and
        `info` is additional information (usually as a dictionary).
    """
    result = env.step(action)
    if isinstance(result, tuple):
        n = len(result)
        if n == 4:
            observation, reward, done, info = result
        elif n == 5:
            observation, reward, terminated, truncated, info = result
            done = terminated or truncated
        else:
            raise ValueError(
                f"The result of the `step(...)` method of the gym environment"
                f" was expected as a tuple of length 4 or 5."
                f" However, the received result is {repr(result)}, which is"
                f" of length {len(result)}."
            )
    else:
        raise TypeError(
            f"The result of the `step(...)` method of the gym environment"
            f" was expected as a tuple of length 4 or 5."
            f" However, the received result is {repr(result)}, which is"
            f" of type {type(result)}."
        )
    return observation, reward, done, info