Rl
This namespace provides various reinforcement learning utilities.
ActClipWrapperModule (Module)
¶
Source code in evotorch/neuroevolution/net/rl.py
class ActClipWrapperModule(nn.Module):
def __init__(self, wrapped_module: nn.Module, obs_space: Box):
super().__init__()
device = device_of_module(wrapped_module)
if not isinstance(obs_space, Box):
raise TypeError(f"Unrecognized observation space: {obs_space}")
self.wrapped_module = wrapped_module
self.register_buffer("_low", torch.from_numpy(obs_space.low).to(device))
self.register_buffer("_high", torch.from_numpy(obs_space.high).to(device))
def forward(self, x: torch.Tensor, h: Any = None) -> Union[torch.Tensor, tuple]:
if h is None:
result = self.wrapped_module(x)
else:
result = self.wrapped_module(x, h)
if isinstance(result, tuple):
x, h = result
got_h = True
else:
x = result
h = None
got_h = False
x = torch.max(x, self._low)
x = torch.min(x, self._high)
if got_h:
return x, h
else:
return x
forward(self, x, h=None)
¶
Defines the computation performed at every call.
Should be overridden by all subclasses.
.. note::
Although the recipe for forward pass needs to be defined within
this function, one should call the :class:Module
instance afterwards
instead of this since the former takes care of running the
registered hooks while the latter silently ignores them.
Source code in evotorch/neuroevolution/net/rl.py
def forward(self, x: torch.Tensor, h: Any = None) -> Union[torch.Tensor, tuple]:
if h is None:
result = self.wrapped_module(x)
else:
result = self.wrapped_module(x, h)
if isinstance(result, tuple):
x, h = result
got_h = True
else:
x = result
h = None
got_h = False
x = torch.max(x, self._low)
x = torch.min(x, self._high)
if got_h:
return x, h
else:
return x
AliveBonusScheduleWrapper (Wrapper)
¶
A Wrapper which awards the agent for being alive in a scheduled manner This wrapper is meant to be used for non-vectorized environments.
Source code in evotorch/neuroevolution/net/rl.py
class AliveBonusScheduleWrapper(gym.Wrapper):
"""
A Wrapper which awards the agent for being alive in a scheduled manner
This wrapper is meant to be used for non-vectorized environments.
"""
def __init__(self, env: gym.Env, alive_bonus_schedule: tuple, **kwargs):
"""
`__init__(...)`: Initialize the AliveBonusScheduleWrapper.
Args:
env: Environment to wrap.
alive_bonus_schedule: If given as a tuple `(t, b)`, an alive
bonus `b` will be added onto all the rewards beyond the
timestep `t`.
If given as a tuple `(t0, t1, b)`, a partial (linearly
increasing towards `b`) alive bonus will be added onto
all the rewards between the timesteps `t0` and `t1`,
and a full alive bonus (which equals to `b`) will be added
onto all the rewards beyond the timestep `t1`.
kwargs: Expected in the form of additional keyword arguments,
these will be passed to the initialization method of the
superclass.
"""
super().__init__(env, **kwargs)
self.__t: Optional[int] = None
if len(alive_bonus_schedule) == 3:
self.__t0, self.__t1, self.__bonus = (
int(alive_bonus_schedule[0]),
int(alive_bonus_schedule[1]),
float(alive_bonus_schedule[2]),
)
elif len(alive_bonus_schedule) == 2:
self.__t0, self.__t1, self.__bonus = (
int(alive_bonus_schedule[0]),
int(alive_bonus_schedule[0]),
float(alive_bonus_schedule[1]),
)
else:
raise ValueError(
f"The argument `alive_bonus_schedule` was expected to have 2 or 3 elements."
f" However, its value is {repr(alive_bonus_schedule)} (having {len(alive_bonus_schedule)} elements)."
)
if self.__t1 > self.__t0:
self.__gap = self.__t1 - self.__t0
else:
self.__gap = None
def reset(self, *args, **kwargs):
self.__t = 0
return self.env.reset(*args, **kwargs)
def step(self, action) -> tuple:
step_result = self.env.step(action)
self.__t += 1
observation = step_result[0]
reward = step_result[1]
rest = step_result[2:]
if self.__t >= self.__t1:
reward = reward + self.__bonus
elif (self.__gap is not None) and (self.__t >= self.__t0):
reward = reward + ((self.__t - self.__t0) / self.__gap) * self.__bonus
return (observation, reward) + rest
__init__(self, env, alive_bonus_schedule, **kwargs)
special
¶
__init__(...)
: Initialize the AliveBonusScheduleWrapper.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
env |
Env |
Environment to wrap. |
required |
alive_bonus_schedule |
tuple |
If given as a tuple |
required |
kwargs |
Expected in the form of additional keyword arguments, these will be passed to the initialization method of the superclass. |
{} |
Source code in evotorch/neuroevolution/net/rl.py
def __init__(self, env: gym.Env, alive_bonus_schedule: tuple, **kwargs):
"""
`__init__(...)`: Initialize the AliveBonusScheduleWrapper.
Args:
env: Environment to wrap.
alive_bonus_schedule: If given as a tuple `(t, b)`, an alive
bonus `b` will be added onto all the rewards beyond the
timestep `t`.
If given as a tuple `(t0, t1, b)`, a partial (linearly
increasing towards `b`) alive bonus will be added onto
all the rewards between the timesteps `t0` and `t1`,
and a full alive bonus (which equals to `b`) will be added
onto all the rewards beyond the timestep `t1`.
kwargs: Expected in the form of additional keyword arguments,
these will be passed to the initialization method of the
superclass.
"""
super().__init__(env, **kwargs)
self.__t: Optional[int] = None
if len(alive_bonus_schedule) == 3:
self.__t0, self.__t1, self.__bonus = (
int(alive_bonus_schedule[0]),
int(alive_bonus_schedule[1]),
float(alive_bonus_schedule[2]),
)
elif len(alive_bonus_schedule) == 2:
self.__t0, self.__t1, self.__bonus = (
int(alive_bonus_schedule[0]),
int(alive_bonus_schedule[0]),
float(alive_bonus_schedule[1]),
)
else:
raise ValueError(
f"The argument `alive_bonus_schedule` was expected to have 2 or 3 elements."
f" However, its value is {repr(alive_bonus_schedule)} (having {len(alive_bonus_schedule)} elements)."
)
if self.__t1 > self.__t0:
self.__gap = self.__t1 - self.__t0
else:
self.__gap = None
reset(self, *args, **kwargs)
¶
step(self, action)
¶
Uses the :meth:step
of the :attr:env
that can be overwritten to change the returned data.
Source code in evotorch/neuroevolution/net/rl.py
def step(self, action) -> tuple:
step_result = self.env.step(action)
self.__t += 1
observation = step_result[0]
reward = step_result[1]
rest = step_result[2:]
if self.__t >= self.__t1:
reward = reward + self.__bonus
elif (self.__gap is not None) and (self.__t >= self.__t0):
reward = reward + ((self.__t - self.__t0) / self.__gap) * self.__bonus
return (observation, reward) + rest
ObsNormWrapperModule (Module)
¶
Source code in evotorch/neuroevolution/net/rl.py
class ObsNormWrapperModule(nn.Module):
def __init__(self, wrapped_module: nn.Module, rn: Union[RunningStat, RunningNorm]):
super().__init__()
device = device_of_module(wrapped_module)
self.wrapped_module = wrapped_module
with torch.no_grad():
normalizer = deepcopy(rn.to_layer()).to(device)
self.normalizer = normalizer
def forward(self, x: torch.Tensor, h: Any = None) -> Union[torch.Tensor, tuple]:
x = self.normalizer(x)
if h is None:
result = self.wrapped_module(x)
else:
result = self.wrapped_module(x, h)
if isinstance(result, tuple):
x, h = result
got_h = True
else:
x = result
h = None
got_h = False
if got_h:
return x, h
else:
return x
forward(self, x, h=None)
¶
Defines the computation performed at every call.
Should be overridden by all subclasses.
.. note::
Although the recipe for forward pass needs to be defined within
this function, one should call the :class:Module
instance afterwards
instead of this since the former takes care of running the
registered hooks while the latter silently ignores them.
Source code in evotorch/neuroevolution/net/rl.py
def forward(self, x: torch.Tensor, h: Any = None) -> Union[torch.Tensor, tuple]:
x = self.normalizer(x)
if h is None:
result = self.wrapped_module(x)
else:
result = self.wrapped_module(x, h)
if isinstance(result, tuple):
x, h = result
got_h = True
else:
x = result
h = None
got_h = False
if got_h:
return x, h
else:
return x
reset_env(env)
¶
Reset a gymnasium environment.
Even though the gymnasium
library switched to a new API where the
reset()
method returns a tuple (observation, info)
, this function
follows the conventions of the classical gym
library and returns
only the observation of the newly reset environment.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
env |
Env |
The gymnasium environment which will be reset. |
required |
Returns:
Type | Description |
---|---|
Iterable |
The initial observation |
Source code in evotorch/neuroevolution/net/rl.py
def reset_env(env: gym.Env) -> Iterable:
"""
Reset a gymnasium environment.
Even though the `gymnasium` library switched to a new API where the
`reset()` method returns a tuple `(observation, info)`, this function
follows the conventions of the classical `gym` library and returns
only the observation of the newly reset environment.
Args:
env: The gymnasium environment which will be reset.
Returns:
The initial observation
"""
result = env.reset()
if isinstance(result, tuple) and (len(result) == 2):
result = result[0]
return result
take_step_in_env(env, action)
¶
Take a step in the gymnasium environment. Taking a step means performing the action provided via the arguments.
Even though the gymnasium
library switched to a new API where the
step()
method returns a 5-element tuple of the form
(observation, reward, terminated, truncated, info)
, this function
follows the conventions of the classical gym
library and returns
a 4-element tuple (observation, reward, done, info)
.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
env |
Env |
The gymnasium environment in which the action will be performed. |
required |
action |
Iterable |
The action to be performed. |
required |
Returns:
Type | Description |
---|---|
tuple |
A tuple in the form |
Source code in evotorch/neuroevolution/net/rl.py
def take_step_in_env(env: gym.Env, action: Iterable) -> tuple:
"""
Take a step in the gymnasium environment.
Taking a step means performing the action provided via the arguments.
Even though the `gymnasium` library switched to a new API where the
`step()` method returns a 5-element tuple of the form
`(observation, reward, terminated, truncated, info)`, this function
follows the conventions of the classical `gym` library and returns
a 4-element tuple `(observation, reward, done, info)`.
Args:
env: The gymnasium environment in which the action will be performed.
action: The action to be performed.
Returns:
A tuple in the form `(observation, reward, done, info)` where
`observation` is the observation received after performing the action,
`reward` is the amount of reward gained,
`done` is a boolean value indicating whether or not the episode has
ended, and
`info` is additional information (usually as a dictionary).
"""
result = env.step(action)
if isinstance(result, tuple):
n = len(result)
if n == 4:
observation, reward, done, info = result
elif n == 5:
observation, reward, terminated, truncated, info = result
done = terminated or truncated
else:
raise ValueError(
f"The result of the `step(...)` method of the gym environment"
f" was expected as a tuple of length 4 or 5."
f" However, the received result is {repr(result)}, which is"
f" of length {len(result)}."
)
else:
raise TypeError(
f"The result of the `step(...)` method of the gym environment"
f" was expected as a tuple of length 4 or 5."
f" However, the received result is {repr(result)}, which is"
f" of type {type(result)}."
)
return observation, reward, done, info