Module robotic_manipulator_rloa.naf_components.naf_neural_network
Expand source code
from typing import Optional, Tuple, Any
import torch
from torch import nn
from torch.distributions import MultivariateNormal
class NAF(nn.Module):
def __init__(self, state_size: int, action_size: int, layer_size: int, seed: int, device: torch.device) -> None:
"""
Model to be used in the NAF algorithm. Network Architecture:\n
- Common network\n
- Linear + BatchNormalization (input_shape, layer_size)\n
- Linear + BatchNormalization (layer_size, layer_size)\n
- Output for mu network (used for calculating A)\n
- Linear (layer_size, action_size)\n
- Output for V network (used for calculating Q = A + V)\n
- Linear (layer_size, 1)\n
- Output for L network (used for calculating P = L . Lt)\n
- Linear (layer_size, (action_size*action_size+1)/2)\n
Args:
state_size: Dimension of a state.
action_size: Dimension of an action.
layer_size: Size of the hidden layers of the neural network.
seed: Random seed.
device: CUDA device.
"""
super(NAF, self).__init__()
self.seed = torch.manual_seed(seed)
self.state_size = state_size
self.action_size = action_size
self.device = device
# DEFINE THE MODEL
# Define the first NN hidden layer + BatchNormalization
self.input_layer = nn.Linear(in_features=self.state_size, out_features=layer_size)
self.bn1 = nn.BatchNorm1d(layer_size)
# Define the second NN hidden layer + BatchNormalization
self.hidden_layer = nn.Linear(in_features=layer_size, out_features=layer_size)
self.bn2 = nn.BatchNorm1d(layer_size)
# Define the output layer for the mu Network
self.action_values = nn.Linear(in_features=layer_size, out_features=action_size)
# Define the output layer for the V Network
self.value = nn.Linear(in_features=layer_size, out_features=1)
# Define the output layer for the L Network
self.matrix_entries = nn.Linear(in_features=layer_size,
out_features=int(self.action_size * (self.action_size + 1) / 2))
def forward(self,
input_: torch.Tensor,
action: Optional[torch.Tensor] = None) -> Tuple[torch.Tensor, Optional[Any], Any]:
"""
Forward propagation.
It feeds the NN with the input, and gets the output for the mu, V and L networks.\n
- Output from the L network is used to create the P matrix.\n
- Output from the V network is used to calculate the Q value: Q = A + V\n
- Output from the mu network is used to calculate A. The action output of mu nn is considered
the action that maximizes Q-function.
Args:
input_: Input for the neural network's input layer.
action: Current action, used for calculating the Q-Function estimate.
Returns:
Returns a tuple containing the action which maximizes the Q-Function, the
Q-Function estimate and the Value Function.
"""
# ============ FEED INPUT DATA TO THE NEURAL NETWORK =================================
# Feed the input to the INPUT_LAYER and apply ReLu activation function (+ BatchNorm)
x = torch.relu(self.bn1(self.input_layer(input_)))
# Feed the output of INPUT_LAYER to the HIDDEN_LAYER layer and apply ReLu activation function (+ BatchNorm)
x = torch.relu(self.bn2(self.hidden_layer(x)))
# Feed the output of HIDDEN_LAYER to the mu layer and apply tanh activation function
action_value = torch.tanh(self.action_values(x))
# Feed the output of HIDDEN_LAYER to the L layer and apply tanh activation function
matrix_entries = torch.tanh(self.matrix_entries(x))
# Feed the output of HIDDEN_LAYER to the V layer
V = self.value(x)
# Modifies the output of the mu layer by unsqueezing it (all tensor as a 1D vector)
action_value = action_value.unsqueeze(-1)
# ============ CREATE L MATRIX from the outputs of the L layer =======================
# Create lower-triangular matrix, size: (n_samples, action_size, action_size)
L = torch.zeros((input_.shape[0], self.action_size, self.action_size)).to(self.device)
# Get lower triagular indices (returns list of 2 elems, where the first row contains row coordinates
# of all indices and the second row contains column coordinates)
lower_tri_indices = torch.tril_indices(row=self.action_size, col=self.action_size, offset=0)
# Fill matrix with the outputs of the L layer
L[:, lower_tri_indices[0], lower_tri_indices[1]] = matrix_entries
# Raise the diagonal elements of the matrix to the square
L.diagonal(dim1=1, dim2=2).exp_()
# Calculate state-dependent, positive-definite square matrix P
P = L * L.transpose(2, 1)
# ============================ CALCULATE Q-VALUE ===================================== #
Q = None
if action is not None:
# Calculate Advantage Function estimate
A = (-0.5 * torch.matmul(torch.matmul((action.unsqueeze(-1) - action_value).transpose(2, 1), P),
(action.unsqueeze(-1) - action_value))).squeeze(-1)
# Calculate Q-values
Q = A + V
# =========================== ADD NOISE TO ACTION ==================================== #
dist = MultivariateNormal(action_value.squeeze(-1), torch.inverse(P))
action = dist.sample()
action = torch.clamp(action, min=-1, max=1)
return action, Q, V
Classes
class NAF (state_size: int, action_size: int, layer_size: int, seed: int, device: torch.device)
-
Model to be used in the NAF algorithm. Inherits from torch.nn.Module. Network Architecture:
-
Common network
-
Linear + BatchNormalization (input_shape, layer_size)
-
Linear + BatchNormalization (layer_size, layer_size)
-
-
Output for mu network (used for calculating A)
- Linear (layer_size, action_size)
-
Output for V network (used for calculating Q = A + V)
- Linear (layer_size, 1)
-
Output for L network (used for calculating P = L . Lt)
- Linear (layer_size, (action_size*action_size+1)/2)
Args
state_size
- Dimension of a state.
action_size
- Dimension of an action.
layer_size
- Size of the hidden layers of the neural network.
seed
- Random seed.
device
- CUDA device.
Expand source code
class NAF(nn.Module): def __init__(self, state_size: int, action_size: int, layer_size: int, seed: int, device: torch.device) -> None: """ Model to be used in the NAF algorithm. Network Architecture:\n - Common network\n - Linear + BatchNormalization (input_shape, layer_size)\n - Linear + BatchNormalization (layer_size, layer_size)\n - Output for mu network (used for calculating A)\n - Linear (layer_size, action_size)\n - Output for V network (used for calculating Q = A + V)\n - Linear (layer_size, 1)\n - Output for L network (used for calculating P = L . Lt)\n - Linear (layer_size, (action_size*action_size+1)/2)\n Args: state_size: Dimension of a state. action_size: Dimension of an action. layer_size: Size of the hidden layers of the neural network. seed: Random seed. device: CUDA device. """ super(NAF, self).__init__() self.seed = torch.manual_seed(seed) self.state_size = state_size self.action_size = action_size self.device = device # DEFINE THE MODEL # Define the first NN hidden layer + BatchNormalization self.input_layer = nn.Linear(in_features=self.state_size, out_features=layer_size) self.bn1 = nn.BatchNorm1d(layer_size) # Define the second NN hidden layer + BatchNormalization self.hidden_layer = nn.Linear(in_features=layer_size, out_features=layer_size) self.bn2 = nn.BatchNorm1d(layer_size) # Define the output layer for the mu Network self.action_values = nn.Linear(in_features=layer_size, out_features=action_size) # Define the output layer for the V Network self.value = nn.Linear(in_features=layer_size, out_features=1) # Define the output layer for the L Network self.matrix_entries = nn.Linear(in_features=layer_size, out_features=int(self.action_size * (self.action_size + 1) / 2)) def forward(self, input_: torch.Tensor, action: Optional[torch.Tensor] = None) -> Tuple[torch.Tensor, Optional[Any], Any]: """ Forward propagation. It feeds the NN with the input, and gets the output for the mu, V and L networks.\n - Output from the L network is used to create the P matrix.\n - Output from the V network is used to calculate the Q value: Q = A + V\n - Output from the mu network is used to calculate A. The action output of mu nn is considered the action that maximizes Q-function. Args: input_: Input for the neural network's input layer. action: Current action, used for calculating the Q-Function estimate. Returns: Returns a tuple containing the action which maximizes the Q-Function, the Q-Function estimate and the Value Function. """ # ============ FEED INPUT DATA TO THE NEURAL NETWORK ================================= # Feed the input to the INPUT_LAYER and apply ReLu activation function (+ BatchNorm) x = torch.relu(self.bn1(self.input_layer(input_))) # Feed the output of INPUT_LAYER to the HIDDEN_LAYER layer and apply ReLu activation function (+ BatchNorm) x = torch.relu(self.bn2(self.hidden_layer(x))) # Feed the output of HIDDEN_LAYER to the mu layer and apply tanh activation function action_value = torch.tanh(self.action_values(x)) # Feed the output of HIDDEN_LAYER to the L layer and apply tanh activation function matrix_entries = torch.tanh(self.matrix_entries(x)) # Feed the output of HIDDEN_LAYER to the V layer V = self.value(x) # Modifies the output of the mu layer by unsqueezing it (all tensor as a 1D vector) action_value = action_value.unsqueeze(-1) # ============ CREATE L MATRIX from the outputs of the L layer ======================= # Create lower-triangular matrix, size: (n_samples, action_size, action_size) L = torch.zeros((input_.shape[0], self.action_size, self.action_size)).to(self.device) # Get lower triagular indices (returns list of 2 elems, where the first row contains row coordinates # of all indices and the second row contains column coordinates) lower_tri_indices = torch.tril_indices(row=self.action_size, col=self.action_size, offset=0) # Fill matrix with the outputs of the L layer L[:, lower_tri_indices[0], lower_tri_indices[1]] = matrix_entries # Raise the diagonal elements of the matrix to the square L.diagonal(dim1=1, dim2=2).exp_() # Calculate state-dependent, positive-definite square matrix P P = L * L.transpose(2, 1) # ============================ CALCULATE Q-VALUE ===================================== # Q = None if action is not None: # Calculate Advantage Function estimate A = (-0.5 * torch.matmul(torch.matmul((action.unsqueeze(-1) - action_value).transpose(2, 1), P), (action.unsqueeze(-1) - action_value))).squeeze(-1) # Calculate Q-values Q = A + V # =========================== ADD NOISE TO ACTION ==================================== # dist = MultivariateNormal(action_value.squeeze(-1), torch.inverse(P)) action = dist.sample() action = torch.clamp(action, min=-1, max=1) return action, Q, V
Ancestors
- torch.nn.modules.module.Module
Class variables
var dump_patches : bool
var training : bool
Methods
def forward(self, input_: torch.Tensor, action: Optional[torch.Tensor] = None) ‑> Tuple[torch.Tensor, Optional[Any], Any]
-
Forward propagation. It feeds the NN with the input, and gets the output for the mu, V and L networks.
-
Output from the L network is used to create the P matrix.
-
Output from the V network is used to calculate the Q value: Q = A + V
-
Output from the mu network is used to calculate A. The action output of mu nn is considered the action that maximizes Q-function.
Args
input_
- Input for the neural network's input layer.
action
- Current action, used for calculating the Q-Function estimate.
Returns
Returns a tuple containing the action which maximizes the Q-Function, the Q-Function estimate and the Value Function.
Expand source code
def forward(self, input_: torch.Tensor, action: Optional[torch.Tensor] = None) -> Tuple[torch.Tensor, Optional[Any], Any]: """ Forward propagation. It feeds the NN with the input, and gets the output for the mu, V and L networks.\n - Output from the L network is used to create the P matrix.\n - Output from the V network is used to calculate the Q value: Q = A + V\n - Output from the mu network is used to calculate A. The action output of mu nn is considered the action that maximizes Q-function. Args: input_: Input for the neural network's input layer. action: Current action, used for calculating the Q-Function estimate. Returns: Returns a tuple containing the action which maximizes the Q-Function, the Q-Function estimate and the Value Function. """ # ============ FEED INPUT DATA TO THE NEURAL NETWORK ================================= # Feed the input to the INPUT_LAYER and apply ReLu activation function (+ BatchNorm) x = torch.relu(self.bn1(self.input_layer(input_))) # Feed the output of INPUT_LAYER to the HIDDEN_LAYER layer and apply ReLu activation function (+ BatchNorm) x = torch.relu(self.bn2(self.hidden_layer(x))) # Feed the output of HIDDEN_LAYER to the mu layer and apply tanh activation function action_value = torch.tanh(self.action_values(x)) # Feed the output of HIDDEN_LAYER to the L layer and apply tanh activation function matrix_entries = torch.tanh(self.matrix_entries(x)) # Feed the output of HIDDEN_LAYER to the V layer V = self.value(x) # Modifies the output of the mu layer by unsqueezing it (all tensor as a 1D vector) action_value = action_value.unsqueeze(-1) # ============ CREATE L MATRIX from the outputs of the L layer ======================= # Create lower-triangular matrix, size: (n_samples, action_size, action_size) L = torch.zeros((input_.shape[0], self.action_size, self.action_size)).to(self.device) # Get lower triagular indices (returns list of 2 elems, where the first row contains row coordinates # of all indices and the second row contains column coordinates) lower_tri_indices = torch.tril_indices(row=self.action_size, col=self.action_size, offset=0) # Fill matrix with the outputs of the L layer L[:, lower_tri_indices[0], lower_tri_indices[1]] = matrix_entries # Raise the diagonal elements of the matrix to the square L.diagonal(dim1=1, dim2=2).exp_() # Calculate state-dependent, positive-definite square matrix P P = L * L.transpose(2, 1) # ============================ CALCULATE Q-VALUE ===================================== # Q = None if action is not None: # Calculate Advantage Function estimate A = (-0.5 * torch.matmul(torch.matmul((action.unsqueeze(-1) - action_value).transpose(2, 1), P), (action.unsqueeze(-1) - action_value))).squeeze(-1) # Calculate Q-values Q = A + V # =========================== ADD NOISE TO ACTION ==================================== # dist = MultivariateNormal(action_value.squeeze(-1), torch.inverse(P)) action = dist.sample() action = torch.clamp(action, min=-1, max=1) return action, Q, V
-
-