Module `ilpyt.agents.imitation_agent`

An implementation of a simple behavioral cloning (BC) agent, as in An Autonomous Land Vehicle in a Neural Network (ALVINN). The BC algorithm was described in the paper "An Autonomous Land Vehicle in a Neural Network" by Dean A. Pomerleau, and presented at NIPS 1988.

For more details, please refer to the paper: https://papers.nips.cc/paper/1988/file/812b4ba287f5ee0bc9d43bbf5bbe87fb-Paper.pdf

Expand source code

"""
An implementation of a simple behavioral cloning (BC) agent, as in An Autonomous 
Land Vehicle in a Neural Network (ALVINN). The BC algorithm was described in 
the paper "An Autonomous Land Vehicle in a Neural Network" by Dean A. Pomerleau, and presented at NIPS 1988.

For more details, please refer to the paper: https://papers.nips.cc/paper/1988/file/812b4ba287f5ee0bc9d43bbf5bbe87fb-Paper.pdf
"""

from typing import Dict, Union

import numpy as np
import torch
from torch.optim import Adam

from ilpyt.agents.base_agent import BaseAgent
from ilpyt.nets.base_net import BaseNetwork
from ilpyt.utils.agent_utils import *


class ImitationAgent(BaseAgent):
    def initialize(
        self, net: Union[BaseNetwork, None] = None, lr: float = 0.001
    ) -> None:
        """
        Initialization function for a simple BC agent.

        Parameters
        ----------
        net: BaseNetwork, default=None
            policy network
        lr: float, default=0.001
            learning rate

        Raises
        ------
        ValueError:
            if `net` is not specified
        """
        if net is None:
            raise ValueError(
                'Please provide input value for net. Currently set to None.'
            )
        self.net = net
        self.nets = {'net': self.net}
        self.opt = Adam(self.net.parameters(), lr=lr)

    @torch.no_grad()
    def step(self, state: torch.Tensor) -> np.ndarray:
        """
        Find best action for the given state.

        Parameters
        ----------
        state: torch.Tensor
            state tensor, of size (batch_size, state_shape)

        Returns
        -------
        np.ndarray:
            selected actions, of size (batch_size, action_shape)
        """
        _, actions = self.net.get_action(state)

        if self.device == 'gpu':
            actions = actions.cpu().numpy()
        else:
            actions = actions.numpy()
        return actions

    def update(self, batch: Dict[str, torch.Tensor]) -> Dict[str, float]:
        """
        Update agent policy based on batch of experiences.

        Parameters
        ----------
        batch: Dict[str, torch.Tensor]
            batch of transitions, with keys `states`, `actions`. Values 
            should be of size (num_steps, num_env, item_shape)

        Returns
        -------
        Dict[str, float]:
            losses for the update step, key strings and loss values can be 
            automatically recorded to TensorBoard
        """
        actions = batch['actions']
        if self.device == 'gpu':
            actions = actions.cuda()
        dist, _ = self.net.get_action(batch['states'])
        log_action_probs = dist.log_prob(actions)
        if len(log_action_probs.shape) > 1:
            log_action_probs = log_action_probs.sum(axis=-1)
        loss = -(log_action_probs.mean())

        self.opt.zero_grad()
        loss.backward(retain_graph=True)
        torch.nn.utils.clip_grad_norm_(self.net.parameters(), 1.5)
        self.opt.step()

        loss_dict = {'loss/total': loss.item()}
        return loss_dict

Classes

class ImitationAgent (**kwargs)

By default, the agent will be in train mode and be configured to use the cpu for step and update calls.

Parameters

**kwargs: arbitrary keyword arguments that will be passed to the initialize function

Expand source code

class ImitationAgent(BaseAgent):
    def initialize(
        self, net: Union[BaseNetwork, None] = None, lr: float = 0.001
    ) -> None:
        """
        Initialization function for a simple BC agent.

        Parameters
        ----------
        net: BaseNetwork, default=None
            policy network
        lr: float, default=0.001
            learning rate

        Raises
        ------
        ValueError:
            if `net` is not specified
        """
        if net is None:
            raise ValueError(
                'Please provide input value for net. Currently set to None.'
            )
        self.net = net
        self.nets = {'net': self.net}
        self.opt = Adam(self.net.parameters(), lr=lr)

    @torch.no_grad()
    def step(self, state: torch.Tensor) -> np.ndarray:
        """
        Find best action for the given state.

        Parameters
        ----------
        state: torch.Tensor
            state tensor, of size (batch_size, state_shape)

        Returns
        -------
        np.ndarray:
            selected actions, of size (batch_size, action_shape)
        """
        _, actions = self.net.get_action(state)

        if self.device == 'gpu':
            actions = actions.cpu().numpy()
        else:
            actions = actions.numpy()
        return actions

    def update(self, batch: Dict[str, torch.Tensor]) -> Dict[str, float]:
        """
        Update agent policy based on batch of experiences.

        Parameters
        ----------
        batch: Dict[str, torch.Tensor]
            batch of transitions, with keys `states`, `actions`. Values 
            should be of size (num_steps, num_env, item_shape)

        Returns
        -------
        Dict[str, float]:
            losses for the update step, key strings and loss values can be 
            automatically recorded to TensorBoard
        """
        actions = batch['actions']
        if self.device == 'gpu':
            actions = actions.cuda()
        dist, _ = self.net.get_action(batch['states'])
        log_action_probs = dist.log_prob(actions)
        if len(log_action_probs.shape) > 1:
            log_action_probs = log_action_probs.sum(axis=-1)
        loss = -(log_action_probs.mean())

        self.opt.zero_grad()
        loss.backward(retain_graph=True)
        torch.nn.utils.clip_grad_norm_(self.net.parameters(), 1.5)
        self.opt.step()

        loss_dict = {'loss/total': loss.item()}
        return loss_dict

Ancestors

BaseAgent

Methods

def initialize(self, net: Union[BaseNetwork, NoneType] = None, lr: float = 0.001) ‑> NoneType

Initialization function for a simple BC agent.

Parameters

net : BaseNetwork, default=None: policy network
lr : float, default=0.001: learning rate

Raises

Valueerror

if net is not specified

Expand source code

def initialize(
    self, net: Union[BaseNetwork, None] = None, lr: float = 0.001
) -> None:
    """
    Initialization function for a simple BC agent.

    Parameters
    ----------
    net: BaseNetwork, default=None
        policy network
    lr: float, default=0.001
        learning rate

    Raises
    ------
    ValueError:
        if `net` is not specified
    """
    if net is None:
        raise ValueError(
            'Please provide input value for net. Currently set to None.'
        )
    self.net = net
    self.nets = {'net': self.net}
    self.opt = Adam(self.net.parameters(), lr=lr)

def step(self, state: torch.Tensor) ‑> numpy.ndarray

Find best action for the given state.

Parameters

state : torch.Tensor: state tensor, of size (batch_size, state_shape)

Returns

np.ndarray:: selected actions, of size (batch_size, action_shape)

Expand source code

@torch.no_grad()
def step(self, state: torch.Tensor) -> np.ndarray:
    """
    Find best action for the given state.

    Parameters
    ----------
    state: torch.Tensor
        state tensor, of size (batch_size, state_shape)

    Returns
    -------
    np.ndarray:
        selected actions, of size (batch_size, action_shape)
    """
    _, actions = self.net.get_action(state)

    if self.device == 'gpu':
        actions = actions.cpu().numpy()
    else:
        actions = actions.numpy()
    return actions

def update(self, batch: Dict[str, torch.Tensor]) ‑> Dict[str, float]

Update agent policy based on batch of experiences.

Parameters

batch : Dict[str, torch.Tensor]: batch of transitions, with keys states, actions. Values should be of size (num_steps, num_env, item_shape)

Returns

Dict[str, float]:: losses for the update step, key strings and loss values can be automatically recorded to TensorBoard

Expand source code

def update(self, batch: Dict[str, torch.Tensor]) -> Dict[str, float]:
    """
    Update agent policy based on batch of experiences.

    Parameters
    ----------
    batch: Dict[str, torch.Tensor]
        batch of transitions, with keys `states`, `actions`. Values 
        should be of size (num_steps, num_env, item_shape)

    Returns
    -------
    Dict[str, float]:
        losses for the update step, key strings and loss values can be 
        automatically recorded to TensorBoard
    """
    actions = batch['actions']
    if self.device == 'gpu':
        actions = actions.cuda()
    dist, _ = self.net.get_action(batch['states'])
    log_action_probs = dist.log_prob(actions)
    if len(log_action_probs.shape) > 1:
        log_action_probs = log_action_probs.sum(axis=-1)
    loss = -(log_action_probs.mean())

    self.opt.zero_grad()
    loss.backward(retain_graph=True)
    torch.nn.utils.clip_grad_norm_(self.net.parameters(), 1.5)
    self.opt.step()

    loss_dict = {'loss/total': loss.item()}
    return loss_dict

Inherited members

BaseAgent:
- load
- save
- set_test
- set_train
- to_cpu
- to_gpu