Module `ilpyt.utils.replay_memory`

Expand source code

from typing import Dict, List

import numpy as np
import torch


class ReplayMemory:
    def __init__(self, capacity: int):
        """
        Experience replay memory to store transitions. Used in DQN.
        Adapted from:
        https://pytorch.org/tutorials/intermediate/reinforcement_q_learning.html

        Parameters
        ----------
        capacity: int
            maximum number of experiences to store in replay memory.
        """
        self.states: List[torch.Tensor] = []
        self.actions: List[torch.Tensor] = []
        self.next_states: List[torch.Tensor] = []
        self.dones: List[bool] = []
        self.rewards: List[float] = []

        self.size = 0
        self.capacity = capacity

    def add(self, batch: Dict[str, torch.Tensor]):
        """
        Add an experience to replay memory.

        Parameters
        ----------
        batch: dict
            single experience
        """
        self.states.append(batch['states'].to('cpu'))
        self.next_states.append(batch['next_states'].to('cpu'))
        self.actions.append(batch['actions'].to('cpu'))
        self.rewards.append(batch['rewards'].to('cpu'))
        self.dones.append(batch['dones'].to('cpu'))
        self.size += 1

        # Remove oldest experience if replay memory full
        if self.size > self.capacity:
            self.states.pop(0)
            self.next_states.pop(0)
            self.actions.pop(0)
            self.rewards.pop(0)
            self.dones.pop(0)
            self.size -= 1

    def sample(self, batch_size: int) -> Dict[str, torch.Tensor]:
        """
        Sample some transitions from replay memory.

        Parameters
        ----------
        batch_size: int
            number of experiences to sample from replay memory

        Returns
        -------
        Dict[str, torch.Tensor]:
            dictionary of random experiences if there are enough available, 
            else None
        """
        if batch_size > self.size:
            return None

        # Sample a batch
        idxs = np.random.randint(0, self.size, batch_size)
        states = [self.states[i] for i in idxs]
        next_states = [self.next_states[i] for i in idxs]
        actions = [self.actions[i] for i in idxs]
        rewards = [self.rewards[i] for i in idxs]
        dones = [self.dones[i] for i in idxs]

        sample_batch = {
            'states': torch.cat(states, dim=0),
            'actions': torch.cat(actions, dim=0),
            'rewards': torch.cat(rewards, dim=0),
            'next_states': torch.cat(next_states, dim=0),
            'dones': torch.cat(dones, dim=0),
        }

        return sample_batch

    def save(self, path: str) -> None:
        """
        Save the ReplayMemory buffer to a numpy file.

        Parameters
        ----------
        path: str
            save path for buffer array
        """
        b = np.asarray(self.buffer)
        np.save(path, b)

    def load(self, path: str) -> None:
        """
        Load a numpy file to the ReplayMemory buffer.

        Parameters
        ----------
        path: str
            load path for buffer array
        """
        b = np.load(path + '.npy', allow_pickle=True)
        assert b.shape[0] == self.memory_size

        for i in range(b.shape[0]):
            self.add(b[i])

Classes

class ReplayMemory (capacity: int)

Experience replay memory to store transitions. Used in DQN. Adapted from: https://pytorch.org/tutorials/intermediate/reinforcement_q_learning.html

Parameters

capacity : int: maximum number of experiences to store in replay memory.

Expand source code

class ReplayMemory:
    def __init__(self, capacity: int):
        """
        Experience replay memory to store transitions. Used in DQN.
        Adapted from:
        https://pytorch.org/tutorials/intermediate/reinforcement_q_learning.html

        Parameters
        ----------
        capacity: int
            maximum number of experiences to store in replay memory.
        """
        self.states: List[torch.Tensor] = []
        self.actions: List[torch.Tensor] = []
        self.next_states: List[torch.Tensor] = []
        self.dones: List[bool] = []
        self.rewards: List[float] = []

        self.size = 0
        self.capacity = capacity

    def add(self, batch: Dict[str, torch.Tensor]):
        """
        Add an experience to replay memory.

        Parameters
        ----------
        batch: dict
            single experience
        """
        self.states.append(batch['states'].to('cpu'))
        self.next_states.append(batch['next_states'].to('cpu'))
        self.actions.append(batch['actions'].to('cpu'))
        self.rewards.append(batch['rewards'].to('cpu'))
        self.dones.append(batch['dones'].to('cpu'))
        self.size += 1

        # Remove oldest experience if replay memory full
        if self.size > self.capacity:
            self.states.pop(0)
            self.next_states.pop(0)
            self.actions.pop(0)
            self.rewards.pop(0)
            self.dones.pop(0)
            self.size -= 1

    def sample(self, batch_size: int) -> Dict[str, torch.Tensor]:
        """
        Sample some transitions from replay memory.

        Parameters
        ----------
        batch_size: int
            number of experiences to sample from replay memory

        Returns
        -------
        Dict[str, torch.Tensor]:
            dictionary of random experiences if there are enough available, 
            else None
        """
        if batch_size > self.size:
            return None

        # Sample a batch
        idxs = np.random.randint(0, self.size, batch_size)
        states = [self.states[i] for i in idxs]
        next_states = [self.next_states[i] for i in idxs]
        actions = [self.actions[i] for i in idxs]
        rewards = [self.rewards[i] for i in idxs]
        dones = [self.dones[i] for i in idxs]

        sample_batch = {
            'states': torch.cat(states, dim=0),
            'actions': torch.cat(actions, dim=0),
            'rewards': torch.cat(rewards, dim=0),
            'next_states': torch.cat(next_states, dim=0),
            'dones': torch.cat(dones, dim=0),
        }

        return sample_batch

    def save(self, path: str) -> None:
        """
        Save the ReplayMemory buffer to a numpy file.

        Parameters
        ----------
        path: str
            save path for buffer array
        """
        b = np.asarray(self.buffer)
        np.save(path, b)

    def load(self, path: str) -> None:
        """
        Load a numpy file to the ReplayMemory buffer.

        Parameters
        ----------
        path: str
            load path for buffer array
        """
        b = np.load(path + '.npy', allow_pickle=True)
        assert b.shape[0] == self.memory_size

        for i in range(b.shape[0]):
            self.add(b[i])

Methods

def add(self, batch: Dict[str, torch.Tensor])

Add an experience to replay memory.

Parameters

batch : dict: single experience

Expand source code

def add(self, batch: Dict[str, torch.Tensor]):
    """
    Add an experience to replay memory.

    Parameters
    ----------
    batch: dict
        single experience
    """
    self.states.append(batch['states'].to('cpu'))
    self.next_states.append(batch['next_states'].to('cpu'))
    self.actions.append(batch['actions'].to('cpu'))
    self.rewards.append(batch['rewards'].to('cpu'))
    self.dones.append(batch['dones'].to('cpu'))
    self.size += 1

    # Remove oldest experience if replay memory full
    if self.size > self.capacity:
        self.states.pop(0)
        self.next_states.pop(0)
        self.actions.pop(0)
        self.rewards.pop(0)
        self.dones.pop(0)
        self.size -= 1

def load(self, path: str) ‑> NoneType

Load a numpy file to the ReplayMemory buffer.

Parameters

path : str: load path for buffer array

Expand source code

def load(self, path: str) -> None:
    """
    Load a numpy file to the ReplayMemory buffer.

    Parameters
    ----------
    path: str
        load path for buffer array
    """
    b = np.load(path + '.npy', allow_pickle=True)
    assert b.shape[0] == self.memory_size

    for i in range(b.shape[0]):
        self.add(b[i])

def sample(self, batch_size: int) ‑> Dict[str, torch.Tensor]

Sample some transitions from replay memory.

Parameters

batch_size : int: number of experiences to sample from replay memory

Returns

Dict[str, torch.Tensor]:: dictionary of random experiences if there are enough available, else None

Expand source code

def sample(self, batch_size: int) -> Dict[str, torch.Tensor]:
    """
    Sample some transitions from replay memory.

    Parameters
    ----------
    batch_size: int
        number of experiences to sample from replay memory

    Returns
    -------
    Dict[str, torch.Tensor]:
        dictionary of random experiences if there are enough available, 
        else None
    """
    if batch_size > self.size:
        return None

    # Sample a batch
    idxs = np.random.randint(0, self.size, batch_size)
    states = [self.states[i] for i in idxs]
    next_states = [self.next_states[i] for i in idxs]
    actions = [self.actions[i] for i in idxs]
    rewards = [self.rewards[i] for i in idxs]
    dones = [self.dones[i] for i in idxs]

    sample_batch = {
        'states': torch.cat(states, dim=0),
        'actions': torch.cat(actions, dim=0),
        'rewards': torch.cat(rewards, dim=0),
        'next_states': torch.cat(next_states, dim=0),
        'dones': torch.cat(dones, dim=0),
    }

    return sample_batch

def save(self, path: str) ‑> NoneType

Save the ReplayMemory buffer to a numpy file.

Parameters

path : str: save path for buffer array

Expand source code

def save(self, path: str) -> None:
    """
    Save the ReplayMemory buffer to a numpy file.

    Parameters
    ----------
    path: str
        save path for buffer array
    """
    b = np.asarray(self.buffer)
    np.save(path, b)