dqn experiment

This commit is contained in:
Varuna Jayasiri
2021-10-02 14:14:26 +05:30
parent 8d1be06af5
commit ec9a58c658
10 changed files with 527 additions and 387 deletions

View File

@ -18,10 +18,8 @@ This is a [PyTorch](https://pytorch.org) implementation of paper
Here is the [experiment](experiment.html) and [model](model.html) implementation.
\(
\def\green#1{{\color{yellowgreen}{#1}}}
\)
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/dqn/experiment.ipynb)
[![View Run](https://img.shields.io/badge/labml-experiment-brightgreen)](https://app.labml.ai/run/a0da8048235511ecb9affd797fa27714)
"""
from typing import Tuple

View File

@ -8,6 +8,9 @@ summary: Implementation of DQN experiment with Atari Breakout
This experiment trains a Deep Q Network (DQN) to play Atari Breakout game on OpenAI Gym.
It runs the [game environments on multiple processes](../game.html) to sample efficiently.
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/dqn/experiment.ipynb)
[![View Run](https://img.shields.io/badge/labml-experiment-brightgreen)](https://app.labml.ai/run/a0da8048235511ecb9affd797fa27714)
"""
import numpy as np
@ -44,8 +47,6 @@ class Trainer:
update_target_model: int,
learning_rate: FloatDynamicHyperParam,
):
# #### Configurations
# number of workers
self.n_workers = n_workers
# steps sampled on each update
@ -92,8 +93,12 @@ class Trainer:
# initialize tensors for observations
self.obs = np.zeros((self.n_workers, 4, 84, 84), dtype=np.uint8)
# reset the workers
for worker in self.workers:
worker.child.send(("reset", None))
# get the initial observations
for i, worker in enumerate(self.workers):
self.obs[i] = worker.child.recv()

View File

@ -5,6 +5,9 @@ summary: Implementation of neural network model for Deep Q Network (DQN).
---
# Deep Q Network (DQN) Model
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/dqn/experiment.ipynb)
[![View Run](https://img.shields.io/badge/labml-experiment-brightgreen)](https://app.labml.ai/run/a0da8048235511ecb9affd797fa27714)
"""
import torch

View File

@ -8,6 +8,9 @@ summary: Annotated implementation of prioritized experience replay using a binar
This implements paper [Prioritized experience replay](https://papers.labml.ai/paper/1511.05952),
using a binary segment tree.
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/dqn/experiment.ipynb)
[![View Run](https://img.shields.io/badge/labml-experiment-brightgreen)](https://app.labml.ai/run/a0da8048235511ecb9affd797fa27714)
"""
import random