dqn experiment

2025-10-29 17:57:14 +08:00 · 2021-10-02 14:14:26 +05:30
parent 8d1be06af5
commit ec9a58c658
10 changed files with 527 additions and 387 deletions
--- a/labml_nn/rl/dqn/init.py
+++ b/labml_nn/rl/dqn/init.py
@ -18,10 +18,8 @@ This is a [PyTorch](https://pytorch.org) implementation of paper

 Here is the [experiment](experiment.html) and [model](model.html) implementation.

-\(
-   \def\green#1{{\color{yellowgreen}{#1}}}
-\)
-
+[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/dqn/experiment.ipynb)
+[![View Run](https://img.shields.io/badge/labml-experiment-brightgreen)](https://app.labml.ai/run/a0da8048235511ecb9affd797fa27714)
 """

 from typing import Tuple
--- a/labml_nn/rl/dqn/experiment.py
+++ b/labml_nn/rl/dqn/experiment.py
@ -8,6 +8,9 @@ summary: Implementation of DQN experiment with Atari Breakout

 This experiment trains a Deep Q Network (DQN) to play Atari Breakout game on OpenAI Gym.
 It runs the [game environments on multiple processes](../game.html) to sample efficiently.
+
+[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/dqn/experiment.ipynb)
+[![View Run](https://img.shields.io/badge/labml-experiment-brightgreen)](https://app.labml.ai/run/a0da8048235511ecb9affd797fa27714)
 """

 import numpy as np
@ -44,8 +47,6 @@ class Trainer:
                 update_target_model: int,
                 learning_rate: FloatDynamicHyperParam,
                 ):
-        # #### Configurations
-
        # number of workers
        self.n_workers = n_workers
        # steps sampled on each update
@ -92,8 +93,12 @@ class Trainer:

        # initialize tensors for observations
        self.obs = np.zeros((self.n_workers, 4, 84, 84), dtype=np.uint8)
+
+        # reset the workers
        for worker in self.workers:
            worker.child.send(("reset", None))
+
+        # get the initial observations
        for i, worker in enumerate(self.workers):
            self.obs[i] = worker.child.recv()

--- a/labml_nn/rl/dqn/model.py
+++ b/labml_nn/rl/dqn/model.py
@ -5,6 +5,9 @@ summary: Implementation of neural network model for Deep Q Network (DQN).
 ---

 # Deep Q Network (DQN) Model
+
+[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/dqn/experiment.ipynb)
+[![View Run](https://img.shields.io/badge/labml-experiment-brightgreen)](https://app.labml.ai/run/a0da8048235511ecb9affd797fa27714)
 """

 import torch
--- a/labml_nn/rl/dqn/replay_buffer.py
+++ b/labml_nn/rl/dqn/replay_buffer.py
@ -8,6 +8,9 @@ summary: Annotated implementation of prioritized experience replay using a binar

 This implements paper [Prioritized experience replay](https://papers.labml.ai/paper/1511.05952),
 using a binary segment tree.
+
+[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/dqn/experiment.ipynb)
+[![View Run](https://img.shields.io/badge/labml-experiment-brightgreen)](https://app.labml.ai/run/a0da8048235511ecb9affd797fa27714)
 """

 import random