mirror of
https://github.com/labmlai/annotated_deep_learning_paper_implementations.git
synced 2025-10-29 17:57:14 +08:00
dqn experiment
This commit is contained in:
@ -18,10 +18,8 @@ This is a [PyTorch](https://pytorch.org) implementation of paper
|
||||
|
||||
Here is the [experiment](experiment.html) and [model](model.html) implementation.
|
||||
|
||||
\(
|
||||
\def\green#1{{\color{yellowgreen}{#1}}}
|
||||
\)
|
||||
|
||||
[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/dqn/experiment.ipynb)
|
||||
[](https://app.labml.ai/run/a0da8048235511ecb9affd797fa27714)
|
||||
"""
|
||||
|
||||
from typing import Tuple
|
||||
|
||||
@ -8,6 +8,9 @@ summary: Implementation of DQN experiment with Atari Breakout
|
||||
|
||||
This experiment trains a Deep Q Network (DQN) to play Atari Breakout game on OpenAI Gym.
|
||||
It runs the [game environments on multiple processes](../game.html) to sample efficiently.
|
||||
|
||||
[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/dqn/experiment.ipynb)
|
||||
[](https://app.labml.ai/run/a0da8048235511ecb9affd797fa27714)
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
@ -44,8 +47,6 @@ class Trainer:
|
||||
update_target_model: int,
|
||||
learning_rate: FloatDynamicHyperParam,
|
||||
):
|
||||
# #### Configurations
|
||||
|
||||
# number of workers
|
||||
self.n_workers = n_workers
|
||||
# steps sampled on each update
|
||||
@ -92,8 +93,12 @@ class Trainer:
|
||||
|
||||
# initialize tensors for observations
|
||||
self.obs = np.zeros((self.n_workers, 4, 84, 84), dtype=np.uint8)
|
||||
|
||||
# reset the workers
|
||||
for worker in self.workers:
|
||||
worker.child.send(("reset", None))
|
||||
|
||||
# get the initial observations
|
||||
for i, worker in enumerate(self.workers):
|
||||
self.obs[i] = worker.child.recv()
|
||||
|
||||
|
||||
@ -5,6 +5,9 @@ summary: Implementation of neural network model for Deep Q Network (DQN).
|
||||
---
|
||||
|
||||
# Deep Q Network (DQN) Model
|
||||
|
||||
[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/dqn/experiment.ipynb)
|
||||
[](https://app.labml.ai/run/a0da8048235511ecb9affd797fa27714)
|
||||
"""
|
||||
|
||||
import torch
|
||||
|
||||
@ -8,6 +8,9 @@ summary: Annotated implementation of prioritized experience replay using a binar
|
||||
|
||||
This implements paper [Prioritized experience replay](https://papers.labml.ai/paper/1511.05952),
|
||||
using a binary segment tree.
|
||||
|
||||
[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/dqn/experiment.ipynb)
|
||||
[](https://app.labml.ai/run/a0da8048235511ecb9affd797fa27714)
|
||||
"""
|
||||
|
||||
import random
|
||||
|
||||
Reference in New Issue
Block a user