mirror of
https://github.com/labmlai/annotated_deep_learning_paper_implementations.git
synced 2025-08-14 17:41:37 +08:00
dqn experiment link
This commit is contained in:
@ -71,7 +71,7 @@
|
||||
<p>This experiment trains a Deep Q Network (DQN) to play Atari Breakout game on OpenAI Gym.
|
||||
It runs the <a href="../game.html">game environments on multiple processes</a> to sample efficiently.</p>
|
||||
<p><a href="https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/dqn/experiment.ipynb"><img alt="Open In Colab" src="https://colab.research.google.com/assets/colab-badge.svg" /></a>
|
||||
<a href="https://app.labml.ai/run/a0da8048235511ecb9affd797fa27714"><img alt="View Run" src="https://img.shields.io/badge/labml-experiment-brightgreen" /></a></p>
|
||||
<a href="https://app.labml.ai/run/fe1ad986237511ec86e8b763a2d3f710"><img alt="View Run" src="https://img.shields.io/badge/labml-experiment-brightgreen" /></a></p>
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">16</span><span></span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
@ -986,7 +986,7 @@ Gradients shouldn’t propagate for these</p>
|
||||
<p>Learning rate.</p>
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">274</span> <span class="s1">'learning_rate'</span><span class="p">:</span> <span class="n">FloatDynamicHyperParam</span><span class="p">(</span><span class="mf">2.5e-4</span><span class="p">,</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mf">1e-3</span><span class="p">)),</span>
|
||||
<div class="highlight"><pre><span class="lineno">274</span> <span class="s1">'learning_rate'</span><span class="p">:</span> <span class="n">FloatDynamicHyperParam</span><span class="p">(</span><span class="mf">1e-4</span><span class="p">,</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mf">1e-3</span><span class="p">)),</span>
|
||||
<span class="lineno">275</span> <span class="p">}</span></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
|
@ -74,7 +74,7 @@
|
||||
and Double Q Network.</p>
|
||||
<p>Here is the <a href="experiment.html">experiment</a> and <a href="model.html">model</a> implementation.</p>
|
||||
<p><a href="https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/dqn/experiment.ipynb"><img alt="Open In Colab" src="https://colab.research.google.com/assets/colab-badge.svg" /></a>
|
||||
<a href="https://app.labml.ai/run/a0da8048235511ecb9affd797fa27714"><img alt="View Run" src="https://img.shields.io/badge/labml-experiment-brightgreen" /></a></p>
|
||||
<a href="https://app.labml.ai/run/fe1ad986237511ec86e8b763a2d3f710"><img alt="View Run" src="https://img.shields.io/badge/labml-experiment-brightgreen" /></a></p>
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">25</span><span></span><span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Tuple</span>
|
||||
|
@ -69,7 +69,7 @@
|
||||
</div>
|
||||
<h1>Deep Q Network (DQN) Model</h1>
|
||||
<p><a href="https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/dqn/experiment.ipynb"><img alt="Open In Colab" src="https://colab.research.google.com/assets/colab-badge.svg" /></a>
|
||||
<a href="https://app.labml.ai/run/a0da8048235511ecb9affd797fa27714"><img alt="View Run" src="https://img.shields.io/badge/labml-experiment-brightgreen" /></a></p>
|
||||
<a href="https://app.labml.ai/run/fe1ad986237511ec86e8b763a2d3f710"><img alt="View Run" src="https://img.shields.io/badge/labml-experiment-brightgreen" /></a></p>
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">13</span><span></span><span class="kn">import</span> <span class="nn">torch</span>
|
||||
|
@ -71,7 +71,7 @@
|
||||
<p>This implements paper <a href="https://papers.labml.ai/paper/1511.05952">Prioritized experience replay</a>,
|
||||
using a binary segment tree.</p>
|
||||
<p><a href="https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/dqn/experiment.ipynb"><img alt="Open In Colab" src="https://colab.research.google.com/assets/colab-badge.svg" /></a>
|
||||
<a href="https://app.labml.ai/run/a0da8048235511ecb9affd797fa27714"><img alt="View Run" src="https://img.shields.io/badge/labml-experiment-brightgreen" /></a></p>
|
||||
<a href="https://app.labml.ai/run/fe1ad986237511ec86e8b763a2d3f710"><img alt="View Run" src="https://img.shields.io/badge/labml-experiment-brightgreen" /></a></p>
|
||||
</div>
|
||||
<div class='code'>
|
||||
<div class="highlight"><pre><span class="lineno">16</span><span></span><span class="kn">import</span> <span class="nn">random</span>
|
||||
|
@ -876,14 +876,14 @@
|
||||
|
||||
<url>
|
||||
<loc>https://nn.labml.ai/rl/dqn/index.html</loc>
|
||||
<lastmod>2021-08-19T16:30:00+00:00</lastmod>
|
||||
<lastmod>2021-10-02T16:30:00+00:00</lastmod>
|
||||
<priority>1.00</priority>
|
||||
</url>
|
||||
|
||||
|
||||
<url>
|
||||
<loc>https://nn.labml.ai/rl/dqn/model.html</loc>
|
||||
<lastmod>2021-08-19T16:30:00+00:00</lastmod>
|
||||
<lastmod>2021-10-02T16:30:00+00:00</lastmod>
|
||||
<priority>1.00</priority>
|
||||
</url>
|
||||
|
||||
@ -897,7 +897,7 @@
|
||||
|
||||
<url>
|
||||
<loc>https://nn.labml.ai/rl/dqn/replay_buffer.html</loc>
|
||||
<lastmod>2021-08-17T16:30:00+00:00</lastmod>
|
||||
<lastmod>2021-10-02T16:30:00+00:00</lastmod>
|
||||
<priority>1.00</priority>
|
||||
</url>
|
||||
|
||||
|
@ -19,7 +19,7 @@ This is a [PyTorch](https://pytorch.org) implementation of paper
|
||||
Here is the [experiment](experiment.html) and [model](model.html) implementation.
|
||||
|
||||
[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/dqn/experiment.ipynb)
|
||||
[](https://app.labml.ai/run/a0da8048235511ecb9affd797fa27714)
|
||||
[](https://app.labml.ai/run/fe1ad986237511ec86e8b763a2d3f710)
|
||||
"""
|
||||
|
||||
from typing import Tuple
|
||||
|
@ -146,7 +146,7 @@
|
||||
" # Target model updating interval\n",
|
||||
" 'update_target_model': 250,\n",
|
||||
" # Learning rate.\n",
|
||||
" 'learning_rate': FloatDynamicHyperParam(2.5e-4, (0, 1e-3)),\n",
|
||||
" 'learning_rate': FloatDynamicHyperParam(1e-4, (0, 1e-3)),\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
|
@ -10,7 +10,7 @@ This experiment trains a Deep Q Network (DQN) to play Atari Breakout game on Ope
|
||||
It runs the [game environments on multiple processes](../game.html) to sample efficiently.
|
||||
|
||||
[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/dqn/experiment.ipynb)
|
||||
[](https://app.labml.ai/run/a0da8048235511ecb9affd797fa27714)
|
||||
[](https://app.labml.ai/run/fe1ad986237511ec86e8b763a2d3f710)
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
@ -7,7 +7,7 @@ summary: Implementation of neural network model for Deep Q Network (DQN).
|
||||
# Deep Q Network (DQN) Model
|
||||
|
||||
[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/dqn/experiment.ipynb)
|
||||
[](https://app.labml.ai/run/a0da8048235511ecb9affd797fa27714)
|
||||
[](https://app.labml.ai/run/fe1ad986237511ec86e8b763a2d3f710)
|
||||
"""
|
||||
|
||||
import torch
|
||||
|
@ -10,7 +10,7 @@ This implements paper [Prioritized experience replay](https://papers.labml.ai/pa
|
||||
using a binary segment tree.
|
||||
|
||||
[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/dqn/experiment.ipynb)
|
||||
[](https://app.labml.ai/run/a0da8048235511ecb9affd797fa27714)
|
||||
[](https://app.labml.ai/run/fe1ad986237511ec86e8b763a2d3f710)
|
||||
"""
|
||||
|
||||
import random
|
||||
|
Reference in New Issue
Block a user