From f80737e8049a1d43c1c51143ec19584c26a312d9 Mon Sep 17 00:00:00 2001 From: Varuna Jayasiri Date: Wed, 21 Oct 2020 15:16:22 +0530 Subject: [PATCH] ppo links --- labml_nn/__init__.py | 1 + labml_nn/rl/__init__.py | 7 +++++++ labml_nn/rl/ppo/__init__.py | 9 +++------ labml_nn/rl/ppo/experiment.py | 5 +++++ labml_nn/rl/ppo/gae.py | 4 ++++ readme.md | 1 + 6 files changed, 21 insertions(+), 6 deletions(-) diff --git a/labml_nn/__init__.py b/labml_nn/__init__.py index 38445dda..201ab98b 100644 --- a/labml_nn/__init__.py +++ b/labml_nn/__init__.py @@ -34,6 +34,7 @@ and #### ✨ [Sketch RNN](http://lab-ml.com/labml_nn/sketch_rnn/) +#### ✨ [Reinforcement Learning](http://lab-ml.com/labml_nn/rl/) ### Installation diff --git a/labml_nn/rl/__init__.py b/labml_nn/rl/__init__.py index e69de29b..12ae2b1b 100644 --- a/labml_nn/rl/__init__.py +++ b/labml_nn/rl/__init__.py @@ -0,0 +1,7 @@ +""" +# RL Algorithms + +* [Proximal Policy Optimization](ppo) +[This is an experiment](ppo/experiment.html) that runs a PPO agent on Atari Breakout. +* [Generalized advantage estimation](ppo/gae.html) +""" \ No newline at end of file diff --git a/labml_nn/rl/ppo/__init__.py b/labml_nn/rl/ppo/__init__.py index 2d85f8ee..b6129501 100644 --- a/labml_nn/rl/ppo/__init__.py +++ b/labml_nn/rl/ppo/__init__.py @@ -1,11 +1,8 @@ """ -This is a an implementation of [Proximal Policy Optimization - PPO](https://arxiv.org/abs/1707.06347) - clipped version for Atari Breakout game on OpenAI Gym. -It runs the game environments on multiple processes to sample efficiently. -Advantages are calculated using [Generalized Advantage Estimation](https://arxiv.org/abs/1506.02438). +This is a an implementation of [Proximal Policy Optimization - PPO](https://arxiv.org/abs/1707.06347). -*This is based on my original implementation -[on my blog](http://blog.varunajayasiri.com/ml/ppo_pytorch.html)*. +You can find an experiment that uses it [here](experiment.html). +The experiment uses [Generalized Advantage Estimation](gae.html). """ import torch diff --git a/labml_nn/rl/ppo/experiment.py b/labml_nn/rl/ppo/experiment.py index b5325bf8..6318fbe3 100644 --- a/labml_nn/rl/ppo/experiment.py +++ b/labml_nn/rl/ppo/experiment.py @@ -1,3 +1,8 @@ +""" +This experiment runs PPO Atari Breakout game on OpenAI Gym. +It runs the [game environments on multiple processes](game.html) to sample efficiently. +""" + from typing import Dict, List import numpy as np diff --git a/labml_nn/rl/ppo/gae.py b/labml_nn/rl/ppo/gae.py index b53159b2..1ad63600 100644 --- a/labml_nn/rl/ppo/gae.py +++ b/labml_nn/rl/ppo/gae.py @@ -1,3 +1,7 @@ +""" +This is an implementation of paper [Generalized Advantage Estimation](https://arxiv.org/abs/1506.02438). +""" + import numpy as np diff --git a/readme.md b/readme.md index 3a72e7c3..15d8111e 100644 --- a/readme.md +++ b/readme.md @@ -31,6 +31,7 @@ and #### ✨ [Sketch RNN](http://lab-ml.com/labml_nn/sketch_rnn/) +#### ✨ [Reinforcement Learning](http://lab-ml.com/labml_nn/rl/) ### Installation