From f80737e8049a1d43c1c51143ec19584c26a312d9 Mon Sep 17 00:00:00 2001
From: Varuna Jayasiri <vpjayasiri@gmail.com>
Date: Wed, 21 Oct 2020 15:16:22 +0530
Subject: [PATCH] ppo links

---
 labml_nn/__init__.py          | 1 +
 labml_nn/rl/__init__.py       | 7 +++++++
 labml_nn/rl/ppo/__init__.py   | 9 +++------
 labml_nn/rl/ppo/experiment.py | 5 +++++
 labml_nn/rl/ppo/gae.py        | 4 ++++
 readme.md                     | 1 +
 6 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/labml_nn/__init__.py b/labml_nn/__init__.py
index 38445dda..201ab98b 100644
--- a/labml_nn/__init__.py
+++ b/labml_nn/__init__.py
@@ -34,6 +34,7 @@ and
 
 #### ✨ [Sketch RNN](http://lab-ml.com/labml_nn/sketch_rnn/)
 
+#### ✨ [Reinforcement Learning](http://lab-ml.com/labml_nn/rl/)
 
 ### Installation
 
diff --git a/labml_nn/rl/__init__.py b/labml_nn/rl/__init__.py
index e69de29b..12ae2b1b 100644
--- a/labml_nn/rl/__init__.py
+++ b/labml_nn/rl/__init__.py
@@ -0,0 +1,7 @@
+"""
+# RL Algorithms
+
+* [Proximal Policy Optimization](ppo)
+[This is an experiment](ppo/experiment.html) that runs a PPO agent on Atari Breakout.
+* [Generalized advantage estimation](ppo/gae.html)
+"""
\ No newline at end of file
diff --git a/labml_nn/rl/ppo/__init__.py b/labml_nn/rl/ppo/__init__.py
index 2d85f8ee..b6129501 100644
--- a/labml_nn/rl/ppo/__init__.py
+++ b/labml_nn/rl/ppo/__init__.py
@@ -1,11 +1,8 @@
 """
-This is a an implementation of [Proximal Policy Optimization - PPO](https://arxiv.org/abs/1707.06347)
- clipped version for Atari Breakout game on OpenAI Gym.
-It runs the game environments on multiple processes to sample efficiently.
-Advantages are calculated using [Generalized Advantage Estimation](https://arxiv.org/abs/1506.02438).
+This is a an implementation of [Proximal Policy Optimization - PPO](https://arxiv.org/abs/1707.06347).
 
-*This is based on my original implementation
-[on my blog](http://blog.varunajayasiri.com/ml/ppo_pytorch.html)*.
+You can find an experiment that uses it [here](experiment.html).
+The experiment uses [Generalized Advantage Estimation](gae.html).
 """
 
 import torch
diff --git a/labml_nn/rl/ppo/experiment.py b/labml_nn/rl/ppo/experiment.py
index b5325bf8..6318fbe3 100644
--- a/labml_nn/rl/ppo/experiment.py
+++ b/labml_nn/rl/ppo/experiment.py
@@ -1,3 +1,8 @@
+"""
+This experiment runs PPO  Atari Breakout game on OpenAI Gym.
+It runs the [game environments on multiple processes](game.html) to sample efficiently.
+"""
+
 from typing import Dict, List
 
 import numpy as np
diff --git a/labml_nn/rl/ppo/gae.py b/labml_nn/rl/ppo/gae.py
index b53159b2..1ad63600 100644
--- a/labml_nn/rl/ppo/gae.py
+++ b/labml_nn/rl/ppo/gae.py
@@ -1,3 +1,7 @@
+"""
+This is an implementation of paper [Generalized Advantage Estimation](https://arxiv.org/abs/1506.02438).
+"""
+
 import numpy as np
 
 
diff --git a/readme.md b/readme.md
index 3a72e7c3..15d8111e 100644
--- a/readme.md
+++ b/readme.md
@@ -31,6 +31,7 @@ and
 
 #### ✨ [Sketch RNN](http://lab-ml.com/labml_nn/sketch_rnn/)
 
+#### ✨ [Reinforcement Learning](http://lab-ml.com/labml_nn/rl/)
 
 ### Installation