ppo links

2025-10-29 09:38:56 +08:00 · 2020-10-21 15:16:22 +05:30
parent 78afe981bc
commit f80737e804
6 changed files with 21 additions and 6 deletions
--- a/labml_nn/init.py
+++ b/labml_nn/init.py
@ -34,6 +34,7 @@ and

 #### ✨ [Sketch RNN](http://lab-ml.com/labml_nn/sketch_rnn/)

+#### ✨ [Reinforcement Learning](http://lab-ml.com/labml_nn/rl/)

 ### Installation

--- a/labml_nn/rl/init.py
+++ b/labml_nn/rl/init.py
@ -0,0 +1,7 @@
+"""
+# RL Algorithms
+
+* [Proximal Policy Optimization](ppo)
+[This is an experiment](ppo/experiment.html) that runs a PPO agent on Atari Breakout.
+* [Generalized advantage estimation](ppo/gae.html)
+"""
--- a/labml_nn/rl/ppo/init.py
+++ b/labml_nn/rl/ppo/init.py
@ -1,11 +1,8 @@
 """
-This is a an implementation of [Proximal Policy Optimization - PPO](https://arxiv.org/abs/1707.06347)
- clipped version for Atari Breakout game on OpenAI Gym.
-It runs the game environments on multiple processes to sample efficiently.
-Advantages are calculated using [Generalized Advantage Estimation](https://arxiv.org/abs/1506.02438).
+This is a an implementation of [Proximal Policy Optimization - PPO](https://arxiv.org/abs/1707.06347).

-*This is based on my original implementation
-[on my blog](http://blog.varunajayasiri.com/ml/ppo_pytorch.html)*.
+You can find an experiment that uses it [here](experiment.html).
+The experiment uses [Generalized Advantage Estimation](gae.html).
 """

 import torch
--- a/labml_nn/rl/ppo/experiment.py
+++ b/labml_nn/rl/ppo/experiment.py
@ -1,3 +1,8 @@
+"""
+This experiment runs PPO  Atari Breakout game on OpenAI Gym.
+It runs the [game environments on multiple processes](game.html) to sample efficiently.
+"""
+
 from typing import Dict, List

 import numpy as np
--- a/labml_nn/rl/ppo/gae.py
+++ b/labml_nn/rl/ppo/gae.py
@ -1,3 +1,7 @@
+"""
+This is an implementation of paper [Generalized Advantage Estimation](https://arxiv.org/abs/1506.02438).
+"""
+
 import numpy as np