mirror of
https://github.com/labmlai/annotated_deep_learning_paper_implementations.git
synced 2025-10-29 09:38:56 +08:00
ppo links
This commit is contained in:
@ -34,6 +34,7 @@ and
|
||||
|
||||
#### ✨ [Sketch RNN](http://lab-ml.com/labml_nn/sketch_rnn/)
|
||||
|
||||
#### ✨ [Reinforcement Learning](http://lab-ml.com/labml_nn/rl/)
|
||||
|
||||
### Installation
|
||||
|
||||
|
||||
@ -0,0 +1,7 @@
|
||||
"""
|
||||
# RL Algorithms
|
||||
|
||||
* [Proximal Policy Optimization](ppo)
|
||||
[This is an experiment](ppo/experiment.html) that runs a PPO agent on Atari Breakout.
|
||||
* [Generalized advantage estimation](ppo/gae.html)
|
||||
"""
|
||||
@ -1,11 +1,8 @@
|
||||
"""
|
||||
This is a an implementation of [Proximal Policy Optimization - PPO](https://arxiv.org/abs/1707.06347)
|
||||
clipped version for Atari Breakout game on OpenAI Gym.
|
||||
It runs the game environments on multiple processes to sample efficiently.
|
||||
Advantages are calculated using [Generalized Advantage Estimation](https://arxiv.org/abs/1506.02438).
|
||||
This is a an implementation of [Proximal Policy Optimization - PPO](https://arxiv.org/abs/1707.06347).
|
||||
|
||||
*This is based on my original implementation
|
||||
[on my blog](http://blog.varunajayasiri.com/ml/ppo_pytorch.html)*.
|
||||
You can find an experiment that uses it [here](experiment.html).
|
||||
The experiment uses [Generalized Advantage Estimation](gae.html).
|
||||
"""
|
||||
|
||||
import torch
|
||||
|
||||
@ -1,3 +1,8 @@
|
||||
"""
|
||||
This experiment runs PPO Atari Breakout game on OpenAI Gym.
|
||||
It runs the [game environments on multiple processes](game.html) to sample efficiently.
|
||||
"""
|
||||
|
||||
from typing import Dict, List
|
||||
|
||||
import numpy as np
|
||||
|
||||
@ -1,3 +1,7 @@
|
||||
"""
|
||||
This is an implementation of paper [Generalized Advantage Estimation](https://arxiv.org/abs/1506.02438).
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user