mirror of
https://github.com/labmlai/annotated_deep_learning_paper_implementations.git
synced 2025-08-06 15:22:21 +08:00
📇 version
This commit is contained in:
@ -3,24 +3,24 @@
|
|||||||
<head>
|
<head>
|
||||||
<meta http-equiv="content-type" content="text/html;charset=utf-8"/>
|
<meta http-equiv="content-type" content="text/html;charset=utf-8"/>
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
|
||||||
<meta name="description" content="An annotated implementation of Proximal Policy Optimization (PPO) algorithm in PyTorch."/>
|
<meta name="description" content="An annotated implementation of Proximal Policy Optimization - PPO algorithm in PyTorch."/>
|
||||||
|
|
||||||
<meta name="twitter:card" content="summary"/>
|
<meta name="twitter:card" content="summary"/>
|
||||||
<meta name="twitter:image:src" content="https://avatars1.githubusercontent.com/u/64068543?s=400&v=4"/>
|
<meta name="twitter:image:src" content="https://avatars1.githubusercontent.com/u/64068543?s=400&v=4"/>
|
||||||
<meta name="twitter:title" content="Proximal Policy Optimization (PPO)"/>
|
<meta name="twitter:title" content="Proximal Policy Optimization - PPO"/>
|
||||||
<meta name="twitter:description" content="An annotated implementation of Proximal Policy Optimization (PPO) algorithm in PyTorch."/>
|
<meta name="twitter:description" content="An annotated implementation of Proximal Policy Optimization - PPO algorithm in PyTorch."/>
|
||||||
<meta name="twitter:site" content="@labmlai"/>
|
<meta name="twitter:site" content="@labmlai"/>
|
||||||
<meta name="twitter:creator" content="@labmlai"/>
|
<meta name="twitter:creator" content="@labmlai"/>
|
||||||
|
|
||||||
<meta property="og:url" content="https://nn.labml.ai/rl/ppo/index.html"/>
|
<meta property="og:url" content="https://nn.labml.ai/rl/ppo/index.html"/>
|
||||||
<meta property="og:title" content="Proximal Policy Optimization (PPO)"/>
|
<meta property="og:title" content="Proximal Policy Optimization - PPO"/>
|
||||||
<meta property="og:image" content="https://avatars1.githubusercontent.com/u/64068543?s=400&v=4"/>
|
<meta property="og:image" content="https://avatars1.githubusercontent.com/u/64068543?s=400&v=4"/>
|
||||||
<meta property="og:site_name" content="LabML Neural Networks"/>
|
<meta property="og:site_name" content="LabML Neural Networks"/>
|
||||||
<meta property="og:type" content="object"/>
|
<meta property="og:type" content="object"/>
|
||||||
<meta property="og:title" content="Proximal Policy Optimization (PPO)"/>
|
<meta property="og:title" content="Proximal Policy Optimization - PPO"/>
|
||||||
<meta property="og:description" content="An annotated implementation of Proximal Policy Optimization (PPO) algorithm in PyTorch."/>
|
<meta property="og:description" content="An annotated implementation of Proximal Policy Optimization - PPO algorithm in PyTorch."/>
|
||||||
|
|
||||||
<title>Proximal Policy Optimization (PPO)</title>
|
<title>Proximal Policy Optimization - PPO</title>
|
||||||
<link rel="shortcut icon" href="/icon.png"/>
|
<link rel="shortcut icon" href="/icon.png"/>
|
||||||
<link rel="stylesheet" href="../../pylit.css">
|
<link rel="stylesheet" href="../../pylit.css">
|
||||||
<link rel="canonical" href="https://nn.labml.ai/rl/ppo/index.html"/>
|
<link rel="canonical" href="https://nn.labml.ai/rl/ppo/index.html"/>
|
||||||
@ -72,7 +72,7 @@
|
|||||||
<div class='section-link'>
|
<div class='section-link'>
|
||||||
<a href='#section-0'>#</a>
|
<a href='#section-0'>#</a>
|
||||||
</div>
|
</div>
|
||||||
<h1>Proximal Policy Optimization (PPO)</h1>
|
<h1>Proximal Policy Optimization - PPO</h1>
|
||||||
<p>This is a <a href="https://pytorch.org">PyTorch</a> implementation of
|
<p>This is a <a href="https://pytorch.org">PyTorch</a> implementation of
|
||||||
<a href="https://arxiv.org/abs/1707.06347">Proximal Policy Optimization - PPO</a>.</p>
|
<a href="https://arxiv.org/abs/1707.06347">Proximal Policy Optimization - PPO</a>.</p>
|
||||||
<p>PPO is a policy gradient method for reinforcement learning.
|
<p>PPO is a policy gradient method for reinforcement learning.
|
||||||
|
@ -7,20 +7,20 @@
|
|||||||
|
|
||||||
<meta name="twitter:card" content="summary"/>
|
<meta name="twitter:card" content="summary"/>
|
||||||
<meta name="twitter:image:src" content="https://avatars1.githubusercontent.com/u/64068543?s=400&v=4"/>
|
<meta name="twitter:image:src" content="https://avatars1.githubusercontent.com/u/64068543?s=400&v=4"/>
|
||||||
<meta name="twitter:title" content="Proximal Policy Optimization (PPO)"/>
|
<meta name="twitter:title" content="Proximal Policy Optimization - PPO"/>
|
||||||
<meta name="twitter:description" content=""/>
|
<meta name="twitter:description" content=""/>
|
||||||
<meta name="twitter:site" content="@labmlai"/>
|
<meta name="twitter:site" content="@labmlai"/>
|
||||||
<meta name="twitter:creator" content="@labmlai"/>
|
<meta name="twitter:creator" content="@labmlai"/>
|
||||||
|
|
||||||
<meta property="og:url" content="https://nn.labml.ai/rl/ppo/readme.html"/>
|
<meta property="og:url" content="https://nn.labml.ai/rl/ppo/readme.html"/>
|
||||||
<meta property="og:title" content="Proximal Policy Optimization (PPO)"/>
|
<meta property="og:title" content="Proximal Policy Optimization - PPO"/>
|
||||||
<meta property="og:image" content="https://avatars1.githubusercontent.com/u/64068543?s=400&v=4"/>
|
<meta property="og:image" content="https://avatars1.githubusercontent.com/u/64068543?s=400&v=4"/>
|
||||||
<meta property="og:site_name" content="LabML Neural Networks"/>
|
<meta property="og:site_name" content="LabML Neural Networks"/>
|
||||||
<meta property="og:type" content="object"/>
|
<meta property="og:type" content="object"/>
|
||||||
<meta property="og:title" content="Proximal Policy Optimization (PPO)"/>
|
<meta property="og:title" content="Proximal Policy Optimization - PPO"/>
|
||||||
<meta property="og:description" content=""/>
|
<meta property="og:description" content=""/>
|
||||||
|
|
||||||
<title>Proximal Policy Optimization (PPO)</title>
|
<title>Proximal Policy Optimization - PPO</title>
|
||||||
<link rel="shortcut icon" href="/icon.png"/>
|
<link rel="shortcut icon" href="/icon.png"/>
|
||||||
<link rel="stylesheet" href="../../pylit.css">
|
<link rel="stylesheet" href="../../pylit.css">
|
||||||
<link rel="canonical" href="https://nn.labml.ai/rl/ppo/readme.html"/>
|
<link rel="canonical" href="https://nn.labml.ai/rl/ppo/readme.html"/>
|
||||||
@ -72,7 +72,7 @@
|
|||||||
<div class='section-link'>
|
<div class='section-link'>
|
||||||
<a href='#section-0'>#</a>
|
<a href='#section-0'>#</a>
|
||||||
</div>
|
</div>
|
||||||
<h1><a href="https://nn.labml.ai/rl/ppo/index.html">Proximal Policy Optimization (PPO)</a></h1>
|
<h1><a href="https://nn.labml.ai/rl/ppo/index.html">Proximal Policy Optimization - PPO</a></h1>
|
||||||
<p>This is a <a href="https://pytorch.org">PyTorch</a> implementation of
|
<p>This is a <a href="https://pytorch.org">PyTorch</a> implementation of
|
||||||
<a href="https://arxiv.org/abs/1707.06347">Proximal Policy Optimization - PPO</a>.</p>
|
<a href="https://arxiv.org/abs/1707.06347">Proximal Policy Optimization - PPO</a>.</p>
|
||||||
<p>PPO is a policy gradient method for reinforcement learning.
|
<p>PPO is a policy gradient method for reinforcement learning.
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
"""
|
"""
|
||||||
---
|
---
|
||||||
title: Proximal Policy Optimization (PPO)
|
title: Proximal Policy Optimization - PPO
|
||||||
summary: >
|
summary: >
|
||||||
An annotated implementation of Proximal Policy Optimization (PPO) algorithm in PyTorch.
|
An annotated implementation of Proximal Policy Optimization - PPO algorithm in PyTorch.
|
||||||
---
|
---
|
||||||
|
|
||||||
# Proximal Policy Optimization (PPO)
|
# Proximal Policy Optimization - PPO
|
||||||
|
|
||||||
This is a [PyTorch](https://pytorch.org) implementation of
|
This is a [PyTorch](https://pytorch.org) implementation of
|
||||||
[Proximal Policy Optimization - PPO](https://arxiv.org/abs/1707.06347).
|
[Proximal Policy Optimization - PPO](https://arxiv.org/abs/1707.06347).
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
# [Proximal Policy Optimization (PPO)](https://nn.labml.ai/rl/ppo/index.html)
|
# [Proximal Policy Optimization - PPO](https://nn.labml.ai/rl/ppo/index.html)
|
||||||
|
|
||||||
This is a [PyTorch](https://pytorch.org) implementation of
|
This is a [PyTorch](https://pytorch.org) implementation of
|
||||||
[Proximal Policy Optimization - PPO](https://arxiv.org/abs/1707.06347).
|
[Proximal Policy Optimization - PPO](https://arxiv.org/abs/1707.06347).
|
||||||
|
4
setup.py
4
setup.py
@ -5,7 +5,7 @@ with open("readme.md", "r") as f:
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='labml-nn',
|
name='labml-nn',
|
||||||
version='0.4.91',
|
version='0.4.93',
|
||||||
author="Varuna Jayasiri, Nipun Wijerathne",
|
author="Varuna Jayasiri, Nipun Wijerathne",
|
||||||
author_email="vpjayasiri@gmail.com, hnipun@gmail.com",
|
author_email="vpjayasiri@gmail.com, hnipun@gmail.com",
|
||||||
description="A collection of PyTorch implementations of neural network architectures and layers.",
|
description="A collection of PyTorch implementations of neural network architectures and layers.",
|
||||||
@ -20,7 +20,7 @@ setuptools.setup(
|
|||||||
'labml_helpers', 'labml_helpers.*',
|
'labml_helpers', 'labml_helpers.*',
|
||||||
'test',
|
'test',
|
||||||
'test.*')),
|
'test.*')),
|
||||||
install_requires=['labml>=0.4.103',
|
install_requires=['labml>=0.4.109',
|
||||||
'labml-helpers>=0.4.76',
|
'labml-helpers>=0.4.76',
|
||||||
'torch',
|
'torch',
|
||||||
'einops',
|
'einops',
|
||||||
|
Reference in New Issue
Block a user