This commit is contained in:
Varuna Jayasiri
2021-03-30 11:37:06 +05:30
parent 6fba4c0957
commit ac40d0a7c9
2 changed files with 228 additions and 271 deletions

View File

@ -1,270 +1,226 @@
{ {
"nbformat": 4, "cells": [
"nbformat_minor": 0, {
"metadata": { "cell_type": "markdown",
"colab": { "metadata": {
"name": "Proximal Policy Optimization - PPO", "id": "AYV_dMVDxyc2"
"provenance": [], },
"collapsed_sections": [] "source": [
}, "[![Github](https://img.shields.io/github/stars/lab-ml/nn?style=social)](https://github.com/lab-ml/nn)\n",
"kernelspec": { "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/rl/ppo/experiment.ipynb) \n",
"name": "python3", "\n",
"display_name": "Python 3" "## Proximal Policy Optimization - PPO\n",
}, "\n",
"accelerator": "GPU" "This is an experiment training an agent to play Atari Breakout game using Proximal Policy Optimization - PPO"
]
}, },
"cells": [ {
{ "cell_type": "markdown",
"cell_type": "markdown", "metadata": {
"metadata": { "id": "AahG_i2y5tY9"
"id": "AYV_dMVDxyc2" },
}, "source": [
"source": [ "Install the `labml-nn` package"
"[![Github](https://img.shields.io/github/stars/lab-ml/nn?style=social)](https://github.com/lab-ml/nn)\n", ]
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/transformers/fast_weights/experiment.ipynb) \n", },
"\n", {
"## Fast Weights Transformer\n", "cell_type": "code",
"\n", "execution_count": null,
"This is an experiment training Shakespeare dataset with a Compressive Transformer model." "metadata": {
] "colab": {
"base_uri": "https://localhost:8080/"
}, },
{ "id": "ZCzmCrAIVg0L",
"cell_type": "markdown", "outputId": "028e759e-0c9f-472e-b4b8-fdcf3e4604ee"
"metadata": { },
"id": "AahG_i2y5tY9" "outputs": [],
}, "source": [
"source": [ "!pip install labml-nn"
"Install the `labml-nn` package" ]
] },
{
"cell_type": "markdown",
"metadata": {
"id": "SE2VUQ6L5zxI"
},
"source": [
"Imports"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "0hJXx_g0wS2C"
},
"outputs": [],
"source": [
"from labml import experiment\n",
"from labml.configs import FloatDynamicHyperParam, IntDynamicHyperParam\n",
"from labml_nn.rl.ppo.experiment import Trainer"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Lpggo0wM6qb-"
},
"source": [
"Create an experiment"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "bFcr9k-l4cAg"
},
"outputs": [],
"source": [
"experiment.create(name=\"ppo\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "-OnHLi626tJt"
},
"source": [
"Configurations"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Piz0c5f44hRo"
},
"outputs": [],
"source": [
"configs = {\n",
" # number of updates\n",
" 'updates': 10000,\n",
" # number of epochs to train the model with sampled data\n",
" 'epochs': IntDynamicHyperParam(8),\n",
" # number of worker processes\n",
" 'n_workers': 8,\n",
" # number of steps to run on each process for a single update\n",
" 'worker_steps': 128,\n",
" # number of mini batches\n",
" 'batches': 4,\n",
" # Value loss coefficient\n",
" 'value_loss_coef': FloatDynamicHyperParam(0.5),\n",
" # Entropy bonus coefficient\n",
" 'entropy_bonus_coef': FloatDynamicHyperParam(0.01),\n",
" # Clip range\n",
" 'clip_range': FloatDynamicHyperParam(0.1),\n",
" # Learning rate\n",
" 'learning_rate': FloatDynamicHyperParam(2.5e-4, (0, 1e-3)),\n",
"}"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "wwMzCqpD6vkL"
},
"source": [
"Set experiment configurations"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 17
}, },
{ "id": "e6hmQhTw4nks",
"cell_type": "code", "outputId": "0e978879-5dcd-4140-ec53-24a3fbd547de"
"metadata": { },
"id": "ZCzmCrAIVg0L", "outputs": [],
"colab": { "source": [
"base_uri": "https://localhost:8080/" "experiment.configs(configs)"
}, ]
"outputId": "028e759e-0c9f-472e-b4b8-fdcf3e4604ee" },
}, {
"source": [ "cell_type": "markdown",
"!pip install labml-nn" "metadata": {
], "id": "qYQCFt_JYsjd"
"execution_count": null, },
"outputs": [ "source": [
{ "Create trainer"
"output_type": "stream", ]
"text": [ },
"Collecting labml-nn\n", {
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/a4/07/d33ead6f84fad2a4e8ff31ccd42864ff7b942785ad9f80d7c98df1c20a02/labml_nn-0.4.94-py3-none-any.whl (171kB)\n", "cell_type": "code",
"\u001b[K |████████████████████████████████| 174kB 16.4MB/s \n", "execution_count": null,
"\u001b[?25hCollecting labml>=0.4.110\n", "metadata": {
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/eb/c8/98b18d0dda3811998838734f9a32e944397fdd6bb0597cef0ae2b57338e3/labml-0.4.110-py3-none-any.whl (106kB)\n", "id": "8LB7XVViYuPG"
"\u001b[K |████████████████████████████████| 112kB 34.7MB/s \n", },
"\u001b[?25hCollecting labml-helpers>=0.4.76\n", "outputs": [],
" Downloading https://files.pythonhosted.org/packages/49/df/4d920a4a221acd3cfa384dddb909ed0691b08682c0d8aeaabeee2138624f/labml_helpers-0.4.76-py3-none-any.whl\n", "source": [
"Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from labml-nn) (1.19.5)\n", "trainer = Trainer(\n",
"Requirement already satisfied: torch in /usr/local/lib/python3.7/dist-packages (from labml-nn) (1.8.0+cu101)\n", " updates=configs['updates'],\n",
"Collecting einops\n", " epochs=configs['epochs'],\n",
" Downloading https://files.pythonhosted.org/packages/5d/a0/9935e030634bf60ecd572c775f64ace82ceddf2f504a5fd3902438f07090/einops-0.3.0-py2.py3-none-any.whl\n", " n_workers=configs['n_workers'],\n",
"Collecting gitpython\n", " worker_steps=configs['worker_steps'],\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/a6/99/98019716955ba243657daedd1de8f3a88ca1f5b75057c38e959db22fb87b/GitPython-3.1.14-py3-none-any.whl (159kB)\n", " batches=configs['batches'],\n",
"\u001b[K |████████████████████████████████| 163kB 47.1MB/s \n", " value_loss_coef=configs['value_loss_coef'],\n",
"\u001b[?25hRequirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from labml>=0.4.110->labml-nn) (3.13)\n", " entropy_bonus_coef=configs['entropy_bonus_coef'],\n",
"Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from torch->labml-nn) (3.7.4.3)\n", " clip_range=configs['clip_range'],\n",
"Collecting gitdb<5,>=4.0.1\n", " learning_rate=configs['learning_rate'],\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/ea/e8/f414d1a4f0bbc668ed441f74f44c116d9816833a48bf81d22b697090dba8/gitdb-4.0.7-py3-none-any.whl (63kB)\n", ")"
"\u001b[K |████████████████████████████████| 71kB 11.1MB/s \n", ]
"\u001b[?25hCollecting smmap<5,>=3.0.1\n", },
" Downloading https://files.pythonhosted.org/packages/68/ee/d540eb5e5996eb81c26ceffac6ee49041d473bc5125f2aa995cf51ec1cf1/smmap-4.0.0-py2.py3-none-any.whl\n", {
"Installing collected packages: smmap, gitdb, gitpython, labml, labml-helpers, einops, labml-nn\n", "cell_type": "markdown",
"Successfully installed einops-0.3.0 gitdb-4.0.7 gitpython-3.1.14 labml-0.4.110 labml-helpers-0.4.76 labml-nn-0.4.94 smmap-4.0.0\n" "metadata": {
], "id": "KJZRf8527GxL"
"name": "stdout" },
} "source": [
] "Start the experiment and run the training loop."
}, ]
{ },
"cell_type": "markdown", {
"metadata": { "cell_type": "code",
"id": "SE2VUQ6L5zxI" "execution_count": null,
}, "metadata": {
"source": [ "id": "aIAWo7Fw5DR8"
"Imports" },
] "outputs": [],
}, "source": [
{ "with experiment.start():\n",
"cell_type": "code", " trainer.run_training_loop()"
"metadata": { ]
"id": "0hJXx_g0wS2C" }
}, ],
"source": [ "metadata": {
"from labml import experiment\n", "accelerator": "GPU",
"from labml.configs import FloatDynamicHyperParam\n", "colab": {
"from labml_nn.rl.ppo.experiment import Trainer" "collapsed_sections": [],
], "name": "Proximal Policy Optimization - PPO",
"execution_count": null, "provenance": []
"outputs": [] },
}, "kernelspec": {
{ "display_name": "Python 3",
"cell_type": "markdown", "language": "python",
"metadata": { "name": "python3"
"id": "Lpggo0wM6qb-" },
}, "language_info": {
"source": [ "codemirror_mode": {
"Create an experiment" "name": "ipython",
] "version": 3
}, },
{ "file_extension": ".py",
"cell_type": "code", "mimetype": "text/x-python",
"metadata": { "name": "python",
"id": "bFcr9k-l4cAg" "nbconvert_exporter": "python",
}, "pygments_lexer": "ipython3",
"source": [ "version": "3.8.5"
"experiment.create(name=\"ppo\")" }
], },
"execution_count": null, "nbformat": 4,
"outputs": [] "nbformat_minor": 4
}, }
{
"cell_type": "markdown",
"metadata": {
"id": "-OnHLi626tJt"
},
"source": [
"Configurations"
]
},
{
"cell_type": "code",
"metadata": {
"id": "Piz0c5f44hRo"
},
"source": [
"configs = {\n",
" # number of updates\n",
" 'updates': 10000,\n",
" # number of epochs to train the model with sampled data\n",
" 'epochs': 4,\n",
" # number of worker processes\n",
" 'n_workers': 8,\n",
" # number of steps to run on each process for a single update\n",
" 'worker_steps': 128,\n",
" # number of mini batches\n",
" 'batches': 4,\n",
" # Value loss coefficient\n",
" 'value_loss_coef': FloatDynamicHyperParam(0.5),\n",
" # Entropy bonus coefficient\n",
" 'entropy_bonus_coef': FloatDynamicHyperParam(0.01),\n",
" # Clip range\n",
" 'clip_range': FloatDynamicHyperParam(0.1),\n",
" # Learning rate\n",
" 'learning_rate': FloatDynamicHyperParam(2.5e-4, (0, 1e-3)),\n",
"}"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "wwMzCqpD6vkL"
},
"source": [
"Set experiment configurations"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 17
},
"id": "e6hmQhTw4nks",
"outputId": "0e978879-5dcd-4140-ec53-24a3fbd547de"
},
"source": [
"experiment.configs(configs)"
],
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/html": [
"<pre style=\"overflow-x: scroll;\"></pre>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {
"tags": []
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "qYQCFt_JYsjd"
},
"source": [
"Create trainer"
]
},
{
"cell_type": "code",
"metadata": {
"id": "8LB7XVViYuPG"
},
"source": [
"trainer = Trainer(\n",
" updates=configs['updates'],\n",
" epochs=configs['epochs'],\n",
" n_workers=configs['n_workers'],\n",
" worker_steps=configs['worker_steps'],\n",
" batches=configs['batches'],\n",
" value_loss_coef=configs['value_loss_coef'],\n",
" entropy_bonus_coef=configs['entropy_bonus_coef'],\n",
" clip_range=configs['clip_range'],\n",
" learning_rate=configs['learning_rate'],\n",
")"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "KJZRf8527GxL"
},
"source": [
"Start the experiment and run the training loop."
]
},
{
"cell_type": "code",
"metadata": {
"id": "aIAWo7Fw5DR8"
},
"source": [
"with experiment.start():\n",
" trainer.run_training_loop()"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "oBXXlP2b7XZO"
},
"source": [
""
],
"execution_count": null,
"outputs": []
}
]
}

View File

@ -19,7 +19,7 @@ from torch import optim
from torch.distributions import Categorical from torch.distributions import Categorical
from labml import monit, tracker, logger, experiment from labml import monit, tracker, logger, experiment
from labml.configs import FloatDynamicHyperParam from labml.configs import FloatDynamicHyperParam, IntDynamicHyperParam
from labml_helpers.module import Module from labml_helpers.module import Module
from labml_nn.rl.game import Worker from labml_nn.rl.game import Worker
from labml_nn.rl.ppo import ClippedPPOLoss, ClippedValueFunctionLoss from labml_nn.rl.ppo import ClippedPPOLoss, ClippedValueFunctionLoss
@ -91,7 +91,8 @@ class Trainer:
""" """
def __init__(self, *, def __init__(self, *,
updates: int, epochs: int, n_workers: int, worker_steps: int, batches: int, updates: int, epochs: IntDynamicHyperParam,
n_workers: int, worker_steps: int, batches: int,
value_loss_coef: FloatDynamicHyperParam, value_loss_coef: FloatDynamicHyperParam,
entropy_bonus_coef: FloatDynamicHyperParam, entropy_bonus_coef: FloatDynamicHyperParam,
clip_range: FloatDynamicHyperParam, clip_range: FloatDynamicHyperParam,
@ -231,7 +232,7 @@ class Trainer:
# the average episode reward does not monotonically increase # the average episode reward does not monotonically increase
# over time. # over time.
# May be reducing the clipping range might solve it. # May be reducing the clipping range might solve it.
for _ in range(self.epochs): for _ in range(self.epochs()):
# shuffle for each epoch # shuffle for each epoch
indexes = torch.randperm(self.batch_size) indexes = torch.randperm(self.batch_size)
@ -356,7 +357,7 @@ def main():
# number of updates # number of updates
'updates': 10000, 'updates': 10000,
# number of epochs to train the model with sampled data # number of epochs to train the model with sampled data
'epochs': 4, 'epochs': IntDynamicHyperParam(8),
# number of worker processes # number of worker processes
'n_workers': 8, 'n_workers': 8,
# number of steps to run on each process for a single update # number of steps to run on each process for a single update
@ -370,7 +371,7 @@ def main():
# Clip range # Clip range
'clip_range': FloatDynamicHyperParam(0.1), 'clip_range': FloatDynamicHyperParam(0.1),
# Learning rate # Learning rate
'learning_rate': FloatDynamicHyperParam(2.5e-4, (0, 1e-3)), 'learning_rate': FloatDynamicHyperParam(1e-3, (0, 1e-3)),
} }
experiment.configs(configs) experiment.configs(configs)