mirror of
https://github.com/labmlai/annotated_deep_learning_paper_implementations.git
synced 2025-08-15 02:07:56 +08:00
130 lines
3.3 KiB
Plaintext
130 lines
3.3 KiB
Plaintext
{
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0,
|
|
"metadata": {
|
|
"colab": {
|
|
"name": "HyperLSTM",
|
|
"provenance": [],
|
|
"collapsed_sections": []
|
|
},
|
|
"kernelspec": {
|
|
"name": "python3",
|
|
"display_name": "Python 3"
|
|
},
|
|
"accelerator": "GPU"
|
|
},
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "AYV_dMVDxyc2"
|
|
},
|
|
"source": [
|
|
"[](https://github.com/labmlai/annotated_deep_learning_paper_implementations)\n",
|
|
"[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/hypernetworks/experiment.ipynb) \n",
|
|
"\n",
|
|
"## HyperLSTM\n",
|
|
"\n",
|
|
"This is an experiment training Shakespear dataset with HyperLSTM from paper HyperNetworks."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "ZCzmCrAIVg0L"
|
|
},
|
|
"source": [
|
|
"!pip install labml-nn"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "0hJXx_g0wS2C"
|
|
},
|
|
"source": [
|
|
"from labml import experiment\n",
|
|
"from labml_nn.hypernetworks.experiment import Configs"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 255
|
|
},
|
|
"id": "WQ8VGpMGwZuj",
|
|
"outputId": "5833cc50-26a8-496e-e729-88f42b3f4651"
|
|
},
|
|
"source": [
|
|
"# Create experiment\n",
|
|
"experiment.create(name=\"hyper_lstm\", comment='')\n",
|
|
"# Create configs\n",
|
|
"conf = Configs()\n",
|
|
"# Load configurations\n",
|
|
"experiment.configs(conf,\n",
|
|
" # A dictionary of configurations to override\n",
|
|
" {'tokenizer': 'character',\n",
|
|
" 'text': 'tiny_shakespeare',\n",
|
|
" 'optimizer.learning_rate': 2.5e-4,\n",
|
|
" 'optimizer.optimizer': 'Adam',\n",
|
|
" 'prompt': 'It is',\n",
|
|
" 'prompt_separator': '',\n",
|
|
"\n",
|
|
" 'rnn_model': 'hyper_lstm',\n",
|
|
"\n",
|
|
" 'train_loader': 'shuffled_train_loader',\n",
|
|
" 'valid_loader': 'shuffled_valid_loader',\n",
|
|
"\n",
|
|
" 'seq_len': 512,\n",
|
|
" 'epochs': 128,\n",
|
|
" 'batch_size': 2,\n",
|
|
" 'inner_iterations': 25})\n",
|
|
"\n",
|
|
"\n",
|
|
"# Set models for saving and loading\n",
|
|
"experiment.add_pytorch_models({'model': conf.model})\n",
|
|
"\n",
|
|
"conf.init()"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 425
|
|
},
|
|
"id": "f07vAOaHwumr",
|
|
"outputId": "6b51205e-3852-4dce-f7a7-f3ba4066ba21"
|
|
},
|
|
"source": [
|
|
"# Start the experiment\n",
|
|
"with experiment.start():\n",
|
|
" # `TrainValidConfigs.run`\n",
|
|
" conf.run()"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "crH6MzKmw-SY"
|
|
},
|
|
"source": [
|
|
""
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
}
|
|
]
|
|
}
|