{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "HyperLSTM", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "AYV_dMVDxyc2" }, "source": [ "[](https://github.com/labmlai/annotated_deep_learning_paper_implementations)\n", "[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/hypernetworks/experiment.ipynb) \n", "\n", "## HyperLSTM\n", "\n", "This is an experiment training Shakespear dataset with HyperLSTM from paper HyperNetworks." ] }, { "cell_type": "code", "metadata": { "id": "ZCzmCrAIVg0L" }, "source": [ "!pip install labml-nn" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "0hJXx_g0wS2C" }, "source": [ "from labml import experiment\n", "from labml_nn.hypernetworks.experiment import Configs" ], "execution_count": 3, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 255 }, "id": "WQ8VGpMGwZuj", "outputId": "5833cc50-26a8-496e-e729-88f42b3f4651" }, "source": [ "# Create experiment\n", "experiment.create(name=\"hyper_lstm\", comment='')\n", "# Create configs\n", "conf = Configs()\n", "# Load configurations\n", "experiment.configs(conf,\n", " # A dictionary of configurations to override\n", " {'tokenizer': 'character',\n", " 'text': 'tiny_shakespeare',\n", " 'optimizer.learning_rate': 2.5e-4,\n", " 'optimizer.optimizer': 'Adam',\n", " 'prompt': 'It is',\n", " 'prompt_separator': '',\n", "\n", " 'rnn_model': 'hyper_lstm',\n", "\n", " 'train_loader': 'shuffled_train_loader',\n", " 'valid_loader': 'shuffled_valid_loader',\n", "\n", " 'seq_len': 512,\n", " 'epochs': 128,\n", " 'batch_size': 2,\n", " 'inner_iterations': 25})\n", "\n", "\n", "# Set models for saving and loading\n", "experiment.add_pytorch_models({'model': conf.model})\n", "\n", "conf.init()" ], "execution_count": 5, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "
\n",
"Prepare model...\n",
" Prepare n_tokens...\n",
" Prepare text...\n",
" Prepare tokenizer...[DONE]\t3.07ms\n",
" Load data...[DONE]\t2.85ms\n",
" Tokenize...[DONE]\t33.69ms\n",
" Build vocabulary...[DONE]\t103.52ms\n",
" Prepare text...[DONE]\t153.38ms\n",
" Prepare n_tokens...[DONE]\t160.21ms\n",
" Prepare rnn_model...[DONE]\t13.84ms\n",
"Prepare model...[DONE]\t195.08ms\n",
"Prepare mode...[DONE]\t1.78ms\n",
""
],
"text/plain": [
"\n",
"hyper_lstm: 5004f5724d8611eba84a0242ac1c0002\n",
"\t[dirty]: \"\"\n",
"Initialize...[DONE]\t1.12ms\n",
"Prepare validator...\n",
" Prepare valid_loader...[DONE]\t76.72ms\n",
"\n",
"--------------------------------------------------\n",
"LABML WARNING\n",
"LabML App Warning: empty_token: Please create a valid token at https://app.labml.ai.\n",
"Click on the experiment link to monitor the experiment and add it to your experiments list.\n",
"--------------------------------------------------\n",
"Monitor experiment at https://app.labml.ai/run?uuid=5004f5724d8611eba84a0242ac1c0002\n",
"Prepare validator...[DONE]\t174.93ms\n",
"Prepare trainer...\n",
" Prepare train_loader...[DONE]\t100.16ms\n",
"Prepare trainer...[DONE]\t137.49ms\n",
"Prepare training_loop...\n",
" Prepare loop_count...[DONE]\t37.12ms\n",
"Prepare training_loop...[DONE]\t301.04ms\n",
"It is????????nn?n?n?n???nn?n?n\n",
"It is the and the and the and \n",
"It is the the the the the the \n",
"It is the the the the the the \n",
" 65,536: Sample: 100% 1,288ms Train: 13% 4,212,862ms Valid: 11% 132,056ms accuracy.train: 0.301926 loss.train: 2.25940 accuracy.valid: 0.330679 loss.valid: 2.48882 4,346,206ms 0:08m/154:23m "
],
"text/plain": [
"