From e38f9af96805f24a60d9b1fb6ace14fe6a86c90c Mon Sep 17 00:00:00 2001
From: Varuna Jayasiri
Date: Sun, 8 Aug 2021 08:32:39 +0530
Subject: [PATCH] repo name
---
docs/capsule_networks/index.html | 2 +-
docs/capsule_networks/readme.html | 2 +-
docs/cfr/index.html | 2 +-
docs/cfr/kuhn/index.html | 2 +-
docs/gan/cycle_gan/index.html | 2 +-
docs/gan/wasserstein/index.html | 2 +-
docs/hypernetworks/hyper_lstm.html | 2 +-
docs/index.html | 2 +-
.../batch_channel_norm/index.html | 2 +-
docs/normalization/batch_norm/index.html | 2 +-
docs/normalization/batch_norm/readme.html | 2 +-
docs/normalization/group_norm/index.html | 2 +-
docs/normalization/group_norm/readme.html | 2 +-
.../weight_standardization/index.html | 2 +-
docs/rl/ppo/experiment.html | 2 +-
docs/rl/ppo/index.html | 2 +-
docs/rl/ppo/readme.html | 2 +-
docs/transformers/compressive/index.html | 2 +-
docs/transformers/compressive/readme.html | 2 +-
.../transformers/fast_weights/experiment.html | 2 +-
docs/transformers/fast_weights/index.html | 2 +-
docs/transformers/fast_weights/readme.html | 2 +-
docs/transformers/feedback/experiment.html | 2 +-
docs/transformers/feedback/index.html | 2 +-
docs/transformers/feedback/readme.html | 4 ++--
docs/transformers/glu_variants/simple.html | 2 +-
docs/transformers/gpt/index.html | 2 +-
docs/transformers/switch/index.html | 2 +-
docs/transformers/switch/readme.html | 2 +-
docs/transformers/xl/index.html | 2 +-
docs/transformers/xl/readme.html | 2 +-
labml_nn/__init__.py | 2 +-
labml_nn/capsule_networks/__init__.py | 2 +-
labml_nn/capsule_networks/mnist.ipynb | 4 ++--
labml_nn/capsule_networks/readme.md | 2 +-
labml_nn/cfr/__init__.py | 2 +-
labml_nn/cfr/kuhn/__init__.py | 2 +-
labml_nn/cfr/kuhn/experiment.ipynb | 4 ++--
labml_nn/gan/cycle_gan/__init__.py | 2 +-
labml_nn/gan/cycle_gan/experiment.ipynb | 4 ++--
labml_nn/gan/dcgan/experiment.ipynb | 24 +++++++++----------
labml_nn/gan/original/experiment.ipynb | 16 ++++++-------
labml_nn/gan/wasserstein/__init__.py | 2 +-
labml_nn/gan/wasserstein/experiment.ipynb | 14 +++++------
labml_nn/hypernetworks/experiment.ipynb | 4 ++--
labml_nn/hypernetworks/hyper_lstm.py | 2 +-
.../batch_channel_norm/__init__.py | 2 +-
labml_nn/normalization/batch_norm/__init__.py | 2 +-
labml_nn/normalization/batch_norm/mnist.ipynb | 4 ++--
labml_nn/normalization/batch_norm/readme.md | 2 +-
labml_nn/normalization/group_norm/__init__.py | 2 +-
.../normalization/group_norm/experiment.ipynb | 4 ++--
labml_nn/normalization/group_norm/readme.md | 2 +-
.../weight_standardization/__init__.py | 2 +-
.../weight_standardization/experiment.ipynb | 4 ++--
labml_nn/rl/ppo/__init__.py | 2 +-
labml_nn/rl/ppo/experiment.ipynb | 4 ++--
labml_nn/rl/ppo/experiment.py | 2 +-
labml_nn/rl/ppo/readme.md | 2 +-
labml_nn/transformers/compressive/__init__.py | 2 +-
.../transformers/compressive/experiment.ipynb | 4 ++--
labml_nn/transformers/compressive/readme.md | 2 +-
.../transformers/fast_weights/__init__.py | 2 +-
.../fast_weights/experiment.ipynb | 4 ++--
.../transformers/fast_weights/experiment.py | 2 +-
labml_nn/transformers/fast_weights/readme.md | 2 +-
labml_nn/transformers/feedback/__init__.py | 2 +-
.../transformers/feedback/experiment.ipynb | 4 ++--
labml_nn/transformers/feedback/experiment.py | 2 +-
labml_nn/transformers/feedback/readme.md | 4 ++--
.../transformers/glu_variants/simple.ipynb | 4 ++--
labml_nn/transformers/glu_variants/simple.py | 2 +-
labml_nn/transformers/gpt/__init__.py | 2 +-
labml_nn/transformers/gpt/experiment.ipynb | 4 ++--
labml_nn/transformers/switch/__init__.py | 2 +-
labml_nn/transformers/switch/experiment.ipynb | 4 ++--
labml_nn/transformers/switch/readme.md | 2 +-
labml_nn/transformers/xl/__init__.py | 2 +-
labml_nn/transformers/xl/experiment.ipynb | 4 ++--
labml_nn/transformers/xl/readme.md | 2 +-
80 files changed, 121 insertions(+), 121 deletions(-)
diff --git a/docs/capsule_networks/index.html b/docs/capsule_networks/index.html
index 2dc4c9bb..e563ece7 100644
--- a/docs/capsule_networks/index.html
+++ b/docs/capsule_networks/index.html
@@ -78,7 +78,7 @@ it is difficult to understand some concepts with just the modules.
I used jindongwang/Pytorch-CapsuleNet to clarify some
confusions I had with the paper.
Here’s a notebook for training a Capsule Network on MNIST dataset.
-
+

diff --git a/docs/capsule_networks/readme.html b/docs/capsule_networks/readme.html
index 319ce05b..4d6e991f 100644
--- a/docs/capsule_networks/readme.html
+++ b/docs/capsule_networks/readme.html
@@ -78,7 +78,7 @@ it is difficult to understand some concepts with just the modules.
I used jindongwang/Pytorch-CapsuleNet to clarify some
confusions I had with the paper.
Here’s a notebook for training a Capsule Network on MNIST dataset.
-
+

diff --git a/docs/cfr/index.html b/docs/cfr/index.html
index 9dde09ce..56ff1fbe 100644
--- a/docs/cfr/index.html
+++ b/docs/cfr/index.html
@@ -78,7 +78,7 @@ introduces Monte Carlo Counterfactual Regret Minimization (
MCCFR
We tried to keep our Python implementation easy-to-understand like a tutorial.
We run it on a very simple imperfect information game called Kuhn poker.
-

+

Twitter thread
Introduction
diff --git a/docs/cfr/kuhn/index.html b/docs/cfr/kuhn/index.html
index 6585a61b..06b04c15 100644
--- a/docs/cfr/kuhn/index.html
+++ b/docs/cfr/kuhn/index.html
@@ -88,7 +88,7 @@ This game is played repeatedly and a good strategy will optimize for the long te
He we extend the InfoSet
class and History
class defined in __init__.py
with Kuhn Poker specifics.
-
+

diff --git a/docs/gan/cycle_gan/index.html b/docs/gan/cycle_gan/index.html
index c41e5592..9ae58422 100644
--- a/docs/gan/cycle_gan/index.html
+++ b/docs/gan/cycle_gan/index.html
@@ -84,7 +84,7 @@ One generator translates images from A to B and the other from B to A.
The discriminators test whether the generated images look real.
This file contains the model code as well as the training code.
We also have a Google Colab notebook.
-
+

diff --git a/docs/gan/wasserstein/index.html b/docs/gan/wasserstein/index.html
index 537fd5b0..abe191ef 100644
--- a/docs/gan/wasserstein/index.html
+++ b/docs/gan/wasserstein/index.html
@@ -133,7 +133,7 @@ to minimize above formula.
while keeping $K$ bounded.
One way to keep $K$ bounded is to clip all weights in the neural
network that defines $f$ clipped within a range.
Here is the code to try this on a simple MNIST generation experiment.
-

+

87import torch.utils.data
diff --git a/docs/hypernetworks/hyper_lstm.html b/docs/hypernetworks/hyper_lstm.html
index abee2723..0256286d 100644
--- a/docs/hypernetworks/hyper_lstm.html
+++ b/docs/hypernetworks/hyper_lstm.html
@@ -74,7 +74,7 @@ using PyTorch.
by David Ha gives a good explanation of HyperNetworks.
We have an experiment that trains a HyperLSTM to predict text on Shakespeare dataset.
Here’s the link to code: experiment.py
-
+

HyperNetworks use a smaller network to generate weights of a larger network.
There are two variants: static hyper-networks and dynamic hyper-networks.
diff --git a/docs/index.html b/docs/index.html
index 8a5a85ac..07fdb9c4 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -68,7 +68,7 @@
This is a collection of simple PyTorch implementations of
neural networks and related algorithms.
-These implementations are documented with explanations,
+These implementations are documented with explanations,
and the website
renders these as side-by-side formatted notes.
We believe these would help you understand these algorithms better.
diff --git a/docs/normalization/batch_channel_norm/index.html b/docs/normalization/batch_channel_norm/index.html
index f060a63b..d79f5882 100644
--- a/docs/normalization/batch_channel_norm/index.html
+++ b/docs/normalization/batch_channel_norm/index.html
@@ -77,7 +77,7 @@ When the batch size is small a running mean and variance is used for
batch normalization.
Here is the training code for training
a VGG network that uses weight standardization to classify CIFAR-10 data.
-
+

diff --git a/docs/normalization/batch_norm/index.html b/docs/normalization/batch_norm/index.html
index 69e2b5d1..f9ffef8e 100644
--- a/docs/normalization/batch_norm/index.html
+++ b/docs/normalization/batch_norm/index.html
@@ -132,7 +132,7 @@ The usual practice is to calculate an exponential moving average of
mean and variance during the training phase and use that for inference.
Here’s the training code and a notebook for training
a CNN classifier that uses batch normalization for MNIST dataset.
-
+

diff --git a/docs/normalization/batch_norm/readme.html b/docs/normalization/batch_norm/readme.html
index b5468b75..0e7aa7dc 100644
--- a/docs/normalization/batch_norm/readme.html
+++ b/docs/normalization/batch_norm/readme.html
@@ -132,7 +132,7 @@ The usual practice is to calculate an exponential moving average of
mean and variance during the training phase and use that for inference.
Here’s the training code and a notebook for training
a CNN classifier that uses batch normalization for MNIST dataset.
-
+

diff --git a/docs/normalization/group_norm/index.html b/docs/normalization/group_norm/index.html
index 2eb0f4dc..92f7d291 100644
--- a/docs/normalization/group_norm/index.html
+++ b/docs/normalization/group_norm/index.html
@@ -127,7 +127,7 @@ $m$ is the size of the set $\mathcal{S}_i$ which is the same for all $i$.
where $G$ is the number of groups and $C$ is the number of channels.
Group normalization normalizes values of the same sample and the same group of channels together.
Here’s a CIFAR 10 classification model that uses instance normalization.
-
+

diff --git a/docs/normalization/group_norm/readme.html b/docs/normalization/group_norm/readme.html
index b0a956f2..b0f21981 100644
--- a/docs/normalization/group_norm/readme.html
+++ b/docs/normalization/group_norm/readme.html
@@ -81,7 +81,7 @@ This is based on the observation that classical features such as
The paper proposes dividing feature channels into groups and then separately normalizing
all channels within each group.
Here’s a CIFAR 10 classification model that uses instance normalization.
-
+

diff --git a/docs/normalization/weight_standardization/index.html b/docs/normalization/weight_standardization/index.html
index 9889bd90..fd4d62b9 100644
--- a/docs/normalization/weight_standardization/index.html
+++ b/docs/normalization/weight_standardization/index.html
@@ -95,7 +95,7 @@ This avoids outputs of nodes from always falling beyond the active range of the
Here is the training code for training
a VGG network that uses weight standardization to classify CIFAR-10 data.
This uses a 2D-Convolution Layer with Weight Standardization.
-
+

diff --git a/docs/rl/ppo/experiment.html b/docs/rl/ppo/experiment.html
index 2662b965..1de56e36 100644
--- a/docs/rl/ppo/experiment.html
+++ b/docs/rl/ppo/experiment.html
@@ -70,7 +70,7 @@
PPO Experiment with Atari Breakout
This experiment trains Proximal Policy Optimization (PPO) agent Atari Breakout game on OpenAI Gym.
It runs the game environments on multiple processes to sample efficiently.
-
+

diff --git a/docs/rl/ppo/index.html b/docs/rl/ppo/index.html
index adedd32d..36e8c4b2 100644
--- a/docs/rl/ppo/index.html
+++ b/docs/rl/ppo/index.html
@@ -80,7 +80,7 @@ It does so by clipping gradient flow if the updated policy
is not close to the policy used to sample the data.
You can find an experiment that uses it here.
The experiment uses Generalized Advantage Estimation.
-
+

diff --git a/docs/rl/ppo/readme.html b/docs/rl/ppo/readme.html
index 67453117..bd384de2 100644
--- a/docs/rl/ppo/readme.html
+++ b/docs/rl/ppo/readme.html
@@ -80,7 +80,7 @@ It does so by clipping gradient flow if the updated policy
is not close to the policy used to sample the data.
You can find an experiment that uses it here.
The experiment uses Generalized Advantage Estimation.
-
+

diff --git a/docs/transformers/compressive/index.html b/docs/transformers/compressive/index.html
index b24f46f4..892d00f1 100644
--- a/docs/transformers/compressive/index.html
+++ b/docs/transformers/compressive/index.html
@@ -99,7 +99,7 @@ self-attention, and the pass-through in the residual connection is not normalize
This is supposed to be more stable in standard transformer setups.
Here are the training code and a notebook for training a compressive transformer
model on the Tiny Shakespeare dataset.
-
+

diff --git a/docs/transformers/compressive/readme.html b/docs/transformers/compressive/readme.html
index 298fa520..73075774 100644
--- a/docs/transformers/compressive/readme.html
+++ b/docs/transformers/compressive/readme.html
@@ -99,7 +99,7 @@ self-attention, and the pass-through in the residual connection is not normalize
This is supposed to be more stable in standard transformer setups.
Here are the training code and a notebook for training a compressive transformer
model on the Tiny Shakespeare dataset.
-
+

diff --git a/docs/transformers/fast_weights/experiment.html b/docs/transformers/fast_weights/experiment.html
index ea4cdca0..378e7fb1 100644
--- a/docs/transformers/fast_weights/experiment.html
+++ b/docs/transformers/fast_weights/experiment.html
@@ -70,7 +70,7 @@
Train Fast Weights Transformer
This trains a fast weights transformer model for auto-regression.
Here’s a Colab notebook for training a fast weights transformer on Tiny Shakespeare dataset.
-
+

diff --git a/docs/transformers/fast_weights/index.html b/docs/transformers/fast_weights/index.html
index 15534e86..f78cf2c1 100644
--- a/docs/transformers/fast_weights/index.html
+++ b/docs/transformers/fast_weights/index.html
@@ -139,7 +139,7 @@ a new update rule for $\color{cyan}{W^{(i)}} = f(\color{cyan}{W^{(i-1)}})$ and c
$\frac{1}{z^{(i)} \cdot \color{lightgreen}{\phi(q^{(i)})}}$
Here are the training code and a notebook for training a fast weights
transformer on the Tiny Shakespeare dataset.
-
+

diff --git a/docs/transformers/fast_weights/readme.html b/docs/transformers/fast_weights/readme.html
index a28207cd..48763408 100644
--- a/docs/transformers/fast_weights/readme.html
+++ b/docs/transformers/fast_weights/readme.html
@@ -73,7 +73,7 @@
Here is the annotated implementation.
Here are the training code
and a notebook for training a fast weights transformer on the Tiny Shakespeare dataset.
-
+

diff --git a/docs/transformers/feedback/experiment.html b/docs/transformers/feedback/experiment.html
index 764ddc2a..e718abd3 100644
--- a/docs/transformers/feedback/experiment.html
+++ b/docs/transformers/feedback/experiment.html
@@ -72,7 +72,7 @@
You can pick the original feedback transformer or the new version
where the keys and values are precalculated.
Here’s a Colab notebook for training a feedback transformer on Tiny Shakespeare dataset.
-
+

diff --git a/docs/transformers/feedback/index.html b/docs/transformers/feedback/index.html
index 04df6d3d..99516990 100644
--- a/docs/transformers/feedback/index.html
+++ b/docs/transformers/feedback/index.html
@@ -91,7 +91,7 @@ them cached.
The
second half of this file implements this.
We implemented a custom PyTorch function to improve performance.
Here’s the training code and a notebook for training a feedback transformer on Tiny Shakespeare dataset.
-
+

diff --git a/docs/transformers/feedback/readme.html b/docs/transformers/feedback/readme.html
index 39035b98..077924c0 100644
--- a/docs/transformers/feedback/readme.html
+++ b/docs/transformers/feedback/readme.html
@@ -91,8 +91,8 @@ them cached.
The
second half of this file implements this.
We implemented a custom PyTorch function to improve performance.
Here’s the training code and a notebook for training a feedback transformer on Tiny Shakespeare dataset.
-
Colab Notebook
-
+
Colab Notebook
+

diff --git a/docs/transformers/glu_variants/simple.html b/docs/transformers/glu_variants/simple.html
index b01943de..0df3839b 100644
--- a/docs/transformers/glu_variants/simple.html
+++ b/docs/transformers/glu_variants/simple.html
@@ -72,7 +72,7 @@
We try different variants for the
position-wise feedforward network.
This is a simpler implementation that doesn’t use labml.configs
module.
We decided to write a simpler implementation to make it easier for readers who are not familiar.
-
+

diff --git a/docs/transformers/gpt/index.html b/docs/transformers/gpt/index.html
index d805326b..12a84ed7 100644
--- a/docs/transformers/gpt/index.html
+++ b/docs/transformers/gpt/index.html
@@ -85,7 +85,7 @@ are the parameter initialization, weight decay, and learning rate schedule.
For the transformer we reuse the
existing labml/nn transformer implementation.
Here’s a notebook for training a GPT model on Tiny Shakespeare dataset.
-
+

diff --git a/docs/transformers/switch/index.html b/docs/transformers/switch/index.html
index ebe258c8..68196176 100644
--- a/docs/transformers/switch/index.html
+++ b/docs/transformers/switch/index.html
@@ -89,7 +89,7 @@ In a distributed setup you would have each FFN (each very large) on a different
The paper introduces another loss term to balance load among the experts (FFNs) and
discusses dropping tokens when routing is not balanced.
Here’s the training code and a notebook for training a switch transformer on Tiny Shakespeare dataset.
-
+

diff --git a/docs/transformers/switch/readme.html b/docs/transformers/switch/readme.html
index ec208925..90892c2d 100644
--- a/docs/transformers/switch/readme.html
+++ b/docs/transformers/switch/readme.html
@@ -89,7 +89,7 @@ In a distributed setup you would have each FFN (each very large) on a different
The paper introduces another loss term to balance load among the experts (FFNs) and
discusses dropping tokens when routing is not balanced.
Here’s the training code and a notebook for training a switch transformer on Tiny Shakespeare dataset.
-
+

diff --git a/docs/transformers/xl/index.html b/docs/transformers/xl/index.html
index afc987ef..dd4fc7ed 100644
--- a/docs/transformers/xl/index.html
+++ b/docs/transformers/xl/index.html
@@ -84,7 +84,7 @@ They introduce relative positional encoding, where the positional encodings
are introduced at the attention calculation.
Annotated implementation of relative multi-headed attention is in relative_mha.py
.
Here’s the training code and a notebook for training a transformer XL model on Tiny Shakespeare dataset.
-
+

diff --git a/docs/transformers/xl/readme.html b/docs/transformers/xl/readme.html
index 351a7355..0f8aa57c 100644
--- a/docs/transformers/xl/readme.html
+++ b/docs/transformers/xl/readme.html
@@ -84,7 +84,7 @@ They introduce relative positional encoding, where the positional encodings
are introduced at the attention calculation.
Annotated implementation of relative multi-headed attention is in relative_mha.py
.
Here’s the training code and a notebook for training a transformer XL model on Tiny Shakespeare dataset.
-
+

diff --git a/labml_nn/__init__.py b/labml_nn/__init__.py
index a371695d..bae8d843 100644
--- a/labml_nn/__init__.py
+++ b/labml_nn/__init__.py
@@ -3,7 +3,7 @@
This is a collection of simple PyTorch implementations of
neural networks and related algorithms.
-[These implementations](https://github.com/lab-ml/nn) are documented with explanations,
+[These implementations](https://github.com/labmlai/annotated_deep_learning_paper_implementations) are documented with explanations,
and the [website](index.html)
renders these as side-by-side formatted notes.
We believe these would help you understand these algorithms better.
diff --git a/labml_nn/capsule_networks/__init__.py b/labml_nn/capsule_networks/__init__.py
index 40b70fa9..ef90ab1c 100644
--- a/labml_nn/capsule_networks/__init__.py
+++ b/labml_nn/capsule_networks/__init__.py
@@ -26,7 +26,7 @@ confusions I had with the paper.
Here's a notebook for training a Capsule Network on MNIST dataset.
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/capsule_networks/mnist.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/capsule_networks/mnist.ipynb)
[](https://app.labml.ai/run/e7c08e08586711ebb3e30242ac1c0002)
"""
diff --git a/labml_nn/capsule_networks/mnist.ipynb b/labml_nn/capsule_networks/mnist.ipynb
index bf0502c0..bc6e70c9 100644
--- a/labml_nn/capsule_networks/mnist.ipynb
+++ b/labml_nn/capsule_networks/mnist.ipynb
@@ -20,8 +20,8 @@
"id": "AYV_dMVDxyc2"
},
"source": [
- "[](https://github.com/lab-ml/nn)\n",
- "[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/capsule_networks/mnist.ipynb) \n",
+ "[](https://github.com/labmlai/annotated_deep_learning_paper_implementations)\n",
+ "[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/capsule_networks/mnist.ipynb) \n",
"\n",
"## Training a Capsule Network to classify MNIST digits\n",
"\n",
diff --git a/labml_nn/capsule_networks/readme.md b/labml_nn/capsule_networks/readme.md
index f144f985..637b75d7 100644
--- a/labml_nn/capsule_networks/readme.md
+++ b/labml_nn/capsule_networks/readme.md
@@ -17,5 +17,5 @@ confusions I had with the paper.
Here's a notebook for training a Capsule Network on MNIST dataset.
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/capsule_networks/mnist.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/capsule_networks/mnist.ipynb)
[](https://app.labml.ai/run/e7c08e08586711ebb3e30242ac1c0002)
diff --git a/labml_nn/cfr/__init__.py b/labml_nn/cfr/__init__.py
index 578b375a..d599c02e 100644
--- a/labml_nn/cfr/__init__.py
+++ b/labml_nn/cfr/__init__.py
@@ -21,7 +21,7 @@ where we sample from the game tree and estimate the regrets.
We tried to keep our Python implementation easy-to-understand like a tutorial.
We run it on [a very simple imperfect information game called Kuhn poker](kuhn/index.html).
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/cfr/kuhn/experiment.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/cfr/kuhn/experiment.ipynb)
[](https://twitter.com/labmlai/status/1407186002255380484)
Twitter thread
diff --git a/labml_nn/cfr/kuhn/__init__.py b/labml_nn/cfr/kuhn/__init__.py
index ab8886b0..fe94190d 100644
--- a/labml_nn/cfr/kuhn/__init__.py
+++ b/labml_nn/cfr/kuhn/__init__.py
@@ -31,7 +31,7 @@ Here's some example games:
He we extend the `InfoSet` class and `History` class defined in [`__init__.py`](../index.html)
with Kuhn Poker specifics.
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/cfr/kuhn/experiment.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/cfr/kuhn/experiment.ipynb)
[](https://app.labml.ai/run/7c35d3fad29711eba588acde48001122)
"""
diff --git a/labml_nn/cfr/kuhn/experiment.ipynb b/labml_nn/cfr/kuhn/experiment.ipynb
index 569d1c59..aa5d7a24 100644
--- a/labml_nn/cfr/kuhn/experiment.ipynb
+++ b/labml_nn/cfr/kuhn/experiment.ipynb
@@ -33,8 +33,8 @@
"id": "AYV_dMVDxyc2"
},
"source": [
- "[](https://github.com/lab-ml/nn)\n",
- "[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/cfr/kuhn/experiment.ipynb) \n",
+ "[](https://github.com/labmlai/annotated_deep_learning_paper_implementations)\n",
+ "[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/cfr/kuhn/experiment.ipynb) \n",
"\n",
"## [Counterfactual Regret Minimization (CFR)](https://nn.labml.ai/cfr/index.html) on Kuhn Poker\n",
"\n",
diff --git a/labml_nn/gan/cycle_gan/__init__.py b/labml_nn/gan/cycle_gan/__init__.py
index 22ab1085..d3b3630b 100644
--- a/labml_nn/gan/cycle_gan/__init__.py
+++ b/labml_nn/gan/cycle_gan/__init__.py
@@ -29,7 +29,7 @@ The discriminators test whether the generated images look real.
This file contains the model code as well as the training code.
We also have a Google Colab notebook.
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/gan/cycle_gan/experiment.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/gan/cycle_gan/experiment.ipynb)
[](https://app.labml.ai/run/93b11a665d6811ebaac80242ac1c0002)
"""
diff --git a/labml_nn/gan/cycle_gan/experiment.ipynb b/labml_nn/gan/cycle_gan/experiment.ipynb
index b00b10ce..929615f7 100644
--- a/labml_nn/gan/cycle_gan/experiment.ipynb
+++ b/labml_nn/gan/cycle_gan/experiment.ipynb
@@ -21,8 +21,8 @@
"id": "AYV_dMVDxyc2"
},
"source": [
- "[](https://github.com/lab-ml/nn)\n",
- "[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/gan/cycle_gan/experiment.ipynb)\n",
+ "[](https://github.com/labmlai/annotated_deep_learning_paper_implementations)\n",
+ "[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/gan/cycle_gan/experiment.ipynb)\n",
"\n",
"## Cycle GAN\n",
"\n",
diff --git a/labml_nn/gan/dcgan/experiment.ipynb b/labml_nn/gan/dcgan/experiment.ipynb
index 47907e34..f04cf5d2 100644
--- a/labml_nn/gan/dcgan/experiment.ipynb
+++ b/labml_nn/gan/dcgan/experiment.ipynb
@@ -22,8 +22,8 @@
"id": "AYV_dMVDxyc2"
},
"source": [
- "[](https://github.com/lab-ml/nn)\n",
- "[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/gan/dcgan/experiment.ipynb)\n",
+ "[](https://github.com/labmlai/annotated_deep_learning_paper_implementations)\n",
+ "[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/gan/dcgan/experiment.ipynb)\n",
"\n",
"## DCGAN\n",
"\n",
@@ -256,16 +256,16 @@
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[0;31mKeyboardInterrupt\u001B[0m Traceback (most recent call last)",
"\u001B[0;32m\u001B[0m in \u001B[0;36m\u001B[0;34m\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mexperiment\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mstart\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m----> 2\u001B[0;31m \u001B[0mconf\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mrun\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 3\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n",
- "\u001B[0;32m~/ml/lab-ml/nn/labml_helpers/train_valid.py\u001B[0m in \u001B[0;36mrun\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 246\u001B[0m \u001B[0m_\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mtrainer\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 247\u001B[0m \u001B[0;32mfor\u001B[0m \u001B[0m_\u001B[0m \u001B[0;32min\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mtraining_loop\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 248\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mrun_step\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 249\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 250\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0msample\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
- "\u001B[0;32m~/ml/lab-ml/nn/labml_helpers/train_valid.py\u001B[0m in \u001B[0;36mrun_step\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 234\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mmode\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mupdate\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mis_train\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;32mTrue\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 235\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mtracker\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mnamespace\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'train'\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 236\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mtrainer\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 237\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mvalidator\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 238\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mtracker\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mnamespace\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'valid'\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
- "\u001B[0;32m~/ml/lab-ml/nn/labml_helpers/train_valid.py\u001B[0m in \u001B[0;36m__call__\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 136\u001B[0m \u001B[0msm\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mon_epoch_start\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 137\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mtorch\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mset_grad_enabled\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mmode\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mis_train\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 138\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__iterate\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 139\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 140\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_batch_index\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mcompleted\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
- "\u001B[0;32m~/ml/lab-ml/nn/labml_helpers/train_valid.py\u001B[0m in \u001B[0;36m__iterate\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 149\u001B[0m \u001B[0mbatch\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mnext\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__iterable\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 150\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 151\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mstep\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mbatch\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_batch_index\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 152\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 153\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_batch_index\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mstep\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
- "\u001B[0;32m~/ml/lab-ml/nn/labml_nn/gan/original/experiment.py\u001B[0m in \u001B[0;36mstep\u001B[0;34m(self, batch, batch_idx)\u001B[0m\n\u001B[1;32m 149\u001B[0m \u001B[0;31m# Log stuff\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 150\u001B[0m \u001B[0mtracker\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0madd\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'generated'\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mgenerated_images\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;36m0\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;36m5\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 151\u001B[0;31m \u001B[0mtracker\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0madd\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m\"loss.generator.\"\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mloss\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 152\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 153\u001B[0m \u001B[0;31m# Train\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
- "\u001B[0;32m~/ml/lab-ml/nn/labml/tracker.py\u001B[0m in \u001B[0;36madd\u001B[0;34m(*args, **kwargs)\u001B[0m\n\u001B[1;32m 131\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0;32mnot\u001B[0m \u001B[0misinstance\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0margs\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;36m0\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mstr\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 132\u001B[0m \u001B[0;32mraise\u001B[0m \u001B[0mTypeError\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'tracker.add should be called as add(name, value), add(dictionary) or add(k=v,k2=v2...)'\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 133\u001B[0;31m \u001B[0m_internal\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mstore\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0margs\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;36m0\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0margs\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;36m1\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 134\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 135\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n",
- "\u001B[0;32m~/ml/lab-ml/nn/labml/internal/tracker/__init__.py\u001B[0m in \u001B[0;36mstore\u001B[0;34m(self, key, value)\u001B[0m\n\u001B[1;32m 165\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 166\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_create_indicator\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mkey\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mvalue\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 167\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mindicators\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0mkey\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mcollect_value\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mvalue\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 168\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 169\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0mnew_line\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
- "\u001B[0;32m~/ml/lab-ml/nn/labml/internal/tracker/indicators/numeric.py\u001B[0m in \u001B[0;36mcollect_value\u001B[0;34m(self, value)\u001B[0m\n\u001B[1;32m 79\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 80\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0mcollect_value\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mvalue\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m---> 81\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_values\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mappend\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mto_numpy\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mvalue\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mravel\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 82\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 83\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0mclear\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
- "\u001B[0;32m~/ml/lab-ml/nn/labml/internal/util/values.py\u001B[0m in \u001B[0;36mto_numpy\u001B[0;34m(value)\u001B[0m\n\u001B[1;32m 20\u001B[0m \u001B[0;32mreturn\u001B[0m \u001B[0mvalue\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mdata\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mcpu\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mnumpy\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 21\u001B[0m \u001B[0;32melif\u001B[0m \u001B[0misinstance\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mvalue\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mtorch\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mTensor\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m---> 22\u001B[0;31m \u001B[0;32mreturn\u001B[0m \u001B[0mvalue\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mdata\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mcpu\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mnumpy\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 23\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 24\u001B[0m \u001B[0;32mraise\u001B[0m \u001B[0mValueError\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34mf\"Unknown type {type(value)}\"\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
- "\u001B[0;32m~/ml/lab-ml/nn/labml_helpers/training_loop.py\u001B[0m in \u001B[0;36m__handler\u001B[0;34m(self, sig, frame)\u001B[0m\n\u001B[1;32m 162\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__finish\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 163\u001B[0m \u001B[0mlogger\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mlog\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'Killing loop...'\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mText\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mdanger\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 164\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mold_handler\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0msig\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mframe\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 165\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 166\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0m__str__\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+ "\u001B[0;32m~/ml/labmlai/annotated_deep_learning_paper_implementations/labml_helpers/train_valid.py\u001B[0m in \u001B[0;36mrun\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 246\u001B[0m \u001B[0m_\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mtrainer\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 247\u001B[0m \u001B[0;32mfor\u001B[0m \u001B[0m_\u001B[0m \u001B[0;32min\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mtraining_loop\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 248\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mrun_step\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 249\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 250\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0msample\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+ "\u001B[0;32m~/ml/labmlai/annotated_deep_learning_paper_implementations/labml_helpers/train_valid.py\u001B[0m in \u001B[0;36mrun_step\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 234\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mmode\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mupdate\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mis_train\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;32mTrue\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 235\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mtracker\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mnamespace\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'train'\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 236\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mtrainer\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 237\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mvalidator\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 238\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mtracker\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mnamespace\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'valid'\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+ "\u001B[0;32m~/ml/labmlai/annotated_deep_learning_paper_implementations/labml_helpers/train_valid.py\u001B[0m in \u001B[0;36m__call__\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 136\u001B[0m \u001B[0msm\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mon_epoch_start\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 137\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mtorch\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mset_grad_enabled\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mmode\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mis_train\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 138\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__iterate\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 139\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 140\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_batch_index\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mcompleted\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+ "\u001B[0;32m~/ml/labmlai/annotated_deep_learning_paper_implementations/labml_helpers/train_valid.py\u001B[0m in \u001B[0;36m__iterate\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 149\u001B[0m \u001B[0mbatch\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mnext\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__iterable\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 150\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 151\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mstep\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mbatch\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_batch_index\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 152\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 153\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_batch_index\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mstep\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+ "\u001B[0;32m~/ml/labmlai/annotated_deep_learning_paper_implementations/labml_nn/gan/original/experiment.py\u001B[0m in \u001B[0;36mstep\u001B[0;34m(self, batch, batch_idx)\u001B[0m\n\u001B[1;32m 149\u001B[0m \u001B[0;31m# Log stuff\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 150\u001B[0m \u001B[0mtracker\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0madd\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'generated'\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mgenerated_images\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;36m0\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;36m5\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 151\u001B[0;31m \u001B[0mtracker\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0madd\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m\"loss.generator.\"\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mloss\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 152\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 153\u001B[0m \u001B[0;31m# Train\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+ "\u001B[0;32m~/ml/labmlai/annotated_deep_learning_paper_implementations/labml/tracker.py\u001B[0m in \u001B[0;36madd\u001B[0;34m(*args, **kwargs)\u001B[0m\n\u001B[1;32m 131\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0;32mnot\u001B[0m \u001B[0misinstance\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0margs\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;36m0\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mstr\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 132\u001B[0m \u001B[0;32mraise\u001B[0m \u001B[0mTypeError\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'tracker.add should be called as add(name, value), add(dictionary) or add(k=v,k2=v2...)'\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 133\u001B[0;31m \u001B[0m_internal\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mstore\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0margs\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;36m0\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0margs\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;36m1\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 134\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 135\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n",
+ "\u001B[0;32m~/ml/labmlai/annotated_deep_learning_paper_implementations/labml/internal/tracker/__init__.py\u001B[0m in \u001B[0;36mstore\u001B[0;34m(self, key, value)\u001B[0m\n\u001B[1;32m 165\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 166\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_create_indicator\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mkey\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mvalue\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 167\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mindicators\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0mkey\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mcollect_value\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mvalue\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 168\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 169\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0mnew_line\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+ "\u001B[0;32m~/ml/labmlai/annotated_deep_learning_paper_implementations/labml/internal/tracker/indicators/numeric.py\u001B[0m in \u001B[0;36mcollect_value\u001B[0;34m(self, value)\u001B[0m\n\u001B[1;32m 79\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 80\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0mcollect_value\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mvalue\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m---> 81\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_values\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mappend\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mto_numpy\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mvalue\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mravel\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 82\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 83\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0mclear\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+ "\u001B[0;32m~/ml/labmlai/annotated_deep_learning_paper_implementations/labml/internal/util/values.py\u001B[0m in \u001B[0;36mto_numpy\u001B[0;34m(value)\u001B[0m\n\u001B[1;32m 20\u001B[0m \u001B[0;32mreturn\u001B[0m \u001B[0mvalue\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mdata\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mcpu\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mnumpy\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 21\u001B[0m \u001B[0;32melif\u001B[0m \u001B[0misinstance\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mvalue\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mtorch\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mTensor\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m---> 22\u001B[0;31m \u001B[0;32mreturn\u001B[0m \u001B[0mvalue\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mdata\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mcpu\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mnumpy\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 23\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 24\u001B[0m \u001B[0;32mraise\u001B[0m \u001B[0mValueError\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34mf\"Unknown type {type(value)}\"\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+ "\u001B[0;32m~/ml/labmlai/annotated_deep_learning_paper_implementations/labml_helpers/training_loop.py\u001B[0m in \u001B[0;36m__handler\u001B[0;34m(self, sig, frame)\u001B[0m\n\u001B[1;32m 162\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__finish\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 163\u001B[0m \u001B[0mlogger\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mlog\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'Killing loop...'\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mText\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mdanger\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 164\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mold_handler\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0msig\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mframe\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 165\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 166\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0m__str__\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
"\u001B[0;31mKeyboardInterrupt\u001B[0m: "
]
}
diff --git a/labml_nn/gan/original/experiment.ipynb b/labml_nn/gan/original/experiment.ipynb
index 424b4d23..c1034aa7 100644
--- a/labml_nn/gan/original/experiment.ipynb
+++ b/labml_nn/gan/original/experiment.ipynb
@@ -22,8 +22,8 @@
"id": "AYV_dMVDxyc2"
},
"source": [
- "[](https://github.com/lab-ml/nn)\n",
- "[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/gan/original/experiment.ipynb)\n",
+ "[](https://github.com/labmlai/annotated_deep_learning_paper_implementations)\n",
+ "[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/gan/original/experiment.ipynb)\n",
"\n",
"## DCGAN\n",
"\n",
@@ -235,15 +235,15 @@
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[0;31mKeyboardInterrupt\u001B[0m Traceback (most recent call last)",
"\u001B[0;32m\u001B[0m in \u001B[0;36m\u001B[0;34m\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mexperiment\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mstart\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m----> 2\u001B[0;31m \u001B[0mconf\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mrun\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 3\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n",
- "\u001B[0;32m~/ml/lab-ml/nn/labml_helpers/train_valid.py\u001B[0m in \u001B[0;36mrun\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 246\u001B[0m \u001B[0m_\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mtrainer\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 247\u001B[0m \u001B[0;32mfor\u001B[0m \u001B[0m_\u001B[0m \u001B[0;32min\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mtraining_loop\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 248\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mrun_step\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 249\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 250\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0msample\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
- "\u001B[0;32m~/ml/lab-ml/nn/labml_helpers/train_valid.py\u001B[0m in \u001B[0;36mrun_step\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 234\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mmode\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mupdate\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mis_train\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;32mTrue\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 235\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mtracker\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mnamespace\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'train'\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 236\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mtrainer\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 237\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mvalidator\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 238\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mtracker\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mnamespace\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'valid'\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
- "\u001B[0;32m~/ml/lab-ml/nn/labml_helpers/train_valid.py\u001B[0m in \u001B[0;36m__call__\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 136\u001B[0m \u001B[0msm\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mon_epoch_start\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 137\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mtorch\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mset_grad_enabled\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mmode\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mis_train\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 138\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__iterate\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 139\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 140\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_batch_index\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mcompleted\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
- "\u001B[0;32m~/ml/lab-ml/nn/labml_helpers/train_valid.py\u001B[0m in \u001B[0;36m__iterate\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 149\u001B[0m \u001B[0mbatch\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mnext\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__iterable\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 150\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 151\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mstep\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mbatch\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_batch_index\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 152\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 153\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_batch_index\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mstep\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
- "\u001B[0;32m~/ml/lab-ml/nn/labml_nn/gan/original/experiment.py\u001B[0m in \u001B[0;36mstep\u001B[0;34m(self, batch, batch_idx)\u001B[0m\n\u001B[1;32m 157\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mbatch_idx\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mis_last\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 158\u001B[0m \u001B[0mtracker\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0madd\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'generator'\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mgenerator\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 159\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mgenerator_optimizer\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mstep\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 160\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 161\u001B[0m \u001B[0mtracker\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0msave\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+ "\u001B[0;32m~/ml/labmlai/annotated_deep_learning_paper_implementations/labml_helpers/train_valid.py\u001B[0m in \u001B[0;36mrun\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 246\u001B[0m \u001B[0m_\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mtrainer\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 247\u001B[0m \u001B[0;32mfor\u001B[0m \u001B[0m_\u001B[0m \u001B[0;32min\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mtraining_loop\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 248\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mrun_step\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 249\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 250\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0msample\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+ "\u001B[0;32m~/ml/labmlai/annotated_deep_learning_paper_implementations/labml_helpers/train_valid.py\u001B[0m in \u001B[0;36mrun_step\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 234\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mmode\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mupdate\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mis_train\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;32mTrue\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 235\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mtracker\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mnamespace\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'train'\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 236\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mtrainer\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 237\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mvalidator\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 238\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mtracker\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mnamespace\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'valid'\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+ "\u001B[0;32m~/ml/labmlai/annotated_deep_learning_paper_implementations/labml_helpers/train_valid.py\u001B[0m in \u001B[0;36m__call__\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 136\u001B[0m \u001B[0msm\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mon_epoch_start\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 137\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mtorch\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mset_grad_enabled\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mmode\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mis_train\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 138\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__iterate\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 139\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 140\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_batch_index\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mcompleted\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+ "\u001B[0;32m~/ml/labmlai/annotated_deep_learning_paper_implementations/labml_helpers/train_valid.py\u001B[0m in \u001B[0;36m__iterate\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 149\u001B[0m \u001B[0mbatch\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mnext\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__iterable\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 150\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 151\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mstep\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mbatch\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_batch_index\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 152\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 153\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_batch_index\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mstep\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+ "\u001B[0;32m~/ml/labmlai/annotated_deep_learning_paper_implementations/labml_nn/gan/original/experiment.py\u001B[0m in \u001B[0;36mstep\u001B[0;34m(self, batch, batch_idx)\u001B[0m\n\u001B[1;32m 157\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mbatch_idx\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mis_last\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 158\u001B[0m \u001B[0mtracker\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0madd\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'generator'\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mgenerator\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 159\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mgenerator_optimizer\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mstep\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 160\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 161\u001B[0m \u001B[0mtracker\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0msave\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
"\u001B[0;32m~/miniconda/envs/torch/lib/python3.8/site-packages/torch/autograd/grad_mode.py\u001B[0m in \u001B[0;36mdecorate_context\u001B[0;34m(*args, **kwargs)\u001B[0m\n\u001B[1;32m 24\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0mdecorate_context\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m*\u001B[0m\u001B[0margs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m**\u001B[0m\u001B[0mkwargs\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 25\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__class__\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m---> 26\u001B[0;31m \u001B[0;32mreturn\u001B[0m \u001B[0mfunc\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m*\u001B[0m\u001B[0margs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m**\u001B[0m\u001B[0mkwargs\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 27\u001B[0m \u001B[0;32mreturn\u001B[0m \u001B[0mcast\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mF\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mdecorate_context\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 28\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n",
"\u001B[0;32m~/miniconda/envs/torch/lib/python3.8/site-packages/torch/optim/adam.py\u001B[0m in \u001B[0;36mstep\u001B[0;34m(self, closure)\u001B[0m\n\u001B[1;32m 106\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 107\u001B[0m \u001B[0mbeta1\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mbeta2\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mgroup\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;34m'betas'\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 108\u001B[0;31m F.adam(params_with_grad,\n\u001B[0m\u001B[1;32m 109\u001B[0m \u001B[0mgrads\u001B[0m\u001B[0;34m,\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 110\u001B[0m \u001B[0mexp_avgs\u001B[0m\u001B[0;34m,\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
"\u001B[0;32m~/miniconda/envs/torch/lib/python3.8/site-packages/torch/optim/functional.py\u001B[0m in \u001B[0;36madam\u001B[0;34m(params, grads, exp_avgs, exp_avg_sqs, max_exp_avg_sqs, state_steps, amsgrad, beta1, beta2, lr, weight_decay, eps)\u001B[0m\n\u001B[1;32m 92\u001B[0m \u001B[0mdenom\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;34m(\u001B[0m\u001B[0mmax_exp_avg_sq\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0msqrt\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;34m/\u001B[0m \u001B[0mmath\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0msqrt\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mbias_correction2\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0madd_\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0meps\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 93\u001B[0m \u001B[0;32melse\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m---> 94\u001B[0;31m \u001B[0mdenom\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;34m(\u001B[0m\u001B[0mexp_avg_sq\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0msqrt\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;34m/\u001B[0m \u001B[0mmath\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0msqrt\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mbias_correction2\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0madd_\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0meps\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 95\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 96\u001B[0m \u001B[0mstep_size\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mlr\u001B[0m \u001B[0;34m/\u001B[0m \u001B[0mbias_correction1\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
- "\u001B[0;32m~/ml/lab-ml/nn/labml_helpers/training_loop.py\u001B[0m in \u001B[0;36m__handler\u001B[0;34m(self, sig, frame)\u001B[0m\n\u001B[1;32m 162\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__finish\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 163\u001B[0m \u001B[0mlogger\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mlog\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'Killing loop...'\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mText\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mdanger\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 164\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mold_handler\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0msig\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mframe\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 165\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 166\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0m__str__\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+ "\u001B[0;32m~/ml/labmlai/annotated_deep_learning_paper_implementations/labml_helpers/training_loop.py\u001B[0m in \u001B[0;36m__handler\u001B[0;34m(self, sig, frame)\u001B[0m\n\u001B[1;32m 162\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__finish\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 163\u001B[0m \u001B[0mlogger\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mlog\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'Killing loop...'\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mText\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mdanger\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 164\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mold_handler\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0msig\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mframe\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 165\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 166\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0m__str__\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
"\u001B[0;31mKeyboardInterrupt\u001B[0m: "
]
}
diff --git a/labml_nn/gan/wasserstein/__init__.py b/labml_nn/gan/wasserstein/__init__.py
index d51de134..496639d1 100644
--- a/labml_nn/gan/wasserstein/__init__.py
+++ b/labml_nn/gan/wasserstein/__init__.py
@@ -81,7 +81,7 @@ network that defines $f$ clipped within a range.*
Here is the code to try this on a [simple MNIST generation experiment](experiment.html).
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/gan/wasserstein/experiment.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/gan/wasserstein/experiment.ipynb)
"""
import torch.utils.data
diff --git a/labml_nn/gan/wasserstein/experiment.ipynb b/labml_nn/gan/wasserstein/experiment.ipynb
index 462905b2..fca837e9 100644
--- a/labml_nn/gan/wasserstein/experiment.ipynb
+++ b/labml_nn/gan/wasserstein/experiment.ipynb
@@ -22,8 +22,8 @@
"id": "AYV_dMVDxyc2"
},
"source": [
- "[](https://github.com/lab-ml/nn)\n",
- "[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/gan/wasserstein/experiment.ipynb)\n",
+ "[](https://github.com/labmlai/annotated_deep_learning_paper_implementations)\n",
+ "[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/gan/wasserstein/experiment.ipynb)\n",
"\n",
"## DCGAN\n",
"\n",
@@ -251,10 +251,10 @@
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[0;31mKeyboardInterrupt\u001B[0m Traceback (most recent call last)",
"\u001B[0;32m\u001B[0m in \u001B[0;36m\u001B[0;34m\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mexperiment\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mstart\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m----> 2\u001B[0;31m \u001B[0mconf\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mrun\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 3\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n",
- "\u001B[0;32m~/ml/lab-ml/nn/labml_helpers/train_valid.py\u001B[0m in \u001B[0;36mrun\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 246\u001B[0m \u001B[0m_\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mtrainer\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 247\u001B[0m \u001B[0;32mfor\u001B[0m \u001B[0m_\u001B[0m \u001B[0;32min\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mtraining_loop\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 248\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mrun_step\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 249\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 250\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0msample\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
- "\u001B[0;32m~/ml/lab-ml/nn/labml_helpers/train_valid.py\u001B[0m in \u001B[0;36mrun_step\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 234\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mmode\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mupdate\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mis_train\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;32mTrue\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 235\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mtracker\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mnamespace\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'train'\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 236\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mtrainer\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 237\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mvalidator\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 238\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mtracker\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mnamespace\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'valid'\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
- "\u001B[0;32m~/ml/lab-ml/nn/labml_helpers/train_valid.py\u001B[0m in \u001B[0;36m__call__\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 136\u001B[0m \u001B[0msm\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mon_epoch_start\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 137\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mtorch\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mset_grad_enabled\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mmode\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mis_train\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 138\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__iterate\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 139\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 140\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_batch_index\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mcompleted\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
- "\u001B[0;32m~/ml/lab-ml/nn/labml_helpers/train_valid.py\u001B[0m in \u001B[0;36m__iterate\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 147\u001B[0m \u001B[0mmonit\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mprogress\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;36m0\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 148\u001B[0m \u001B[0;32mwhile\u001B[0m \u001B[0;32mnot\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_batch_index\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0miteration_completed\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 149\u001B[0;31m \u001B[0mbatch\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mnext\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__iterable\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 150\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 151\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mstep\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mbatch\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_batch_index\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+ "\u001B[0;32m~/ml/labmlai/annotated_deep_learning_paper_implementations/labml_helpers/train_valid.py\u001B[0m in \u001B[0;36mrun\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 246\u001B[0m \u001B[0m_\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mtrainer\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 247\u001B[0m \u001B[0;32mfor\u001B[0m \u001B[0m_\u001B[0m \u001B[0;32min\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mtraining_loop\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 248\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mrun_step\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 249\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 250\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0msample\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+ "\u001B[0;32m~/ml/labmlai/annotated_deep_learning_paper_implementations/labml_helpers/train_valid.py\u001B[0m in \u001B[0;36mrun_step\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 234\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mmode\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mupdate\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mis_train\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;32mTrue\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 235\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mtracker\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mnamespace\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'train'\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 236\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mtrainer\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 237\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mvalidator\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 238\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mtracker\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mnamespace\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'valid'\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+ "\u001B[0;32m~/ml/labmlai/annotated_deep_learning_paper_implementations/labml_helpers/train_valid.py\u001B[0m in \u001B[0;36m__call__\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 136\u001B[0m \u001B[0msm\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mon_epoch_start\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 137\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mtorch\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mset_grad_enabled\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mmode\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mis_train\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 138\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__iterate\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 139\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 140\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_batch_index\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mcompleted\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+ "\u001B[0;32m~/ml/labmlai/annotated_deep_learning_paper_implementations/labml_helpers/train_valid.py\u001B[0m in \u001B[0;36m__iterate\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 147\u001B[0m \u001B[0mmonit\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mprogress\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;36m0\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 148\u001B[0m \u001B[0;32mwhile\u001B[0m \u001B[0;32mnot\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_batch_index\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0miteration_completed\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 149\u001B[0;31m \u001B[0mbatch\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mnext\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__iterable\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 150\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 151\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mstep\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mbatch\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_batch_index\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
"\u001B[0;32m~/miniconda/envs/torch/lib/python3.8/site-packages/torch/utils/data/dataloader.py\u001B[0m in \u001B[0;36m__next__\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 433\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_sampler_iter\u001B[0m \u001B[0;32mis\u001B[0m \u001B[0;32mNone\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 434\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_reset\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 435\u001B[0;31m \u001B[0mdata\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_next_data\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 436\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_num_yielded\u001B[0m \u001B[0;34m+=\u001B[0m \u001B[0;36m1\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 437\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_dataset_kind\u001B[0m \u001B[0;34m==\u001B[0m \u001B[0m_DatasetKind\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mIterable\u001B[0m \u001B[0;32mand\u001B[0m\u001B[0;31m \u001B[0m\u001B[0;31m\\\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
"\u001B[0;32m~/miniconda/envs/torch/lib/python3.8/site-packages/torch/utils/data/dataloader.py\u001B[0m in \u001B[0;36m_next_data\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 473\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0m_next_data\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 474\u001B[0m \u001B[0mindex\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_next_index\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;31m# may raise StopIteration\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 475\u001B[0;31m \u001B[0mdata\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_dataset_fetcher\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mfetch\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mindex\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;31m# may raise StopIteration\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 476\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_pin_memory\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 477\u001B[0m \u001B[0mdata\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0m_utils\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mpin_memory\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mpin_memory\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mdata\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
"\u001B[0;32m~/miniconda/envs/torch/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py\u001B[0m in \u001B[0;36mfetch\u001B[0;34m(self, possibly_batched_index)\u001B[0m\n\u001B[1;32m 42\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0mfetch\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mpossibly_batched_index\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 43\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mauto_collation\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m---> 44\u001B[0;31m \u001B[0mdata\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;34m[\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mdataset\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0midx\u001B[0m\u001B[0;34m]\u001B[0m \u001B[0;32mfor\u001B[0m \u001B[0midx\u001B[0m \u001B[0;32min\u001B[0m \u001B[0mpossibly_batched_index\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 45\u001B[0m \u001B[0;32melse\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 46\u001B[0m \u001B[0mdata\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mdataset\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0mpossibly_batched_index\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
@@ -267,7 +267,7 @@
"\u001B[0;32m~/miniconda/envs/torch/lib/python3.8/site-packages/torch/tensor.py\u001B[0m in \u001B[0;36mwrapped\u001B[0;34m(*args, **kwargs)\u001B[0m\n\u001B[1;32m 22\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0mwrapped\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m*\u001B[0m\u001B[0margs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m**\u001B[0m\u001B[0mkwargs\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 23\u001B[0m \u001B[0;32mfrom\u001B[0m \u001B[0mtorch\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0moverrides\u001B[0m \u001B[0;32mimport\u001B[0m \u001B[0mhas_torch_function\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mhandle_torch_function\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m---> 24\u001B[0;31m \u001B[0;32mif\u001B[0m \u001B[0;32mnot\u001B[0m \u001B[0mall\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mtype\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mt\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;32mis\u001B[0m \u001B[0mTensor\u001B[0m \u001B[0;32mfor\u001B[0m \u001B[0mt\u001B[0m \u001B[0;32min\u001B[0m \u001B[0margs\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;32mand\u001B[0m \u001B[0mhas_torch_function\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0margs\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 25\u001B[0m \u001B[0;32mreturn\u001B[0m \u001B[0mhandle_torch_function\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mwrapped\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0margs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m*\u001B[0m\u001B[0margs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m**\u001B[0m\u001B[0mkwargs\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 26\u001B[0m \u001B[0;32mtry\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
"\u001B[0;32m~/miniconda/envs/torch/lib/python3.8/site-packages/torch/overrides.py\u001B[0m in \u001B[0;36mhas_torch_function\u001B[0;34m(relevant_args)\u001B[0m\n\u001B[1;32m 1081\u001B[0m \u001B[0mimplementations\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;32mFalse\u001B[0m \u001B[0motherwise\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 1082\u001B[0m \"\"\"\n\u001B[0;32m-> 1083\u001B[0;31m return _is_torch_function_enabled() and any(\n\u001B[0m\u001B[1;32m 1084\u001B[0m \u001B[0mtype\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0ma\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;32mis\u001B[0m \u001B[0;32mnot\u001B[0m \u001B[0mtorch\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mTensor\u001B[0m \u001B[0;32mand\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 1085\u001B[0m \u001B[0mgetattr\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0ma\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m'__torch_function__'\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0m_disabled_torch_function_impl\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
"\u001B[0;32m~/miniconda/envs/torch/lib/python3.8/site-packages/torch/overrides.py\u001B[0m in \u001B[0;36m\u001B[0;34m(.0)\u001B[0m\n\u001B[1;32m 1081\u001B[0m \u001B[0mimplementations\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;32mFalse\u001B[0m \u001B[0motherwise\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 1082\u001B[0m \"\"\"\n\u001B[0;32m-> 1083\u001B[0;31m return _is_torch_function_enabled() and any(\n\u001B[0m\u001B[1;32m 1084\u001B[0m \u001B[0mtype\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0ma\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;32mis\u001B[0m \u001B[0;32mnot\u001B[0m \u001B[0mtorch\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mTensor\u001B[0m \u001B[0;32mand\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 1085\u001B[0m \u001B[0mgetattr\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0ma\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m'__torch_function__'\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0m_disabled_torch_function_impl\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
- "\u001B[0;32m~/ml/lab-ml/nn/labml_helpers/training_loop.py\u001B[0m in \u001B[0;36m__handler\u001B[0;34m(self, sig, frame)\u001B[0m\n\u001B[1;32m 162\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__finish\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 163\u001B[0m \u001B[0mlogger\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mlog\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'Killing loop...'\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mText\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mdanger\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 164\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mold_handler\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0msig\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mframe\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 165\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 166\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0m__str__\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+ "\u001B[0;32m~/ml/labmlai/annotated_deep_learning_paper_implementations/labml_helpers/training_loop.py\u001B[0m in \u001B[0;36m__handler\u001B[0;34m(self, sig, frame)\u001B[0m\n\u001B[1;32m 162\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__finish\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 163\u001B[0m \u001B[0mlogger\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mlog\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'Killing loop...'\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mText\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mdanger\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 164\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mold_handler\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0msig\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mframe\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 165\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 166\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0m__str__\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
"\u001B[0;31mKeyboardInterrupt\u001B[0m: "
]
}
diff --git a/labml_nn/hypernetworks/experiment.ipynb b/labml_nn/hypernetworks/experiment.ipynb
index bcdc6187..3dbcf743 100644
--- a/labml_nn/hypernetworks/experiment.ipynb
+++ b/labml_nn/hypernetworks/experiment.ipynb
@@ -20,8 +20,8 @@
"id": "AYV_dMVDxyc2"
},
"source": [
- "[](https://github.com/lab-ml/nn)\n",
- "[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/hypernetworks/experiment.ipynb) \n",
+ "[](https://github.com/labmlai/annotated_deep_learning_paper_implementations)\n",
+ "[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/hypernetworks/experiment.ipynb) \n",
"\n",
"## HyperLSTM\n",
"\n",
diff --git a/labml_nn/hypernetworks/hyper_lstm.py b/labml_nn/hypernetworks/hyper_lstm.py
index e7becb01..d39137dd 100644
--- a/labml_nn/hypernetworks/hyper_lstm.py
+++ b/labml_nn/hypernetworks/hyper_lstm.py
@@ -15,7 +15,7 @@ by David Ha gives a good explanation of HyperNetworks.
We have an experiment that trains a HyperLSTM to predict text on Shakespeare dataset.
Here's the link to code: [`experiment.py`](experiment.html)
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/hypernetworks/experiment.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/hypernetworks/experiment.ipynb)
[](https://app.labml.ai/run/9e7f39e047e811ebbaff2b26e3148b3d)
HyperNetworks use a smaller network to generate weights of a larger network.
diff --git a/labml_nn/normalization/batch_channel_norm/__init__.py b/labml_nn/normalization/batch_channel_norm/__init__.py
index 66714424..7617ea8a 100644
--- a/labml_nn/normalization/batch_channel_norm/__init__.py
+++ b/labml_nn/normalization/batch_channel_norm/__init__.py
@@ -19,7 +19,7 @@ batch normalization.
Here is [the training code](../weight_standardization/experiment.html) for training
a VGG network that uses weight standardization to classify CIFAR-10 data.
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/normalization/weight_standardization/experiment.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/normalization/weight_standardization/experiment.ipynb)
[](https://app.labml.ai/run/f4a783a2a7df11eb921d0242ac1c0002)
[](https://wandb.ai/vpj/cifar10/runs/3flr4k8w)
"""
diff --git a/labml_nn/normalization/batch_norm/__init__.py b/labml_nn/normalization/batch_norm/__init__.py
index 746caef6..100c96cc 100644
--- a/labml_nn/normalization/batch_norm/__init__.py
+++ b/labml_nn/normalization/batch_norm/__init__.py
@@ -91,7 +91,7 @@ mean and variance during the training phase and use that for inference.
Here's [the training code](mnist.html) and a notebook for training
a CNN classifier that uses batch normalization for MNIST dataset.
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/normalization/batch_norm/mnist.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/normalization/batch_norm/mnist.ipynb)
[](https://app.labml.ai/run/011254fe647011ebbb8e0242ac1c0002)
"""
diff --git a/labml_nn/normalization/batch_norm/mnist.ipynb b/labml_nn/normalization/batch_norm/mnist.ipynb
index b2009021..8b7976fb 100644
--- a/labml_nn/normalization/batch_norm/mnist.ipynb
+++ b/labml_nn/normalization/batch_norm/mnist.ipynb
@@ -1005,8 +1005,8 @@
"id": "AYV_dMVDxyc2"
},
"source": [
- "[](https://github.com/lab-ml/nn)\n",
- "[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/normalization/batch_norm/mnist.ipynb) \n",
+ "[](https://github.com/labmlai/annotated_deep_learning_paper_implementations)\n",
+ "[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/normalization/batch_norm/mnist.ipynb) \n",
"\n",
"## Batch Normaliztion\n",
"\n",
diff --git a/labml_nn/normalization/batch_norm/readme.md b/labml_nn/normalization/batch_norm/readme.md
index 3e2a5d12..ece7ba8d 100644
--- a/labml_nn/normalization/batch_norm/readme.md
+++ b/labml_nn/normalization/batch_norm/readme.md
@@ -84,5 +84,5 @@ mean and variance during the training phase and use that for inference.
Here's [the training code](mnist.html) and a notebook for training
a CNN classifier that uses batch normalization for MNIST dataset.
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/normalization/batch_norm/mnist.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/normalization/batch_norm/mnist.ipynb)
[](https://app.labml.ai/run/011254fe647011ebbb8e0242ac1c0002)
diff --git a/labml_nn/normalization/group_norm/__init__.py b/labml_nn/normalization/group_norm/__init__.py
index df81b851..ec48d5c6 100644
--- a/labml_nn/normalization/group_norm/__init__.py
+++ b/labml_nn/normalization/group_norm/__init__.py
@@ -78,7 +78,7 @@ Group normalization normalizes values of the same sample and the same group of c
Here's a [CIFAR 10 classification model](experiment.html) that uses instance normalization.
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/normalization/group_norm/experiment.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/normalization/group_norm/experiment.ipynb)
[](https://app.labml.ai/run/081d950aa4e011eb8f9f0242ac1c0002)
[](https://wandb.ai/vpj/cifar10/runs/310etthp)
"""
diff --git a/labml_nn/normalization/group_norm/experiment.ipynb b/labml_nn/normalization/group_norm/experiment.ipynb
index 0bec0875..fc8eac0a 100644
--- a/labml_nn/normalization/group_norm/experiment.ipynb
+++ b/labml_nn/normalization/group_norm/experiment.ipynb
@@ -269,8 +269,8 @@
"id": "AYV_dMVDxyc2"
},
"source": [
- "[](https://github.com/lab-ml/nn)\n",
- "[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/normalization/group_norm/experiment.ipynb) \n",
+ "[](https://github.com/labmlai/annotated_deep_learning_paper_implementations)\n",
+ "[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/normalization/group_norm/experiment.ipynb) \n",
"\n",
"## Group Norm - CIFAR 10\n",
"\n",
diff --git a/labml_nn/normalization/group_norm/readme.md b/labml_nn/normalization/group_norm/readme.md
index 06afdb0d..f6f2c269 100644
--- a/labml_nn/normalization/group_norm/readme.md
+++ b/labml_nn/normalization/group_norm/readme.md
@@ -17,6 +17,6 @@ all channels within each group.
Here's a [CIFAR 10 classification model](https://nn.labml.ai/normalization/group_norm/experiment.html) that uses instance normalization.
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/normalization/group_norm/experiment.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/normalization/group_norm/experiment.ipynb)
[](https://app.labml.ai/run/081d950aa4e011eb8f9f0242ac1c0002)
[](https://wandb.ai/vpj/cifar10/runs/310etthp)
\ No newline at end of file
diff --git a/labml_nn/normalization/weight_standardization/__init__.py b/labml_nn/normalization/weight_standardization/__init__.py
index 2cfbab4e..6756f50a 100644
--- a/labml_nn/normalization/weight_standardization/__init__.py
+++ b/labml_nn/normalization/weight_standardization/__init__.py
@@ -42,7 +42,7 @@ Here is [the training code](experiment.html) for training
a VGG network that uses weight standardization to classify CIFAR-10 data.
This uses a [2D-Convolution Layer with Weight Standardization](conv2d.html).
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/normalization/weight_standardization/experiment.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/normalization/weight_standardization/experiment.ipynb)
[](https://app.labml.ai/run/f4a783a2a7df11eb921d0242ac1c0002)
[](https://wandb.ai/vpj/cifar10/runs/3flr4k8w)
"""
diff --git a/labml_nn/normalization/weight_standardization/experiment.ipynb b/labml_nn/normalization/weight_standardization/experiment.ipynb
index 0f35ba28..d8e7f620 100644
--- a/labml_nn/normalization/weight_standardization/experiment.ipynb
+++ b/labml_nn/normalization/weight_standardization/experiment.ipynb
@@ -269,8 +269,8 @@
"id": "AYV_dMVDxyc2"
},
"source": [
- "[](https://github.com/lab-ml/nn)\n",
- "[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/normalization/group_norm/experiment.ipynb) \n",
+ "[](https://github.com/labmlai/annotated_deep_learning_paper_implementations)\n",
+ "[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/normalization/group_norm/experiment.ipynb) \n",
"\n",
"## Weight Standardization & Batch-Channel Normalization - CIFAR 10\n",
"\n",
diff --git a/labml_nn/rl/ppo/__init__.py b/labml_nn/rl/ppo/__init__.py
index 462bbe03..9a99c4cb 100644
--- a/labml_nn/rl/ppo/__init__.py
+++ b/labml_nn/rl/ppo/__init__.py
@@ -22,7 +22,7 @@ is not close to the policy used to sample the data.
You can find an experiment that uses it [here](experiment.html).
The experiment uses [Generalized Advantage Estimation](gae.html).
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/rl/ppo/experiment.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/ppo/experiment.ipynb)
[](https://app.labml.ai/run/6eff28a0910e11eb9b008db315936e2f)
"""
diff --git a/labml_nn/rl/ppo/experiment.ipynb b/labml_nn/rl/ppo/experiment.ipynb
index 15d98731..c185a1fe 100644
--- a/labml_nn/rl/ppo/experiment.ipynb
+++ b/labml_nn/rl/ppo/experiment.ipynb
@@ -6,8 +6,8 @@
"id": "AYV_dMVDxyc2"
},
"source": [
- "[](https://github.com/lab-ml/nn)\n",
- "[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/rl/ppo/experiment.ipynb) \n",
+ "[](https://github.com/labmlai/annotated_deep_learning_paper_implementations)\n",
+ "[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/ppo/experiment.ipynb) \n",
"\n",
"## Proximal Policy Optimization - PPO\n",
"\n",
diff --git a/labml_nn/rl/ppo/experiment.py b/labml_nn/rl/ppo/experiment.py
index fce12631..f9c2ab55 100644
--- a/labml_nn/rl/ppo/experiment.py
+++ b/labml_nn/rl/ppo/experiment.py
@@ -9,7 +9,7 @@ summary: Annotated implementation to train a PPO agent on Atari Breakout game.
This experiment trains Proximal Policy Optimization (PPO) agent Atari Breakout game on OpenAI Gym.
It runs the [game environments on multiple processes](../game.html) to sample efficiently.
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/rl/ppo/experiment.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/ppo/experiment.ipynb)
[](https://app.labml.ai/run/6eff28a0910e11eb9b008db315936e2f)
"""
diff --git a/labml_nn/rl/ppo/readme.md b/labml_nn/rl/ppo/readme.md
index 759addfb..63c219d3 100644
--- a/labml_nn/rl/ppo/readme.md
+++ b/labml_nn/rl/ppo/readme.md
@@ -15,5 +15,5 @@ is not close to the policy used to sample the data.
You can find an experiment that uses it [here](https://nn.labml.ai/rl/ppo/experiment.html).
The experiment uses [Generalized Advantage Estimation](https://nn.labml.ai/rl/ppo/gae.html).
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/rl/ppo/experiment.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/ppo/experiment.ipynb)
[](https://app.labml.ai/run/6eff28a0910e11eb9b008db315936e2f)
diff --git a/labml_nn/transformers/compressive/__init__.py b/labml_nn/transformers/compressive/__init__.py
index 1fe71056..846e1755 100644
--- a/labml_nn/transformers/compressive/__init__.py
+++ b/labml_nn/transformers/compressive/__init__.py
@@ -47,7 +47,7 @@ This is supposed to be more stable in standard transformer setups.
Here are [the training code](experiment.html) and a notebook for training a compressive transformer
model on the Tiny Shakespeare dataset.
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/transformers/compressive/experiment.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/compressive/experiment.ipynb)
[](https://app.labml.ai/run/0d9b5338726c11ebb7c80242ac1c0002)
"""
diff --git a/labml_nn/transformers/compressive/experiment.ipynb b/labml_nn/transformers/compressive/experiment.ipynb
index 6ce329d4..d99735e3 100644
--- a/labml_nn/transformers/compressive/experiment.ipynb
+++ b/labml_nn/transformers/compressive/experiment.ipynb
@@ -20,8 +20,8 @@
"id": "AYV_dMVDxyc2"
},
"source": [
- "[](https://github.com/lab-ml/nn)\n",
- "[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/transformers/compressive/experiment.ipynb) \n",
+ "[](https://github.com/labmlai/annotated_deep_learning_paper_implementations)\n",
+ "[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/compressive/experiment.ipynb) \n",
"\n",
"## Compressive Transformer\n",
"\n",
diff --git a/labml_nn/transformers/compressive/readme.md b/labml_nn/transformers/compressive/readme.md
index 0d154273..e0ea82e4 100644
--- a/labml_nn/transformers/compressive/readme.md
+++ b/labml_nn/transformers/compressive/readme.md
@@ -39,5 +39,5 @@ This is supposed to be more stable in standard transformer setups.
Here are [the training code](https://nn.labml.ai/transformers/compressive/experiment.html) and a notebook for training a compressive transformer
model on the Tiny Shakespeare dataset.
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/transformers/compressive/experiment.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/compressive/experiment.ipynb)
[](https://app.labml.ai/run/0d9b5338726c11ebb7c80242ac1c0002)
diff --git a/labml_nn/transformers/fast_weights/__init__.py b/labml_nn/transformers/fast_weights/__init__.py
index 851dbb62..8c539ab3 100644
--- a/labml_nn/transformers/fast_weights/__init__.py
+++ b/labml_nn/transformers/fast_weights/__init__.py
@@ -88,7 +88,7 @@ $\frac{1}{z^{(i)} \cdot \color{lightgreen}{\phi(q^{(i)})}}$
Here are [the training code](experiment.html) and a notebook for training a fast weights
transformer on the Tiny Shakespeare dataset.
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/transformers/fast_weights/experiment.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/fast_weights/experiment.ipynb)
[](https://app.labml.ai/run/928aadc0846c11eb85710242ac1c0002)
"""
diff --git a/labml_nn/transformers/fast_weights/experiment.ipynb b/labml_nn/transformers/fast_weights/experiment.ipynb
index e6c517dd..85e53567 100644
--- a/labml_nn/transformers/fast_weights/experiment.ipynb
+++ b/labml_nn/transformers/fast_weights/experiment.ipynb
@@ -20,8 +20,8 @@
"id": "AYV_dMVDxyc2"
},
"source": [
- "[](https://github.com/lab-ml/nn)\n",
- "[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/transformers/fast_weights/experiment.ipynb) \n",
+ "[](https://github.com/labmlai/annotated_deep_learning_paper_implementations)\n",
+ "[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/fast_weights/experiment.ipynb) \n",
"\n",
"## Fast Weights Transformer\n",
"\n",
diff --git a/labml_nn/transformers/fast_weights/experiment.py b/labml_nn/transformers/fast_weights/experiment.py
index b2d4a727..eda909fe 100644
--- a/labml_nn/transformers/fast_weights/experiment.py
+++ b/labml_nn/transformers/fast_weights/experiment.py
@@ -10,7 +10,7 @@ This trains a fast weights transformer model for auto-regression.
Here’s a Colab notebook for training a fast weights transformer on Tiny Shakespeare dataset.
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/transformers/fast_weights/experiment.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/fast_weights/experiment.ipynb)
[](https://app.labml.ai/run/928aadc0846c11eb85710242ac1c0002)
"""
diff --git a/labml_nn/transformers/fast_weights/readme.md b/labml_nn/transformers/fast_weights/readme.md
index 3bf43ac9..0addfefc 100644
--- a/labml_nn/transformers/fast_weights/readme.md
+++ b/labml_nn/transformers/fast_weights/readme.md
@@ -7,5 +7,5 @@ Here is the [annotated implementation](https://nn.labml.ai/transformers/fast_wei
Here are [the training code](https://nn.labml.ai/transformers/fast_weights/experiment.html)
and a notebook for training a fast weights transformer on the Tiny Shakespeare dataset.
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/transformers/fast_weights/experiment.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/fast_weights/experiment.ipynb)
[](https://app.labml.ai/run/928aadc0846c11eb85710242ac1c0002)
diff --git a/labml_nn/transformers/feedback/__init__.py b/labml_nn/transformers/feedback/__init__.py
index ede9f8d9..6ec3a2b2 100644
--- a/labml_nn/transformers/feedback/__init__.py
+++ b/labml_nn/transformers/feedback/__init__.py
@@ -36,7 +36,7 @@ We implemented a custom PyTorch function to improve performance.
Here's [the training code](experiment.html) and a notebook for training a feedback transformer on Tiny Shakespeare dataset.
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/transformers/feedback/experiment.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/feedback/experiment.ipynb)
[](https://app.labml.ai/run/d8eb9416530a11eb8fb50242ac1c0002)
"""
diff --git a/labml_nn/transformers/feedback/experiment.ipynb b/labml_nn/transformers/feedback/experiment.ipynb
index f606ff1d..5c9665b8 100644
--- a/labml_nn/transformers/feedback/experiment.ipynb
+++ b/labml_nn/transformers/feedback/experiment.ipynb
@@ -6,8 +6,8 @@
"id": "AYV_dMVDxyc2"
},
"source": [
- "[](https://github.com/lab-ml/nn)\n",
- "[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/transformers/feedback/experiment.ipynb) \n",
+ "[](https://github.com/labmlai/annotated_deep_learning_paper_implementations)\n",
+ "[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/feedback/experiment.ipynb) \n",
"\n",
"## Feedback Transformer\n",
"\n",
diff --git a/labml_nn/transformers/feedback/experiment.py b/labml_nn/transformers/feedback/experiment.py
index b2eeafa6..e9741b7f 100644
--- a/labml_nn/transformers/feedback/experiment.py
+++ b/labml_nn/transformers/feedback/experiment.py
@@ -12,7 +12,7 @@ where the keys and values are precalculated.
Here's a Colab notebook for training a feedback transformer on Tiny Shakespeare dataset.
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/transformers/feedback/experiment.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/feedback/experiment.ipynb)
[](https://app.labml.ai/run/d8eb9416530a11eb8fb50242ac1c0002)
"""
diff --git a/labml_nn/transformers/feedback/readme.md b/labml_nn/transformers/feedback/readme.md
index 12d2ca85..f6b2dc78 100644
--- a/labml_nn/transformers/feedback/readme.md
+++ b/labml_nn/transformers/feedback/readme.md
@@ -29,7 +29,7 @@ We implemented a custom PyTorch function to improve performance.
Here's [the training code](experiment.html) and a notebook for training a feedback transformer on Tiny Shakespeare dataset.
-[Colab Notebook](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/transformers/feedback/experiment.ipynb)
+[Colab Notebook](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/feedback/experiment.ipynb)
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/transformers/feedback/experiment.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/feedback/experiment.ipynb)
[](https://app.labml.ai/run/d8eb9416530a11eb8fb50242ac1c0002)
diff --git a/labml_nn/transformers/glu_variants/simple.ipynb b/labml_nn/transformers/glu_variants/simple.ipynb
index b03fcf77..34e59e96 100644
--- a/labml_nn/transformers/glu_variants/simple.ipynb
+++ b/labml_nn/transformers/glu_variants/simple.ipynb
@@ -21,8 +21,8 @@
"id": "AYV_dMVDxyc2"
},
"source": [
- "[](https://github.com/lab-ml/nn)\n",
- "[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/transformers/glu_variants/simple.ipynb) \n",
+ "[](https://github.com/labmlai/annotated_deep_learning_paper_implementations)\n",
+ "[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/glu_variants/simple.ipynb) \n",
"\n",
"## Gated Linear Units and Variants\n",
"\n",
diff --git a/labml_nn/transformers/glu_variants/simple.py b/labml_nn/transformers/glu_variants/simple.py
index 10c801e9..302f9a3a 100644
--- a/labml_nn/transformers/glu_variants/simple.py
+++ b/labml_nn/transformers/glu_variants/simple.py
@@ -14,7 +14,7 @@ We try different variants for the [position-wise feedforward network](../feed_fo
*This is a simpler implementation that doesn't use [`labml.configs`](experiment.html) module.
We decided to write a simpler implementation to make it easier for readers who are not familiar.*
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/transformers/glu_variants/simple.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/glu_variants/simple.ipynb)
[](https://app.labml.ai/run/86b773f65fc911ebb2ac0242ac1c0002)
"""
import dataclasses
diff --git a/labml_nn/transformers/gpt/__init__.py b/labml_nn/transformers/gpt/__init__.py
index 2e6a08d5..e9af05ee 100644
--- a/labml_nn/transformers/gpt/__init__.py
+++ b/labml_nn/transformers/gpt/__init__.py
@@ -28,7 +28,7 @@ For the transformer we reuse the
Here's a notebook for training a GPT model on Tiny Shakespeare dataset.
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/transformers/gpt/experiment.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/gpt/experiment.ipynb)
[](https://app.labml.ai/run/0324c6d0562111eba65d0242ac1c0002)
"""
diff --git a/labml_nn/transformers/gpt/experiment.ipynb b/labml_nn/transformers/gpt/experiment.ipynb
index 04424825..3b705c22 100644
--- a/labml_nn/transformers/gpt/experiment.ipynb
+++ b/labml_nn/transformers/gpt/experiment.ipynb
@@ -20,8 +20,8 @@
"id": "AYV_dMVDxyc2"
},
"source": [
- "[](https://github.com/lab-ml/nn)\n",
- "[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/transformers/gpt/experiment.ipynb) \n",
+ "[](https://github.com/labmlai/annotated_deep_learning_paper_implementations)\n",
+ "[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/gpt/experiment.ipynb) \n",
"\n",
"## Training a model with GPT architecture\n",
"\n",
diff --git a/labml_nn/transformers/switch/__init__.py b/labml_nn/transformers/switch/__init__.py
index 4ebbd502..fa74705b 100644
--- a/labml_nn/transformers/switch/__init__.py
+++ b/labml_nn/transformers/switch/__init__.py
@@ -33,7 +33,7 @@ discusses dropping tokens when routing is not balanced.
Here's [the training code](experiment.html) and a notebook for training a switch transformer on Tiny Shakespeare dataset.
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/transformers/switch/experiment.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/switch/experiment.ipynb)
[](https://app.labml.ai/run/c4656c605b9311eba13d0242ac1c0002)
"""
diff --git a/labml_nn/transformers/switch/experiment.ipynb b/labml_nn/transformers/switch/experiment.ipynb
index 095ea783..30d00409 100644
--- a/labml_nn/transformers/switch/experiment.ipynb
+++ b/labml_nn/transformers/switch/experiment.ipynb
@@ -20,8 +20,8 @@
"id": "AYV_dMVDxyc2"
},
"source": [
- "[](https://github.com/lab-ml/nn)\n",
- "[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/transformers/switch/experiment.ipynb) \n",
+ "[](https://github.com/labmlai/annotated_deep_learning_paper_implementations)\n",
+ "[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/switch/experiment.ipynb) \n",
"\n",
"## Switch Transformer\n",
"\n",
diff --git a/labml_nn/transformers/switch/readme.md b/labml_nn/transformers/switch/readme.md
index 2f47406e..72780f30 100644
--- a/labml_nn/transformers/switch/readme.md
+++ b/labml_nn/transformers/switch/readme.md
@@ -26,5 +26,5 @@ discusses dropping tokens when routing is not balanced.
Here's [the training code](experiment.html) and a notebook for training a switch transformer on Tiny Shakespeare dataset.
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/transformers/switch/experiment.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/switch/experiment.ipynb)
[](https://app.labml.ai/run/c4656c605b9311eba13d0242ac1c0002)
diff --git a/labml_nn/transformers/xl/__init__.py b/labml_nn/transformers/xl/__init__.py
index b80b2cca..b37ad7dd 100644
--- a/labml_nn/transformers/xl/__init__.py
+++ b/labml_nn/transformers/xl/__init__.py
@@ -28,7 +28,7 @@ Annotated implementation of relative multi-headed attention is in [`relative_mha
Here's [the training code](experiment.html) and a notebook for training a transformer XL model on Tiny Shakespeare dataset.
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/transformers/xl/experiment.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/xl/experiment.ipynb)
[](https://app.labml.ai/run/d3b6760c692e11ebb6a70242ac1c0002)
"""
diff --git a/labml_nn/transformers/xl/experiment.ipynb b/labml_nn/transformers/xl/experiment.ipynb
index b8ea66bd..9ca21fa8 100644
--- a/labml_nn/transformers/xl/experiment.ipynb
+++ b/labml_nn/transformers/xl/experiment.ipynb
@@ -20,8 +20,8 @@
"id": "AYV_dMVDxyc2"
},
"source": [
- "[](https://github.com/lab-ml/nn)\n",
- "[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/transformers/xl/experiment.ipynb) \n",
+ "[](https://github.com/labmlai/annotated_deep_learning_paper_implementations)\n",
+ "[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/xl/experiment.ipynb) \n",
"\n",
"## Transformer XL\n",
"\n",
diff --git a/labml_nn/transformers/xl/readme.md b/labml_nn/transformers/xl/readme.md
index c10342c5..24d50a3a 100644
--- a/labml_nn/transformers/xl/readme.md
+++ b/labml_nn/transformers/xl/readme.md
@@ -20,5 +20,5 @@ Annotated implementation of relative multi-headed attention is in [`relative_mha
Here's [the training code](https://nn.labml.ai/transformers/xl/experiment.html) and a notebook for training a transformer XL model on Tiny Shakespeare dataset.
-[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/transformers/xl/experiment.ipynb)
+[](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/xl/experiment.ipynb)
[](https://app.labml.ai/run/d3b6760c692e11ebb6a70242ac1c0002)