diff --git a/docs/transformers/gmlp/experiment.html b/docs/transformers/gmlp/experiment.html index 5371d757..10d9da11 100644 --- a/docs/transformers/gmlp/experiment.html +++ b/docs/transformers/gmlp/experiment.html @@ -68,17 +68,19 @@ #
This is an annotated PyTorch experiment to train a gMLP model.
+This is an annotated PyTorch experiment to train a gMLP model. +The paper also applies a Stochastic Depth regularization where some layers are removed randomly during training. +We have not implemented that here.
This is based on training loop and configurations for a simple transformer auto-regressive NLP task.
16from labml import experiment
-17from labml.configs import option
-18from labml_nn.transformers import TransformerConfigs
-19from labml_nn.transformers.basic.autoregressive_experiment import Configs as BasicAutoRegressionConfigs
-20from labml_nn.transformers.gmlp import GMLPBlock
18from labml import experiment
+19from labml.configs import option
+20from labml_nn.transformers import TransformerConfigs
+21from labml_nn.transformers.basic.autoregressive_experiment import Configs as BasicAutoRegressionConfigs
+22from labml_nn.transformers.gmlp import GMLPBlock
23class Configs(BasicAutoRegressionConfigs):
25class Configs(BasicAutoRegressionConfigs):
Transformer
32 transformer: TransformerConfigs = 'gMLP'
34 transformer: TransformerConfigs = 'gMLP'
gMLP Block
34 gmlp: GMLPBlock
36 gmlp: GMLPBlock
d_ffn
for gMLP projection layer
36 d_ffn: int = 2048
38 d_ffn: int = 2048
39@option(Configs.gmlp, 'gMLP')
-40def _gmlp_configs(c: Configs):
41@option(Configs.gmlp, 'gMLP')
+42def _gmlp_configs(c: Configs):
44 return GMLPBlock(c.d_model, c.d_ffn, c.seq_len)
46 return GMLPBlock(c.d_model, c.d_ffn, c.seq_len)
47@option(Configs.transformer, 'gMLP')
-48def _transformer_configs(c: Configs):
49@option(Configs.transformer, 'gMLP')
+50def _transformer_configs(c: Configs):
55 conf = TransformerConfigs()
57 conf = TransformerConfigs()
Set the vocabulary sizes for embeddings and generating logits
57 conf.n_src_vocab = c.n_tokens
-58 conf.n_tgt_vocab = c.n_tokens
59 conf.n_src_vocab = c.n_tokens
+60 conf.n_tgt_vocab = c.n_tokens
Set model size
60 conf.d_model = c.d_model
62 conf.d_model = c.d_model
Replace the encoder layer with a gMLP layer
62 conf.encoder_layer = c.gmlp
-63
-64 return conf
64 conf.encoder_layer = c.gmlp
+65
+66 return conf
67def main():
69def main():
Create experiment
69 experiment.create(name="gMLP")
71 experiment.create(name="gMLP")
Create configs
71 conf = Configs()
73 conf = Configs()
Override configurations
73 experiment.configs(conf, {
75 experiment.configs(conf, {
Use character level tokenizer
75 'tokenizer': 'character',
77 'tokenizer': 'character',
Prompt separator is blank
77 'prompt_separator': '',
79 'prompt_separator': '',
Starting prompt for sampling
79 'prompt': 'It is ',
81 'prompt': 'It is ',
Use Tiny Shakespeare dataset
81 'text': 'tiny_shakespeare',
83 'text': 'tiny_shakespeare',
Use a context size of $256$
84 'seq_len': 256,
86 'seq_len': 256,
Train for $128$ epochs
86 'epochs': 128,
88 'epochs': 128,
Batch size $32$
88 'batch_size': 32,
90 'batch_size': 32,
91 'inner_iterations': 10,
93 'inner_iterations': 10,
Model size
94 'd_model': 512,
-95 'd_ffn': 2048,
96 'd_model': 512,
+97 'd_ffn': 2048,
Use Noam optimizer
98 'optimizer.optimizer': 'Noam',
-99 'optimizer.learning_rate': 1.,
-100 })
100 'optimizer.optimizer': 'Noam',
+101 'optimizer.learning_rate': 1.,
+102 })
Set models for saving and loading
103 experiment.add_pytorch_models({'model': conf.model})
105 experiment.add_pytorch_models({'model': conf.model})
Start the experiment
106 with experiment.start():
108 with experiment.start():
Run training
108 conf.run()
110 conf.run()
112if __name__ == '__main__':
-113 main()
114if __name__ == '__main__':
+115 main()