auto regression common exp

2025-10-30 18:27:03 +08:00 · 2020-12-27 07:30:20 +05:30
parent c64b98e390
commit 4fe8392a8d
2 changed files with 16 additions and 150 deletions
--- a/labml_nn/experiments/nlp_autoregression.py
+++ b/labml_nn/experiments/nlp_autoregression.py
@ -44,13 +44,14 @@ class NLPAutoRegressionConfigs(TrainValidConfigs):

    is_save_models = True

-    loss_func: CrossEntropyLoss()
+    loss_func = CrossEntropyLoss()
+    accuracy = Accuracy()

    def init(self):
        tracker.set_queue("loss.*", 20, True)
        tracker.set_scalar("accuracy.*", True)
        hook_model_outputs(self.mode, self.model, 'model')
-        self.state_modules = [Accuracy()]
+        self.state_modules = [self.accuracy]

    def step(self, batch: any, batch_idx: BatchIndex):
        data, target = batch[0].to(self.device), batch[1].to(self.device)
@ -62,8 +63,8 @@ class NLPAutoRegressionConfigs(TrainValidConfigs):
            output, *_ = self.model(data)

        loss = self.loss_func(output, target)
-        self.accuracy_func(output, target)
-        self.accuracy_func.track()
+        self.accuracy(output, target)
+        self.accuracy.track()
        tracker.add("loss.", loss)

        if self.mode.is_train:
@ -88,7 +89,7 @@ class NLPAutoRegressionConfigs(TrainValidConfigs):
            data = self.text.text_to_i(prompt).unsqueeze(-1)
            data = data.to(self.device)
            # Get the model output
-            output = self.model(data)
+            output, *_ = self.model(data)
            # Get the model prediction (greedy)
            output = output.argmax(dim=-1).squeeze()
            # Add the prediction to prompt
--- a/labml_nn/hypernetworks/experiment.py
+++ b/labml_nn/hypernetworks/experiment.py
@ -1,18 +1,11 @@
-from typing import Callable, Any
-
 import torch
 import torch.nn as nn
-from labml import lab, experiment, monit, tracker, logger
+from labml import experiment
 from labml.configs import option
-from labml.logger import Text
 from labml.utils.pytorch import get_modules
-from labml_helpers.datasets.text import TextDataset, TextFileDataset, SequentialUnBatchedDataset
-from labml_helpers.metrics.accuracy import Accuracy
 from labml_helpers.module import Module
-from labml_helpers.optimizer import OptimizerConfigs
-from labml_helpers.train_valid import SimpleTrainValidConfigs, BatchIndex
-from torch.utils.data import DataLoader

+from labml_nn.experiments.nlp_autoregression import NLPAutoRegressionConfigs
 from labml_nn.hypernetworks.hyper_lstm import HyperLSTM


@ -36,28 +29,7 @@ class AutoregressiveModel(Module):
        return self.generator(res), state


-class CrossEntropyLoss(Module):
-    """
-    Cross entropy loss
-    """
-
-    def __init__(self):
-        super().__init__()
-        self.loss = nn.CrossEntropyLoss()
-
-    def __call__(self, outputs, targets):
-        return self.loss(outputs.view(-1, outputs.shape[-1]), targets.view(-1))
-
-
-def transpose_batch(batch):
-    transposed_data = list(zip(*batch))
-    src = torch.stack(transposed_data[0], 1)
-    tgt = torch.stack(transposed_data[1], 1)
-
-    return src, tgt
-
-
-class Configs(SimpleTrainValidConfigs):
+class Configs(NLPAutoRegressionConfigs):
    """
    ## Configurations

@ -65,119 +37,6 @@ class Configs(SimpleTrainValidConfigs):
    """

    model: AutoregressiveModel
-    text: TextDataset
-    batch_size: int = 20
-    seq_len: int = 512
-    n_tokens: int
-    tokenizer: Callable = 'character'
-
-    is_save_models = True
-
-    optimizer: torch.optim.Adam = 'transformer_optimizer'
-
-    accuracy = Accuracy()
-    loss_func = CrossEntropyLoss()
-
-    def init(self):
-        # Create a configurable optimizer.
-        # Parameters like learning rate can be changed by passing a dictionary when starting the experiment.
-        optimizer = OptimizerConfigs()
-        optimizer.parameters = self.model.parameters()
-        optimizer.optimizer = 'Adam'
-        self.optimizer = optimizer
-
-        # Create a sequential data loader for training
-        self.train_loader = DataLoader(SequentialUnBatchedDataset(text=self.text.train,
-                                                                  dataset=self.text,
-                                                                  seq_len=self.seq_len),
-                                       batch_size=self.batch_size,
-                                       collate_fn=transpose_batch,
-                                       shuffle=True)
-
-        # Create a sequential data loader for validation
-        self.valid_loader = DataLoader(SequentialUnBatchedDataset(text=self.text.valid,
-                                                                  dataset=self.text,
-                                                                  seq_len=self.seq_len),
-                                       batch_size=self.batch_size,
-                                       collate_fn=transpose_batch,
-                                       shuffle=True)
-
-        self.state_modules = [self.accuracy]
-
-    def sample(self):
-        """
-        Sampling function to generate samples periodically while training
-        """
-        prompt = 'It is'
-        log = [(prompt, Text.subtle)]
-        # Sample 25 tokens
-        for i in monit.iterate('Sample', 25):
-            # Tokenize the prompt
-            data = self.text.text_to_i(prompt).unsqueeze(-1)
-            data = data.to(self.device)
-            # Get the model output
-            output, state = self.model(data)
-            output = output.cpu()
-            # Get the model prediction (greedy)
-            output = output.argmax(dim=-1).squeeze()
-            # Add the prediction to prompt
-            prompt += self.text.itos[output[-1]]
-            # Add the prediction for logging
-            log += [(self.text.itos[output[-1]], Text.value)]
-
-        logger.log(log)
-
-    def step(self, batch: Any, batch_idx: BatchIndex):
-        """
-        This method is called for each batch
-        """
-        self.model.train(self.mode.is_train)
-
-        # Get data and target labels
-        data, target = batch[0].to(self.device), batch[1].to(self.device)
-
-        if self.mode.is_train:
-            tracker.add_global_step(data.shape[0] * data.shape[1])
-
-        # Run the model
-        output, state = self.model(data)
-
-        # Calculate loss
-        loss = self.loss_func(output, target)
-        # Calculate accuracy
-        self.accuracy(output, target)
-
-        # Log the loss
-        tracker.add("loss.", loss)
-
-        #  If we are in training mode, calculate the gradients
-        if self.mode.is_train:
-            loss.backward()
-            self.optimizer.step()
-            if batch_idx.is_last:
-                tracker.add('model', self.model)
-            self.optimizer.zero_grad()
-
-        tracker.save()
-
-
-def character_tokenizer(x: str):
-    return list(x)
-
-
-@option(Configs.tokenizer)
-def character():
-    """
-    Character level tokenizer
-    """
-    return character_tokenizer
-
-
-@option(Configs.text)
-def tiny_shakespeare(c: Configs):
-    return TextFileDataset(
-        lab.get_data_path() / 'tiny_shakespeare.txt', c.tokenizer,
-        url='https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt')


@option(Configs.model)
@ -191,7 +50,7 @@ def autoregressive_model(c: Configs):

 def main():
    # Create experiment
-    experiment.create(name="knn_lm", comment='')
+    experiment.create(name="hyper_lstm", comment='')
    # Create configs
    conf = Configs()
    # Load configurations
@ -200,6 +59,12 @@ def main():
                       {'tokenizer': 'character',
                        'text': 'tiny_shakespeare',
                        'optimizer.learning_rate': 2.5e-4,
+                        'optimizer.optimizer': 'Adam',
+                        'prompt': 'It is',
+                        'prompt_separator': '',
+
+                        'train_loader': 'shuffled_train_loader',
+                        'valid_loader': 'shuffled_valid_loader',

                        'seq_len': 512,
                        'epochs': 128,