deep norm fix

2025-08-16 10:51:23 +08:00 · 2022-08-26 15:26:38 +05:30
parent 25ad4d6750
commit 0eb74d7b20
3 changed files with 10 additions and 3 deletions
--- a/labml_nn/normalization/deep_norm/init.py
+++ b/labml_nn/normalization/deep_norm/init.py
@ -110,7 +110,7 @@ class DeepNorm(nn.Module):
        :param gx: is the output of the current sub-layer $\mathop{G}_l (x_l, \theta_l)$
        """
        # $$x_{l + 1} = \mathop{LN}\Big( \alpha x_l + \mathop{G}_l \big(x_l, \theta_l \big)\Big)$$
-        return x + self.alpha * gx
+        return self.layer_norm(x + self.alpha * gx)


 class DeepNormTransformerLayer(nn.Module):
--- a/labml_nn/normalization/deep_norm/experiment.py
+++ b/labml_nn/normalization/deep_norm/experiment.py
@ -8,7 +8,6 @@ summary: >
 # [DeepNorm](index.html) Experiment

 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/normalization/deep_norm/experiment.ipynb)
-[![Open In Comet](https://images.labml.ai/images/comet.svg?experiment=deep_norm&file=experiment)](https://www.comet.ml/labml/deep-norm/61d817f80ff143c8825fba4aacd431d4?experiment-tab=chart&showOutliers=true&smoothing=0&transformY=smoothing&xAxis=step)
 """

 import copy
@ -131,7 +130,7 @@ def main():
    #### Create and run the experiment
    """
    # Create experiment
-    experiment.create(name="deep_norm", writers={'screen', 'web_api', 'comet'})
+    experiment.create(name="deep_norm", writers={'screen', 'web_api'})
    # Create configs
    conf = Configs()
    # Override configurations
--- a/labml_nn/normalization/layer_norm/init.py
+++ b/labml_nn/normalization/layer_norm/init.py
@ -83,6 +83,14 @@ class LayerNorm(Module):
        """
        super().__init__()

+        # Convert `normalized_shape` to `torch.Size`
+        if isinstance(normalized_shape, int):
+            normalized_shape = torch.Size([normalized_shape])
+        elif isinstance(normalized_shape, list):
+            normalized_shape = torch.Size(normalized_shape)
+        assert isinstance(normalized_shape, torch.Size)
+
+        #
        self.normalized_shape = normalized_shape
        self.eps = eps
        self.elementwise_affine = elementwise_affine