mirror of
https://github.com/labmlai/annotated_deep_learning_paper_implementations.git
synced 2025-08-14 17:41:37 +08:00
📚 group norm links
This commit is contained in:
@ -133,8 +133,8 @@ $m$ is the size of the set $\mathcal{S}_i$ which is same for all $i$.</p>
|
|||||||
<p>Group normalization normalizes values of the same sample and the same group of channels together.</p>
|
<p>Group normalization normalizes values of the same sample and the same group of channels together.</p>
|
||||||
<p>Here’s a <a href="experiment.html">CIFAR 10 classification model</a> that uses instance normalization.</p>
|
<p>Here’s a <a href="experiment.html">CIFAR 10 classification model</a> that uses instance normalization.</p>
|
||||||
<p><a href="https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/normalization/group_norm/experiment.ipynb"><img alt="Open In Colab" src="https://colab.research.google.com/assets/colab-badge.svg" /></a>
|
<p><a href="https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/normalization/group_norm/experiment.ipynb"><img alt="Open In Colab" src="https://colab.research.google.com/assets/colab-badge.svg" /></a>
|
||||||
<a href="https://app.labml.ai/run/011254fe647011ebbb8e0242ac1c0002"><img alt="View Run" src="https://img.shields.io/badge/labml-experiment-brightgreen" /></a>
|
<a href="https://app.labml.ai/run/081d950aa4e011eb8f9f0242ac1c0002"><img alt="View Run" src="https://img.shields.io/badge/labml-experiment-brightgreen" /></a>
|
||||||
<a href="https://app.labml.ai/run/011254fe647011ebbb8e0242ac1c0002"><img alt="WandB" src="https://img.shields.io/badge/wandb-run-yellow" /></a></p>
|
<a href="https://wandb.ai/vpj/cifar10/runs/310etthp"><img alt="WandB" src="https://img.shields.io/badge/wandb-run-yellow" /></a></p>
|
||||||
</div>
|
</div>
|
||||||
<div class='code'>
|
<div class='code'>
|
||||||
<div class="highlight"><pre><span class="lineno">87</span><span></span><span class="kn">import</span> <span class="nn">torch</span>
|
<div class="highlight"><pre><span class="lineno">87</span><span></span><span class="kn">import</span> <span class="nn">torch</span>
|
||||||
|
@ -79,8 +79,8 @@ Group normalization normalizes values of the same sample and the same group of c
|
|||||||
Here's a [CIFAR 10 classification model](experiment.html) that uses instance normalization.
|
Here's a [CIFAR 10 classification model](experiment.html) that uses instance normalization.
|
||||||
|
|
||||||
[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/normalization/group_norm/experiment.ipynb)
|
[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/normalization/group_norm/experiment.ipynb)
|
||||||
[](https://app.labml.ai/run/011254fe647011ebbb8e0242ac1c0002)
|
[](https://app.labml.ai/run/081d950aa4e011eb8f9f0242ac1c0002)
|
||||||
[](https://app.labml.ai/run/011254fe647011ebbb8e0242ac1c0002)
|
[](https://wandb.ai/vpj/cifar10/runs/310etthp)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
620
labml_nn/normalization/group_norm/experiment.ipynb
Normal file
620
labml_nn/normalization/group_norm/experiment.ipynb
Normal file
@ -0,0 +1,620 @@
|
|||||||
|
{
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 0,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"name": "Group Norm",
|
||||||
|
"provenance": [],
|
||||||
|
"collapsed_sections": []
|
||||||
|
},
|
||||||
|
"kernelspec": {
|
||||||
|
"name": "python3",
|
||||||
|
"display_name": "Python 3"
|
||||||
|
},
|
||||||
|
"accelerator": "GPU",
|
||||||
|
"widgets": {
|
||||||
|
"application/vnd.jupyter.widget-state+json": {
|
||||||
|
"14841e99103e41f69dd9b709301d3204": {
|
||||||
|
"model_module": "@jupyter-widgets/controls",
|
||||||
|
"model_name": "HBoxModel",
|
||||||
|
"state": {
|
||||||
|
"_view_name": "HBoxView",
|
||||||
|
"_dom_classes": [],
|
||||||
|
"_model_name": "HBoxModel",
|
||||||
|
"_view_module": "@jupyter-widgets/controls",
|
||||||
|
"_model_module_version": "1.5.0",
|
||||||
|
"_view_count": null,
|
||||||
|
"_view_module_version": "1.5.0",
|
||||||
|
"box_style": "",
|
||||||
|
"layout": "IPY_MODEL_60f10c7bff5c4eea845a14f3f3075e8d",
|
||||||
|
"_model_module": "@jupyter-widgets/controls",
|
||||||
|
"children": [
|
||||||
|
"IPY_MODEL_cf2f7c0f10454901bc5b48872b364dbf",
|
||||||
|
"IPY_MODEL_8755fc8b0f6b40b3b08822e0a705d403"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"60f10c7bff5c4eea845a14f3f3075e8d": {
|
||||||
|
"model_module": "@jupyter-widgets/base",
|
||||||
|
"model_name": "LayoutModel",
|
||||||
|
"state": {
|
||||||
|
"_view_name": "LayoutView",
|
||||||
|
"grid_template_rows": null,
|
||||||
|
"right": null,
|
||||||
|
"justify_content": null,
|
||||||
|
"_view_module": "@jupyter-widgets/base",
|
||||||
|
"overflow": null,
|
||||||
|
"_model_module_version": "1.2.0",
|
||||||
|
"_view_count": null,
|
||||||
|
"flex_flow": null,
|
||||||
|
"width": null,
|
||||||
|
"min_width": null,
|
||||||
|
"border": null,
|
||||||
|
"align_items": null,
|
||||||
|
"bottom": null,
|
||||||
|
"_model_module": "@jupyter-widgets/base",
|
||||||
|
"top": null,
|
||||||
|
"grid_column": null,
|
||||||
|
"overflow_y": null,
|
||||||
|
"overflow_x": null,
|
||||||
|
"grid_auto_flow": null,
|
||||||
|
"grid_area": null,
|
||||||
|
"grid_template_columns": null,
|
||||||
|
"flex": null,
|
||||||
|
"_model_name": "LayoutModel",
|
||||||
|
"justify_items": null,
|
||||||
|
"grid_row": null,
|
||||||
|
"max_height": null,
|
||||||
|
"align_content": null,
|
||||||
|
"visibility": null,
|
||||||
|
"align_self": null,
|
||||||
|
"height": null,
|
||||||
|
"min_height": null,
|
||||||
|
"padding": null,
|
||||||
|
"grid_auto_rows": null,
|
||||||
|
"grid_gap": null,
|
||||||
|
"max_width": null,
|
||||||
|
"order": null,
|
||||||
|
"_view_module_version": "1.2.0",
|
||||||
|
"grid_template_areas": null,
|
||||||
|
"object_position": null,
|
||||||
|
"object_fit": null,
|
||||||
|
"grid_auto_columns": null,
|
||||||
|
"margin": null,
|
||||||
|
"display": null,
|
||||||
|
"left": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"cf2f7c0f10454901bc5b48872b364dbf": {
|
||||||
|
"model_module": "@jupyter-widgets/controls",
|
||||||
|
"model_name": "FloatProgressModel",
|
||||||
|
"state": {
|
||||||
|
"_view_name": "ProgressView",
|
||||||
|
"style": "IPY_MODEL_c6c192b58fa242008fbc2983c7866c5f",
|
||||||
|
"_dom_classes": [],
|
||||||
|
"description": "",
|
||||||
|
"_model_name": "FloatProgressModel",
|
||||||
|
"bar_style": "success",
|
||||||
|
"max": 170498071,
|
||||||
|
"_view_module": "@jupyter-widgets/controls",
|
||||||
|
"_model_module_version": "1.5.0",
|
||||||
|
"value": 170498071,
|
||||||
|
"_view_count": null,
|
||||||
|
"_view_module_version": "1.5.0",
|
||||||
|
"orientation": "horizontal",
|
||||||
|
"min": 0,
|
||||||
|
"description_tooltip": null,
|
||||||
|
"_model_module": "@jupyter-widgets/controls",
|
||||||
|
"layout": "IPY_MODEL_6e5c4becab6b40aaafce1a4575d3199c"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"8755fc8b0f6b40b3b08822e0a705d403": {
|
||||||
|
"model_module": "@jupyter-widgets/controls",
|
||||||
|
"model_name": "HTMLModel",
|
||||||
|
"state": {
|
||||||
|
"_view_name": "HTMLView",
|
||||||
|
"style": "IPY_MODEL_328dbbbc3cdb4163896913308059c23c",
|
||||||
|
"_dom_classes": [],
|
||||||
|
"description": "",
|
||||||
|
"_model_name": "HTMLModel",
|
||||||
|
"placeholder": "",
|
||||||
|
"_view_module": "@jupyter-widgets/controls",
|
||||||
|
"_model_module_version": "1.5.0",
|
||||||
|
"value": " 170499072/? [00:03<00:00, 54808451.08it/s]",
|
||||||
|
"_view_count": null,
|
||||||
|
"_view_module_version": "1.5.0",
|
||||||
|
"description_tooltip": null,
|
||||||
|
"_model_module": "@jupyter-widgets/controls",
|
||||||
|
"layout": "IPY_MODEL_3bff44b4205f40119715fae60d4a04a9"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"c6c192b58fa242008fbc2983c7866c5f": {
|
||||||
|
"model_module": "@jupyter-widgets/controls",
|
||||||
|
"model_name": "ProgressStyleModel",
|
||||||
|
"state": {
|
||||||
|
"_view_name": "StyleView",
|
||||||
|
"_model_name": "ProgressStyleModel",
|
||||||
|
"description_width": "initial",
|
||||||
|
"_view_module": "@jupyter-widgets/base",
|
||||||
|
"_model_module_version": "1.5.0",
|
||||||
|
"_view_count": null,
|
||||||
|
"_view_module_version": "1.2.0",
|
||||||
|
"bar_color": null,
|
||||||
|
"_model_module": "@jupyter-widgets/controls"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"6e5c4becab6b40aaafce1a4575d3199c": {
|
||||||
|
"model_module": "@jupyter-widgets/base",
|
||||||
|
"model_name": "LayoutModel",
|
||||||
|
"state": {
|
||||||
|
"_view_name": "LayoutView",
|
||||||
|
"grid_template_rows": null,
|
||||||
|
"right": null,
|
||||||
|
"justify_content": null,
|
||||||
|
"_view_module": "@jupyter-widgets/base",
|
||||||
|
"overflow": null,
|
||||||
|
"_model_module_version": "1.2.0",
|
||||||
|
"_view_count": null,
|
||||||
|
"flex_flow": null,
|
||||||
|
"width": null,
|
||||||
|
"min_width": null,
|
||||||
|
"border": null,
|
||||||
|
"align_items": null,
|
||||||
|
"bottom": null,
|
||||||
|
"_model_module": "@jupyter-widgets/base",
|
||||||
|
"top": null,
|
||||||
|
"grid_column": null,
|
||||||
|
"overflow_y": null,
|
||||||
|
"overflow_x": null,
|
||||||
|
"grid_auto_flow": null,
|
||||||
|
"grid_area": null,
|
||||||
|
"grid_template_columns": null,
|
||||||
|
"flex": null,
|
||||||
|
"_model_name": "LayoutModel",
|
||||||
|
"justify_items": null,
|
||||||
|
"grid_row": null,
|
||||||
|
"max_height": null,
|
||||||
|
"align_content": null,
|
||||||
|
"visibility": null,
|
||||||
|
"align_self": null,
|
||||||
|
"height": null,
|
||||||
|
"min_height": null,
|
||||||
|
"padding": null,
|
||||||
|
"grid_auto_rows": null,
|
||||||
|
"grid_gap": null,
|
||||||
|
"max_width": null,
|
||||||
|
"order": null,
|
||||||
|
"_view_module_version": "1.2.0",
|
||||||
|
"grid_template_areas": null,
|
||||||
|
"object_position": null,
|
||||||
|
"object_fit": null,
|
||||||
|
"grid_auto_columns": null,
|
||||||
|
"margin": null,
|
||||||
|
"display": null,
|
||||||
|
"left": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"328dbbbc3cdb4163896913308059c23c": {
|
||||||
|
"model_module": "@jupyter-widgets/controls",
|
||||||
|
"model_name": "DescriptionStyleModel",
|
||||||
|
"state": {
|
||||||
|
"_view_name": "StyleView",
|
||||||
|
"_model_name": "DescriptionStyleModel",
|
||||||
|
"description_width": "",
|
||||||
|
"_view_module": "@jupyter-widgets/base",
|
||||||
|
"_model_module_version": "1.5.0",
|
||||||
|
"_view_count": null,
|
||||||
|
"_view_module_version": "1.2.0",
|
||||||
|
"_model_module": "@jupyter-widgets/controls"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"3bff44b4205f40119715fae60d4a04a9": {
|
||||||
|
"model_module": "@jupyter-widgets/base",
|
||||||
|
"model_name": "LayoutModel",
|
||||||
|
"state": {
|
||||||
|
"_view_name": "LayoutView",
|
||||||
|
"grid_template_rows": null,
|
||||||
|
"right": null,
|
||||||
|
"justify_content": null,
|
||||||
|
"_view_module": "@jupyter-widgets/base",
|
||||||
|
"overflow": null,
|
||||||
|
"_model_module_version": "1.2.0",
|
||||||
|
"_view_count": null,
|
||||||
|
"flex_flow": null,
|
||||||
|
"width": null,
|
||||||
|
"min_width": null,
|
||||||
|
"border": null,
|
||||||
|
"align_items": null,
|
||||||
|
"bottom": null,
|
||||||
|
"_model_module": "@jupyter-widgets/base",
|
||||||
|
"top": null,
|
||||||
|
"grid_column": null,
|
||||||
|
"overflow_y": null,
|
||||||
|
"overflow_x": null,
|
||||||
|
"grid_auto_flow": null,
|
||||||
|
"grid_area": null,
|
||||||
|
"grid_template_columns": null,
|
||||||
|
"flex": null,
|
||||||
|
"_model_name": "LayoutModel",
|
||||||
|
"justify_items": null,
|
||||||
|
"grid_row": null,
|
||||||
|
"max_height": null,
|
||||||
|
"align_content": null,
|
||||||
|
"visibility": null,
|
||||||
|
"align_self": null,
|
||||||
|
"height": null,
|
||||||
|
"min_height": null,
|
||||||
|
"padding": null,
|
||||||
|
"grid_auto_rows": null,
|
||||||
|
"grid_gap": null,
|
||||||
|
"max_width": null,
|
||||||
|
"order": null,
|
||||||
|
"_view_module_version": "1.2.0",
|
||||||
|
"grid_template_areas": null,
|
||||||
|
"object_position": null,
|
||||||
|
"object_fit": null,
|
||||||
|
"grid_auto_columns": null,
|
||||||
|
"margin": null,
|
||||||
|
"display": null,
|
||||||
|
"left": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "AYV_dMVDxyc2"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"[](https://github.com/lab-ml/nn)\n",
|
||||||
|
"[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/normalization/group_norm/experiment.ipynb) \n",
|
||||||
|
"\n",
|
||||||
|
"## Group Norm - CIFAR 10\n",
|
||||||
|
"\n",
|
||||||
|
"This is an experiment training a model with group norm to classify CIFAR-10 dataset."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "AahG_i2y5tY9"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"Install the `labml-nn` package. Optionally `wandb` package for experiment stats."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"metadata": {
|
||||||
|
"id": "ZCzmCrAIVg0L"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"!pip install labml-nn wandb"
|
||||||
|
],
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "SE2VUQ6L5zxI"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"Imports"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"metadata": {
|
||||||
|
"id": "0hJXx_g0wS2C"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"import torch\n",
|
||||||
|
"import torch.nn as nn\n",
|
||||||
|
"\n",
|
||||||
|
"from labml import experiment\n",
|
||||||
|
"from labml_nn.normalization.group_norm.experiment import Configs"
|
||||||
|
],
|
||||||
|
"execution_count": 6,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "Lpggo0wM6qb-"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"Create an experiment"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"metadata": {
|
||||||
|
"id": "bFcr9k-l4cAg"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"experiment.create(name=\"cifar10\", comment=\"group norm\")"
|
||||||
|
],
|
||||||
|
"execution_count": 7,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "-OnHLi626tJt"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"Initialize configurations"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"metadata": {
|
||||||
|
"id": "Piz0c5f44hRo"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"conf = Configs()"
|
||||||
|
],
|
||||||
|
"execution_count": 8,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "wwMzCqpD6vkL"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"Set experiment configurations and assign a configurations dictionary to override configurations"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/",
|
||||||
|
"height": 17
|
||||||
|
},
|
||||||
|
"id": "e6hmQhTw4nks",
|
||||||
|
"outputId": "50ad9e07-84f4-47cf-9d26-034448eb611b"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"experiment.configs(conf, {\n",
|
||||||
|
" 'optimizer.optimizer': 'Adam',\n",
|
||||||
|
" 'optimizer.learning_rate': 2.5e-4,\n",
|
||||||
|
"})"
|
||||||
|
],
|
||||||
|
"execution_count": 9,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"output_type": "display_data",
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<pre style=\"overflow-x: scroll;\"></pre>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
"<IPython.core.display.HTML object>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "KJZRf8527GxL"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"Start the experiment and run the training loop."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/",
|
||||||
|
"height": 882,
|
||||||
|
"referenced_widgets": [
|
||||||
|
"14841e99103e41f69dd9b709301d3204",
|
||||||
|
"60f10c7bff5c4eea845a14f3f3075e8d",
|
||||||
|
"cf2f7c0f10454901bc5b48872b364dbf",
|
||||||
|
"8755fc8b0f6b40b3b08822e0a705d403",
|
||||||
|
"c6c192b58fa242008fbc2983c7866c5f",
|
||||||
|
"6e5c4becab6b40aaafce1a4575d3199c",
|
||||||
|
"328dbbbc3cdb4163896913308059c23c",
|
||||||
|
"3bff44b4205f40119715fae60d4a04a9"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"id": "aIAWo7Fw5DR8",
|
||||||
|
"outputId": "2e82cce8-eaad-4cab-88d3-efd5d095b5b7"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"with experiment.start():\n",
|
||||||
|
" conf.run()"
|
||||||
|
],
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"output_type": "display_data",
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<pre style=\"overflow-x: scroll;\">\n",
|
||||||
|
"<strong><span style=\"text-decoration: underline\">cifar10</span></strong>: <span style=\"color: #208FFB\">081d950aa4e011eb8f9f0242ac1c0002</span>\n",
|
||||||
|
"\t<strong><span style=\"color: #DDB62B\">group norm</span></strong>\n",
|
||||||
|
"\t[dirty]: <strong><span style=\"color: #DDB62B\">\"\"</span></strong>\n",
|
||||||
|
"<span style=\"color: #C5C1B4\"></span>\n",
|
||||||
|
"<span style=\"color: #C5C1B4\">--------------------------------------------------</span><span style=\"color: #DDB62B\"><strong><span style=\"text-decoration: underline\"></span></strong></span>\n",
|
||||||
|
"<span style=\"color: #DDB62B\"><strong><span style=\"text-decoration: underline\">LABML WARNING</span></strong></span>\n",
|
||||||
|
"<span style=\"color: #DDB62B\"><strong><span style=\"text-decoration: underline\"></span></strong></span>LabML App Warning: <span style=\"color: #60C6C8\">empty_token: </span><strong>Please create a valid token at https://app.labml.ai.</strong>\n",
|
||||||
|
"<strong>Click on the experiment link to monitor the experiment and add it to your experiments list.</strong><span style=\"color: #C5C1B4\"></span>\n",
|
||||||
|
"<span style=\"color: #C5C1B4\">--------------------------------------------------</span>\n",
|
||||||
|
"<span style=\"color: #208FFB\">Monitor experiment at </span><a href='https://app.labml.ai/run/081d950aa4e011eb8f9f0242ac1c0002' target='blank'>https://app.labml.ai/run/081d950aa4e011eb8f9f0242ac1c0002</a>\n",
|
||||||
|
"Initialize...\n",
|
||||||
|
" Prepare mode<span style=\"color: #00A250\">...[DONE]</span><span style=\"color: #208FFB\">\t4.29ms</span>\n",
|
||||||
|
" Prepare model...\n",
|
||||||
|
" Prepare device...\n",
|
||||||
|
" Prepare device_info<span style=\"color: #00A250\">...[DONE]</span><span style=\"color: #208FFB\">\t67.86ms</span>\n",
|
||||||
|
" Prepare device<span style=\"color: #00A250\">...[DONE]</span><span style=\"color: #208FFB\">\t79.38ms</span>\n",
|
||||||
|
" Prepare model<span style=\"color: #00A250\">...[DONE]</span><span style=\"color: #208FFB\">\t10,940.55ms</span>\n",
|
||||||
|
"Initialize<span style=\"color: #00A250\">...[DONE]</span><span style=\"color: #208FFB\">\t11,069.97ms</span>\n",
|
||||||
|
"Prepare validator...\n",
|
||||||
|
" Prepare valid_loader...\n",
|
||||||
|
" Prepare valid_dataset...\n",
|
||||||
|
" Prepare dataset_transforms<span style=\"color: #00A250\">...[DONE]</span><span style=\"color: #208FFB\">\t8.51ms</span>\n",
|
||||||
|
" Prepare valid_dataset<span style=\"color: #00A250\">...[DONE]</span><span style=\"color: #208FFB\">\t4,699.14ms</span>\n",
|
||||||
|
" Prepare valid_loader<span style=\"color: #00A250\">...[DONE]</span><span style=\"color: #208FFB\">\t4,813.97ms</span>\n",
|
||||||
|
"Prepare validator<span style=\"color: #00A250\">...[DONE]</span><span style=\"color: #208FFB\">\t4,920.79ms</span>\n",
|
||||||
|
"Prepare trainer...\n",
|
||||||
|
" Prepare train_loader...\n",
|
||||||
|
" Prepare train_dataset<span style=\"color: #00A250\">...[DONE]</span><span style=\"color: #208FFB\">\t978.39ms</span>\n",
|
||||||
|
" Prepare train_loader<span style=\"color: #00A250\">...[DONE]</span><span style=\"color: #208FFB\">\t1,079.91ms</span>\n",
|
||||||
|
"Prepare trainer<span style=\"color: #00A250\">...[DONE]</span><span style=\"color: #208FFB\">\t1,133.44ms</span>\n",
|
||||||
|
"Prepare training_loop...\n",
|
||||||
|
" Prepare loop_count<span style=\"color: #00A250\">...[DONE]</span><span style=\"color: #208FFB\">\t47.45ms</span>\n",
|
||||||
|
"Prepare training_loop<span style=\"color: #00A250\">...[DONE]</span><span style=\"color: #208FFB\">\t288.33ms</span>\n",
|
||||||
|
"<strong><span style=\"color: #DDB62B\"> 50,000: </span></strong>Train:<span style=\"color: #C5C1B4\"> 100%</span><span style=\"color: #208FFB\"> 52,404ms </span>Valid:<span style=\"color: #C5C1B4\"> 100%</span><span style=\"color: #208FFB\"> 4,279ms </span> loss.train: <span style=\"color: #C5C1B4\"> 2.34893</span> accuracy.train: <span style=\"color: #C5C1B4\">0.220780</span> loss.valid: <span style=\"color: #C5C1B4\"> 1.82418</span> accuracy.valid: <span style=\"color: #C5C1B4\">0.224600</span> <span style=\"color: #208FFB\">57,111ms</span><span style=\"color: #D160C4\"> 0:00m/ 0:08m </span>\n",
|
||||||
|
"<strong><span style=\"color: #DDB62B\"> 89,168: </span></strong>Train:<span style=\"color: #C5C1B4\"> 78%</span><span style=\"color: #208FFB\"> 57,275ms </span>Valid:<span style=\"color: #C5C1B4\"> 70%</span><span style=\"color: #208FFB\"> 4,571ms </span> loss.train: <strong> 1.67616</strong> accuracy.train: <strong>0.300985</strong> loss.valid: <span style=\"color: #C5C1B4\"> 1.66962</span> accuracy.valid: <span style=\"color: #C5C1B4\">0.288923</span> <span style=\"color: #208FFB\">57,111ms</span><span style=\"color: #D160C4\"> 0:01m/ 0:07m </span></pre>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
"<IPython.core.display.HTML object>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"output_type": "display_data",
|
||||||
|
"data": {
|
||||||
|
"application/javascript": [
|
||||||
|
"\n",
|
||||||
|
" window._wandbApiKey = new Promise((resolve, reject) => {\n",
|
||||||
|
" function loadScript(url) {\n",
|
||||||
|
" return new Promise(function(resolve, reject) {\n",
|
||||||
|
" let newScript = document.createElement(\"script\");\n",
|
||||||
|
" newScript.onerror = reject;\n",
|
||||||
|
" newScript.onload = resolve;\n",
|
||||||
|
" document.body.appendChild(newScript);\n",
|
||||||
|
" newScript.src = url;\n",
|
||||||
|
" });\n",
|
||||||
|
" }\n",
|
||||||
|
" loadScript(\"https://cdn.jsdelivr.net/npm/postmate/build/postmate.min.js\").then(() => {\n",
|
||||||
|
" const iframe = document.createElement('iframe')\n",
|
||||||
|
" iframe.style.cssText = \"width:0;height:0;border:none\"\n",
|
||||||
|
" document.body.appendChild(iframe)\n",
|
||||||
|
" const handshake = new Postmate({\n",
|
||||||
|
" container: iframe,\n",
|
||||||
|
" url: 'https://wandb.ai/authorize'\n",
|
||||||
|
" });\n",
|
||||||
|
" const timeout = setTimeout(() => reject(\"Couldn't auto authenticate\"), 5000)\n",
|
||||||
|
" handshake.then(function(child) {\n",
|
||||||
|
" child.on('authorize', data => {\n",
|
||||||
|
" clearTimeout(timeout)\n",
|
||||||
|
" resolve(data)\n",
|
||||||
|
" });\n",
|
||||||
|
" });\n",
|
||||||
|
" })\n",
|
||||||
|
" });\n",
|
||||||
|
" "
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
"<IPython.core.display.Javascript object>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n"
|
||||||
|
],
|
||||||
|
"name": "stderr"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"wandb: Paste an API key from your profile and hit enter: ··········\n"
|
||||||
|
],
|
||||||
|
"name": "stdout"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n"
|
||||||
|
],
|
||||||
|
"name": "stderr"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"output_type": "display_data",
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"\n",
|
||||||
|
" Tracking run with wandb version 0.10.27<br/>\n",
|
||||||
|
" Syncing run <strong style=\"color:#cdcd00\">firm-snowflake-1</strong> to <a href=\"https://wandb.ai\" target=\"_blank\">Weights & Biases</a> <a href=\"https://docs.wandb.com/integrations/jupyter.html\" target=\"_blank\">(Documentation)</a>.<br/>\n",
|
||||||
|
" Project page: <a href=\"https://wandb.ai/vpj/cifar10\" target=\"_blank\">https://wandb.ai/vpj/cifar10</a><br/>\n",
|
||||||
|
" Run page: <a href=\"https://wandb.ai/vpj/cifar10/runs/310etthp\" target=\"_blank\">https://wandb.ai/vpj/cifar10/runs/310etthp</a><br/>\n",
|
||||||
|
" Run data is saved locally in <code>/content/logs/cifar10/081d950aa4e011eb8f9f0242ac1c0002/wandb/run-20210424_093315-310etthp</code><br/><br/>\n",
|
||||||
|
" "
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
"<IPython.core.display.HTML object>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /content/data/cifar-10-python.tar.gz\n"
|
||||||
|
],
|
||||||
|
"name": "stdout"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"output_type": "display_data",
|
||||||
|
"data": {
|
||||||
|
"application/vnd.jupyter.widget-view+json": {
|
||||||
|
"model_id": "14841e99103e41f69dd9b709301d3204",
|
||||||
|
"version_minor": 0,
|
||||||
|
"version_major": 2
|
||||||
|
},
|
||||||
|
"text/plain": [
|
||||||
|
"HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"\n",
|
||||||
|
"Extracting /content/data/cifar-10-python.tar.gz to /content/data\n",
|
||||||
|
"Files already downloaded and verified\n"
|
||||||
|
],
|
||||||
|
"name": "stdout"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"metadata": {
|
||||||
|
"id": "oBXXlP2b7XZO"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
],
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
76
labml_nn/normalization/group_norm/readme.md
Normal file
76
labml_nn/normalization/group_norm/readme.md
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
# [Group Normalization](https://nn.labml.ai/normalization/group_norm/index.html)
|
||||||
|
|
||||||
|
This is a [PyTorch](https://pytorch.org) implementation of
|
||||||
|
the paper [Group Normalization](https://arxiv.org/abs/1803.08494).
|
||||||
|
|
||||||
|
[Batch Normalization](https://nn.labml.ai/normalization/batch_norm/index.html) works well for sufficiently large batch sizes,
|
||||||
|
but does not perform well for small batch sizes, because it normalizes across the batch.
|
||||||
|
Training large models with large batch sizes is not possible due to the memory capacity of the
|
||||||
|
devices.
|
||||||
|
|
||||||
|
This paper introduces Group Normalization, which normalizes a set of features together as a group.
|
||||||
|
This is based on the observation that classical features such as
|
||||||
|
[SIFT](https://en.wikipedia.org/wiki/Scale-invariant_feature_transform) and
|
||||||
|
[HOG](https://en.wikipedia.org/wiki/Histogram_of_oriented_gradients) are group-wise features.
|
||||||
|
The paper proposes dividing feature channels into groups and then separately normalizing
|
||||||
|
all channels within each group.
|
||||||
|
|
||||||
|
## Formulation
|
||||||
|
|
||||||
|
All normalization layers can be defined by the following computation.
|
||||||
|
|
||||||
|
$$\hat{x}_i = \frac{1}{\sigma_i} (x_i - \mu_i)$$
|
||||||
|
|
||||||
|
where $x$ is the tensor representing the batch,
|
||||||
|
and $i$ is the index of a single value.
|
||||||
|
For instance, when it's 2D images
|
||||||
|
$i = (i_N, i_C, i_H, i_W)$ is a 4-d vector for indexing
|
||||||
|
image within batch, feature channel, vertical coordinate and horizontal coordinate.
|
||||||
|
$\mu_i$ and $\sigma_i$ are mean and standard deviation.
|
||||||
|
|
||||||
|
\begin{align}
|
||||||
|
\mu_i &= \frac{1}{m} \sum_{k \in \mathcal{S}_i} x_k \\
|
||||||
|
\sigma_i &= \sqrt{\frac{1}{m} \sum_{k \in \mathcal{S}_i} (x_k - \mu_i)^2 + \epsilon}
|
||||||
|
\end{align}
|
||||||
|
|
||||||
|
$\mathcal{S}_i$ is the set of indexes across which the mean and standard deviation
|
||||||
|
are calculated for index $i$.
|
||||||
|
$m$ is the size of the set $\mathcal{S}_i$ which is same for all $i$.
|
||||||
|
|
||||||
|
The definition of $\mathcal{S}_i$ is different for
|
||||||
|
[Batch normalization](https://nn.labml.ai/normalization/batch_norm/index.html),
|
||||||
|
[Layer normalization](https://nn.labml.ai/normalization/layer_norm/index.html), and
|
||||||
|
[Instance normalization](https://nn.labml.ai/normalization/instance_norm/index.html).
|
||||||
|
|
||||||
|
### [Batch Normalization](https://nn.labml.ai/normalization/batch_norm/index.html)
|
||||||
|
|
||||||
|
$$\mathcal{S}_i = \{k | k_C = i_C\}$$
|
||||||
|
|
||||||
|
The values that share the same feature channel are normalized together.
|
||||||
|
|
||||||
|
### [Layer Normalization](https://nn.labml.ai/normalization/layer_norm/index.html)
|
||||||
|
|
||||||
|
$$\mathcal{S}_i = \{k | k_N = i_N\}$$
|
||||||
|
|
||||||
|
The values from the same sample in the batch are normalized together.
|
||||||
|
|
||||||
|
### [Instance Normalization](https://nn.labml.ai/normalization/instance_norm/index.html)
|
||||||
|
|
||||||
|
$$\mathcal{S}_i = \{k | k_N = i_N, k_C = i_C\}$$
|
||||||
|
|
||||||
|
The values from the same sample and same feature channel are normalized together.
|
||||||
|
|
||||||
|
### Group Normalization
|
||||||
|
|
||||||
|
$$\mathcal{S}_i = \{k | k_N = i_N,
|
||||||
|
\bigg \lfloor \frac{k_C}{C/G} \bigg \rfloor = \bigg \lfloor \frac{i_C}{C/G} \bigg \rfloor\}$$
|
||||||
|
|
||||||
|
where $G$ is the number of groups and $C$ is the number of channels.
|
||||||
|
|
||||||
|
Group normalization normalizes values of the same sample and the same group of channels together.
|
||||||
|
|
||||||
|
Here's a [CIFAR 10 classification model](https://nn.labml.ai/normalization/group_norm/experiment.html) that uses instance normalization.
|
||||||
|
|
||||||
|
[](https://colab.research.google.com/github/lab-ml/nn/blob/master/labml_nn/normalization/group_norm/experiment.ipynb)
|
||||||
|
[](https://app.labml.ai/run/081d950aa4e011eb8f9f0242ac1c0002)
|
||||||
|
[](https://wandb.ai/vpj/cifar10/runs/310etthp)
|
Reference in New Issue
Block a user