This commit is contained in:
Varuna Jayasiri
2023-02-28 08:40:22 +05:30
parent ef7268e89c
commit 1c14551a19
275 changed files with 12791 additions and 3588 deletions

View File

@ -20,7 +20,7 @@
"<h4>\u2728 <a href=\"recurrent_highway_networks/index.html\">Recurrent Highway Networks</a></h4>\n": "<h4>\u2728 <a href=\"recurrent_highway_networks/index.html\">\u5faa\u73af\u9ad8\u901f\u516c\u8def\u7f51\u7edc</a></h4>\n",
"<h4>\u2728 <a href=\"resnet/index.html\">ResNet</a></h4>\n": "<h4>\u2728 <a href=\"resnet/index.html\">ResNet</a></h4>\n",
"<h4>\u2728 <a href=\"rl/index.html\">Reinforcement Learning</a></h4>\n": "<h4>\u2728 <a href=\"rl/index.html\">\u5f3a\u5316\u5b66\u4e60</a></h4>\n",
"<h4>\u2728 <a href=\"sampling/index.html\">Sampling Techniques</a></h4>\n": "<h4>\u2728 <a href=\"sampling/index.html\">\u91c7\u6837\u6280\u5de7</a></h4>\n",
"<h4>\u2728 <a href=\"sampling/index.html\">Language Model Sampling Techniques</a></h4>\n": "<h4>\u2728 <a href=\"sampling/index.html\">\u8bed\u8a00\u6a21\u578b\u91c7\u6837\u6280\u672f</a></h4>\n",
"<h4>\u2728 <a href=\"scaling/index.html\">Scalable Training/Inference</a></h4>\n": "<h4>\u2728 <a href=\"scaling/index.html\">\u53ef\u6269\u5c55\u7684\u8bad\u7ec3/\u63a8\u7406</a></h4>\n",
"<h4>\u2728 <a href=\"sketch_rnn/index.html\">Sketch RNN</a></h4>\n": "<h4>\u2728 <a href=\"sketch_rnn/index.html\">\u7d20\u63cf RNN</a></h4>\n",
"<h4>\u2728 <a href=\"transformers/index.html\">Transformers</a></h4>\n": "<h4>\u2728 <a href=\"transformers/index.html\">\u53d8\u5f62\u91d1\u521a</a></h4>\n",
@ -36,7 +36,7 @@
"<ul><li><a href=\"activations/fta/index.html\">Fuzzy Tiling Activations</a></li></ul>\n": "<ul><li><a href=\"activations/fta/index.html\">\u6a21\u7cca\u5e73\u94fa\u6fc0\u6d3b</a></li></ul>\n",
"<ul><li><a href=\"adaptive_computation/ponder_net/index.html\">PonderNet</a></li></ul>\n": "<ul><li><a href=\"adaptive_computation/ponder_net/index.html\">PonderNet</a></li></ul>\n",
"<ul><li><a href=\"cfr/kuhn/index.html\">Kuhn Poker</a></li></ul>\n": "<ul><li><a href=\"cfr/kuhn/index.html\">\u5e93\u6069\u6251\u514b</a></li></ul>\n",
"<ul><li><a href=\"diffusion/ddpm/index.html\">Denoising Diffusion Probabilistic Models (DDPM)</a></li></ul>\n": "<ul><li><a href=\"diffusion/ddpm/index.html\">\u53bb\u566a\u6269\u6563\u6982\u7387\u6a21\u578b (DDPM)</a></li></ul>\n",
"<ul><li><a href=\"diffusion/ddpm/index.html\">Denoising Diffusion Probabilistic Models (DDPM)</a> </li>\n<li><a href=\"diffusion/stable_diffusion/sampler/ddim.html\">Denoising Diffusion Implicit Models (DDIM)</a> </li>\n<li><a href=\"diffusion/stable_diffusion/latent_diffusion.html\">Latent Diffusion Models</a> </li>\n<li><a href=\"diffusion/stable_diffusion/index.html\">Stable Diffusion</a></li></ul>\n": "<ul><li><a href=\"diffusion/ddpm/index.html\">\u53bb\u566a\u6269\u6563\u6982\u7387\u6a21\u578b (DDPM)</a></li>\n<li><a href=\"diffusion/stable_diffusion/sampler/ddim.html\">\u964d\u566a\u6269\u6563\u9690\u542b\u6a21\u578b (DDIM)</a></li>\n<li><a href=\"diffusion/stable_diffusion/latent_diffusion.html\">\u6f5c\u5728\u6269\u6563\u6a21\u578b</a></li>\n<li><a href=\"diffusion/stable_diffusion/index.html\">\u7a33\u5b9a\u7684\u6269\u6563</a></li></ul>\n",
"<ul><li><a href=\"gan/original/index.html\">Original GAN</a> </li>\n<li><a href=\"gan/dcgan/index.html\">GAN with deep convolutional network</a> </li>\n<li><a href=\"gan/cycle_gan/index.html\">Cycle GAN</a> </li>\n<li><a href=\"gan/wasserstein/index.html\">Wasserstein GAN</a> </li>\n<li><a href=\"gan/wasserstein/gradient_penalty/index.html\">Wasserstein GAN with Gradient Penalty</a> </li>\n<li><a href=\"gan/stylegan/index.html\">StyleGAN 2</a></li></ul>\n": "<ul><li><a href=\"gan/original/index.html\">\u539f\u88c5 GAN</a></li>\n<li><a href=\"gan/dcgan/index.html\">\u5177\u6709\u6df1\u5ea6\u5377\u79ef\u7f51\u7edc\u7684 GAN</a></li>\n<li><a href=\"gan/cycle_gan/index.html\">\u5faa\u73af\u589e\u76ca</a></li>\n<li><a href=\"gan/wasserstein/index.html\">Wasserstein GAN</a></li>\n<li><a href=\"gan/wasserstein/gradient_penalty/index.html\">Wasserstein GAN \u5e26\u68af\u5ea6\u60e9\u7f5a</a></li>\n<li><a href=\"gan/stylegan/index.html\">StyleGan 2</a></li></ul>\n",
"<ul><li><a href=\"graphs/gat/index.html\">Graph Attention Networks (GAT)</a> </li>\n<li><a href=\"graphs/gatv2/index.html\">Graph Attention Networks v2 (GATv2)</a></li></ul>\n": "<ul><li><a href=\"graphs/gat/index.html\">\u56fe\u5173\u6ce8\u7f51\u7edc (GAT)</a></li>\n<li><a href=\"graphs/gatv2/index.html\">Graph \u6ce8\u610f\u529b\u7f51\u7edc v2 (GATv2)</a></li></ul>\n",
"<ul><li><a href=\"neox/samples/generate.html\">Generate on a 48GB GPU</a> </li>\n<li><a href=\"neox/samples/finetune.html\">Finetune on two 48GB GPUs</a> </li>\n<li><a href=\"neox/utils/llm_int8.html\">LLM.int8()</a></li></ul>\n": "<li><a href=\"neox/samples/generate.html\">\u5728 48GB GPU \u4e0a\u751f\u6210</a></li> <ul>\n<li><a href=\"neox/samples/finetune.html\">\u4e24\u4e2a 48GB GPU \u4e0a\u7684 Finetune</a></li>\n<li><a href=\"neox/utils/llm_int8.html\">llm.int8 ()</a></li></ul>\n",

File diff suppressed because one or more lines are too long

View File

@ -1,5 +1,5 @@
{
"<h1><a href=\"index.html\">Fuzzy Tiling Activation</a> Experiment</h1>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/activations/fta/experiment.ipynb\"><span translate=no>_^_0_^_</span></a> <a href=\"https://www.comet.ml/labml/fta/69be11f83693407f82a86dcbb232bcfe?experiment-tab=chart&showOutliers=true&smoothing=0&transformY=smoothing&viewId=rlJOpXDGtL8zbkcX66R77P5me&xAxis=step\"><span translate=no>_^_1_^_</span></a></p>\n<p>Here we train a transformer that uses <a href=\"index.html\">Fuzzy Tiling Activation</a> in the <a href=\"../../transformers/feed_forward.html\">Feed-Forward Network</a>. We use it for a language model and train it on Tiny Shakespeare dataset for demonstration.</p>\n<p>However, this is probably not the ideal task for FTA, and we believe FTA is more suitable for modeling data with continuous variables.</p>\n": "<h1><a href=\"index.html\">\u6a21\u7cca\u5e73\u94fa\u6fc0\u6d3b</a>\u5b9e\u9a8c</h1>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/activations/fta/experiment.ipynb\"><span translate=no>_^_0_^_</span></a><a href=\"https://www.comet.ml/labml/fta/69be11f83693407f82a86dcbb232bcfe?experiment-tab=chart&showOutliers=true&smoothing=0&transformY=smoothing&viewId=rlJOpXDGtL8zbkcX66R77P5me&xAxis=step\"><span translate=no>_^_1_^_</span></a></p>\n<p>\u5728\u8fd9\u91cc\uff0c\u6211\u4eec\u8bad\u7ec3\u4e00\u4e2a\u5728<a href=\"../../transformers/feed_forward.html\">\u524d\u9988\u7f51\u7edc</a>\u4e2d\u4f7f\u7528<a href=\"index.html\">\u6a21\u7cca\u5e73\u94fa\u6fc0\u6d3b</a>\u7684\u53d8\u538b\u5668\u3002\u6211\u4eec\u7528\u5b83\u6765\u5236\u4f5c\u8bed\u8a00\u6a21\u578b\uff0c\u7136\u540e\u5728 Tiny Shakespeare \u6570\u636e\u96c6\u4e0a\u8bad\u7ec3\u5b83\u8fdb\u884c\u6f14\u793a\u3002</p>\n<p>\u4f46\u662f\uff0c\u8fd9\u53ef\u80fd\u4e0d\u662f\u81ea\u7531\u8d38\u6613\u534f\u5b9a\u7684\u7406\u60f3\u4efb\u52a1\uff0c\u6211\u4eec\u8ba4\u4e3a\u81ea\u7531\u8d38\u6613\u534f\u5b9a\u66f4\u9002\u5408\u4f7f\u7528\u8fde\u7eed\u53d8\u91cf\u5bf9\u6570\u636e\u8fdb\u884c\u5efa\u6a21\u3002</p>\n",
"<h1><a href=\"index.html\">Fuzzy Tiling Activation</a> Experiment</h1>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/activations/fta/experiment.ipynb\"><span translate=no>_^_0_^_</span></a></p>\n<p>Here we train a transformer that uses <a href=\"index.html\">Fuzzy Tiling Activation</a> in the <a href=\"../../transformers/feed_forward.html\">Feed-Forward Network</a>. We use it for a language model and train it on Tiny Shakespeare dataset for demonstration.</p>\n<p>However, this is probably not the ideal task for FTA, and we believe FTA is more suitable for modeling data with continuous variables.</p>\n": "<h1><a href=\"index.html\">\u6a21\u7cca\u62fc\u8d34\u6fc0\u6d3b</a>\u5b9e\u9a8c</h1>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/activations/fta/experiment.ipynb\"><span translate=no>_^_0_^_</span></a></p>\n<p>\u5728\u8fd9\u91cc\uff0c\u6211\u4eec\u8bad\u7ec3\u4e00\u53f0\u5728<a href=\"../../transformers/feed_forward.html\">\u524d\u9988\u7f51\u7edc</a>\u4e2d\u4f7f\u7528<a href=\"index.html\">\u6a21\u7cca\u5207\u7247\u6fc0\u6d3b</a>\u7684\u53d8\u538b\u5668\u3002\u6211\u4eec\u5c06\u5176\u7528\u4f5c\u8bed\u8a00\u6a21\u578b\uff0c\u5e76\u5728\u5c0f\u838e\u58eb\u6bd4\u4e9a\u6570\u636e\u96c6\u4e0a\u5bf9\u5176\u8fdb\u884c\u8bad\u7ec3\u4ee5\u8fdb\u884c\u6f14\u793a\u3002</p>\n<p>\u4f46\u662f\uff0c\u5bf9\u4e8e FTA \u6765\u8bf4\uff0c\u8fd9\u53ef\u80fd\u4e0d\u662f\u7406\u60f3\u7684\u4efb\u52a1\uff0c\u6211\u4eec\u8ba4\u4e3a FTA \u66f4\u9002\u5408\u5bf9\u5177\u6709\u8fde\u7eed\u53d8\u91cf\u7684\u6570\u636e\u8fdb\u884c\u5efa\u6a21\u3002</p>\n",
"<h2>Auto-Regressive model</h2>\n<p>This is an autoregressive transformer model that uses Feed-Forward Networks with (Fuzzy Tiling Activations)(index.html).</p>\n": "<h2>\u81ea\u56de\u5f52\u6a21\u578b</h2>\n<p>\u8fd9\u662f\u4e00\u4e2a\u81ea\u56de\u5f52\u53d8\u538b\u5668\u6a21\u578b\uff0c\u5b83\u4f7f\u7528\u524d\u9988\u7f51\u7edc\u548c\uff08\u6a21\u7cca\u5e73\u94fa\u6fc0\u6d3b\uff09\uff08index.html\uff09\u3002</p>\n",
"<h2>Configurations</h2>\n<p>This inherits from <a href=\"../../experiments/nlp_autoregression.html#NLPAutoRegressionConfigs\"><span translate=no>_^_0_^_</span></a></p>\n": "<h2>\u914d\u7f6e</h2>\n<p>\u8fd9\u7ee7\u627f\u81ea <a href=\"../../experiments/nlp_autoregression.html#NLPAutoRegressionConfigs\"><span translate=no>_^_0_^_</span></a></p>\n",
"<h2>FFN module with <a href=\"index.html\">FTA</a> activation</h2>\n": "<h2>\u5e26\u6709 F <a href=\"index.html\">TA \u6fc0\u6d3b\u529f\u80fd\u7684 FF</a> N \u6a21\u5757</h2>\n",

View File

@ -1,5 +1,5 @@
{
"<h1>Diffusion models</h1>\n<ul><li><a href=\"ddpm/index.html\">Denoising Diffusion Probabilistic Models (DDPM)</a></li></ul>\n": "<h1>\u6269\u6563\u6a21\u578b</h1>\n<ul><li><a href=\"ddpm/index.html\">\u53bb\u566a\u6269\u6563\u6982\u7387\u6a21\u578b (DDPM)</a></li></ul>\n",
"<h1>Diffusion models</h1>\n<ul><li><a href=\"ddpm/index.html\">Denoising Diffusion Probabilistic Models (DDPM)</a> </li>\n<li><a href=\"stable_diffusion/index.html\">Stable Diffusion</a> </li>\n<li><a href=\"stable_diffusion/latent_diffusion.html\">Latent Diffusion Model</a> </li>\n<li><a href=\"stable_diffusion/sampler/ddim.html\">Denoising Diffusion Implicit Models (DDIM) Sampling</a></li></ul>\n": "<h1>\u6269\u6563\u6a21\u578b</h1>\n<ul><li><a href=\"ddpm/index.html\">\u53bb\u566a\u6269\u6563\u6982\u7387\u6a21\u578b (DDPM)</a></li>\n<li><a href=\"stable_diffusion/index.html\">\u7a33\u5b9a\u7684\u6269\u6563</a></li>\n<li><a href=\"stable_diffusion/latent_diffusion.html\">\u6f5c\u5728\u6269\u6563\u6a21\u578b</a></li>\n<li><a href=\"stable_diffusion/sampler/ddim.html\">\u964d\u566a\u6269\u6563\u9690\u542b\u6a21\u578b (DDIM) \u91c7\u6837</a></li></ul>\n",
"A set of PyTorch implementations/tutorials of diffusion models.": "\u4e00\u7ec4\u5173\u4e8e\u6269\u6563\u6a21\u578b\u7684 PyTorch \u5b9e\u73b0/\u6559\u7a0b\u3002",
"Diffusion models": "\u6269\u6563\u6a21\u578b"
}

File diff suppressed because one or more lines are too long

View File

@ -1,5 +1,5 @@
{
"<h1><a href=\"index.html\">Denoising Diffusion Probabilistic Models (DDPM)</a> training</h1>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/diffusion/ddpm/experiment.ipynb\"><span translate=no>_^_0_^_</span></a> <a href=\"https://www.comet.com/labml/diffuse/view/FknjSiKWotr8fgZerpC1sV1cy/panels?utm_source=referral&utm_medium=partner&utm_campaign=labml\"><span translate=no>_^_1_^_</span></a></p>\n<p>This trains a DDPM based model on CelebA HQ dataset. You can find the download instruction in this <a href=\"https://forums.fast.ai/t/download-celeba-hq-dataset/45873/3\">discussion on fast.ai</a>. Save the images inside <a href=\"#dataset_path\"><span translate=no>_^_2_^_</span> folder</a>.</p>\n<p>The paper had used a exponential moving average of the model with a decay of <span translate=no>_^_3_^_</span>. We have skipped this for simplicity.</p>\n": "<h1><a href=\"index.html\">\u53bb\u566a\u6269\u6563\u6982\u7387\u6a21\u578b (DDPM)</a> \u8bad\u7ec3</h1>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/diffusion/ddpm/experiment.ipynb\"><span translate=no>_^_0_^_</span></a><a href=\"https://www.comet.com/labml/diffuse/view/FknjSiKWotr8fgZerpC1sV1cy/panels?utm_source=referral&utm_medium=partner&utm_campaign=labml\"><span translate=no>_^_1_^_</span></a></p>\n<p>\u8fd9\u5c06\u5728 CeleBA HQ \u6570\u636e\u96c6\u4e0a\u8bad\u7ec3\u57fa\u4e8e DDPM \u7684\u6a21\u578b\u3002\u4f60\u53ef\u4ee5\u5728\u8fd9\u7bc7\u5173\u4e8e <a href=\"https://forums.fast.ai/t/download-celeba-hq-dataset/45873/3\">fast.ai \u7684\u8ba8\u8bba</a>\u4e2d\u627e\u5230\u4e0b\u8f7d\u8bf4\u660e\u3002\u5c06\u56fe\u50cf\u4fdd\u5b58\u5728<a href=\"#dataset_path\"><span translate=no>_^_2_^_</span>\u6587\u4ef6\u5939\u4e2d</a>\u3002</p>\n<p>\u8be5\u8bba\u6587\u4f7f\u7528\u4e86\u8870\u51cf\u4e3a\u7684\u6a21\u578b\u7684\u6307\u6570\u79fb\u52a8\u5e73\u5747\u7ebf<span translate=no>_^_3_^_</span>\u3002\u4e3a\u4e86\u7b80\u5355\u8d77\u89c1\uff0c\u6211\u4eec\u8df3\u8fc7\u4e86\u8fd9\u4e2a\u3002</p>\n",
"<h1><a href=\"index.html\">Denoising Diffusion Probabilistic Models (DDPM)</a> training</h1>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/diffusion/ddpm/experiment.ipynb\"><span translate=no>_^_0_^_</span></a></p>\n<p>This trains a DDPM based model on CelebA HQ dataset. You can find the download instruction in this <a href=\"https://forums.fast.ai/t/download-celeba-hq-dataset/45873/3\">discussion on fast.ai</a>. Save the images inside <a href=\"#dataset_path\"><span translate=no>_^_1_^_</span> folder</a>.</p>\n<p>The paper had used a exponential moving average of the model with a decay of <span translate=no>_^_2_^_</span>. We have skipped this for simplicity.</p>\n": "<h1><a href=\"index.html\">\u964d\u566a\u6269\u6563\u6982\u7387\u6a21\u578b (DDPM)</a> \u8bad\u7ec3</h1>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/diffusion/ddpm/experiment.ipynb\"><span translate=no>_^_0_^_</span></a></p>\n<p>\u8fd9\u5c06\u57fa\u4e8e CeleBA HQ \u6570\u636e\u96c6\u8bad\u7ec3\u57fa\u4e8e DDPM \u7684\u6a21\u578b\u3002\u4f60\u53ef\u4ee5\u5728 <a href=\"https://forums.fast.ai/t/download-celeba-hq-dataset/45873/3\">fast.ai \u7684\u8ba8\u8bba</a>\u4e2d\u627e\u5230\u4e0b\u8f7d\u8bf4\u660e\u3002\u5c06\u56fe\u50cf\u4fdd\u5b58\u5728<a href=\"#dataset_path\"><span translate=no>_^_1_^_</span>\u6587\u4ef6\u5939\u4e2d</a>\u3002</p>\n<p>\u8be5\u8bba\u6587\u4f7f\u7528\u4e86\u8be5\u6a21\u578b\u7684\u6307\u6570\u79fb\u52a8\u5e73\u5747\u7ebf\uff0c\u5176\u8870\u51cf\u91cf\u4e3a<span translate=no>_^_2_^_</span>\u3002\u4e3a\u7b80\u5355\u8d77\u89c1\uff0c\u6211\u4eec\u8df3\u8fc7\u4e86\u8fd9\u4e2a\u3002</p>\n",
"<h2>Configurations</h2>\n": "<h2>\u914d\u7f6e</h2>\n",
"<h3>CelebA HQ dataset</h3>\n": "<h3>CeleBA HQ \u6570\u636e\u96c6</h3>\n",
"<h3>MNIST dataset</h3>\n": "<h3>MNIST \u6570\u636e\u96c6</h3>\n",

View File

@ -1,4 +1,4 @@
{
"<h1><a href=\"https://nn.labml.ai/diffusion/ddpm/index.html\">Denoising Diffusion Probabilistic Models (DDPM)</a></h1>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/diffusion/ddpm/experiment.ipynb\"><span translate=no>_^_0_^_</span></a> <a href=\"https://www.comet.com/labml/diffuse/view/FknjSiKWotr8fgZerpC1sV1cy/panels?utm_source=referral&utm_medium=partner&utm_campaign=labml\"><span translate=no>_^_1_^_</span></a></p>\n<p>This is a <a href=\"https://pytorch.org\">PyTorch</a> implementation/tutorial of the paper <a href=\"https://papers.labml.ai/paper/2006.11239\">Denoising Diffusion Probabilistic Models</a>.</p>\n<p>In simple terms, we get an image from data and add noise step by step. Then We train a model to predict that noise at each step and use the model to generate images.</p>\n<p>Here is the <a href=\"https://nn.labml.ai/diffusion/ddpm/unet.html\">UNet model</a> that predicts the noise and <a href=\"https://nn.labml.ai/diffusion/ddpm/experiment.html\">training code</a>. <a href=\"https://nn.labml.ai/diffusion/ddpm/evaluate.html\">This file</a> can generate samples and interpolations from a trained model. </p>\n": "<h1><a href=\"https://nn.labml.ai/diffusion/ddpm/index.html\">\u53bb\u566a\u6269\u6563\u6982\u7387\u6a21\u578b (DDPM)</a></h1>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/diffusion/ddpm/experiment.ipynb\"><span translate=no>_^_0_^_</span></a><a href=\"https://www.comet.com/labml/diffuse/view/FknjSiKWotr8fgZerpC1sV1cy/panels?utm_source=referral&utm_medium=partner&utm_campaign=labml\"><span translate=no>_^_1_^_</span></a></p>\n<p>\u8fd9\u662f\u8bba\u6587\u300a<a href=\"https://papers.labml.ai/paper/2006.11239\">\u53bb\u566a\u6269\u6563\u6982\u7387\u6a21\u578b\u300b\u7684 PyTorc</a> <a href=\"https://pytorch.org\">h</a> \u5b9e\u73b0/\u6559\u7a0b\u3002</p>\n<p>\u7b80\u800c\u8a00\u4e4b\uff0c\u6211\u4eec\u4ece\u6570\u636e\u4e2d\u83b7\u53d6\u56fe\u50cf\u5e76\u9010\u6b65\u6dfb\u52a0\u566a\u70b9\u3002\u7136\u540e\u6211\u4eec\u8bad\u7ec3\u4e00\u4e2a\u6a21\u578b\u6765\u9884\u6d4b\u6bcf\u4e00\u6b65\u7684\u566a\u58f0\uff0c\u7136\u540e\u4f7f\u7528\u8be5\u6a21\u578b\u751f\u6210\u56fe\u50cf\u3002</p>\n<p>\u8fd9\u662f\u9884\u6d4b\u566a\u58f0\u548c<a href=\"https://nn.labml.ai/diffusion/ddpm/experiment.html\">\u8bad\u7ec3\u4ee3\u7801</a>\u7684 <a href=\"https://nn.labml.ai/diffusion/ddpm/unet.html\">unET \u6a21\u578b</a>\u3002<a href=\"https://nn.labml.ai/diffusion/ddpm/evaluate.html\">\u8be5\u6587\u4ef6</a>\u53ef\u4ee5\u6839\u636e\u8bad\u7ec3\u540e\u7684\u6a21\u578b\u751f\u6210\u6837\u672c\u548c\u63d2\u503c\u3002</p>\n",
"<h1><a href=\"https://nn.labml.ai/diffusion/ddpm/index.html\">Denoising Diffusion Probabilistic Models (DDPM)</a></h1>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/diffusion/ddpm/experiment.ipynb\"><span translate=no>_^_0_^_</span></a></p>\n<p>This is a <a href=\"https://pytorch.org\">PyTorch</a> implementation/tutorial of the paper <a href=\"https://papers.labml.ai/paper/2006.11239\">Denoising Diffusion Probabilistic Models</a>.</p>\n<p>In simple terms, we get an image from data and add noise step by step. Then We train a model to predict that noise at each step and use the model to generate images.</p>\n<p>Here is the <a href=\"https://nn.labml.ai/diffusion/ddpm/unet.html\">UNet model</a> that predicts the noise and <a href=\"https://nn.labml.ai/diffusion/ddpm/experiment.html\">training code</a>. <a href=\"https://nn.labml.ai/diffusion/ddpm/evaluate.html\">This file</a> can generate samples and interpolations from a trained model. </p>\n": "<h1><a href=\"https://nn.labml.ai/diffusion/ddpm/index.html\">\u53bb\u566a\u6269\u6563\u6982\u7387\u6a21\u578b (DDPM)</a></h1>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/diffusion/ddpm/experiment.ipynb\"><span translate=no>_^_0_^_</span></a></p>\n<p>\u8fd9\u662f\u300a<a href=\"https://papers.labml.ai/paper/2006.11239\">\u53bb\u566a\u6269\u6563\u6982\u7387\u6a21\u578b</a>\u300b\u8bba\u6587\u7684 <a href=\"https://pytorch.org\">PyTorch</a> \u5b9e\u73b0/\u6559\u7a0b\u3002</p>\n<p>\u7b80\u800c\u8a00\u4e4b\uff0c\u6211\u4eec\u4ece\u6570\u636e\u4e2d\u83b7\u53d6\u56fe\u50cf\u5e76\u9010\u6b65\u6dfb\u52a0\u566a\u70b9\u3002\u7136\u540e\uff0c\u6211\u4eec\u8bad\u7ec3\u4e00\u4e2a\u6a21\u578b\u6765\u9884\u6d4b\u6bcf\u4e2a\u6b65\u9aa4\u7684\u566a\u58f0\uff0c\u5e76\u4f7f\u7528\u8be5\u6a21\u578b\u751f\u6210\u56fe\u50cf\u3002</p>\n<p>\u8fd9\u662f\u9884\u6d4b\u566a\u58f0\u548c<a href=\"https://nn.labml.ai/diffusion/ddpm/experiment.html\">\u8bad\u7ec3\u4ee3\u7801</a>\u7684 <a href=\"https://nn.labml.ai/diffusion/ddpm/unet.html\">UNet \u6a21\u578b</a>\u3002<a href=\"https://nn.labml.ai/diffusion/ddpm/evaluate.html\">\u6b64\u6587\u4ef6</a>\u53ef\u4ee5\u4ece\u7ecf\u8fc7\u8bad\u7ec3\u7684\u6a21\u578b\u751f\u6210\u6837\u672c\u548c\u63d2\u503c\u3002</p>\n",
"Denoising Diffusion Probabilistic Models (DDPM)": "\u53bb\u566a\u6269\u6563\u6982\u7387\u6a21\u578b (DDPM)"
}

View File

@ -70,7 +70,7 @@
"<ul><li><span translate=no>_^_0_^_</span> is the number of channels in the image. <span translate=no>_^_1_^_</span> for RGB. </li>\n<li><span translate=no>_^_2_^_</span> is number of channels in the initial feature map that we transform the image into </li>\n<li><span translate=no>_^_3_^_</span> is the list of channel numbers at each resolution. The number of channels is <span translate=no>_^_4_^_</span> </li>\n<li><span translate=no>_^_5_^_</span> is a list of booleans that indicate whether to use attention at each resolution </li>\n<li><span translate=no>_^_6_^_</span> is the number of <span translate=no>_^_7_^_</span> at each resolution</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u56fe\u50cf\u4e2d\u7684\u901a\u9053\u6570\u3002<span translate=no>_^_1_^_</span>\u5bf9\u4e8e RGB\u3002</li>\n<li><span translate=no>_^_2_^_</span>\u662f\u521d\u59cb\u7279\u5f81\u56fe\u4e2d\u6211\u4eec\u5c06\u56fe\u50cf\u8f6c\u6362\u4e3a\u7684\u901a\u9053\u6570</li>\n<li><span translate=no>_^_3_^_</span>\u662f\u6bcf\u79cd\u5206\u8fa8\u7387\u4e0b\u7684\u901a\u9053\u7f16\u53f7\u5217\u8868\u3002\u9891\u9053\u7684\u6570\u91cf\u662f<span translate=no>_^_4_^_</span></li>\n<li><span translate=no>_^_5_^_</span>\u662f\u4e00\u4e2a\u5e03\u5c14\u503c\u5217\u8868\uff0c\u7528\u4e8e\u6307\u793a\u662f\u5426\u5728\u6bcf\u4e2a\u5206\u8fa8\u7387\u4e0b\u4f7f\u7528\u6ce8\u610f\u529b</li>\n<li><span translate=no>_^_6_^_</span>\u662f\u6bcf\u79cd\u5206\u8fa8<span translate=no>_^_7_^_</span>\u7387\u7684\u6570\u5b57</li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the number of channels in the input </li>\n<li><span translate=no>_^_1_^_</span> is the number of heads in multi-head attention </li>\n<li><span translate=no>_^_2_^_</span> is the number of dimensions in each head </li>\n<li><span translate=no>_^_3_^_</span> is the number of groups for <a href=\"../../normalization/group_norm/index.html\">group normalization</a></li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u8f93\u5165\u4e2d\u7684\u58f0\u9053\u6570</li>\n<li><span translate=no>_^_1_^_</span>\u662f\u591a\u5934\u5173\u6ce8\u4e2d\u7684\u5934\u90e8\u6570\u91cf</li>\n<li><span translate=no>_^_2_^_</span>\u662f\u6bcf\u4e2a\u5934\u90e8\u7684\u5c3a\u5bf8\u6570</li>\n<li><span translate=no>_^_3_^_</span>\u662f\u7ec4\u5f52\u4e00<a href=\"../../normalization/group_norm/index.html\">\u5316\u7684\u7ec4</a>\u6570</li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the number of dimensions in the embedding</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u5d4c\u5165\u4e2d\u7684\u7ef4\u6570</li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the number of input channels </li>\n<li><span translate=no>_^_1_^_</span> is the number of input channels </li>\n<li><span translate=no>_^_2_^_</span> is the number channels in the time step (<span translate=no>_^_3_^_</span>) embeddings </li>\n<li><span translate=no>_^_4_^_</span> is the number of groups for <a href=\"../../normalization/group_norm/index.html\">group normalization</a></li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u8f93\u5165\u58f0\u9053\u7684\u6570\u91cf</li>\n<li><span translate=no>_^_1_^_</span>\u662f\u8f93\u5165\u58f0\u9053\u7684\u6570\u91cf</li>\n<li><span translate=no>_^_2_^_</span>\u662f time step (<span translate=no>_^_3_^_</span>) \u5d4c\u5165\u4e2d\u7684\u6570\u5b57\u901a\u9053</li>\n<li><span translate=no>_^_4_^_</span>\u662f\u7ec4\u5f52\u4e00<a href=\"../../normalization/group_norm/index.html\">\u5316\u7684\u7ec4</a>\u6570</li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the number of input channels </li>\n<li><span translate=no>_^_1_^_</span> is the number of input channels </li>\n<li><span translate=no>_^_2_^_</span> is the number channels in the time step (<span translate=no>_^_3_^_</span>) embeddings </li>\n<li><span translate=no>_^_4_^_</span> is the number of groups for <a href=\"../../normalization/group_norm/index.html\">group normalization</a> </li>\n<li><span translate=no>_^_5_^_</span> is the dropout rate</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u8f93\u5165\u901a\u9053\u7684\u6570\u91cf</li>\n<li><span translate=no>_^_1_^_</span>\u662f\u8f93\u5165\u901a\u9053\u7684\u6570\u91cf</li>\n<li><span translate=no>_^_2_^_</span>\u662f\u65f6\u95f4\u6b65 (<span translate=no>_^_3_^_</span>) \u5d4c\u5165\u4e2d\u7684\u901a\u9053\u6570</li>\n<li><span translate=no>_^_4_^_</span>\u662f\u7528\u4e8e\u7ec4<a href=\"../../normalization/group_norm/index.html\">\u6807\u51c6\u5316\u7684\u7ec4</a>\u6570</li>\n<li><span translate=no>_^_5_^_</span>\u662f\u8f8d\u5b66\u7387</li></ul>\n",
"U-Net model for Denoising Diffusion Probabilistic Models (DDPM)": "\u7528\u4e8e\u53bb\u566a\u6269\u6563\u6982\u7387\u6a21\u578b (DDPM) \u7684 U-Net \u6a21\u578b",
"UNet model for Denoising Diffusion Probabilistic Models (DDPM)": "\u7528\u4e8e\u53bb\u566a\u6269\u6563\u6982\u7387\u6a21\u578b (DDPM) \u7684 unET \u6a21\u578b"
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,19 @@
{
"<h1>Latent Diffusion Models</h1>\n<p>Latent diffusion models use an auto-encoder to map between image space and latent space. The diffusion model works on the latent space, which makes it a lot easier to train. It is based on paper <a href=\"https://papers.labml.ai/paper/2112.10752\">High-Resolution Image Synthesis with Latent Diffusion Models</a>.</p>\n<p>They use a pre-trained auto-encoder and train the diffusion U-Net on the latent space of the pre-trained auto-encoder.</p>\n<p>For a simpler diffusion implementation refer to our <a href=\"../ddpm/index.html\">DDPM implementation</a>. We use same notations for <span translate=no>_^_0_^_</span>, <span translate=no>_^_1_^_</span> schedules, etc.</p>\n": "<h1>\u6f5c\u5728\u6269\u6563\u6a21\u578b</h1>\n<p>\u6f5c\u5728\u6269\u6563\u6a21\u578b\u4f7f\u7528\u81ea\u52a8\u7f16\u7801\u5668\u5728\u56fe\u50cf\u7a7a\u95f4\u548c\u6f5c\u5728\u7a7a\u95f4\u4e4b\u95f4\u8fdb\u884c\u6620\u5c04\u3002\u6269\u6563\u6a21\u578b\u9002\u7528\u4e8e\u6f5c\u5728\u7a7a\u95f4\uff0c\u8fd9\u4f7f\u5f97\u8bad\u7ec3\u53d8\u5f97\u5bb9\u6613\u5f97\u591a\u3002\u5b83\u57fa\u4e8e<a href=\"https://papers.labml.ai/paper/2112.10752\">\u5e26\u6709\u6f5c\u5728\u6269\u6563\u6a21\u578b\u7684\u7eb8\u8d28\u9ad8\u5206\u8fa8\u7387\u56fe\u50cf\u5408\u6210</a>\u3002</p>\n<p>\u5b83\u4eec\u4f7f\u7528\u9884\u8bad\u7ec3\u7684\u81ea\u52a8\u7f16\u7801\u5668\uff0c\u5728\u9884\u8bad\u7ec3\u7684\u81ea\u52a8\u7f16\u7801\u5668\u7684\u6f5c\u5728\u7a7a\u95f4\u4e0a\u8bad\u7ec3\u6269\u6563 U-Net\u3002</p>\n<p>\u6709\u5173\u66f4\u7b80\u5355\u7684\u6269\u6563\u5b9e\u73b0\uff0c\u8bf7\u53c2\u9605\u6211\u4eec\u7684 <a href=\"../ddpm/index.html\">DDPM \u5b9e\u73b0</a>\u3002\u6211\u4eec\u5bf9<span translate=no>_^_0_^_</span><span translate=no>_^_1_^_</span>\u65f6\u95f4\u8868\u7b49\u4f7f\u7528\u76f8\u540c\u7684\u7b26\u53f7\u3002</p>\n",
"<h2>Latent diffusion model</h2>\n<p>This contains following components:</p>\n<ul><li><a href=\"model/autoencoder.html\">AutoEncoder</a> </li>\n<li><a href=\"model/unet.html\">U-Net</a> with <a href=\"model/unet_attention.html\">attention</a> </li>\n<li><a href=\"model/clip_embedder.html\">CLIP embeddings generator</a></li></ul>\n": "<h2>\u6f5c\u5728\u6269\u6563\u6a21\u578b</h2>\n<p>\u5b83\u5305\u542b\u4ee5\u4e0b\u7ec4\u4ef6\uff1a</p>\n<ul><li><a href=\"model/autoencoder.html\">\u81ea\u52a8\u7f16\u7801\u5668</a></li>\n<li><a href=\"model/unet_attention.html\">\u5907\u53d7\u5173\u6ce8</a>\u7684 <a href=\"model/unet.html\">U-Net</a></li>\n<li><a href=\"model/clip_embedder.html\">CLIP \u5d4c\u5165\u5f0f\u751f\u6210\u5668</a></li></ul>\n",
"<h3>Get <a href=\"model/clip_embedder.html\">CLIP embeddings</a> for a list of text prompts</h3>\n": "<h3>\u83b7\u53d6 <a href=\"model/clip_embedder.html\">CLIP \u5d4c\u5165</a>\u4ee5\u83b7\u53d6\u6587\u672c\u63d0\u793a\u5217\u8868</h3>\n",
"<h3>Get image from the latent representation</h3>\n<p>We scale down by the scaling factor and then decode.</p>\n": "<h3>\u4ece\u6f5c\u5728\u8868\u793a\u4e2d\u83b7\u53d6\u56fe\u50cf</h3>\n<p>\u6211\u4eec\u6309\u7f29\u653e\u7cfb\u6570\u5411\u4e0b\u7f29\u653e\uff0c\u7136\u540e\u89e3\u7801\u3002</p>\n",
"<h3>Get model device</h3>\n": "<h3>\u83b7\u53d6\u8bbe\u5907\u6a21\u578b</h3>\n",
"<h3>Get scaled latent space representation of the image</h3>\n<p>The encoder output is a distribution. We sample from that and multiply by the scaling factor.</p>\n": "<h3>\u83b7\u53d6\u56fe\u50cf\u7684\u7f29\u653e\u6f5c\u5728\u7a7a\u95f4\u8868\u793a</h3>\n<p>\u7f16\u7801\u5668\u8f93\u51fa\u662f\u5206\u5e03\u5f0f\u3002\u6211\u4eec\u4ece\u4e2d\u53d6\u6837\u5e76\u4e58\u4ee5\u7f29\u653e\u7cfb\u6570\u3002</p>\n",
"<h3>Predict noise</h3>\n<p>Predict noise given the latent representation <span translate=no>_^_0_^_</span>, time step <span translate=no>_^_1_^_</span>, and the conditioning context <span translate=no>_^_2_^_</span>.</p>\n<p><span translate=no>_^_3_^_</span></p>\n": "<h3>\u9884\u6d4b\u566a\u97f3</h3>\n<p>\u6839\u636e\u6f5c\u5728\u8868\u793a<span translate=no>_^_0_^_</span>\u3001\u65f6\u95f4\u6b65<span translate=no>_^_1_^_</span>\u957f\u548c\u6761\u4ef6\u73af\u5883\u9884\u6d4b\u566a\u58f0<span translate=no>_^_2_^_</span>\u3002</p>\n<p><span translate=no>_^_3_^_</span></p>\n",
"<p> <em>This is an empty wrapper class around the <a href=\"model/unet.html\">U-Net</a>. We keep this to have the same model structure as <a href=\"https://github.com/CompVis/stable-diffusion\">CompVis/stable-diffusion</a> so that we do not have to map the checkpoint weights explicitly</em>.</p>\n": "<p><em>\u8fd9\u662f\u56f4\u7ed5 <a href=\"model/unet.html\">U-Net</a> \u7684\u7a7a\u5305\u88c5\u7c7b\u3002\u6211\u4eec\u4fdd\u6301\u5b83\u4e0e <a href=\"https://github.com/CompVis/stable-diffusion\">compVIS/Stable-</a> Difusion \u76f8\u540c\u7684\u6a21\u578b\u7ed3\u6784\uff0c\u8fd9\u6837\u6211\u4eec\u5c31\u4e0d\u5fc5\u660e\u786e\u5730\u6620\u5c04\u68c0\u67e5\u70b9\u6743\u91cd</em>\u3002</p>\n",
"<p><a href=\"model/clip_embedder.html\">CLIP embeddings generator</a> </p>\n": "<p><a href=\"model/clip_embedder.html\">CLIP \u5d4c\u5165\u5f0f\u751f\u6210\u5668</a></p>\n",
"<p><span translate=no>_^_0_^_</span> </p>\n": "<p><span translate=no>_^_0_^_</span></p>\n",
"<p><span translate=no>_^_0_^_</span> schedule </p>\n": "<p><span translate=no>_^_0_^_</span>\u65f6\u95f4\u8868</p>\n",
"<p>Auto-encoder and scaling factor </p>\n": "<p>\u81ea\u52a8\u7f16\u7801\u5668\u548c\u7f29\u653e\u7cfb\u6570</p>\n",
"<p>Number of steps <span translate=no>_^_0_^_</span> </p>\n": "<p>\u6b65\u6570<span translate=no>_^_0_^_</span></p>\n",
"<p>Wrap the <a href=\"model/unet.html\">U-Net</a> to keep the same model structure as <a href=\"https://github.com/CompVis/stable-diffusion\">CompVis/stable-diffusion</a>. </p>\n": "<p>\u5c01\u88c5 <a href=\"model/unet.html\">U-Net</a> \u4ee5\u4fdd\u6301\u4e0e <a href=\"https://github.com/CompVis/stable-diffusion\">compVIS/Stable-</a> Difusion \u76f8\u540c\u7684\u6a21\u578b\u7ed3\u6784\u3002</p>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the <a href=\"model/unet.html\">U-Net</a> that predicts noise <span translate=no>_^_1_^_</span>, in latent space </li>\n<li><span translate=no>_^_2_^_</span> is the <a href=\"model/autoencoder.html\">AutoEncoder</a> </li>\n<li><span translate=no>_^_3_^_</span> is the <a href=\"model/clip_embedder.html\">CLIP embeddings generator</a> </li>\n<li><span translate=no>_^_4_^_</span> is the scaling factor for the latent space. The encodings of the autoencoder are scaled by this before feeding into the U-Net. </li>\n<li><span translate=no>_^_5_^_</span> is the number of diffusion steps <span translate=no>_^_6_^_</span>. </li>\n<li><span translate=no>_^_7_^_</span> is the start of the <span translate=no>_^_8_^_</span> schedule. </li>\n<li><span translate=no>_^_9_^_</span> is the end of the <span translate=no>_^_10_^_</span> schedule.</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u9884\u6d4b\u6f5c\u5728\u7a7a\u95f4\u4e2d\u566a\u58f0<span translate=no>_^_1_^_</span>\u7684 <a href=\"model/unet.html\">U-Ne</a> t</li>\n<li><span translate=no>_^_2_^_</span>\u662f<a href=\"model/autoencoder.html\">\u81ea\u52a8\u7f16\u7801\u5668</a></li>\n<li><span translate=no>_^_3_^_</span>\u662f <a href=\"model/clip_embedder.html\">CLIP \u5d4c\u5165\u751f\u6210\u5668</a></li>\n<li><span translate=no>_^_4_^_</span>\u662f\u6f5c\u5728\u7a7a\u95f4\u7684\u7f29\u653e\u7cfb\u6570\u3002\u5728\u9988\u5165 U-Net \u4e4b\u524d\uff0c\u81ea\u52a8\u7f16\u7801\u5668\u7684\u7f16\u7801\u4f1a\u6309\u6b64\u8fdb\u884c\u7f29\u653e\u3002</li>\n<li><span translate=no>_^_5_^_</span>\u662f\u6269\u6563\u6b65\u9aa4\u7684\u6570\u91cf<span translate=no>_^_6_^_</span>\u3002</li>\n<li><span translate=no>_^_7_^_</span>\u662f<span translate=no>_^_8_^_</span>\u65f6\u95f4\u8868\u7684\u5f00\u59cb\u3002</li>\n<li><span translate=no>_^_9_^_</span>\u662f<span translate=no>_^_10_^_</span>\u65f6\u95f4\u8868\u7684\u7ed3\u675f\u3002</li></ul>\n",
"Annotated PyTorch implementation/tutorial of latent diffusion models from paper High-Resolution Image Synthesis with Latent Diffusion Models": "\u5e26\u6ce8\u91ca\u7684 PyTorch \u5b9e\u73b0/\u6559\u7a0b\u6765\u81ea\u8bba\u6587\u7684\u6f5c\u5728\u6269\u6563\u6a21\u578b\u4f7f\u7528\u6f5c\u5728\u6269\u6563\u6a21\u578b\u8fdb\u884c\u9ad8\u5206\u8fa8\u7387\u56fe\u50cf\u5408\u6210",
"Latent Diffusion Models": "\u6f5c\u5728\u6269\u6563\u6a21\u578b"
}

View File

@ -0,0 +1,5 @@
{
"<h1><a href=\"../index.html\">Stable Diffusion</a> Models</h1>\n<ul><li><a href=\"autoencoder.html\">AutoEncoder</a> </li>\n<li><a href=\"unet.html\">U-Net</a> with <a href=\"unet_attention.html\">attention</a> </li>\n<li><a href=\"clip_embedder.html\">CLIP embedder</a>.</li></ul>\n": "<h1><a href=\"../index.html\">\u7a33\u5b9a\u7684\u6269\u6563</a>\u6a21\u578b</h1>\n<ul><li><a href=\"autoencoder.html\">\u81ea\u52a8\u7f16\u7801\u5668</a></li>\n<li><a href=\"unet_attention.html\">\u5907\u53d7\u5173\u6ce8</a>\u7684 <a href=\"unet.html\">U-Net</a></li>\n<li><a href=\"clip_embedder.html\">CLIP \u5d4c\u5165\u5668</a>\u3002</li></ul>\n",
"Models and components for stable diffusion.": "\u7528\u4e8e\u7a33\u5b9a\u6269\u6563\u7684\u6a21\u578b\u548c\u7ec4\u4ef6\u3002",
"Modules used in stable diffusion": "\u7528\u4e8e\u7a33\u5b9a\u6269\u6563\u7684\u6a21\u5757"
}

View File

@ -0,0 +1,84 @@
{
"<h1>Autoencoder for <a href=\"../index.html\">Stable Diffusion</a></h1>\n<p>This implements the auto-encoder model used to map between image space and latent space.</p>\n<p>We have kept to the model definition and naming unchanged from <a href=\"https://github.com/CompVis/stable-diffusion\">CompVis/stable-diffusion</a> so that we can load the checkpoints directly.</p>\n": "<h1>\u7528\u4e8e<a href=\"../index.html\">\u7a33\u5b9a\u6269\u6563</a>\u7684\u81ea\u52a8\u7f16\u7801\u5668</h1>\n<p>\u8fd9\u5b9e\u73b0\u4e86\u7528\u4e8e\u5728\u56fe\u50cf\u7a7a\u95f4\u548c\u6f5c\u5728\u7a7a\u95f4\u4e4b\u95f4\u8fdb\u884c\u6620\u5c04\u7684\u81ea\u52a8\u7f16\u7801\u5668\u6a21\u578b\u3002</p>\n<p>\u6211\u4eec\u4fdd\u6301\u4e86 <a href=\"https://github.com/CompVis/stable-diffusion\">compvis/Stable-Difusi</a> on \u7684\u6a21\u578b\u5b9a\u4e49\u548c\u547d\u540d\u4e0d\u53d8\uff0c\u8fd9\u6837\u6211\u4eec\u5c31\u53ef\u4ee5\u76f4\u63a5\u52a0\u8f7d\u68c0\u67e5\u70b9\u3002</p>\n",
"<h2>Attention block</h2>\n": "<h2>\u6ce8\u610f\u65b9\u5757</h2>\n",
"<h2>Autoencoder</h2>\n<p>This consists of the encoder and decoder modules.</p>\n": "<h2>\u81ea\u52a8\u7f16\u7801\u5668</h2>\n<p>\u5b83\u7531\u7f16\u7801\u5668\u548c\u89e3\u7801\u5668\u6a21\u5757\u7ec4\u6210\u3002</p>\n",
"<h2>Decoder module</h2>\n": "<h2>\u89e3\u7801\u5668\u6a21\u5757</h2>\n",
"<h2>Down-sampling layer</h2>\n": "<h2>\u5411\u4e0b\u91c7\u6837\u5c42</h2>\n",
"<h2>Encoder module</h2>\n": "<h2>\u7f16\u7801\u5668\u6a21\u5757</h2>\n",
"<h2>Gaussian Distribution</h2>\n": "<h2>\u9ad8\u65af\u5206\u5e03</h2>\n",
"<h2>ResNet Block</h2>\n": "<h2>ResNet \u533a\u5757</h2>\n",
"<h2>Up-sampling layer</h2>\n": "<h2>\u5411\u4e0a\u91c7\u6837\u5c42</h2>\n",
"<h3>Decode images from latent representation</h3>\n<ul><li><span translate=no>_^_0_^_</span> is the latent representation with shape <span translate=no>_^_1_^_</span></li></ul>\n": "<h3>\u4ece\u6f5c\u5728\u8868\u73b0\u4e2d\u89e3\u7801\u56fe\u50cf</h3>\n<ul><li><span translate=no>_^_0_^_</span>\u662f\u5e26\u6709\u5f62\u72b6\u7684\u6f5c\u5728\u8868\u793a\u5f62\u5f0f<span translate=no>_^_1_^_</span></li></ul>\n",
"<h3>Encode images to latent representation</h3>\n<ul><li><span translate=no>_^_0_^_</span> is the image tensor with shape <span translate=no>_^_1_^_</span></li></ul>\n": "<h3>\u5c06\u56fe\u50cf\u7f16\u7801\u4e3a\u6f5c\u5728\u8868\u793a</h3>\n<ul><li><span translate=no>_^_0_^_</span>\u662f\u5e26\u6709\u5f62\u72b6\u7684\u56fe\u50cf\u5f20\u91cf<span translate=no>_^_1_^_</span></li></ul>\n",
"<h3>Group normalization</h3>\n<p>This is a helper function, with fixed number of groups and <span translate=no>_^_0_^_</span>.</p>\n": "<h3>\u7fa4\u7ec4\u6807\u51c6\u5316</h3>\n<p>\u8fd9\u662f\u4e00\u4e2a\u8f85\u52a9\u51fd\u6570\uff0c\u5177\u6709\u56fa\u5b9a\u6570\u91cf\u7684\u7ec4\u548c<span translate=no>_^_0_^_</span>\u3002</p>\n",
"<h3>Swish activation</h3>\n<p><span translate=no>_^_0_^_</span></p>\n": "<h3>Swish \u6fc0\u6d3b</h3>\n<p><span translate=no>_^_0_^_</span></p>\n",
"<p> </p>\n": "<p></p>\n",
"<p><span translate=no>_^_0_^_</span> convolution mapping </p>\n": "<p><span translate=no>_^_0_^_</span>\u5377\u79ef\u6620\u5c04</p>\n",
"<p><span translate=no>_^_0_^_</span> convolution with stride length of <span translate=no>_^_1_^_</span> to down-sample by a factor of <span translate=no>_^_2_^_</span> </p>\n": "<p><span translate=no>_^_0_^_</span>\u5377\u79ef\uff0c\u6b65\u957f\u4e3a<span translate=no>_^_1_^_</span>\u5411\u4e0b\u91c7\u6837\u7684\u7cfb\u6570\u4e3a<span translate=no>_^_2_^_</span></p>\n",
"<p><span translate=no>_^_0_^_</span> to <span translate=no>_^_1_^_</span> mapping layer for residual connection </p>\n": "<p><span translate=no>_^_0_^_</span>\u5230\u5269\u4f59\u8fde\u63a5\u7684<span translate=no>_^_1_^_</span>\u6620\u5c04\u5c42</p>\n",
"<p>Add ResNet Blocks </p>\n": "<p>\u6dfb\u52a0 ResNet \u533a\u5757</p>\n",
"<p>Add padding </p>\n": "<p>\u6dfb\u52a0\u5185\u8fb9\u8ddd</p>\n",
"<p>Add residual connection </p>\n": "<p>\u6dfb\u52a0\u5269\u4f59\u8fde\u63a5</p>\n",
"<p>Apply convolution </p>\n": "<p>\u5e94\u7528\u5377\u79ef</p>\n",
"<p>Attention scaling factor </p>\n": "<p>\u6ce8\u610f\u529b\u7f29\u653e\u7cfb\u6570</p>\n",
"<p>Calculate standard deviation </p>\n": "<p>\u8ba1\u7b97\u6807\u51c6\u5dee</p>\n",
"<p>Clamp the log of variances </p>\n": "<p>\u9650\u5236\u65b9\u5dee\u65e5\u5fd7</p>\n",
"<p>Compute <span translate=no>_^_0_^_</span> </p>\n": "<p>\u8ba1\u7b97<span translate=no>_^_0_^_</span></p>\n",
"<p>Convolution to map from embedding space to quantized embedding space moments (mean and log variance) </p>\n": "<p>\u4ece\u5d4c\u5165\u7a7a\u95f4\u5230\u91cf\u5316\u5d4c\u5165\u7a7a\u95f4\u77e9\u7684\u5377\u79ef\u5230\u6620\u5c04\uff08\u5747\u503c\u548c\u5bf9\u6570\u65b9\u5dee\uff09</p>\n",
"<p>Convolution to map from quantized embedding space back to embedding space </p>\n": "<p>\u5377\u79ef\u5c06\u4ece\u91cf\u5316\u5d4c\u5165\u7a7a\u95f4\u6620\u5c04\u56de\u5d4c\u5165\u7a7a\u95f4</p>\n",
"<p>Create top-level blocks </p>\n": "<p>\u521b\u5efa\u9876\u7ea7\u533a\u5757</p>\n",
"<p>Decode the image of shape <span translate=no>_^_0_^_</span> </p>\n": "<p>\u89e3\u7801\u5f62\u72b6\u7684\u56fe\u50cf<span translate=no>_^_0_^_</span></p>\n",
"<p>Down-sampling </p>\n": "<p>\u5411\u4e0b\u91c7\u6837</p>\n",
"<p>Down-sampling at the end of each top level block except the last </p>\n": "<p>\u5728\u6bcf\u4e2a\u9876\u7ea7\u533a\u5757\u7684\u672b\u5c3e\u5904\u5411\u4e0b\u91c7\u6837\uff08\u6700\u540e\u4e00\u4e2a\u533a\u5757\u9664\u5916\uff09</p>\n",
"<p>Each top level block consists of multiple ResNet Blocks and down-sampling </p>\n": "<p>\u6bcf\u4e2a\u9876\u7ea7\u533a\u5757\u7531\u591a\u4e2a ResNet \u6a21\u5757\u548c\u5411\u4e0b\u91c7\u6837\u7ec4\u6210</p>\n",
"<p>Each top level block consists of multiple ResNet Blocks and up-sampling </p>\n": "<p>\u6bcf\u4e2a\u9876\u7ea7\u533a\u5757\u7531\u591a\u4e2a ResNet \u6a21\u5757\u548c\u5411\u4e0a\u91c7\u6837\u7ec4\u6210</p>\n",
"<p>Final <span translate=no>_^_0_^_</span> convolution layer </p>\n": "<p>\u6700\u7ec8<span translate=no>_^_0_^_</span>\u5377\u79ef\u5c42</p>\n",
"<p>Final ResNet blocks with attention </p>\n": "<p>\u6700\u540e\u4e00\u4e2a\u503c\u5f97\u6ce8\u610f\u7684 ResNet \u5c01\u9501</p>\n",
"<p>First normalization and convolution layer </p>\n": "<p>\u7b2c\u4e00\u4e2a\u5f52\u4e00\u5316\u548c\u5377\u79ef\u5c42</p>\n",
"<p>Get embeddings with shape <span translate=no>_^_0_^_</span> </p>\n": "<p>\u83b7\u53d6\u5e26\u6709\u5f62\u72b6\u7684\u5d4c\u5165\u7269<span translate=no>_^_0_^_</span></p>\n",
"<p>Get query, key and vector embeddings </p>\n": "<p>\u83b7\u53d6\u67e5\u8be2\u3001\u952e\u548c\u5411\u91cf\u5d4c\u5165</p>\n",
"<p>Get the moments in the quantized embedding space </p>\n": "<p>\u83b7\u53d6\u91cf\u5316\u5d4c\u5165\u7a7a\u95f4\u4e2d\u7684\u77ac\u95f4</p>\n",
"<p>Group normalization </p>\n": "<p>\u7fa4\u7ec4\u6807\u51c6\u5316</p>\n",
"<p>Initial <span translate=no>_^_0_^_</span> convolution layer that maps the embedding space to <span translate=no>_^_1_^_</span> </p>\n": "<p>\u5c06\u5d4c\u5165\u7a7a\u95f4\u6620\u5c04\u5230\u7684\u521d\u59cb<span translate=no>_^_0_^_</span>\u5377\u79ef\u5c42<span translate=no>_^_1_^_</span></p>\n",
"<p>Initial <span translate=no>_^_0_^_</span> convolution layer that maps the image to <span translate=no>_^_1_^_</span> </p>\n": "<p>\u5c06\u56fe\u50cf\u6620\u5c04\u5230\u7684\u521d\u59cb<span translate=no>_^_0_^_</span>\u5377\u79ef\u5c42<span translate=no>_^_1_^_</span></p>\n",
"<p>List of top-level blocks </p>\n": "<p>\u9876\u7ea7\u533a\u5757\u5217\u8868</p>\n",
"<p>Map and add residual </p>\n": "<p>\u6620\u5c04\u5e76\u6dfb\u52a0\u6b8b\u5dee</p>\n",
"<p>Map to <span translate=no>_^_0_^_</span> with the initial convolution </p>\n": "<p><span translate=no>_^_0_^_</span>\u4f7f\u7528\u521d\u59cb\u5377\u79ef\u6620\u5c04\u5230</p>\n",
"<p>Map to embedding space from the quantized representation </p>\n": "<p>\u4ece\u91cf\u5316\u8868\u793a\u6620\u5c04\u5230\u5d4c\u5165\u7a7a\u95f4</p>\n",
"<p>Map to embedding space with a <span translate=no>_^_0_^_</span> convolution </p>\n": "<p>\u7528<span translate=no>_^_0_^_</span>\u5377\u79ef\u6620\u5c04\u5230\u5d4c\u5165\u7a7a\u95f4</p>\n",
"<p>Map to image space with a <span translate=no>_^_0_^_</span> convolution </p>\n": "<p>\u4f7f\u7528<span translate=no>_^_0_^_</span>\u5377\u79ef\u6620\u5c04\u5230\u56fe\u50cf\u7a7a\u95f4</p>\n",
"<p>Normalize <span translate=no>_^_0_^_</span> </p>\n": "<p>\u6807\u51c6\u5316<span translate=no>_^_0_^_</span></p>\n",
"<p>Normalize and map to embedding space </p>\n": "<p>\u5f52\u4e00\u5316\u5e76\u6620\u5c04\u5230\u5d4c\u5165\u7a7a\u95f4</p>\n",
"<p>Normalize and map to image space </p>\n": "<p>\u5f52\u4e00\u5316\u5e76\u6620\u5c04\u5230\u56fe\u50cf\u7a7a\u95f4</p>\n",
"<p>Number of blocks of different resolutions. The resolution is halved at the end each top level block </p>\n": "<p>\u4e0d\u540c\u5206\u8fa8\u7387\u7684\u533a\u5757\u6570\u3002\u6bcf\u4e2a\u9876\u5c42\u65b9\u5757\u7684\u7ed3\u5c3e\u5904\u5206\u8fa8\u7387\u51cf\u534a</p>\n",
"<p>Number of channels in each top level block </p>\n": "<p>\u6bcf\u4e2a\u9876\u7ea7\u533a\u5757\u4e2d\u7684\u9891\u9053\u6570</p>\n",
"<p>Number of channels in each top level block, in the reverse order </p>\n": "<p>\u6bcf\u4e2a\u9876\u7ea7\u5757\u4e2d\u7684\u901a\u9053\u6570\uff0c\u6309\u76f8\u53cd\u987a\u5e8f\u6392\u5217</p>\n",
"<p>Number of channels in the top-level block </p>\n": "<p>\u9876\u7ea7\u533a\u5757\u4e2d\u7684\u9891\u9053\u6570</p>\n",
"<p>Prepend to be consistent with the checkpoint </p>\n": "<p>\u9884\u5148\u8bbe\u7f6e\u4ee5\u4e0e\u68c0\u67e5\u70b9\u4fdd\u6301\u4e00\u81f4</p>\n",
"<p>Query, key and value mappings </p>\n": "<p>\u67e5\u8be2\u3001\u952e\u548c\u503c\u6620\u5c04</p>\n",
"<p>ResNet Blocks </p>\n": "<p>ResNet \u533a\u5757</p>\n",
"<p>ResNet blocks with attention </p>\n": "<p>ResNet \u8981\u6ce8\u610f\u5c01\u9501</p>\n",
"<p>Reshape back to <span translate=no>_^_0_^_</span> </p>\n": "<p>\u91cd\u5851\u56de\u539f\u72b6<span translate=no>_^_0_^_</span></p>\n",
"<p>Reshape to query, key and vector embeedings from <span translate=no>_^_0_^_</span> to <span translate=no>_^_1_^_</span> </p>\n": "<p>\u91cd\u5851\u4e3a\u67e5\u8be2\uff0c\u952e\u5d4c\u5165\u548c\u5411\u91cf\u5d4c\u5165\u4ece<span translate=no>_^_0_^_</span>\u4e3a<span translate=no>_^_1_^_</span></p>\n",
"<p>Return the distribution </p>\n": "<p>\u8fd4\u56de\u5206\u5e03</p>\n",
"<p>Sample from the distribution </p>\n": "<p>\u6765\u81ea\u5206\u5e03\u7684\u6837\u672c</p>\n",
"<p>Second normalization and convolution layer </p>\n": "<p>\u7b2c\u4e8c\u4e2a\u5f52\u4e00\u5316\u548c\u5377\u79ef\u5c42</p>\n",
"<p>Split mean and log of variance </p>\n": "<p>\u5206\u5272\u5747\u503c\u548c\u65b9\u5dee\u5bf9\u6570</p>\n",
"<p>Top-level block </p>\n": "<p>\u9876\u7ea7\u533a\u5757</p>\n",
"<p>Top-level blocks </p>\n": "<p>\u9876\u7ea7\u533a\u5757</p>\n",
"<p>Up-sample by a factor of <span translate=no>_^_0_^_</span> </p>\n": "<p>\u6309\u7cfb\u6570\u5411\u4e0a\u91c7\u6837<span translate=no>_^_0_^_</span></p>\n",
"<p>Up-sampling </p>\n": "<p>\u5411\u4e0a\u91c7\u6837</p>\n",
"<p>Up-sampling at the end of each top level block except the first </p>\n": "<p>\u5728\u6bcf\u4e2a\u9876\u7ea7\u533a\u5757\u7684\u7ed3\u5c3e\u5904\u5411\u4e0a\u91c7\u6837\uff08\u7b2c\u4e00\u4e2a\u9664\u5916\uff09</p>\n",
"<ul><li><span translate=no>_^_0_^_</span> are the means and log of variances of the embedding of shape <span translate=no>_^_1_^_</span></li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u5f62\u72b6\u5d4c\u5165\u7684\u65b9\u5dee\u7684\u5747\u503c\u548c\u5bf9\u6570<span translate=no>_^_1_^_</span></li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the embedding tensor with shape <span translate=no>_^_1_^_</span></li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u5e26\u6709\u5f62\u72b6\u7684\u5d4c\u5165\u5f20\u91cf<span translate=no>_^_1_^_</span></li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the encoder </li>\n<li><span translate=no>_^_1_^_</span> is the decoder </li>\n<li><span translate=no>_^_2_^_</span> is the number of dimensions in the quantized embedding space </li>\n<li><span translate=no>_^_3_^_</span> is the number of channels in the embedding space</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u7f16\u7801\u5668</li>\n<li><span translate=no>_^_1_^_</span>\u662f\u89e3\u7801\u5668</li>\n<li><span translate=no>_^_2_^_</span>\u662f\u91cf\u5316\u5d4c\u5165\u7a7a\u95f4\u4e2d\u7684\u7ef4\u6570</li>\n<li><span translate=no>_^_3_^_</span>\u662f\u5d4c\u5165\u7a7a\u95f4\u4e2d\u7684\u901a\u9053\u6570</li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the image tensor with shape <span translate=no>_^_1_^_</span></li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u5e26\u6709\u5f62\u72b6\u7684\u56fe\u50cf\u5f20\u91cf<span translate=no>_^_1_^_</span></li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the input feature map with shape <span translate=no>_^_1_^_</span></li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u5e26\u6709\u5f62\u72b6\u7684\u8f93\u5165\u8981\u7d20\u56fe<span translate=no>_^_1_^_</span></li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the number of channels in the final convolution layer </li>\n<li><span translate=no>_^_1_^_</span> are the multiplicative factors for the number of channels in the previous blocks, in reverse order </li>\n<li><span translate=no>_^_2_^_</span> is the number of resnet layers at each resolution </li>\n<li><span translate=no>_^_3_^_</span> is the number of channels in the image </li>\n<li><span translate=no>_^_4_^_</span> is the number of channels in the embedding space</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u6700\u7ec8\u5377\u79ef\u5c42\u4e2d\u7684\u901a\u9053\u6570</li>\n<li><span translate=no>_^_1_^_</span>\u662f\u524d\u9762\u533a\u5757\u4e2d\u4fe1\u9053\u6570\u7684\u4e58\u6cd5\u56e0\u5b50\uff0c\u987a\u5e8f\u76f8\u53cd</li>\n<li><span translate=no>_^_2_^_</span>\u662f\u6bcf\u79cd\u5206\u8fa8\u7387\u4e0b\u7684 resnet \u5c42\u6570</li>\n<li><span translate=no>_^_3_^_</span>\u662f\u56fe\u50cf\u4e2d\u7684\u901a\u9053\u6570</li>\n<li><span translate=no>_^_4_^_</span>\u662f\u5d4c\u5165\u7a7a\u95f4\u4e2d\u7684\u901a\u9053\u6570</li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the number of channels in the first convolution layer </li>\n<li><span translate=no>_^_1_^_</span> are the multiplicative factors for the number of channels in the subsequent blocks </li>\n<li><span translate=no>_^_2_^_</span> is the number of resnet layers at each resolution </li>\n<li><span translate=no>_^_3_^_</span> is the number of channels in the image </li>\n<li><span translate=no>_^_4_^_</span> is the number of channels in the embedding space</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u7b2c\u4e00\u4e2a\u5377\u79ef\u5c42\u4e2d\u7684\u901a\u9053\u6570</li>\n<li><span translate=no>_^_1_^_</span>\u662f\u540e\u7eed\u533a\u7ec4\u4e2d\u4fe1\u9053\u6570\u91cf\u7684\u4e58\u6cd5\u56e0\u5b50</li>\n<li><span translate=no>_^_2_^_</span>\u662f\u6bcf\u79cd\u5206\u8fa8\u7387\u4e0b\u7684 resnet \u5c42\u6570</li>\n<li><span translate=no>_^_3_^_</span>\u662f\u56fe\u50cf\u4e2d\u7684\u901a\u9053\u6570</li>\n<li><span translate=no>_^_4_^_</span>\u662f\u5d4c\u5165\u7a7a\u95f4\u4e2d\u7684\u901a\u9053\u6570</li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the number of channels in the input </li>\n<li><span translate=no>_^_1_^_</span> is the number of channels in the output</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u8f93\u5165\u4e2d\u7684\u901a\u9053\u6570</li>\n<li><span translate=no>_^_1_^_</span>\u662f\u8f93\u51fa\u4e2d\u7684\u901a\u9053\u6570</li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the number of channels</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u9891\u9053\u6570</li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the tensor of shape <span translate=no>_^_1_^_</span></li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u5f62\u72b6\u7684\u5f20\u91cf<span translate=no>_^_1_^_</span></li></ul>\n",
"Annotated PyTorch implementation/tutorial of the autoencoder for stable diffusion.": "\u5e26\u6709\u6ce8\u91ca\u7684\u81ea\u52a8\u7f16\u7801\u5668\u7684 PyTorch \u5b9e\u73b0/\u6559\u7a0b\uff0c\u7528\u4e8e\u7a33\u5b9a\u6269\u6563\u3002",
"Autoencoder for Stable Diffusion": "\u7528\u4e8e\u7a33\u5b9a\u6269\u6563\u7684\u81ea\u52a8\u7f16\u7801\u5668"
}

View File

@ -0,0 +1,13 @@
{
"<h1>CLIP Text Embedder</h1>\n<p>This is used to get prompt embeddings for <a href=\"../index.html\">stable diffusion</a>. It uses HuggingFace Transformers CLIP model.</p>\n": "<h1>CLIP \u6587\u672c\u5d4c\u5165\u5668</h1>\n<p>\u8fd9\u7528\u4e8e\u83b7\u53d6\u63d0\u793a\u5d4c\u5165\u4ee5\u5b9e\u73b0<a href=\"../index.html\">\u7a33\u5b9a\u7684\u6269\u6563</a>\u3002\u5b83\u4f7f\u7528 HuggingFace \u53d8\u5f62\u91d1\u521a CLIP \u6a21\u578b\u3002</p>\n",
"<h2>CLIP Text Embedder</h2>\n": "<h2>CLIP \u6587\u672c\u5d4c\u5165\u5668</h2>\n",
"<p>Get CLIP embeddings </p>\n": "<p>\u83b7\u53d6 CLIP \u5d4c\u5165\u5185\u5bb9</p>\n",
"<p>Get token ids </p>\n": "<p>\u83b7\u53d6\u4ee3\u5e01 ID</p>\n",
"<p>Load the CLIP transformer </p>\n": "<p>\u52a0\u8f7d CLIP \u53d8\u538b\u5668</p>\n",
"<p>Load the tokenizer </p>\n": "<p>\u52a0\u8f7d\u4ee3\u5e01\u751f\u6210\u5668</p>\n",
"<p>Tokenize the prompts </p>\n": "<p>\u5bf9\u63d0\u793a\u8fdb\u884c\u6807\u8bb0\u5316</p>\n",
"<ul><li><span translate=no>_^_0_^_</span> are the list of prompts to embed</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u8981\u5d4c\u5165\u7684\u63d0\u793a\u5217\u8868</li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the model version </li>\n<li><span translate=no>_^_1_^_</span> is the device </li>\n<li><span translate=no>_^_2_^_</span> is the max length of the tokenized prompt</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u6a21\u578b\u7248\u672c</li>\n<li><span translate=no>_^_1_^_</span>\u662f\u8bbe\u5907\u5417</li>\n<li><span translate=no>_^_2_^_</span>\u662f\u6807\u8bb0\u5316\u63d0\u793a\u7684\u6700\u5927\u957f\u5ea6</li></ul>\n",
"CLIP Text Embedder": "CLIP \u6587\u672c\u5d4c\u5165\u5668",
"CLIP embedder to get prompt embeddings for stable diffusion": "CLIP \u5d4c\u5165\u5668\u53ef\u83b7\u5f97\u63d0\u793a\u6027\u5d4c\u5165\u4ee5\u5b9e\u73b0\u7a33\u5b9a\u7684\u6269\u6563"
}

View File

@ -0,0 +1,60 @@
{
"<h1>U-Net for <a href=\"../index.html\">Stable Diffusion</a></h1>\n<p>This implements the U-Net that gives <span translate=no>_^_0_^_</span></p>\n<p>We have kept to the model definition and naming unchanged from <a href=\"https://github.com/CompVis/stable-diffusion\">CompVis/stable-diffusion</a> so that we can load the checkpoints directly.</p>\n": "<h1>U-Net \u7528\u4e8e<a href=\"../index.html\">\u7a33\u5b9a\u6269\u6563</a></h1>\n<p>\u8fd9\u5b9e\u73b0\u4e86 U-Net\uff0c\u5b83\u53ef\u4ee5\u7ed9\u51fa<span translate=no>_^_0_^_</span></p>\n<p>\u6211\u4eec\u4fdd\u6301\u4e86 <a href=\"https://github.com/CompVis/stable-diffusion\">compvis/Stable-Difusi</a> on \u7684\u6a21\u578b\u5b9a\u4e49\u548c\u547d\u540d\u4e0d\u53d8\uff0c\u8fd9\u6837\u6211\u4eec\u5c31\u53ef\u4ee5\u76f4\u63a5\u52a0\u8f7d\u68c0\u67e5\u70b9\u3002</p>\n",
"<h2>Create sinusoidal time step embeddings</h2>\n<ul><li><span translate=no>_^_0_^_</span> are the time steps of shape <span translate=no>_^_1_^_</span> </li>\n<li><span translate=no>_^_2_^_</span> controls the minimum frequency of the embeddings.</li></ul>\n": "<h2>\u521b\u5efa\u6b63\u5f26\u65f6\u95f4\u6b65\u957f\u5d4c\u5165</h2>\n<ul><li><span translate=no>_^_0_^_</span>\u662f\u5f62\u72b6\u7684\u65f6\u95f4\u6b65\u957f<span translate=no>_^_1_^_</span></li>\n<li><span translate=no>_^_2_^_</span>\u63a7\u5236\u5d4c\u5165\u7684\u6700\u5c0f\u9891\u7387\u3002</li></ul>\n",
"<h2>Down-sampling layer</h2>\n": "<h2>\u5411\u4e0b\u91c7\u6837\u5c42</h2>\n",
"<h2>ResNet Block</h2>\n": "<h2>ResNet \u533a\u5757</h2>\n",
"<h2>U-Net model</h2>\n": "<h2>U-Net \u6a21\u578b</h2>\n",
"<h3>Group normalization with float32 casting</h3>\n": "<h3>\u4f7f\u7528 float32 \u5f3a\u5236\u8f6c\u6362\u8fdb\u884c\u5206\u7ec4\u5f52\u4e00\u5316</h3>\n",
"<h3>Group normalization</h3>\n<p>This is a helper function, with fixed number of groups..</p>\n": "<h3>\u7fa4\u7ec4\u6807\u51c6\u5316</h3>\n<p>\u8fd9\u662f\u4e00\u4e2a\u8f85\u52a9\u51fd\u6570\uff0c\u5177\u6709\u56fa\u5b9a\u6570\u91cf\u7684\u7ec4\u3002</p>\n",
"<h3>Sequential block for modules with different inputs</h3>\n<p>This sequential module can compose of different modules suck as <span translate=no>_^_0_^_</span>, <span translate=no>_^_1_^_</span> and <span translate=no>_^_2_^_</span> and calls them with the matching signatures</p>\n": "<h3>\u5177\u6709\u4e0d\u540c\u8f93\u5165\u7684\u6a21\u5757\u7684\u987a\u5e8f\u6a21\u5757</h3>\n<p>\u8fd9\u4e2a\u987a\u5e8f\u6a21\u5757\u53ef\u4ee5\u7531\u4e0d\u540c\u7684\u6a21\u5757\u7ec4\u6210\uff0c\u5f88\u7cdf\u7cd5<span translate=no>_^_0_^_</span>\uff0c<span translate=no>_^_1_^_</span><span translate=no>_^_2_^_</span>\u5e76\u4f7f\u7528\u5339\u914d\u7684\u7b7e\u540d\u8c03\u7528\u5b83\u4eec</p>\n",
"<h3>Up-sampling layer</h3>\n": "<h3>\u5411\u4e0a\u91c7\u6837\u5c42</h3>\n",
"<p> </p>\n": "<p></p>\n",
"<p> Test sinusoidal time step embeddings</p>\n": "<p>\u6d4b\u8bd5\u6b63\u5f26\u65f6\u95f4\u6b65\u957f\u5d4c\u5165</p>\n",
"<p><span translate=no>_^_0_^_</span> </p>\n": "<p><span translate=no>_^_0_^_</span></p>\n",
"<p><span translate=no>_^_0_^_</span> and <span translate=no>_^_1_^_</span> </p>\n": "<p><span translate=no>_^_0_^_</span>\u548c<span translate=no>_^_1_^_</span></p>\n",
"<p><span translate=no>_^_0_^_</span> convolution mapping </p>\n": "<p><span translate=no>_^_0_^_</span>\u5377\u79ef\u6620\u5c04</p>\n",
"<p><span translate=no>_^_0_^_</span> convolution with stride length of <span translate=no>_^_1_^_</span> to down-sample by a factor of <span translate=no>_^_2_^_</span> </p>\n": "<p><span translate=no>_^_0_^_</span>\u5377\u79ef\uff0c\u6b65\u957f\u4e3a<span translate=no>_^_1_^_</span>\u5411\u4e0b\u91c7\u6837\u7684\u7cfb\u6570\u4e3a<span translate=no>_^_2_^_</span></p>\n",
"<p><span translate=no>_^_0_^_</span> not specified </p>\n": "<p><span translate=no>_^_0_^_</span>\u672a\u6307\u5b9a</p>\n",
"<p><span translate=no>_^_0_^_</span> to <span translate=no>_^_1_^_</span> mapping layer for residual connection </p>\n": "<p><span translate=no>_^_0_^_</span>\u5230\u5269\u4f59\u8fde\u63a5\u7684<span translate=no>_^_1_^_</span>\u6620\u5c04\u5c42</p>\n",
"<p><span translate=no>_^_0_^_</span>; half the channels are sin and the other half is cos, </p>\n": "<p><span translate=no>_^_0_^_</span>; \u4e00\u534a\u7684\u9891\u9053\u662f\u7f6a\u6076\u53e6\u4e00\u534a\u662f cos\uff0c</p>\n",
"<p>Add skip connection </p>\n": "<p>\u6dfb\u52a0\u8df3\u8fc7\u8fde\u63a5</p>\n",
"<p>Add the residual blocks and attentions </p>\n": "<p>\u6dfb\u52a0\u6b8b\u7559\u65b9\u5757\u548c\u6ce8\u610f\u529b</p>\n",
"<p>Add them to the input half of the U-Net and keep track of the number of channels of its output </p>\n": "<p>\u5c06\u5b83\u4eec\u52a0\u5230 U-Net \u7684\u8f93\u5165\u534a\u90e8\u5206\uff0c\u5e76\u8ddf\u8e2a\u5176\u8f93\u51fa\u7684\u901a\u9053\u6570</p>\n",
"<p>Add time step embeddings </p>\n": "<p>\u6dfb\u52a0\u65f6\u95f4\u6b65\u957f\u5d4c\u5165</p>\n",
"<p>Add to the output half of the U-Net </p>\n": "<p>\u5c06 U-Net \u7684\u4e00\u534a\u52a0\u5230\u8f93\u51fa\u4e2d</p>\n",
"<p>Add transformer </p>\n": "<p>\u6dfb\u52a0\u53d8\u538b\u5668</p>\n",
"<p>Apply convolution </p>\n": "<p>\u5e94\u7528\u5377\u79ef</p>\n",
"<p>Down sample at all levels except last </p>\n": "<p>\u9664\u6700\u540e\u4e00\u4e2a\u5173\u5361\u5916\uff0c\u6240\u6709\u7ea7\u522b\u5747\u5411\u4e0b\u91c7\u6837</p>\n",
"<p>Final convolution </p>\n": "<p>\u6700\u540e\u7684\u5377\u79ef</p>\n",
"<p>Final convolution layer </p>\n": "<p>\u6700\u7ec8\u5377\u79ef\u5c42</p>\n",
"<p>Final normalization and <span translate=no>_^_0_^_</span> convolution </p>\n": "<p>\u6700\u7ec8\u6807\u51c6\u5316\u548c<span translate=no>_^_0_^_</span>\u5377\u79ef</p>\n",
"<p>First normalization and convolution </p>\n": "<p>\u7b2c\u4e00\u6b21\u5f52\u4e00\u5316\u548c\u5377\u79ef</p>\n",
"<p>Get time step embeddings </p>\n": "<p>\u83b7\u53d6\u65f6\u95f4\u6b65\u957f\u5d4c\u5165\u4fe1\u606f</p>\n",
"<p>Initial <span translate=no>_^_0_^_</span> convolution that maps the input to <span translate=no>_^_1_^_</span>. The blocks are wrapped in <span translate=no>_^_2_^_</span> module because different modules have different forward function signatures; for example, convolution only accepts the feature map and residual blocks accept the feature map and time embedding. <span translate=no>_^_3_^_</span> calls them accordingly. </p>\n": "\u5c06@@ <p>\u8f93\u5165\u6620\u5c04\u5230\u7684\u521d\u59cb<span translate=no>_^_0_^_</span>\u5377\u79ef<span translate=no>_^_1_^_</span>\u3002\u8fd9\u4e9b\u65b9\u5757\u88ab\u5c01\u88c5\u5728<span translate=no>_^_2_^_</span>\u6a21\u5757\u4e2d\uff0c\u56e0\u4e3a\u4e0d\u540c\u7684\u6a21\u5757\u5177\u6709\u4e0d\u540c\u7684\u6b63\u5411\u51fd\u6570\u7b7e\u540d\uff1b\u4f8b\u5982\uff0c\u5377\u79ef\u4ec5\u63a5\u53d7\u7279\u5f81\u56fe\uff0c\u800c\u5269\u4f59\u5757\u63a5\u53d7\u7279\u5f81\u56fe\u548c\u65f6\u95f4\u5d4c\u5165\u3002<span translate=no>_^_3_^_</span>\u76f8\u5e94\u5730\u7ed9\u4ed6\u4eec\u6253\u7535\u8bdd\u3002</p>\n",
"<p>Initial convolution </p>\n": "<p>\u521d\u59cb\u5377\u79ef</p>\n",
"<p>Input half of the U-Net </p>\n": "<p>\u8f93\u5165 U-Net \u7684\u4e00\u534a</p>\n",
"<p>Middle of the U-Net </p>\n": "<p>U-Net \u7684\u4e2d\u95f4</p>\n",
"<p>Number of channels at each block in the input half of U-Net </p>\n": "<p>U-Net \u8f93\u5165\u534a\u90e8\u5206\u4e2d\u6bcf\u4e2a\u6a21\u5757\u7684\u4fe1\u9053\u6570</p>\n",
"<p>Number of channels at each level </p>\n": "<p>\u6bcf\u4e2a\u7ea7\u522b\u7684\u9891\u9053\u6570</p>\n",
"<p>Number of levels </p>\n": "<p>\u5173\u5361\u6570</p>\n",
"<p>Output half of the U-Net </p>\n": "<p>\u8f93\u51fa U-Net \u7684\u4e00\u534a</p>\n",
"<p>Prepare levels </p>\n": "<p>\u51c6\u5907\u5173\u5361</p>\n",
"<p>Prepare levels in reverse order </p>\n": "<p>\u6309\u76f8\u53cd\u7684\u987a\u5e8f\u51c6\u5907\u5173\u5361</p>\n",
"<p>Residual block maps from previous number of channels plus the skip connections from the input half of U-Net to the number of channels in the current level. </p>\n": "<p>\u6b8b\u5dee\u65b9\u5757\u4ece\u5148\u524d\u7684\u4fe1\u9053\u6570\u52a0\u4e0a\u4ece U-Net \u7684\u8f93\u5165\u4e00\u534a\u7684\u8df3\u8fc7\u8fde\u63a5\u6620\u5c04\u5230\u5f53\u524d\u5173\u5361\u4e2d\u7684\u4fe1\u9053\u6570\u3002</p>\n",
"<p>Residual block maps from previous number of channels to the number of channels in the current level </p>\n": "<p>\u6b8b\u5dee\u65b9\u5757\u4ece\u5148\u524d\u7684\u901a\u9053\u6570\u6620\u5c04\u5230\u5f53\u524d\u5173\u5361\u4e2d\u7684\u901a\u9053\u6570</p>\n",
"<p>Second half of the U-Net </p>\n": "<p>U-Net \u7684\u540e\u534a\u90e8\u5206</p>\n",
"<p>Size time embeddings </p>\n": "<p>\u8c03\u6574\u65f6\u95f4\u5d4c\u5165\u7684\u5927\u5c0f</p>\n",
"<p>The middle of the U-Net </p>\n": "<p>U-Net \u7684\u4e2d\u95f4</p>\n",
"<p>Time step embeddings </p>\n": "<p>\u65f6\u95f4\u6b65\u957f\u5d4c\u5165</p>\n",
"<p>To store the input half outputs for skip connections </p>\n": "<p>\u5b58\u50a8\u8df3\u8fc7\u8fde\u63a5\u7684\u8f93\u5165\u534a\u8f93\u51fa</p>\n",
"<p>Up-sample at every level after last residual block except the last one. Note that we are iterating in reverse; i.e. <span translate=no>_^_0_^_</span> is the last. </p>\n": "<p>\u5728\u6700\u540e\u4e00\u4e2a\u6b8b\u5dee\u65b9\u5757\u4e4b\u540e\u7684\u6bcf\u4e2a\u7b49\u7ea7\u4e0a\u91c7\u6837\uff0c\u6700\u540e\u4e00\u4e2a\u533a\u5757\u9664\u5916\u3002\u8bf7\u6ce8\u610f\uff0c\u6211\u4eec\u5728\u53cd\u5411\u8fed\u4ee3\uff1b<span translate=no>_^_0_^_</span>\u5373\u6700\u540e\u4e00\u6b21\u3002</p>\n",
"<p>Up-sample by a factor of <span translate=no>_^_0_^_</span> </p>\n": "<p>\u6309\u7cfb\u6570\u5411\u4e0a\u91c7\u6837<span translate=no>_^_0_^_</span></p>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the input feature map of shape <span translate=no>_^_1_^_</span> </li>\n<li><span translate=no>_^_2_^_</span> are the time steps of shape <span translate=no>_^_3_^_</span> </li>\n<li><span translate=no>_^_4_^_</span> conditioning of shape <span translate=no>_^_5_^_</span></li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u5f62\u72b6\u7684\u8f93\u5165\u7279\u5f81\u56fe<span translate=no>_^_1_^_</span></li>\n<li><span translate=no>_^_2_^_</span>\u662f\u5f62\u72b6\u7684\u65f6\u95f4\u6b65\u957f<span translate=no>_^_3_^_</span></li>\n</ul><li><span translate=no>_^_4_^_</span>\u5f62\u72b6\u8c03\u8282<span translate=no>_^_5_^_</span></li>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the input feature map with shape <span translate=no>_^_1_^_</span> </li>\n<li><span translate=no>_^_2_^_</span> is the time step embeddings of shape <span translate=no>_^_3_^_</span></li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u5e26\u6709\u5f62\u72b6\u7684\u8f93\u5165\u8981\u7d20\u56fe<span translate=no>_^_1_^_</span></li>\n<li><span translate=no>_^_2_^_</span>\u662f\u5f62\u72b6\u7684\u65f6\u95f4\u6b65\u957f\u5d4c\u5165<span translate=no>_^_3_^_</span></li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the input feature map with shape <span translate=no>_^_1_^_</span></li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u5e26\u6709\u5f62\u72b6\u7684\u8f93\u5165\u8981\u7d20\u56fe<span translate=no>_^_1_^_</span></li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the number of channels in the input feature map </li>\n<li><span translate=no>_^_1_^_</span> is the number of channels in the output feature map </li>\n<li><span translate=no>_^_2_^_</span> is the base channel count for the model </li>\n<li><span translate=no>_^_3_^_</span> number of residual blocks at each level </li>\n<li><span translate=no>_^_4_^_</span> are the levels at which attention should be performed </li>\n<li><span translate=no>_^_5_^_</span> are the multiplicative factors for number of channels for each level </li>\n<li><span translate=no>_^_6_^_</span> the number of attention heads in the transformers</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u8f93\u5165\u7279\u5f81\u56fe\u4e2d\u7684\u901a\u9053\u6570</li>\n<li><span translate=no>_^_1_^_</span>\u662f\u8f93\u51fa\u7279\u5f81\u56fe\u4e2d\u7684\u901a\u9053\u6570</li>\n<li><span translate=no>_^_2_^_</span>\u662f\u8be5\u6a21\u578b\u7684\u57fa\u672c\u4fe1\u9053\u6570</li>\n<li><span translate=no>_^_3_^_</span>\u6bcf\u7ea7\u5269\u4f59\u65b9\u5757\u7684\u6570\u91cf</li>\n<li><span translate=no>_^_4_^_</span>\u662f\u5e94\u6ce8\u610f\u7684\u7ea7\u522b</li>\n<li><span translate=no>_^_5_^_</span>\u662f\u6bcf\u4e2a\u7ea7\u522b\u4fe1\u9053\u6570\u7684\u4e58\u6cd5\u56e0\u5b50</li>\n<li><span translate=no>_^_6_^_</span>\u53d8\u538b\u5668\u4e2d\u6ce8\u610f\u529b\u7684\u6570\u91cf</li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the number of channels</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u9891\u9053\u6570</li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> the number of input channels </li>\n<li><span translate=no>_^_1_^_</span> the size of timestep embeddings </li>\n<li><span translate=no>_^_2_^_</span> is the number of out channels. defaults to `channels.</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u8f93\u5165\u901a\u9053\u7684\u6570\u91cf</li>\n<li><span translate=no>_^_1_^_</span>\u65f6\u95f4\u6b65\u5d4c\u5165\u7684\u5927\u5c0f</li>\n<li><span translate=no>_^_2_^_</span>\u662f\u8f93\u51fa\u4fe1\u9053\u7684\u6570\u91cf\u3002\u9ed8\u8ba4\u4e3a `channels\u3002</li></ul>\n",
"Annotated PyTorch implementation/tutorial of the U-Net in stable diffusion.": "\u5e26\u6ce8\u91ca\u7684 U-Net \u7a33\u5b9a\u6269\u6563\u7248 PyTorch \u5b9e\u73b0/\u6559\u7a0b\u3002",
"U-Net for Stable Diffusion": "U-Net \u7528\u4e8e\u7a33\u5b9a\u6269\u6563"
}

View File

@ -0,0 +1,63 @@
{
"<h1>Transformer for Stable Diffusion <a href=\"unet.html\">U-Net</a></h1>\n<p>This implements the transformer module used in <a href=\"unet.html\">U-Net</a> that gives <span translate=no>_^_0_^_</span></p>\n<p>We have kept to the model definition and naming unchanged from <a href=\"https://github.com/CompVis/stable-diffusion\">CompVis/stable-diffusion</a> so that we can load the checkpoints directly.</p>\n": "<h1>\u7528\u4e8e\u7a33\u5b9a\u6269\u6563 <a href=\"unet.html\">U-Net</a> \u7684\u53d8\u538b\u5668</h1>\n<p>\u8fd9\u5b9e\u73b0\u4e86 <a href=\"unet.html\">U-Net</a> \u4e2d\u4f7f\u7528\u7684\u53d8\u538b\u5668\u6a21\u5757\uff0c\u5b83\u63d0\u4f9b<span translate=no>_^_0_^_</span></p>\n<p>\u6211\u4eec\u4fdd\u6301\u4e86 <a href=\"https://github.com/CompVis/stable-diffusion\">compvis/Stable-Difusi</a> on \u7684\u6a21\u578b\u5b9a\u4e49\u548c\u547d\u540d\u4e0d\u53d8\uff0c\u8fd9\u6837\u6211\u4eec\u5c31\u53ef\u4ee5\u76f4\u63a5\u52a0\u8f7d\u68c0\u67e5\u70b9\u3002</p>\n",
"<h2>Spatial Transformer</h2>\n": "<h2>\u7a7a\u95f4\u53d8\u538b\u5668</h2>\n",
"<h3>Cross Attention Layer</h3>\n<p>This falls-back to self-attention when conditional embeddings are not specified.</p>\n": "<h3>\u4ea4\u53c9\u6ce8\u610f\u529b\u5c42</h3>\n<p>\u5f53\u672a\u6307\u5b9a\u6761\u4ef6\u5d4c\u5165\u65f6\uff0c\u8fd9\u4f1a\u56de\u5f52\u5230\u81ea\u6211\u6ce8\u610f\u529b\u3002</p>\n",
"<h3>Feed-Forward Network</h3>\n": "<h3>\u524d\u9988\u7f51\u7edc</h3>\n",
"<h3>GeGLU Activation</h3>\n<p><span translate=no>_^_0_^_</span></p>\n": "<h3>\u6fc0\u6d3b GegLU</h3>\n<p><span translate=no>_^_0_^_</span></p>\n",
"<h3>Transformer Layer</h3>\n": "<h3>\u53d8\u538b\u5668\u5c42</h3>\n",
"<h4>Flash Attention</h4>\n<ul><li><span translate=no>_^_0_^_</span> are the query vectors before splitting heads, of shape <span translate=no>_^_1_^_</span> </li>\n<li><span translate=no>_^_2_^_</span> are the query vectors before splitting heads, of shape <span translate=no>_^_3_^_</span> </li>\n<li><span translate=no>_^_4_^_</span> are the query vectors before splitting heads, of shape <span translate=no>_^_5_^_</span></li></ul>\n": "<h4>Flash \u6ce8\u610f</h4>\n<ul><li><span translate=no>_^_0_^_</span>\u662f\u5206\u5272\u5934\u90e8\u4e4b\u524d\u7684\u67e5\u8be2\u5411\u91cf\uff0c\u5f62\u72b6\u4e3a<span translate=no>_^_1_^_</span></li>\n<li><span translate=no>_^_2_^_</span>\u662f\u5206\u5272\u5934\u90e8\u4e4b\u524d\u7684\u67e5\u8be2\u5411\u91cf\uff0c\u5f62\u72b6\u4e3a<span translate=no>_^_3_^_</span></li>\n</ul><li><span translate=no>_^_4_^_</span>\u662f\u5206\u5272\u5934\u90e8\u4e4b\u524d\u7684\u67e5\u8be2\u5411\u91cf\uff0c\u5f62\u72b6\u4e3a<span translate=no>_^_5_^_</span></li>\n",
"<h4>Normal Attention</h4>\n<ul><li><span translate=no>_^_0_^_</span> are the query vectors before splitting heads, of shape <span translate=no>_^_1_^_</span> </li>\n<li><span translate=no>_^_2_^_</span> are the query vectors before splitting heads, of shape <span translate=no>_^_3_^_</span> </li>\n<li><span translate=no>_^_4_^_</span> are the query vectors before splitting heads, of shape <span translate=no>_^_5_^_</span></li></ul>\n": "<h4>\u6b63\u5e38\u6ce8\u610f\u529b</h4>\n<ul><li><span translate=no>_^_0_^_</span>\u662f\u5206\u5272\u5934\u90e8\u4e4b\u524d\u7684\u67e5\u8be2\u5411\u91cf\uff0c\u5f62\u72b6\u4e3a<span translate=no>_^_1_^_</span></li>\n<li><span translate=no>_^_2_^_</span>\u662f\u5206\u5272\u5934\u90e8\u4e4b\u524d\u7684\u67e5\u8be2\u5411\u91cf\uff0c\u5f62\u72b6\u4e3a<span translate=no>_^_3_^_</span></li>\n</ul><li><span translate=no>_^_4_^_</span>\u662f\u5206\u5272\u5934\u90e8\u4e4b\u524d\u7684\u67e5\u8be2\u5411\u91cf\uff0c\u5f62\u72b6\u4e3a<span translate=no>_^_5_^_</span></li>\n",
"<p> </p>\n": "<p></p>\n",
"<p><span translate=no>_^_0_^_</span> </p>\n": "<p><span translate=no>_^_0_^_</span></p>\n",
"<p>Add residual </p>\n": "<p>\u6dfb\u52a0\u6b8b\u5dee</p>\n",
"<p>Apply the transformer layers </p>\n": "<p>\u5e94\u7528\u53d8\u538b\u5668\u5c42</p>\n",
"<p>Attention scaling factor </p>\n": "<p>\u6ce8\u610f\u529b\u7f29\u653e\u7cfb\u6570</p>\n",
"<p>Calculate attention <span translate=no>_^_0_^_</span> </p>\n": "<p>\u8ba1\u7b97\u6ce8\u610f\u529b<span translate=no>_^_0_^_</span></p>\n",
"<p>Combined linear projections <span translate=no>_^_0_^_</span> and <span translate=no>_^_1_^_</span> </p>\n": "<p>\u7ec4\u5408\u7ebf\u6027\u6295\u5f71<span translate=no>_^_0_^_</span>\u548c<span translate=no>_^_1_^_</span></p>\n",
"<p>Compute attention <span translate=no>_^_0_^_</span> This gives a tensor of shape <span translate=no>_^_1_^_</span> </p>\n": "<p>\u8ba1\u7b97\u6ce8\u610f\u529b<span translate=no>_^_0_^_</span>\u8fd9\u7ed9\u51fa\u4e86\u5f62\u72b6\u7684\u5f20\u91cf<span translate=no>_^_1_^_</span></p>\n",
"<p>Compute attention output <span translate=no>_^_0_^_</span> </p>\n": "<p>\u8ba1\u7b97\u6ce8\u610f\u529b\u8f93\u51fa<span translate=no>_^_0_^_</span></p>\n",
"<p>Compute softmax <span translate=no>_^_0_^_</span> </p>\n": "<p>\u8ba1\u7b97 softmax<span translate=no>_^_0_^_</span></p>\n",
"<p>Cross attention layer and pre-norm layer </p>\n": "<p>\u4ea4\u53c9\u6ce8\u610f\u529b\u5c42\u548c\u9884\u89c4\u8303\u5c42</p>\n",
"<p>Cross-attention with conditioning </p>\n": "<p>\u4ea4\u53c9\u6ce8\u610f\u529b\u4e0e\u8c03\u8282</p>\n",
"<p>Feed-forward network </p>\n": "<p>\u524d\u9988\u7f51\u7edc</p>\n",
"<p>Feed-forward network and pre-norm layer </p>\n": "<p>\u524d\u9988\u7f51\u7edc\u548c\u9884\u89c4\u8303\u5c42</p>\n",
"<p>Final <span translate=no>_^_0_^_</span> convolution </p>\n": "<p>\u6700\u540e\u7684<span translate=no>_^_0_^_</span>\u5377\u79ef</p>\n",
"<p>Final linear layer </p>\n": "<p>\u6700\u540e\u7684\u7ebf\u6027\u5c42</p>\n",
"<p>Flash attention works for head sizes <span translate=no>_^_0_^_</span>, <span translate=no>_^_1_^_</span> and <span translate=no>_^_2_^_</span>, so we have to pad the heads to fit this size. </p>\n": "<p>Flash \u6ce8\u610f\u529b\u9002\u7528\u4e8e\u5934\u90e8\u5c3a\u5bf8<span translate=no>_^_0_^_</span><span translate=no>_^_2_^_</span>\uff0c<span translate=no>_^_1_^_</span>\u800c\u4e14\uff0c\u56e0\u6b64\u6211\u4eec\u5fc5\u987b\u57ab\u4f4f\u5934\u90e8\u624d\u80fd\u9002\u5408\u8fd9\u4e2a\u5c3a\u5bf8\u3002</p>\n",
"<p>For residual connection </p>\n": "<p>\u7528\u4e8e\u5269\u4f59\u8fde\u63a5</p>\n",
"<p>Get <span translate=no>_^_0_^_</span> and <span translate=no>_^_1_^_</span> </p>\n": "<p>\u83b7\u53d6<span translate=no>_^_0_^_</span>\u548c<span translate=no>_^_1_^_</span></p>\n",
"<p>Get batch size and number of elements along sequence axis (<span translate=no>_^_0_^_</span>) </p>\n": "<p>\u6cbf\u5e8f\u5217\u8f74\u83b7\u53d6\u6279\u91cf\u5927\u5c0f\u548c\u5143\u7d20\u6570\u91cf (<span translate=no>_^_0_^_</span>)</p>\n",
"<p>Get query, key and value vectors </p>\n": "<p>\u83b7\u53d6\u67e5\u8be2\u5411\u91cf\u3001\u952e\u5411\u91cf\u548c\u503c\u5411\u91cf</p>\n",
"<p>Get shape <span translate=no>_^_0_^_</span> </p>\n": "<p>\u5851\u9020\u8eab\u5f62<span translate=no>_^_0_^_</span></p>\n",
"<p>If <span translate=no>_^_0_^_</span> is <span translate=no>_^_1_^_</span> we perform self attention </p>\n": "<p>\u5982\u679c<span translate=no>_^_0_^_</span>\u662f\uff0c<span translate=no>_^_1_^_</span>\u6211\u4eec\u8fdb\u884c\u81ea\u6211\u5173\u6ce8</p>\n",
"<p>Initial <span translate=no>_^_0_^_</span> convolution </p>\n": "<p>\u521d\u59cb<span translate=no>_^_0_^_</span>\u5377\u79ef</p>\n",
"<p>Initial group normalization </p>\n": "<p>\u521d\u59cb\u7fa4\u7ec4\u6807\u51c6\u5316</p>\n",
"<p>Map to <span translate=no>_^_0_^_</span> with a linear layer </p>\n": "<p><span translate=no>_^_0_^_</span>\u4f7f\u7528\u7ebf\u6027\u56fe\u5c42\u6620\u5c04\u5230</p>\n",
"<p>Normalize </p>\n": "<p>\u6807\u51c6\u5316</p>\n",
"<p>Otherwise, fallback to normal attention </p>\n": "<p>\u5426\u5219\uff0c\u56de\u9000\u5230\u6b63\u5e38\u7684\u6ce8\u610f\u529b\u4e0a</p>\n",
"<p>Pad the heads </p>\n": "<p>\u57ab\u4f4f\u5934\u90e8</p>\n",
"<p>Query, key and value mappings </p>\n": "<p>\u67e5\u8be2\u3001\u952e\u548c\u503c\u6620\u5c04</p>\n",
"<p>Reshape and transpose from <span translate=no>_^_0_^_</span> to <span translate=no>_^_1_^_</span> </p>\n": "<p>\u91cd\u5851\u5f62\u72b6\u5e76\u4ece\u53d8\u6362<span translate=no>_^_0_^_</span>\u4e3a<span translate=no>_^_1_^_</span></p>\n",
"<p>Reshape to <span translate=no>_^_0_^_</span> </p>\n": "<p>\u91cd\u5851\u4e3a<span translate=no>_^_0_^_</span></p>\n",
"<p>Self attention </p>\n": "<p>\u81ea\u6211\u6ce8\u610f\u529b</p>\n",
"<p>Self-attention layer and pre-norm layer </p>\n": "<p>\u81ea\u6211\u6ce8\u610f\u529b\u5c42\u548c\u9884\u89c4\u8303\u5c42</p>\n",
"<p>Set the scale for scaled dot-product attention. </p>\n": "<p>\u8bbe\u7f6e\u6309\u6bd4\u4f8b\u7f29\u653e\u70b9\u4ea7\u54c1\u6ce8\u610f\u529b\u7684\u6bd4\u4f8b\u3002</p>\n",
"<p>Set to <span translate=no>_^_0_^_</span> if it&#x27;s not installed </p>\n": "<p><span translate=no>_^_0_^_</span>\u5982\u679c\u672a\u5b89\u88c5\uff0c\u5219\u8bbe\u7f6e\u4e3a</p>\n",
"<p>Setup <a href=\"https://github.com/HazyResearch/flash-attention\">flash attention</a>. Flash attention is only used if it&#x27;s installed and <span translate=no>_^_0_^_</span> is set to <span translate=no>_^_1_^_</span>. </p>\n": "<p>\u8bbe\u7f6e<a href=\"https://github.com/HazyResearch/flash-attention\">\u95ea\u5149\u8b66\u793a</a>\u3002Flash \u6ce8\u610f\u53ea\u6709\u5728\u5b89\u88c5\u5e76\u8bbe\u7f6e<span translate=no>_^_0_^_</span>\u4e3a\u65f6\u624d\u4f1a\u4f7f\u7528<span translate=no>_^_1_^_</span>\u3002</p>\n",
"<p>Split the heads </p>\n": "<p>\u5206\u5f00\u8111\u888b</p>\n",
"<p>Split them to heads of shape <span translate=no>_^_0_^_</span> </p>\n": "<p>\u5c06\u5b83\u4eec\u5206\u6210\u5f62\u72b6\u7684\u5934\u90e8<span translate=no>_^_0_^_</span></p>\n",
"<p>Stack <span translate=no>_^_0_^_</span>, <span translate=no>_^_1_^_</span>, <span translate=no>_^_2_^_</span> vectors for flash attention, to get a single tensor of shape <span translate=no>_^_3_^_</span> </p>\n": "<p>\u5806\u53e0<span translate=no>_^_0_^_</span>\u3001<span translate=no>_^_1_^_</span>\u3001<span translate=no>_^_2_^_</span>\u5411\u91cf\u4ee5\u83b7\u5f97\u95ea\u5149\u6ce8\u610f\u529b\uff0c\u4ee5\u83b7\u5f97\u5355\u4e2a\u5f62\u72b6\u5f20\u91cf<span translate=no>_^_3_^_</span></p>\n",
"<p>Transformer layers </p>\n": "<p>\u53d8\u538b\u5668\u5c42</p>\n",
"<p>Transpose and reshape from <span translate=no>_^_0_^_</span> to <span translate=no>_^_1_^_</span> </p>\n": "<p>\u4ece\u5230<span translate=no>_^_0_^_</span>\u8f6c\u7f6e\u548c\u91cd\u5851<span translate=no>_^_1_^_</span></p>\n",
"<p>Truncate the extra head size </p>\n": "<p>\u622a\u65ad\u591a\u4f59\u7684\u5934\u90e8\u5c3a\u5bf8</p>\n",
"<p>Use flash attention if it&#x27;s available and the head size is less than or equal to <span translate=no>_^_0_^_</span> </p>\n": "<p>\u5982\u679c\u95ea\u5149\u706f\u6ce8\u610f\u529b\u53ef\u7528\u4e14\u5934\u90e8\u5927\u5c0f\u5c0f\u4e8e\u6216\u7b49\u4e8e\uff0c\u8bf7\u4f7f\u7528\u95ea\u5149\u8b66\u793a<span translate=no>_^_0_^_</span></p>\n",
"<p>You can install flash attention by cloning their Github repo, <a href=\"https://github.com/HazyResearch/flash-attention\">https://github.com/HazyResearch/flash-attention</a> and then running <span translate=no>_^_0_^_</span> </p>\n": "<p>\u4f60\u53ef\u4ee5\u901a\u8fc7\u514b\u9686\u4ed6\u4eec\u7684 Github \u5b58\u50a8\u5e93 <a href=\"https://github.com/HazyResearch/flash-attention\">https://github.com/HazyResearch/flash-attention</a> \u7136\u540e\u8fd0\u884c\u6765\u5b89\u88c5 Flash \u6ce8\u610f\u529b<span translate=no>_^_0_^_</span></p>\n",
"<ul><li><span translate=no>_^_0_^_</span> are the input embeddings of shape <span translate=no>_^_1_^_</span> </li>\n<li><span translate=no>_^_2_^_</span> is the conditional embeddings of shape <span translate=no>_^_3_^_</span></li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u5f62\u72b6\u7684\u8f93\u5165\u5d4c\u5165<span translate=no>_^_1_^_</span></li>\n<li><span translate=no>_^_2_^_</span>\u662f\u5f62\u72b6\u7684\u6761\u4ef6\u5d4c\u5165<span translate=no>_^_3_^_</span></li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the feature map of shape <span translate=no>_^_1_^_</span> </li>\n<li><span translate=no>_^_2_^_</span> is the conditional embeddings of shape <span translate=no>_^_3_^_</span></li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u5f62\u72b6\u7684\u7279\u5f81\u56fe<span translate=no>_^_1_^_</span></li>\n<li><span translate=no>_^_2_^_</span>\u662f\u5f62\u72b6\u7684\u6761\u4ef6\u5d4c\u5165<span translate=no>_^_3_^_</span></li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the input embedding size </li>\n<li><span translate=no>_^_1_^_</span> is multiplicative factor for the hidden layer size</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u8f93\u5165\u5d4c\u5165\u5927\u5c0f</li>\n<li><span translate=no>_^_1_^_</span>\u662f\u9690\u85cf\u5c42\u5927\u5c0f\u7684\u4e58\u6cd5\u56e0\u5b50</li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the input embedding size </li>\n<li><span translate=no>_^_1_^_</span> is the number of attention heads </li>\n<li><span translate=no>_^_2_^_</span> is the size of a attention head </li>\n<li><span translate=no>_^_3_^_</span> is the size of the conditional embeddings </li>\n<li><span translate=no>_^_4_^_</span> specifies whether to perform the attention softmax computation inplace to save memory</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u8f93\u5165\u5d4c\u5165\u5927\u5c0f</li>\n<li><span translate=no>_^_1_^_</span>\u662f\u6ce8\u610f\u529b\u5934\u7684\u6570\u91cf</li>\n<li><span translate=no>_^_2_^_</span>\u662f\u6ce8\u610f\u529b\u5934\u7684\u5927\u5c0f</li>\n<li><span translate=no>_^_3_^_</span>\u662f\u6761\u4ef6\u5d4c\u5165\u7684\u5927\u5c0f</li>\n<li><span translate=no>_^_4_^_</span>\u6307\u5b9a\u662f\u5426\u5c31\u5730\u6267\u884c\u6ce8\u610f\u529b softmax \u8ba1\u7b97\u4ee5\u8282\u7701\u5185\u5b58</li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the input embedding size </li>\n<li><span translate=no>_^_1_^_</span> is the number of attention heads </li>\n<li><span translate=no>_^_2_^_</span> is the size of a attention head </li>\n<li><span translate=no>_^_3_^_</span> is the size of the conditional embeddings</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u8f93\u5165\u5d4c\u5165\u5927\u5c0f</li>\n<li><span translate=no>_^_1_^_</span>\u662f\u6ce8\u610f\u529b\u5934\u7684\u6570\u91cf</li>\n<li><span translate=no>_^_2_^_</span>\u662f\u6ce8\u610f\u529b\u5934\u7684\u5927\u5c0f</li>\n<li><span translate=no>_^_3_^_</span>\u662f\u6761\u4ef6\u5d4c\u5165\u7684\u5927\u5c0f</li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the number of channels in the feature map </li>\n<li><span translate=no>_^_1_^_</span> is the number of attention heads </li>\n<li><span translate=no>_^_2_^_</span> is the number of transformer layers </li>\n<li><span translate=no>_^_3_^_</span> is the size of the conditional embedding</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u529f\u80fd\u56fe\u4e2d\u7684\u9891\u9053\u6570</li>\n<li><span translate=no>_^_1_^_</span>\u662f\u6ce8\u610f\u529b\u5934\u7684\u6570\u91cf</li>\n<li><span translate=no>_^_2_^_</span>\u662f\u53d8\u538b\u5668\u5c42\u6570</li>\n<li><span translate=no>_^_3_^_</span>\u662f\u6761\u4ef6\u5d4c\u5165\u7684\u5927\u5c0f</li></ul>\n",
"Annotated PyTorch implementation/tutorial of the transformer for U-Net in stable diffusion.": "\u5e26\u6ce8\u91ca\u7684 U-Net \u7a33\u5b9a\u6269\u6563\u53d8\u538b\u5668\u7684 PyTorch \u5b9e\u73b0/\u6559\u7a0b\u3002",
"Transformer for Stable Diffusion U-Net": "\u7528\u4e8e\u7a33\u5b9a\u6269\u6563 U-Net \u7684\u53d8\u538b\u5668"
}

View File

@ -0,0 +1,19 @@
{
"<h1>Sampling algorithms for <a href=\"../index.html\">stable diffusion</a></h1>\n<p>We have implemented the following <a href=\"sampler/index.html\">sampling algorithms</a>:</p>\n<ul><li><a href=\"ddpm.html\">Denoising Diffusion Probabilistic Models (DDPM) Sampling</a> </li>\n<li><a href=\"ddim.html\">Denoising Diffusion Implicit Models (DDIM) Sampling</a></li></ul>\n": "<h1>\u7528\u4e8e<a href=\"../index.html\">\u7a33\u5b9a\u6269\u6563</a>\u7684\u91c7\u6837\u7b97\u6cd5</h1>\n<p>\u6211\u4eec\u5df2\u7ecf\u5b9e\u73b0\u4e86\u4ee5\u4e0b<a href=\"sampler/index.html\">\u91c7\u6837\u7b97\u6cd5</a>\uff1a</p>\n<ul><li><a href=\"ddpm.html\">\u53bb\u566a\u6269\u6563\u6982\u7387\u6a21\u578b (DDPM) \u91c7\u6837</a></li>\n<li><a href=\"ddim.html\">\u964d\u566a\u6269\u6563\u9690\u542b\u6a21\u578b (DDIM) \u91c7\u6837</a></li></ul>\n",
"<h2>Base class for sampling algorithms</h2>\n": "<h2>\u91c7\u6837\u7b97\u6cd5\u7684\u57fa\u7c7b</h2>\n",
"<h2>Get <span translate=no>_^_0_^_</span></h2>\n<ul><li><span translate=no>_^_1_^_</span> is <span translate=no>_^_2_^_</span> of shape <span translate=no>_^_3_^_</span> </li>\n<li><span translate=no>_^_4_^_</span> is <span translate=no>_^_5_^_</span> of shape <span translate=no>_^_6_^_</span> </li>\n<li><span translate=no>_^_7_^_</span> is the conditional embeddings <span translate=no>_^_8_^_</span> of shape <span translate=no>_^_9_^_</span> </li>\n<li><span translate=no>_^_10_^_</span> is the unconditional guidance scale <span translate=no>_^_11_^_</span>. This is used for <span translate=no>_^_12_^_</span> </li>\n<li><span translate=no>_^_13_^_</span> is the conditional embedding for empty prompt <span translate=no>_^_14_^_</span></li></ul>\n": "<h2>\u83b7\u53d6<span translate=no>_^_0_^_</span></h2>\n<ul><li><span translate=no>_^_1_^_</span>\u662f\u5f62<span translate=no>_^_2_^_</span>\u72b6\u7684<span translate=no>_^_3_^_</span></li>\n<li><span translate=no>_^_4_^_</span>\u662f\u5f62<span translate=no>_^_5_^_</span>\u72b6\u7684<span translate=no>_^_6_^_</span></li>\n<li><span translate=no>_^_7_^_</span>\u662f\u5f62\u72b6<span translate=no>_^_8_^_</span>\u7684\u6761\u4ef6\u5d4c\u5165<span translate=no>_^_9_^_</span></li>\n<li><span translate=no>_^_10_^_</span>\u662f\u65e0\u6761\u4ef6\u6307\u5bfc\u91cf\u8868<span translate=no>_^_11_^_</span>\u3002\u8fd9\u7528\u4e8e<span translate=no>_^_12_^_</span></li>\n<li><span translate=no>_^_13_^_</span>\u662f\u7a7a\u63d0\u793a\u7684\u6761\u4ef6\u5d4c\u5165<span translate=no>_^_14_^_</span></li></ul>\n",
"<h3>Painting Loop</h3>\n<ul><li><span translate=no>_^_0_^_</span> is <span translate=no>_^_1_^_</span> of shape <span translate=no>_^_2_^_</span> </li>\n<li><span translate=no>_^_3_^_</span> is the conditional embeddings <span translate=no>_^_4_^_</span> </li>\n<li><span translate=no>_^_5_^_</span> is the sampling step to start from, <span translate=no>_^_6_^_</span> </li>\n<li><span translate=no>_^_7_^_</span> is the original image in latent page which we are in paining. </li>\n<li><span translate=no>_^_8_^_</span> is the mask to keep the original image. </li>\n<li><span translate=no>_^_9_^_</span> is fixed noise to be added to the original image. </li>\n<li><span translate=no>_^_10_^_</span> is the unconditional guidance scale <span translate=no>_^_11_^_</span>. This is used for <span translate=no>_^_12_^_</span> </li>\n<li><span translate=no>_^_13_^_</span> is the conditional embedding for empty prompt <span translate=no>_^_14_^_</span></li></ul>\n": "<h3>\u7ed8\u753b\u5faa\u73af</h3>\n<ul><li><span translate=no>_^_0_^_</span>\u662f\u5f62<span translate=no>_^_1_^_</span>\u72b6\u7684<span translate=no>_^_2_^_</span></li>\n<li><span translate=no>_^_3_^_</span>\u662f\u6761\u4ef6\u5d4c\u5165<span translate=no>_^_4_^_</span></li>\n<li><span translate=no>_^_5_^_</span>\u662f\u5f00\u59cb\u65f6\u7684\u91c7\u6837\u6b65\u9aa4\uff0c<span translate=no>_^_6_^_</span></li>\n<li><span translate=no>_^_7_^_</span>\u662f\u6211\u4eec\u6b63\u5728\u7ed8\u5236\u7684\u6f5c\u5728\u9875\u9762\u4e2d\u7684\u539f\u59cb\u56fe\u50cf\u3002</li>\n<li><span translate=no>_^_8_^_</span>\u662f\u4fdd\u7559\u539f\u59cb\u56fe\u50cf\u7684\u63a9\u7801\u3002</li>\n<li><span translate=no>_^_9_^_</span>\u662f\u8981\u6dfb\u52a0\u5230\u539f\u59cb\u56fe\u50cf\u7684\u56fa\u5b9a\u566a\u70b9\u3002</li>\n<li><span translate=no>_^_10_^_</span>\u662f\u65e0\u6761\u4ef6\u6307\u5bfc\u91cf\u8868<span translate=no>_^_11_^_</span>\u3002\u8fd9\u7528\u4e8e<span translate=no>_^_12_^_</span></li>\n<li><span translate=no>_^_13_^_</span>\u662f\u7a7a\u63d0\u793a\u7684\u6761\u4ef6\u5d4c\u5165<span translate=no>_^_14_^_</span></li></ul>\n",
"<h3>Sample from <span translate=no>_^_0_^_</span></h3>\n<ul><li><span translate=no>_^_1_^_</span> is <span translate=no>_^_2_^_</span> of shape <span translate=no>_^_3_^_</span> </li>\n<li><span translate=no>_^_4_^_</span> is the time step <span translate=no>_^_5_^_</span> index </li>\n<li><span translate=no>_^_6_^_</span> is the noise, <span translate=no>_^_7_^_</span></li></ul>\n": "<h3>\u6837\u672c\u6765\u81ea<span translate=no>_^_0_^_</span></h3>\n<ul><li><span translate=no>_^_1_^_</span>\u662f\u5f62<span translate=no>_^_2_^_</span>\u72b6\u7684<span translate=no>_^_3_^_</span></li>\n<li><span translate=no>_^_4_^_</span>\u662f\u65f6\u95f4\u6b65\u957f<span translate=no>_^_5_^_</span>\u6307\u6570</li>\n<li><span translate=no>_^_6_^_</span>\u662f\u566a\u97f3\uff0c<span translate=no>_^_7_^_</span></li></ul>\n",
"<h3>Sampling Loop</h3>\n<ul><li><span translate=no>_^_0_^_</span> is the shape of the generated images in the form <span translate=no>_^_1_^_</span> </li>\n<li><span translate=no>_^_2_^_</span> is the conditional embeddings <span translate=no>_^_3_^_</span> </li>\n<li><span translate=no>_^_4_^_</span> is the noise temperature (random noise gets multiplied by this) </li>\n<li><span translate=no>_^_5_^_</span> is <span translate=no>_^_6_^_</span>. If not provided random noise will be used. </li>\n<li><span translate=no>_^_7_^_</span> is the unconditional guidance scale <span translate=no>_^_8_^_</span>. This is used for <span translate=no>_^_9_^_</span> </li>\n<li><span translate=no>_^_10_^_</span> is the conditional embedding for empty prompt <span translate=no>_^_11_^_</span> </li>\n<li><span translate=no>_^_12_^_</span> is the number of time steps to skip.</li></ul>\n": "<h3>\u91c7\u6837\u56de\u8def</h3>\n<ul><li><span translate=no>_^_0_^_</span>\u662f\u8868\u5355\u4e2d\u751f\u6210\u7684\u56fe\u50cf\u7684\u5f62\u72b6<span translate=no>_^_1_^_</span></li>\n<li><span translate=no>_^_2_^_</span>\u662f\u6761\u4ef6\u5d4c\u5165<span translate=no>_^_3_^_</span></li>\n<li><span translate=no>_^_4_^_</span>\u662f\u566a\u58f0\u6e29\u5ea6\uff08\u968f\u673a\u566a\u58f0\u4e58\u4ee5\u6b64\u503c\uff09</li>\n<li><span translate=no>_^_5_^_</span>\u662f<span translate=no>_^_6_^_</span>\u3002\u5982\u679c\u672a\u63d0\u4f9b\uff0c\u5c06\u4f7f\u7528\u968f\u673a\u566a\u58f0\u3002</li>\n<li><span translate=no>_^_7_^_</span>\u662f\u65e0\u6761\u4ef6\u6307\u5bfc\u91cf\u8868<span translate=no>_^_8_^_</span>\u3002\u8fd9\u7528\u4e8e<span translate=no>_^_9_^_</span></li>\n<li><span translate=no>_^_10_^_</span>\u662f\u7a7a\u63d0\u793a\u7684\u6761\u4ef6\u5d4c\u5165<span translate=no>_^_11_^_</span></li>\n<li><span translate=no>_^_12_^_</span>\u662f\u8981\u8df3\u8fc7\u7684\u65f6\u95f4\u6b65\u6570\u3002</li></ul>\n",
"<p> </p>\n": "<p></p>\n",
"<p>Calculate <span translate=no>_^_0_^_</span> </p>\n": "<p>\u8ba1\u7b97<span translate=no>_^_0_^_</span></p>\n",
"<p>Concatenated <span translate=no>_^_0_^_</span> and <span translate=no>_^_1_^_</span> </p>\n": "<p>\u4e32\u8054<span translate=no>_^_0_^_</span>\u548c<span translate=no>_^_1_^_</span></p>\n",
"<p>Duplicate <span translate=no>_^_0_^_</span> and <span translate=no>_^_1_^_</span> </p>\n": "<p>\u590d\u5236<span translate=no>_^_0_^_</span>\u548c<span translate=no>_^_1_^_</span></p>\n",
"<p>Get <span translate=no>_^_0_^_</span> and <span translate=no>_^_1_^_</span> </p>\n": "<p>\u83b7\u53d6<span translate=no>_^_0_^_</span>\u548c<span translate=no>_^_1_^_</span></p>\n",
"<p>Get number of steps the model was trained with <span translate=no>_^_0_^_</span> </p>\n": "<p>\u83b7\u53d6\u6a21\u578b\u8bad\u7ec3\u7684\u6b65\u6570<span translate=no>_^_0_^_</span></p>\n",
"<p>Set the model <span translate=no>_^_0_^_</span> </p>\n": "<p>\u8bbe\u7f6e\u6a21\u578b<span translate=no>_^_0_^_</span></p>\n",
"<p>When the scale <span translate=no>_^_0_^_</span> <span translate=no>_^_1_^_</span> </p>\n": "<p>\u5f53\u4f53\u91cd\u79e4\u65f6<span translate=no>_^_0_^_</span><span translate=no>_^_1_^_</span></p>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the model to predict noise <span translate=no>_^_1_^_</span></li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u9884\u6d4b\u566a\u58f0\u7684\u6a21\u578b<span translate=no>_^_1_^_</span></li></ul>\n",
"Annotated PyTorch implementation/tutorial of sampling algorithms for stable diffusion model.": "\u5e26\u6ce8\u91ca\u7684 PyTorch \u5b9e\u73b0/\u7a33\u5b9a\u6269\u6563\u6a21\u578b\u91c7\u6837\u7b97\u6cd5\u6559\u7a0b\u3002",
"Sampling algorithms for stable diffusion": "\u7528\u4e8e\u7a33\u5b9a\u6269\u6563\u7684\u91c7\u6837\u7b97\u6cd5"
}

View File

@ -0,0 +1,38 @@
{
"<h1>Denoising Diffusion Implicit Models (DDIM) Sampling</h1>\n<p>This implements DDIM sampling from the paper <a href=\"https://papers.labml.ai/paper/2010.02502\">Denoising Diffusion Implicit Models</a></p>\n": "<h1>\u964d\u566a\u6269\u6563\u9690\u542b\u6a21\u578b (DDIM) \u91c7\u6837</h1>\n<p>\u8fd9\u5b9e\u73b0\u4e86\u6765\u81ea\u8bba\u6587 \u201c<a href=\"https://papers.labml.ai/paper/2010.02502\">\u964d\u566a\u6269\u6563\u9690\u5f0f\u6a21\u578b</a>\u201d \u7684 DDIM \u91c7\u6837</p>\n",
"<h2>DDIM Sampler</h2>\n<p>This extends the <a href=\"index.html\"><span translate=no>_^_0_^_</span> base class</a>.</p>\n<p>DDPM samples images by repeatedly removing noise by sampling step by step using,</p>\n<span translate=no>_^_1_^_</span><p>where <span translate=no>_^_2_^_</span> is random noise, <span translate=no>_^_3_^_</span> is a subsequence of <span translate=no>_^_4_^_</span> of length <span translate=no>_^_5_^_</span>, and <span translate=no>_^_6_^_</span></p>\n<p>Note that, <span translate=no>_^_7_^_</span> in DDIM paper refers to <span translate=no>_^_8_^_</span> from <a href=\"ddpm.html\">DDPM</a>.</p>\n": "<h2>DDIM \u91c7\u6837\u5668</h2>\n<p>\u8fd9\u6269\u5c55\u4e86<a href=\"index.html\"><span translate=no>_^_0_^_</span>\u57fa\u7c7b</a>\u3002</p>\n<p>DDPM \u901a\u8fc7\u9010\u6b65\u91c7\u6837\u6765\u53cd\u590d\u6d88\u9664\u566a\u70b9\u6765\u5bf9\u56fe\u50cf\u8fdb\u884c\u91c7\u6837\uff0c</p>\n<span translate=no>_^_1_^_</span><p>\u5176\u4e2d<span translate=no>_^_2_^_</span>\uff0c\u662f\u968f\u673a\u566a\u58f0\uff0c<span translate=no>_^_3_^_</span>\u662f\u957f\u5ea6\u4e3a<span translate=no>_^_4_^_</span>\u7684\u5b50\u5e8f\u5217<span translate=no>_^_5_^_</span>\uff0c<span translate=no>_^_6_^_</span></p>\n<p>\u8bf7\u6ce8\u610f\uff0c<span translate=no>_^_7_^_</span>\u5728 DDIM \u8bba\u6587\u4e2d\uff0c\u6307\u7684\u662f\u6765<span translate=no>_^_8_^_</span>\u81ea <a href=\"ddpm.html\">DDPM</a> \u7684\u8bba\u6587\u3002</p>\n",
"<h3>Painting Loop</h3>\n<ul><li><span translate=no>_^_0_^_</span> is <span translate=no>_^_1_^_</span> of shape <span translate=no>_^_2_^_</span> </li>\n<li><span translate=no>_^_3_^_</span> is the conditional embeddings <span translate=no>_^_4_^_</span> </li>\n<li><span translate=no>_^_5_^_</span> is the sampling step to start from, <span translate=no>_^_6_^_</span> </li>\n<li><span translate=no>_^_7_^_</span> is the original image in latent page which we are in paining. If this is not provided, it&#x27;ll be an image to image transformation. </li>\n<li><span translate=no>_^_8_^_</span> is the mask to keep the original image. </li>\n<li><span translate=no>_^_9_^_</span> is fixed noise to be added to the original image. </li>\n<li><span translate=no>_^_10_^_</span> is the unconditional guidance scale <span translate=no>_^_11_^_</span>. This is used for <span translate=no>_^_12_^_</span> </li>\n<li><span translate=no>_^_13_^_</span> is the conditional embedding for empty prompt <span translate=no>_^_14_^_</span></li></ul>\n": "<h3>\u7ed8\u753b\u5faa\u73af</h3>\n<ul><li><span translate=no>_^_0_^_</span>\u662f\u5f62<span translate=no>_^_1_^_</span>\u72b6\u7684<span translate=no>_^_2_^_</span></li>\n<li><span translate=no>_^_3_^_</span>\u662f\u6761\u4ef6\u5d4c\u5165<span translate=no>_^_4_^_</span></li>\n<li><span translate=no>_^_5_^_</span>\u662f\u5f00\u59cb\u65f6\u7684\u91c7\u6837\u6b65\u9aa4\uff0c<span translate=no>_^_6_^_</span></li>\n<li><span translate=no>_^_7_^_</span>\u662f\u6211\u4eec\u6b63\u5728\u7ed8\u5236\u7684\u6f5c\u5728\u9875\u9762\u4e2d\u7684\u539f\u59cb\u56fe\u50cf\u3002\u5982\u679c\u672a\u63d0\u4f9b\uff0c\u5219\u5c06\u662f\u56fe\u50cf\u5230\u56fe\u50cf\u7684\u8f6c\u6362\u3002</li>\n<li><span translate=no>_^_8_^_</span>\u662f\u4fdd\u7559\u539f\u59cb\u56fe\u50cf\u7684\u63a9\u7801\u3002</li>\n<li><span translate=no>_^_9_^_</span>\u662f\u8981\u6dfb\u52a0\u5230\u539f\u59cb\u56fe\u50cf\u7684\u56fa\u5b9a\u566a\u70b9\u3002</li>\n<li><span translate=no>_^_10_^_</span>\u662f\u65e0\u6761\u4ef6\u6307\u5bfc\u91cf\u8868<span translate=no>_^_11_^_</span>\u3002\u8fd9\u7528\u4e8e<span translate=no>_^_12_^_</span></li>\n<li><span translate=no>_^_13_^_</span>\u662f\u7a7a\u63d0\u793a\u7684\u6761\u4ef6\u5d4c\u5165<span translate=no>_^_14_^_</span></li></ul>\n",
"<h3>Sample <span translate=no>_^_0_^_</span> given <span translate=no>_^_1_^_</span></h3>\n": "<h3><span translate=no>_^_0_^_</span>\u7ed9\u51fa\u7684\u6837\u672c<span translate=no>_^_1_^_</span></h3>\n",
"<h3>Sample <span translate=no>_^_0_^_</span></h3>\n<ul><li><span translate=no>_^_1_^_</span> is <span translate=no>_^_2_^_</span> of shape <span translate=no>_^_3_^_</span> </li>\n<li><span translate=no>_^_4_^_</span> is the conditional embeddings <span translate=no>_^_5_^_</span> of shape <span translate=no>_^_6_^_</span> </li>\n<li><span translate=no>_^_7_^_</span> is <span translate=no>_^_8_^_</span> of shape <span translate=no>_^_9_^_</span> </li>\n<li><span translate=no>_^_10_^_</span> is the step <span translate=no>_^_11_^_</span> as an integer </li>\n<li><span translate=no>_^_12_^_</span> is index <span translate=no>_^_13_^_</span> in the list <span translate=no>_^_14_^_</span> </li>\n<li><span translate=no>_^_15_^_</span> specified whether the noise should be same for all samples in the batch </li>\n<li><span translate=no>_^_16_^_</span> is the noise temperature (random noise gets multiplied by this) </li>\n<li><span translate=no>_^_17_^_</span> is the unconditional guidance scale <span translate=no>_^_18_^_</span>. This is used for <span translate=no>_^_19_^_</span> </li>\n<li><span translate=no>_^_20_^_</span> is the conditional embedding for empty prompt <span translate=no>_^_21_^_</span></li></ul>\n": "<h3>\u793a\u4f8b<span translate=no>_^_0_^_</span></h3>\n<ul><li><span translate=no>_^_1_^_</span>\u662f\u5f62<span translate=no>_^_2_^_</span>\u72b6\u7684<span translate=no>_^_3_^_</span></li>\n<li><span translate=no>_^_4_^_</span>\u662f\u5f62\u72b6<span translate=no>_^_5_^_</span>\u7684\u6761\u4ef6\u5d4c\u5165<span translate=no>_^_6_^_</span></li>\n<li><span translate=no>_^_7_^_</span>\u662f\u5f62<span translate=no>_^_8_^_</span>\u72b6\u7684<span translate=no>_^_9_^_</span></li>\n<li><span translate=no>_^_10_^_</span>\u662f\u6574\u6570<span translate=no>_^_11_^_</span>\u5f62\u5f0f\u7684\u6b65\u957f</li>\n<li><span translate=no>_^_12_^_</span>\u662f\u5217\u8868<span translate=no>_^_13_^_</span>\u4e2d\u7684\u7d22\u5f15<span translate=no>_^_14_^_</span></li>\n<li><span translate=no>_^_15_^_</span>\u6307\u5b9a\u6279\u6b21\u4e2d\u6240\u6709\u6837\u672c\u7684\u566a\u58f0\u662f\u5426\u5e94\u76f8\u540c</li>\n<li><span translate=no>_^_16_^_</span>\u662f\u566a\u58f0\u6e29\u5ea6\uff08\u968f\u673a\u566a\u58f0\u4e58\u4ee5\u6b64\u503c\uff09</li>\n<li><span translate=no>_^_17_^_</span>\u662f\u65e0\u6761\u4ef6\u6307\u5bfc\u91cf\u8868<span translate=no>_^_18_^_</span>\u3002\u8fd9\u7528\u4e8e<span translate=no>_^_19_^_</span></li>\n<li><span translate=no>_^_20_^_</span>\u662f\u7a7a\u63d0\u793a\u7684\u6761\u4ef6\u5d4c\u5165<span translate=no>_^_21_^_</span></li></ul>\n",
"<h3>Sample from <span translate=no>_^_0_^_</span></h3>\n<p><span translate=no>_^_1_^_</span></p>\n<ul><li><span translate=no>_^_2_^_</span> is <span translate=no>_^_3_^_</span> of shape <span translate=no>_^_4_^_</span> </li>\n<li><span translate=no>_^_5_^_</span> is the time step <span translate=no>_^_6_^_</span> index <span translate=no>_^_7_^_</span> </li>\n<li><span translate=no>_^_8_^_</span> is the noise, <span translate=no>_^_9_^_</span></li></ul>\n": "<h3>\u6837\u672c\u6765\u81ea<span translate=no>_^_0_^_</span></h3>\n<p><span translate=no>_^_1_^_</span></p>\n<ul><li><span translate=no>_^_2_^_</span>\u662f\u5f62<span translate=no>_^_3_^_</span>\u72b6\u7684<span translate=no>_^_4_^_</span></li>\n<li><span translate=no>_^_5_^_</span>\u662f\u65f6\u95f4\u6b65\u957f<span translate=no>_^_6_^_</span>\u6307\u6570<span translate=no>_^_7_^_</span></li>\n<li><span translate=no>_^_8_^_</span>\u662f\u566a\u97f3\uff0c<span translate=no>_^_9_^_</span></li></ul>\n",
"<h3>Sampling Loop</h3>\n<ul><li><span translate=no>_^_0_^_</span> is the shape of the generated images in the form <span translate=no>_^_1_^_</span> </li>\n<li><span translate=no>_^_2_^_</span> is the conditional embeddings <span translate=no>_^_3_^_</span> </li>\n<li><span translate=no>_^_4_^_</span> is the noise temperature (random noise gets multiplied by this) </li>\n<li><span translate=no>_^_5_^_</span> is <span translate=no>_^_6_^_</span>. If not provided random noise will be used. </li>\n<li><span translate=no>_^_7_^_</span> is the unconditional guidance scale <span translate=no>_^_8_^_</span>. This is used for <span translate=no>_^_9_^_</span> </li>\n<li><span translate=no>_^_10_^_</span> is the conditional embedding for empty prompt <span translate=no>_^_11_^_</span> </li>\n<li><span translate=no>_^_12_^_</span> is the number of time steps to skip <span translate=no>_^_13_^_</span>. We start sampling from <span translate=no>_^_14_^_</span>. And <span translate=no>_^_15_^_</span> is then <span translate=no>_^_16_^_</span>.</li></ul>\n": "<h3>\u91c7\u6837\u56de\u8def</h3>\n<ul><li><span translate=no>_^_0_^_</span>\u662f\u8868\u5355\u4e2d\u751f\u6210\u7684\u56fe\u50cf\u7684\u5f62\u72b6<span translate=no>_^_1_^_</span></li>\n<li><span translate=no>_^_2_^_</span>\u662f\u6761\u4ef6\u5d4c\u5165<span translate=no>_^_3_^_</span></li>\n<li><span translate=no>_^_4_^_</span>\u662f\u566a\u58f0\u6e29\u5ea6\uff08\u968f\u673a\u566a\u58f0\u4e58\u4ee5\u6b64\u503c\uff09</li>\n<li><span translate=no>_^_5_^_</span>\u662f<span translate=no>_^_6_^_</span>\u3002\u5982\u679c\u672a\u63d0\u4f9b\uff0c\u5c06\u4f7f\u7528\u968f\u673a\u566a\u58f0\u3002</li>\n<li><span translate=no>_^_7_^_</span>\u662f\u65e0\u6761\u4ef6\u6307\u5bfc\u91cf\u8868<span translate=no>_^_8_^_</span>\u3002\u8fd9\u7528\u4e8e<span translate=no>_^_9_^_</span></li>\n<li><span translate=no>_^_10_^_</span>\u662f\u7a7a\u63d0\u793a\u7684\u6761\u4ef6\u5d4c\u5165<span translate=no>_^_11_^_</span></li>\n<li><span translate=no>_^_12_^_</span>\u662f\u8981\u8df3\u8fc7\u7684\u65f6\u95f4\u6b65\u6570<span translate=no>_^_13_^_</span>\u3002\u6211\u4eec\u4ece\u5f00\u59cb\u91c7\u6837<span translate=no>_^_14_^_</span>\u3002\u7136\u540e<span translate=no>_^_15_^_</span>\u5c31\u662f\u8fd9\u6837<span translate=no>_^_16_^_</span>\u3002</li></ul>\n",
"<p> </p>\n": "<p></p>\n",
"<p><span translate=no>_^_0_^_</span> </p>\n": "<p><span translate=no>_^_0_^_</span></p>\n",
"<p>Calculate <span translate=no>_^_0_^_</span> and predicted <span translate=no>_^_1_^_</span> </p>\n": "<p>\u8ba1\u7b97<span translate=no>_^_0_^_</span>\u548c\u9884\u6d4b<span translate=no>_^_1_^_</span></p>\n",
"<p>Calculate <span translate=no>_^_0_^_</span> to be quadratically distributed across <span translate=no>_^_1_^_</span> </p>\n": "<p>\u8ba1\u7b97<span translate=no>_^_0_^_</span>\u4ee5\u4e8c\u6b21\u5206\u5e03<span translate=no>_^_1_^_</span></p>\n",
"<p>Calculate <span translate=no>_^_0_^_</span> to be uniformly distributed across <span translate=no>_^_1_^_</span> </p>\n": "<p>\u8ba1\u7b97<span translate=no>_^_0_^_</span>\u5f97\u5747\u5300\u5206\u5e03\u5728\u5404\u5904<span translate=no>_^_1_^_</span></p>\n",
"<p>Current prediction for <span translate=no>_^_0_^_</span>, <span translate=no>_^_1_^_</span> </p>\n": "<p>\u76ee\u524d\u7684\u9884\u6d4b<span translate=no>_^_0_^_</span>\uff0c<span translate=no>_^_1_^_</span></p>\n",
"<p>Different noise for each sample </p>\n": "<p>\u6bcf\u4e2a\u6837\u672c\u7684\u566a\u58f0\u4e0d\u540c</p>\n",
"<p>Direction pointing to <span translate=no>_^_0_^_</span> <span translate=no>_^_1_^_</span> </p>\n": "<p>\u6307\u5411\u7684\u65b9\u5411<span translate=no>_^_0_^_</span><span translate=no>_^_1_^_</span></p>\n",
"<p>Get <span translate=no>_^_0_^_</span> </p>\n": "<p>\u83b7\u53d6<span translate=no>_^_0_^_</span></p>\n",
"<p>Get batch size </p>\n": "<p>\u83b7\u53d6\u6279\u6b21\u5927\u5c0f</p>\n",
"<p>Get device and batch size </p>\n": "<p>\u83b7\u53d6\u8bbe\u5907\u548c\u6279\u6b21\u5927\u5c0f</p>\n",
"<p>Get the <span translate=no>_^_0_^_</span> for original image in latent space </p>\n": "<p>\u5728\u6f5c\u5728\u7a7a\u95f4\u4e2d<span translate=no>_^_0_^_</span>\u83b7\u53d6\u539f\u59cb\u56fe\u50cf</p>\n",
"<p>If same noise is used for all samples in the batch </p>\n": "<p>\u5982\u679c\u6279\u6b21\u4e2d\u7684\u6240\u6709\u6837\u54c1\u90fd\u4f7f\u7528\u76f8\u540c\u7684\u566a\u58f0</p>\n",
"<p>Index <span translate=no>_^_0_^_</span> in the list <span translate=no>_^_1_^_</span> </p>\n": "<p>\u5217\u8868<span translate=no>_^_0_^_</span>\u4e2d\u7684\u7d22\u5f15<span translate=no>_^_1_^_</span></p>\n",
"<p>Multiply noise by the temperature </p>\n": "<p>\u5c06\u566a\u58f0\u4e58\u4ee5\u6e29\u5ea6</p>\n",
"<p>No noise is added, when <span translate=no>_^_0_^_</span> </p>\n": "<p>\u5728\u4ee5\u4e0b\u60c5\u51b5\u4e0b\u4e0d\u6dfb\u52a0\u4efb\u4f55\u566a\u97f3<span translate=no>_^_0_^_</span></p>\n",
"<p>Number of steps, <span translate=no>_^_0_^_</span> </p>\n": "<p>\u6b65\u6570\uff0c<span translate=no>_^_0_^_</span></p>\n",
"<p>Random noise, if noise is not specified </p>\n": "<p>\u5982\u679c\u672a\u6307\u5b9a\u566a\u58f0\uff0c\u5219\u4e3a\u968f\u673a\u566a\u58f0</p>\n",
"<p>Replace the masked area </p>\n": "<p>\u66ff\u6362\u88ab\u5c4f\u853d\u7684\u533a\u57df</p>\n",
"<p>Replace the masked area with original image </p>\n": "<p>\u5c06\u8499\u7248\u533a\u57df\u66ff\u6362\u4e3a\u539f\u59cb\u56fe\u50cf</p>\n",
"<p>Return <span translate=no>_^_0_^_</span> </p>\n": "<p>\u8fd4\u56de<span translate=no>_^_0_^_</span></p>\n",
"<p>Sample <span translate=no>_^_0_^_</span> </p>\n": "<p>\u793a\u4f8b<span translate=no>_^_0_^_</span></p>\n",
"<p>Sample from <span translate=no>_^_0_^_</span> </p>\n": "<p>\u6837\u672c\u6765\u81ea<span translate=no>_^_0_^_</span></p>\n",
"<p>Time step <span translate=no>_^_0_^_</span> </p>\n": "<p>\u65f6\u95f4\u6b65\u957f<span translate=no>_^_0_^_</span></p>\n",
"<p>Time steps to sample at <span translate=no>_^_0_^_</span> </p>\n": "<p>\u91c7\u6837\u7684\u65f6\u95f4\u6b65\u957f<span translate=no>_^_0_^_</span></p>\n",
"<span translate=no>_^_0_^_</span><p> </p>\n": "<span translate=no>_^_0_^_</span><p></p>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the model to predict noise <span translate=no>_^_1_^_</span> </li>\n<li><span translate=no>_^_2_^_</span> is the number of DDIM sampling steps, <span translate=no>_^_3_^_</span> </li>\n<li><span translate=no>_^_4_^_</span> specifies how to extract <span translate=no>_^_5_^_</span> from <span translate=no>_^_6_^_</span>. It can be either <span translate=no>_^_7_^_</span> or <span translate=no>_^_8_^_</span>. </li>\n<li><span translate=no>_^_9_^_</span> is <span translate=no>_^_10_^_</span> used to calculate <span translate=no>_^_11_^_</span>. <span translate=no>_^_12_^_</span> makes the sampling process deterministic.</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u9884\u6d4b\u566a\u58f0\u7684\u6a21\u578b<span translate=no>_^_1_^_</span></li>\n<li><span translate=no>_^_2_^_</span>\u662f DDIM \u91c7\u6837\u6b65\u9aa4\u7684\u6570\u91cf\uff0c<span translate=no>_^_3_^_</span></li>\n<li><span translate=no>_^_4_^_</span>\u6307\u5b9a\u5982\u4f55<span translate=no>_^_5_^_</span>\u4ece\u4e2d\u63d0\u53d6<span translate=no>_^_6_^_</span>\u3002\u53ef\u4ee5\u662f<span translate=no>_^_7_^_</span>\u6216<span translate=no>_^_8_^_</span>\u3002</li>\n<li><span translate=no>_^_9_^_</span><span translate=no>_^_10_^_</span>\u7528\u4e8e\u8ba1\u7b97<span translate=no>_^_11_^_</span>\u3002<span translate=no>_^_12_^_</span>\u4f7f\u91c7\u6837\u8fc7\u7a0b\u5177\u6709\u786e\u5b9a\u6027\u3002</li></ul>\n",
"Annotated PyTorch implementation/tutorial of Denoising Diffusion Implicit Models (DDIM) Sampling for stable diffusion model.": "\u5e26\u6ce8\u91ca\u7684 PyTorch \u5b9e\u73b0/\u6559\u7a0b\uff0c\u7528\u4e8e\u7a33\u5b9a\u6269\u6563\u6a21\u578b\u7684\u964d\u566a\u6269\u6563\u9690\u5f0f\u6a21\u578b (DDIM) \u91c7\u6837\u3002",
"Denoising Diffusion Implicit Models (DDIM) Sampling": "\u964d\u566a\u6269\u6563\u9690\u542b\u6a21\u578b (DDIM) \u91c7\u6837"
}

View File

@ -0,0 +1,33 @@
{
"<h1>Denoising Diffusion Probabilistic Models (DDPM) Sampling</h1>\n<p>For a simpler DDPM implementation refer to our <a href=\"../../ddpm/index.html\">DDPM implementation</a>. We use same notations for <span translate=no>_^_0_^_</span>, <span translate=no>_^_1_^_</span> schedules, etc.</p>\n": "<h1>\u53bb\u566a\u6269\u6563\u6982\u7387\u6a21\u578b (DDPM) \u91c7\u6837</h1>\n<p>\u6709\u5173\u66f4\u7b80\u5355\u7684 DDPM \u5b9e\u73b0\uff0c\u8bf7\u53c2\u9605\u6211\u4eec\u7684 <a href=\"../../ddpm/index.html\">DDPM \u5b9e\u73b0</a>\u3002\u6211\u4eec\u5bf9<span translate=no>_^_0_^_</span><span translate=no>_^_1_^_</span>\u65f6\u95f4\u8868\u7b49\u4f7f\u7528\u76f8\u540c\u7684\u7b26\u53f7\u3002</p>\n",
"<h2>DDPM Sampler</h2>\n<p>This extends the <a href=\"index.html\"><span translate=no>_^_0_^_</span> base class</a>.</p>\n<p>DDPM samples images by repeatedly removing noise by sampling step by step from <span translate=no>_^_1_^_</span>,</p>\n<span translate=no>_^_2_^_</span>": "<h2>DDPM \u91c7\u6837\u5668</h2>\n<p>\u8fd9\u6269\u5c55\u4e86<a href=\"index.html\"><span translate=no>_^_0_^_</span>\u57fa\u7c7b</a>\u3002</p>\n<p>DDPM \u901a\u8fc7\u9010\u6b65\u4ece<span translate=no>_^_1_^_</span>\u4e2d\u53cd\u590d\u6d88\u9664\u566a\u70b9\u6765\u5bf9\u56fe\u50cf\u8fdb\u884c\u91c7\u6837</p>\n<span translate=no>_^_2_^_</span>",
"<h3>Sample <span translate=no>_^_0_^_</span> from <span translate=no>_^_1_^_</span></h3>\n<ul><li><span translate=no>_^_2_^_</span> is <span translate=no>_^_3_^_</span> of shape <span translate=no>_^_4_^_</span> </li>\n<li><span translate=no>_^_5_^_</span> is the conditional embeddings <span translate=no>_^_6_^_</span> of shape <span translate=no>_^_7_^_</span> </li>\n<li><span translate=no>_^_8_^_</span> is <span translate=no>_^_9_^_</span> of shape <span translate=no>_^_10_^_</span> </li>\n<li><span translate=no>_^_11_^_</span> is the step <span translate=no>_^_12_^_</span> as an integer :repeat_noise: specified whether the noise should be same for all samples in the batch </li>\n<li><span translate=no>_^_13_^_</span> is the noise temperature (random noise gets multiplied by this) </li>\n<li><span translate=no>_^_14_^_</span> is the unconditional guidance scale <span translate=no>_^_15_^_</span>. This is used for <span translate=no>_^_16_^_</span> </li>\n<li><span translate=no>_^_17_^_</span> is the conditional embedding for empty prompt <span translate=no>_^_18_^_</span></li></ul>\n": "<h3>\u6837\u672c<span translate=no>_^_0_^_</span>\u6765\u81ea<span translate=no>_^_1_^_</span></h3>\n<ul><li><span translate=no>_^_2_^_</span>\u662f\u5f62<span translate=no>_^_3_^_</span>\u72b6\u7684<span translate=no>_^_4_^_</span></li>\n<li><span translate=no>_^_5_^_</span>\u662f\u5f62\u72b6<span translate=no>_^_6_^_</span>\u7684\u6761\u4ef6\u5d4c\u5165<span translate=no>_^_7_^_</span></li>\n<li><span translate=no>_^_8_^_</span>\u662f\u5f62<span translate=no>_^_9_^_</span>\u72b6\u7684<span translate=no>_^_10_^_</span></li>\n<li><span translate=no>_^_11_^_</span>\u662f\u6574\u6570\u5f62\u5f0f\u7684\u6b65<span translate=no>_^_12_^_</span>\u957f:repeat_noise: \u6307\u5b9a\u6279\u6b21\u4e2d\u6240\u6709\u6837\u672c\u7684\u566a\u58f0\u662f\u5426\u5e94\u76f8\u540c</li>\n<li><span translate=no>_^_13_^_</span>\u662f\u566a\u58f0\u6e29\u5ea6\uff08\u968f\u673a\u566a\u58f0\u4e58\u4ee5\u6b64\u503c\uff09</li>\n<li><span translate=no>_^_14_^_</span>\u662f\u65e0\u6761\u4ef6\u6307\u5bfc\u91cf\u8868<span translate=no>_^_15_^_</span>\u3002\u8fd9\u7528\u4e8e<span translate=no>_^_16_^_</span></li>\n<li><span translate=no>_^_17_^_</span>\u662f\u7a7a\u63d0\u793a\u7684\u6761\u4ef6\u5d4c\u5165<span translate=no>_^_18_^_</span></li></ul>\n",
"<h3>Sample from <span translate=no>_^_0_^_</span></h3>\n<p><span translate=no>_^_1_^_</span></p>\n<ul><li><span translate=no>_^_2_^_</span> is <span translate=no>_^_3_^_</span> of shape <span translate=no>_^_4_^_</span> </li>\n<li><span translate=no>_^_5_^_</span> is the time step <span translate=no>_^_6_^_</span> index </li>\n<li><span translate=no>_^_7_^_</span> is the noise, <span translate=no>_^_8_^_</span></li></ul>\n": "<h3>\u6837\u672c\u6765\u81ea<span translate=no>_^_0_^_</span></h3>\n<p><span translate=no>_^_1_^_</span></p>\n<ul><li><span translate=no>_^_2_^_</span>\u662f\u5f62<span translate=no>_^_3_^_</span>\u72b6\u7684<span translate=no>_^_4_^_</span></li>\n<li><span translate=no>_^_5_^_</span>\u662f\u65f6\u95f4\u6b65\u957f<span translate=no>_^_6_^_</span>\u6307\u6570</li>\n<li><span translate=no>_^_7_^_</span>\u662f\u566a\u97f3\uff0c<span translate=no>_^_8_^_</span></li></ul>\n",
"<h3>Sampling Loop</h3>\n<ul><li><span translate=no>_^_0_^_</span> is the shape of the generated images in the form <span translate=no>_^_1_^_</span> </li>\n<li><span translate=no>_^_2_^_</span> is the conditional embeddings <span translate=no>_^_3_^_</span> </li>\n<li><span translate=no>_^_4_^_</span> is the noise temperature (random noise gets multiplied by this) </li>\n<li><span translate=no>_^_5_^_</span> is <span translate=no>_^_6_^_</span>. If not provided random noise will be used. </li>\n<li><span translate=no>_^_7_^_</span> is the unconditional guidance scale <span translate=no>_^_8_^_</span>. This is used for <span translate=no>_^_9_^_</span> </li>\n<li><span translate=no>_^_10_^_</span> is the conditional embedding for empty prompt <span translate=no>_^_11_^_</span> </li>\n<li><span translate=no>_^_12_^_</span> is the number of time steps to skip <span translate=no>_^_13_^_</span>. We start sampling from <span translate=no>_^_14_^_</span>. And <span translate=no>_^_15_^_</span> is then <span translate=no>_^_16_^_</span>.</li></ul>\n": "<h3>\u91c7\u6837\u56de\u8def</h3>\n<ul><li><span translate=no>_^_0_^_</span>\u662f\u8868\u5355\u4e2d\u751f\u6210\u7684\u56fe\u50cf\u7684\u5f62\u72b6<span translate=no>_^_1_^_</span></li>\n<li><span translate=no>_^_2_^_</span>\u662f\u6761\u4ef6\u5d4c\u5165<span translate=no>_^_3_^_</span></li>\n<li><span translate=no>_^_4_^_</span>\u662f\u566a\u58f0\u6e29\u5ea6\uff08\u968f\u673a\u566a\u58f0\u4e58\u4ee5\u6b64\u503c\uff09</li>\n<li><span translate=no>_^_5_^_</span>\u662f<span translate=no>_^_6_^_</span>\u3002\u5982\u679c\u672a\u63d0\u4f9b\uff0c\u5c06\u4f7f\u7528\u968f\u673a\u566a\u58f0\u3002</li>\n<li><span translate=no>_^_7_^_</span>\u662f\u65e0\u6761\u4ef6\u6307\u5bfc\u91cf\u8868<span translate=no>_^_8_^_</span>\u3002\u8fd9\u7528\u4e8e<span translate=no>_^_9_^_</span></li>\n<li><span translate=no>_^_10_^_</span>\u662f\u7a7a\u63d0\u793a\u7684\u6761\u4ef6\u5d4c\u5165<span translate=no>_^_11_^_</span></li>\n<li><span translate=no>_^_12_^_</span>\u662f\u8981\u8df3\u8fc7\u7684\u65f6\u95f4\u6b65\u6570<span translate=no>_^_13_^_</span>\u3002\u6211\u4eec\u4ece\u5f00\u59cb\u91c7\u6837<span translate=no>_^_14_^_</span>\u3002\u7136\u540e<span translate=no>_^_15_^_</span>\u5c31\u662f\u8fd9\u6837<span translate=no>_^_16_^_</span>\u3002</li></ul>\n",
"<p> </p>\n": "<p></p>\n",
"<p> <span translate=no>_^_0_^_</span> </p>\n": "<p><span translate=no>_^_0_^_</span></p>\n",
"<p><span translate=no>_^_0_^_</span> </p>\n": "<p><span translate=no>_^_0_^_</span></p>\n",
"<p><span translate=no>_^_0_^_</span> schedule </p>\n": "<p><span translate=no>_^_0_^_</span>\u65f6\u95f4\u8868</p>\n",
"<p>Calculate <span translate=no>_^_0_^_</span> with current <span translate=no>_^_1_^_</span></p>\n<p><span translate=no>_^_2_^_</span> </p>\n": "<p><span translate=no>_^_0_^_</span>\u7528\u7535\u6d41\u8ba1\u7b97<span translate=no>_^_1_^_</span></p>\n<p><span translate=no>_^_2_^_</span></p>\n",
"<p>Calculate <span translate=no>_^_0_^_</span></p>\n<p><span translate=no>_^_1_^_</span> </p>\n": "<p>\u8ba1\u7b97<span translate=no>_^_0_^_</span></p>\n<p><span translate=no>_^_1_^_</span></p>\n",
"<p>Clamped log of <span translate=no>_^_0_^_</span> </p>\n": "<p>\u5c01\u95ed\u4e86\u65e5\u5fd7<span translate=no>_^_0_^_</span></p>\n",
"<p>Different noise for each sample </p>\n": "<p>\u6bcf\u4e2a\u6837\u672c\u7684\u566a\u58f0\u4e0d\u540c</p>\n",
"<p>Do not add noise when <span translate=no>_^_0_^_</span> (final step sampling process). Note that <span translate=no>_^_1_^_</span> is <span translate=no>_^_2_^_</span> when <span translate=no>_^_3_^_</span>) </p>\n": "<p><span translate=no>_^_0_^_</span>\uff08\u6700\u540e\u4e00\u6b65\u91c7\u6837\u8fc7\u7a0b\uff09\u65f6\u4e0d\u8981\u6dfb\u52a0\u566a\u97f3\u3002\u6ce8\u610f\u90a3<span translate=no>_^_1_^_</span>\u662f<span translate=no>_^_2_^_</span>\u65f6\u5019<span translate=no>_^_3_^_</span>\uff09</p>\n",
"<p>Get <span translate=no>_^_0_^_</span> </p>\n": "<p>\u83b7\u53d6<span translate=no>_^_0_^_</span></p>\n",
"<p>Get batch size </p>\n": "<p>\u83b7\u53d6\u6279\u6b21\u5927\u5c0f</p>\n",
"<p>Get device and batch size </p>\n": "<p>\u83b7\u53d6\u8bbe\u5907\u548c\u6279\u6b21\u5927\u5c0f</p>\n",
"<p>If same noise is used for all samples in the batch </p>\n": "<p>\u5982\u679c\u6279\u6b21\u4e2d\u7684\u6240\u6709\u6837\u54c1\u90fd\u4f7f\u7528\u76f8\u540c\u7684\u566a\u58f0</p>\n",
"<p>Multiply noise by the temperature </p>\n": "<p>\u5c06\u566a\u58f0\u4e58\u4ee5\u6e29\u5ea6</p>\n",
"<p>Random noise, if noise is not specified </p>\n": "<p>\u5982\u679c\u672a\u6307\u5b9a\u566a\u58f0\uff0c\u5219\u4e3a\u968f\u673a\u566a\u58f0</p>\n",
"<p>Return <span translate=no>_^_0_^_</span> </p>\n": "<p>\u8fd4\u56de<span translate=no>_^_0_^_</span></p>\n",
"<p>Sample <span translate=no>_^_0_^_</span> </p>\n": "<p>\u793a\u4f8b<span translate=no>_^_0_^_</span></p>\n",
"<p>Sample from <span translate=no>_^_0_^_</span> </p>\n": "<p>\u6837\u672c\u6765\u81ea<span translate=no>_^_0_^_</span></p>\n",
"<p>Sample from,</p>\n<p><span translate=no>_^_0_^_</span> </p>\n": "<p>\u6837\u672c\u6765\u81ea</p>\n<p><span translate=no>_^_0_^_</span></p>\n",
"<p>Sampling loop </p>\n": "<p>\u91c7\u6837\u56de\u8def</p>\n",
"<p>Sampling steps <span translate=no>_^_0_^_</span> </p>\n": "<p>\u91c7\u6837\u6b65\u9aa4<span translate=no>_^_0_^_</span></p>\n",
"<p>Time step <span translate=no>_^_0_^_</span> </p>\n": "<p>\u65f6\u95f4\u6b65\u957f<span translate=no>_^_0_^_</span></p>\n",
"<p>Time steps to sample at <span translate=no>_^_0_^_</span> </p>\n": "<p>\u91c7\u6837\u7684\u65f6\u95f4\u6b65\u957f<span translate=no>_^_0_^_</span></p>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the model to predict noise <span translate=no>_^_1_^_</span></li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u9884\u6d4b\u566a\u58f0\u7684\u6a21\u578b<span translate=no>_^_1_^_</span></li></ul>\n",
"Annotated PyTorch implementation/tutorial of Denoising Diffusion Probabilistic Models (DDPM) Sampling for stable diffusion model.": "\u5e26\u6ce8\u91ca\u7684 PyTorch \u5b9e\u73b0/\u6559\u7a0b\uff1a\u7528\u4e8e\u7a33\u5b9a\u6269\u6563\u6a21\u578b\u7684\u964d\u566a\u6269\u6563\u6982\u7387\u6a21\u578b (DDPM) \u91c7\u6837\u3002",
"Denoising Diffusion Probabilistic Models (DDPM) Sampling": "\u53bb\u566a\u6269\u6563\u6982\u7387\u6a21\u578b (DDPM) \u91c7\u6837"
}

View File

@ -0,0 +1,5 @@
{
"<h1>Scripts to show example usages <a href=\"../index.html\">stable diffusion</a></h1>\n<ul><li><a href=\"text_to_image.html\">Prompt to image diffusion</a> </li>\n<li><a href=\"image_to_image.html\">Image to image diffusion</a> </li>\n<li><a href=\"in_paint.html\">In-painting</a></li></ul>\n": "<h1>\u7528\u4e8e\u663e\u793a\u793a\u4f8b\u7528\u6cd5\u548c<a href=\"../index.html\">\u7a33\u5b9a\u6269\u6563</a>\u7684\u811a\u672c</h1>\n<ul><li><a href=\"text_to_image.html\">\u63d0\u793a\u56fe\u50cf\u6269\u6563</a></li>\n<li><a href=\"image_to_image.html\">\u56fe\u50cf\u5230\u56fe\u50cf\u7684\u6269\u6563</a></li>\n<li><a href=\"in_paint.html\">\u5185\u753b</a></li></ul>\n",
"Annotated PyTorch implementation/tutorial of example usages of stable diffusion": "\u5e26\u6ce8\u91ca\u7684 PyTorch \u5b9e\u73b0/\u7a33\u5b9a\u6269\u6563\u793a\u4f8b\u7528\u6cd5\u6559\u7a0b",
"Scripts to show example usages stable diffusion": "\u7528\u4e8e\u663e\u793a\u793a\u4f8b\u7528\u6cd5\u548c\u7a33\u5b9a\u6269\u6563\u7684\u811a\u672c"
}

View File

@ -0,0 +1,24 @@
{
"<h1>Generate images using <a href=\"../index.html\">stable diffusion</a> with a prompt from a given image</h1>\n": "<h1>\u6839\u636e\u7ed9\u5b9a\u56fe\u50cf\u7684\u63d0\u793a\uff0c\u4f7f\u7528<a href=\"../index.html\">\u7a33\u5b9a\u7684\u6269\u6563</a>\u751f\u6210\u56fe\u50cf</h1>\n",
"<h3>CLI</h3>\n": "<h3>CLI</h3>\n",
"<h3>Image to image class</h3>\n": "<h3>\u56fe\u50cf\u5230\u56fe\u50cf\u7c7b\u522b</h3>\n",
"<p> </p>\n": "<p></p>\n",
"<p>AMP auto casting </p>\n": "<p>AMP \u81ea\u52a8\u6295\u5c04</p>\n",
"<p>Add noise to the original image </p>\n": "<p>\u5411\u539f\u59cb\u56fe\u50cf\u6dfb\u52a0\u566a\u70b9</p>\n",
"<p>Decode the image from the <a href=\"../model/autoencoder.html\">autoencoder</a> </p>\n": "<p>\u4ece<a href=\"../model/autoencoder.html\">\u81ea\u52a8\u7f16\u7801\u5668\u89e3\u7801</a>\u56fe\u50cf</p>\n",
"<p>Encode the image in the latent space and make <span translate=no>_^_0_^_</span> copies of it </p>\n": "<p>\u5728\u6f5c\u5728\u7a7a\u95f4\u4e2d\u5bf9\u56fe\u50cf\u8fdb\u884c\u7f16\u7801\u5e76\u5236\u4f5c<span translate=no>_^_0_^_</span>\u526f\u672c</p>\n",
"<p>Get device </p>\n": "<p>\u83b7\u53d6\u8bbe\u5907</p>\n",
"<p>Get the number of steps to diffuse the original </p>\n": "<p>\u83b7\u53d6\u6f2b\u53cd\u5c04\u539f\u7a3f\u7684\u6b65\u6570</p>\n",
"<p>Get the prompt embeddings </p>\n": "<p>\u83b7\u53d6\u63d0\u793a\u5d4c\u5165\u4fe1\u606f</p>\n",
"<p>In unconditional scaling is not <span translate=no>_^_0_^_</span> get the embeddings for empty prompts (no conditioning). </p>\n": "<p>\u5728\u65e0\u6761\u4ef6\u7f29\u653e\u4e2d\uff0c\u65e0\u6cd5<span translate=no>_^_0_^_</span>\u83b7\u53d6\u7a7a\u63d0\u793a\u7684\u5d4c\u5165\u503c\uff08\u65e0\u6761\u4ef6\uff09\u3002</p>\n",
"<p>Initialize <a href=\"../sampler/ddim.html\">DDIM sampler</a> </p>\n": "<p>\u521d\u59cb\u5316 <a href=\"../sampler/ddim.html\">DDIM \u91c7\u6837\u5668</a></p>\n",
"<p>Load <a href=\"../latent_diffusion.html\">latent diffusion model</a> </p>\n": "<p>\u8f7d\u8377<a href=\"../latent_diffusion.html\">\u6f5c\u5728\u6269\u6563\u6a21\u578b</a></p>\n",
"<p>Load image </p>\n": "<p>\u52a0\u8f7d\u56fe\u7247</p>\n",
"<p>Make a batch of prompts </p>\n": "<p>\u505a\u4e00\u6279\u63d0\u793a</p>\n",
"<p>Move the model to device </p>\n": "<p>\u5c06\u6a21\u578b\u79fb\u81f3\u8bbe\u5907</p>\n",
"<p>Reconstruct from the noisy image </p>\n": "<p>\u4ece\u5608\u6742\u7684\u56fe\u50cf\u4e2d\u91cd\u5efa</p>\n",
"<p>Save images </p>\n": "<p>\u4fdd\u5b58\u56fe\u7247</p>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the path of the checkpoint </li>\n<li><span translate=no>_^_1_^_</span> is the number of sampling steps </li>\n<li><span translate=no>_^_2_^_</span> is the <a href=\"../sampler/ddim.html\">DDIM sampling</a> <span translate=no>_^_3_^_</span> constant</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u68c0\u67e5\u70b9\u7684\u8def\u5f84</li>\n<li><span translate=no>_^_1_^_</span>\u662f\u91c7\u6837\u6b65\u9aa4\u7684\u6570\u91cf</li>\n<li><span translate=no>_^_2_^_</span>\u662f <a href=\"../sampler/ddim.html\">DDIM \u91c7\u6837</a><span translate=no>_^_3_^_</span>\u5e38\u6570</li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the path to store the generated images </li>\n<li><span translate=no>_^_1_^_</span> is the image to transform </li>\n<li><span translate=no>_^_2_^_</span> specifies how much of the original image should not be preserved </li>\n<li><span translate=no>_^_3_^_</span> is the number of images to generate in a batch </li>\n<li><span translate=no>_^_4_^_</span> is the prompt to generate images with </li>\n<li><span translate=no>_^_5_^_</span> is the unconditional guidance scale <span translate=no>_^_6_^_</span>. This is used for <span translate=no>_^_7_^_</span></li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u5b58\u50a8\u751f\u6210\u7684\u56fe\u50cf\u7684\u8def\u5f84</li>\n<li><span translate=no>_^_1_^_</span>\u662f\u8981\u8f6c\u6362\u7684\u56fe\u50cf</li>\n<li><span translate=no>_^_2_^_</span>\u6307\u5b9a\u4e0d\u5e94\u4fdd\u7559\u539f\u59cb\u56fe\u50cf\u7684\u591a\u5c11</li>\n<li><span translate=no>_^_3_^_</span>\u662f\u6279\u91cf\u751f\u6210\u7684\u56fe\u50cf\u6570\u91cf</li>\n<li><span translate=no>_^_4_^_</span>\u662f\u4f7f\u7528\u4ee5\u4e0b\u547d\u4ee4\u751f\u6210\u56fe\u50cf\u7684\u63d0\u793a</li>\n<li><span translate=no>_^_5_^_</span>\u662f\u65e0\u6761\u4ef6\u6307\u5bfc\u91cf\u8868<span translate=no>_^_6_^_</span>\u3002\u8fd9\u7528\u4e8e<span translate=no>_^_7_^_</span></li></ul>\n",
"Generate images using stable diffusion with a prompt from a given image": "\u6839\u636e\u7ed9\u5b9a\u56fe\u50cf\u7684\u63d0\u793a\uff0c\u4f7f\u7528\u7a33\u5b9a\u7684\u6269\u6563\u751f\u6210\u56fe\u50cf"
}

View File

@ -0,0 +1,26 @@
{
"<h1>In-paint images using <a href=\"../index.html\">stable diffusion</a> with a prompt</h1>\n": "<h1>\u4f7f\u7528\u5e26\u6709\u63d0\u793a\u7684<a href=\"../index.html\">\u7a33\u5b9a\u6269\u6563</a>\u529f\u80fd\u586b\u5145\u56fe\u50cf</h1>\n",
"<h3>CLI</h3>\n": "<h3>CLI</h3>\n",
"<h3>Image in-painting class</h3>\n": "<h3>\u56fe\u50cf\u8865\u753b\u8bfe</h3>\n",
"<p> </p>\n": "<p></p>\n",
"<p>AMP auto casting </p>\n": "<p>AMP \u81ea\u52a8\u6295\u5c04</p>\n",
"<p>Add noise to the original image </p>\n": "<p>\u5411\u539f\u59cb\u56fe\u50cf\u6dfb\u52a0\u566a\u70b9</p>\n",
"<p>Decode the image from the <a href=\"../model/autoencoder.html\">autoencoder</a> </p>\n": "<p>\u4ece<a href=\"../model/autoencoder.html\">\u81ea\u52a8\u7f16\u7801\u5668\u89e3\u7801</a>\u56fe\u50cf</p>\n",
"<p>Encode the image in the latent space and make <span translate=no>_^_0_^_</span> copies of it </p>\n": "<p>\u5728\u6f5c\u5728\u7a7a\u95f4\u4e2d\u5bf9\u56fe\u50cf\u8fdb\u884c\u7f16\u7801\u5e76\u5236\u4f5c<span translate=no>_^_0_^_</span>\u526f\u672c</p>\n",
"<p>Get device </p>\n": "<p>\u83b7\u53d6\u8bbe\u5907</p>\n",
"<p>Get the number of steps to diffuse the original </p>\n": "<p>\u83b7\u53d6\u6f2b\u53cd\u5c04\u539f\u7a3f\u7684\u6b65\u6570</p>\n",
"<p>Get the prompt embeddings </p>\n": "<p>\u83b7\u53d6\u63d0\u793a\u5d4c\u5165\u4fe1\u606f</p>\n",
"<p>If <span translate=no>_^_0_^_</span> is not provided, we set a sample mask to preserve the bottom half of the image </p>\n": "<p>\u5982\u679c<span translate=no>_^_0_^_</span>\u672a\u63d0\u4f9b\uff0c\u6211\u4eec\u4f1a\u8bbe\u7f6e\u6837\u672c\u63a9\u7801\u4ee5\u4fdd\u7559\u56fe\u50cf\u7684\u4e0b\u534a\u90e8\u5206</p>\n",
"<p>In unconditional scaling is not <span translate=no>_^_0_^_</span> get the embeddings for empty prompts (no conditioning). </p>\n": "<p>\u5728\u65e0\u6761\u4ef6\u7f29\u653e\u4e2d\uff0c\u65e0\u6cd5<span translate=no>_^_0_^_</span>\u83b7\u53d6\u7a7a\u63d0\u793a\u7684\u5d4c\u5165\u503c\uff08\u65e0\u6761\u4ef6\uff09\u3002</p>\n",
"<p>Initialize <a href=\"../sampler/ddim.html\">DDIM sampler</a> </p>\n": "<p>\u521d\u59cb\u5316 <a href=\"../sampler/ddim.html\">DDIM \u91c7\u6837\u5668</a></p>\n",
"<p>Load <a href=\"../latent_diffusion.html\">latent diffusion model</a> </p>\n": "<p>\u8f7d\u8377<a href=\"../latent_diffusion.html\">\u6f5c\u5728\u6269\u6563\u6a21\u578b</a></p>\n",
"<p>Load image </p>\n": "<p>\u52a0\u8f7d\u56fe\u7247</p>\n",
"<p>Make a batch of prompts </p>\n": "<p>\u505a\u4e00\u6279\u63d0\u793a</p>\n",
"<p>Move the model to device </p>\n": "<p>\u5c06\u6a21\u578b\u79fb\u81f3\u8bbe\u5907</p>\n",
"<p>Noise diffuse the original image </p>\n": "<p>\u566a\u70b9\u4f1a\u6f2b\u53cd\u5c04\u539f\u59cb\u56fe\u50cf</p>\n",
"<p>Reconstruct from the noisy image, while preserving the masked area </p>\n": "<p>\u5728\u4fdd\u7559\u906e\u7f69\u533a\u57df\u7684\u540c\u65f6\uff0c\u4ece\u566a\u58f0\u56fe\u50cf\u4e2d\u91cd\u5efa</p>\n",
"<p>Save images </p>\n": "<p>\u4fdd\u5b58\u56fe\u7247</p>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the path of the checkpoint </li>\n<li><span translate=no>_^_1_^_</span> is the number of sampling steps </li>\n<li><span translate=no>_^_2_^_</span> is the <a href=\"../sampler/ddim.html\">DDIM sampling</a> <span translate=no>_^_3_^_</span> constant</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u68c0\u67e5\u70b9\u7684\u8def\u5f84</li>\n<li><span translate=no>_^_1_^_</span>\u662f\u91c7\u6837\u6b65\u9aa4\u7684\u6570\u91cf</li>\n<li><span translate=no>_^_2_^_</span>\u662f <a href=\"../sampler/ddim.html\">DDIM \u91c7\u6837</a><span translate=no>_^_3_^_</span>\u5e38\u6570</li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the path to store the generated images </li>\n<li><span translate=no>_^_1_^_</span> is the image to transform </li>\n<li><span translate=no>_^_2_^_</span> specifies how much of the original image should not be preserved </li>\n<li><span translate=no>_^_3_^_</span> is the number of images to generate in a batch </li>\n<li><span translate=no>_^_4_^_</span> is the prompt to generate images with </li>\n<li><span translate=no>_^_5_^_</span> is the unconditional guidance scale <span translate=no>_^_6_^_</span>. This is used for <span translate=no>_^_7_^_</span></li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u5b58\u50a8\u751f\u6210\u7684\u56fe\u50cf\u7684\u8def\u5f84</li>\n<li><span translate=no>_^_1_^_</span>\u662f\u8981\u8f6c\u6362\u7684\u56fe\u50cf</li>\n<li><span translate=no>_^_2_^_</span>\u6307\u5b9a\u4e0d\u5e94\u4fdd\u7559\u539f\u59cb\u56fe\u50cf\u7684\u591a\u5c11</li>\n<li><span translate=no>_^_3_^_</span>\u662f\u6279\u91cf\u751f\u6210\u7684\u56fe\u50cf\u6570\u91cf</li>\n<li><span translate=no>_^_4_^_</span>\u662f\u4f7f\u7528\u4ee5\u4e0b\u547d\u4ee4\u751f\u6210\u56fe\u50cf\u7684\u63d0\u793a</li>\n<li><span translate=no>_^_5_^_</span>\u662f\u65e0\u6761\u4ef6\u6307\u5bfc\u91cf\u8868<span translate=no>_^_6_^_</span>\u3002\u8fd9\u7528\u4e8e<span translate=no>_^_7_^_</span></li></ul>\n",
"In-paint images using stable diffusion with a prompt": "\u4f7f\u7528\u5e26\u6709\u63d0\u793a\u7684\u7a33\u5b9a\u6269\u6563\u529f\u80fd\u586b\u5145\u56fe\u50cf"
}

View File

@ -0,0 +1,23 @@
{
"<h1>Generate images using <a href=\"../index.html\">stable diffusion</a> with a prompt</h1>\n": "<h1>\u5728\u63d0\u793a\u4e0b\u4f7f\u7528<a href=\"../index.html\">\u7a33\u5b9a\u7684\u6269\u6563</a>\u751f\u6210\u56fe\u50cf</h1>\n",
"<h3>CLI</h3>\n": "<h3>CLI</h3>\n",
"<h3>Text to image class</h3>\n": "<h3>\u6587\u672c\u8f6c\u56fe\u50cf\u7c7b\u522b</h3>\n",
"<p> </p>\n": "<p></p>\n",
"<p><a href=\"../sampler/index.html\">Sample in the latent space</a>. <span translate=no>_^_0_^_</span> will be of shape <span translate=no>_^_1_^_</span> </p>\n": "<p><a href=\"../sampler/index.html\">\u5728\u6f5c\u5728\u7a7a\u95f4\u4e2d\u53d6\u6837</a>\u3002<span translate=no>_^_0_^_</span>\u4f1a\u53d8\u5f62<span translate=no>_^_1_^_</span></p>\n",
"<p>AMP auto casting </p>\n": "<p>AMP \u81ea\u52a8\u6295\u5c04</p>\n",
"<p>Decode the image from the <a href=\"../model/autoencoder.html\">autoencoder</a> </p>\n": "<p>\u4ece<a href=\"../model/autoencoder.html\">\u81ea\u52a8\u7f16\u7801\u5668\u89e3\u7801</a>\u56fe\u50cf</p>\n",
"<p>Get device </p>\n": "<p>\u83b7\u53d6\u8bbe\u5907</p>\n",
"<p>Get the prompt embeddings </p>\n": "<p>\u83b7\u53d6\u63d0\u793a\u5d4c\u5165\u4fe1\u606f</p>\n",
"<p>Image to latent space resolution reduction </p>\n": "<p>\u964d\u4f4e\u56fe\u50cf\u5230\u6f5c\u5728\u7a7a\u95f4\u7684\u5206\u8fa8\u7387</p>\n",
"<p>In unconditional scaling is not <span translate=no>_^_0_^_</span> get the embeddings for empty prompts (no conditioning). </p>\n": "<p>\u5728\u65e0\u6761\u4ef6\u7f29\u653e\u4e2d\uff0c\u65e0\u6cd5<span translate=no>_^_0_^_</span>\u83b7\u53d6\u7a7a\u63d0\u793a\u7684\u5d4c\u5165\u503c\uff08\u65e0\u6761\u4ef6\uff09\u3002</p>\n",
"<p>Initialize <a href=\"../sampler/index.html\">sampler</a> </p>\n": "<p>\u521d\u59cb\u5316<a href=\"../sampler/index.html\">\u91c7\u6837\u5668</a></p>\n",
"<p>Load <a href=\"../latent_diffusion.html\">latent diffusion model</a> </p>\n": "<p>\u8f7d\u8377<a href=\"../latent_diffusion.html\">\u6f5c\u5728\u6269\u6563\u6a21\u578b</a></p>\n",
"<p>Make a batch of prompts </p>\n": "<p>\u505a\u4e00\u6279\u63d0\u793a</p>\n",
"<p>Move the model to device </p>\n": "<p>\u5c06\u6a21\u578b\u79fb\u81f3\u8bbe\u5907</p>\n",
"<p>Number of channels in the image </p>\n": "<p>\u56fe\u50cf\u4e2d\u7684\u901a\u9053\u6570</p>\n",
"<p>Save images </p>\n": "<p>\u4fdd\u5b58\u56fe\u7247</p>\n",
"<p>Set flash attention </p>\n": "<p>\u8bbe\u7f6e\u95ea\u5149\u706f\u6ce8\u610f\u529b</p>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the path of the checkpoint </li>\n<li><span translate=no>_^_1_^_</span> is the name of the <a href=\"../sampler/index.html\">sampler</a> </li>\n<li><span translate=no>_^_2_^_</span> is the number of sampling steps </li>\n<li><span translate=no>_^_3_^_</span> is the <a href=\"../sampler/ddim.html\">DDIM sampling</a> <span translate=no>_^_4_^_</span> constant</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u68c0\u67e5\u70b9\u7684\u8def\u5f84</li>\n<li><span translate=no>_^_1_^_</span>\u662f<a href=\"../sampler/index.html\">\u91c7\u6837</a>\u5668\u7684\u540d\u5b57</li>\n<li><span translate=no>_^_2_^_</span>\u662f\u91c7\u6837\u6b65\u9aa4\u7684\u6570\u91cf</li>\n<li><span translate=no>_^_3_^_</span>\u662f <a href=\"../sampler/ddim.html\">DDIM \u91c7\u6837</a><span translate=no>_^_4_^_</span>\u5e38\u6570</li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the path to store the generated images </li>\n<li><span translate=no>_^_1_^_</span> is the number of images to generate in a batch </li>\n<li><span translate=no>_^_2_^_</span> is the prompt to generate images with </li>\n<li><span translate=no>_^_3_^_</span> is the height of the image </li>\n<li><span translate=no>_^_4_^_</span> is the width of the image </li>\n<li><span translate=no>_^_5_^_</span> is the unconditional guidance scale <span translate=no>_^_6_^_</span>. This is used for <span translate=no>_^_7_^_</span></li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u5b58\u50a8\u751f\u6210\u7684\u56fe\u50cf\u7684\u8def\u5f84</li>\n<li><span translate=no>_^_1_^_</span>\u662f\u6279\u91cf\u751f\u6210\u7684\u56fe\u50cf\u6570\u91cf</li>\n<li><span translate=no>_^_2_^_</span>\u662f\u4f7f\u7528\u4ee5\u4e0b\u547d\u4ee4\u751f\u6210\u56fe\u50cf\u7684\u63d0\u793a</li>\n<li><span translate=no>_^_3_^_</span>\u662f\u56fe\u50cf\u7684\u9ad8\u5ea6</li>\n<li><span translate=no>_^_4_^_</span>\u662f\u56fe\u50cf\u7684\u5bbd\u5ea6</li>\n<li><span translate=no>_^_5_^_</span>\u662f\u65e0\u6761\u4ef6\u6307\u5bfc\u91cf\u8868<span translate=no>_^_6_^_</span>\u3002\u8fd9\u7528\u4e8e<span translate=no>_^_7_^_</span></li></ul>\n",
"Generate images using stable diffusion with a prompt": "\u5728\u63d0\u793a\u4e0b\u4f7f\u7528\u7a33\u5b9a\u7684\u6269\u6563\u751f\u6210\u56fe\u50cf"
}

View File

@ -0,0 +1,26 @@
{
"<h1>Utility functions for <a href=\"index.html\">stable diffusion</a></h1>\n": "<h1>\u7528\u4e8e<a href=\"index.html\">\u7a33\u5b9a\u6269\u6563</a>\u7684\u5b9e\u7528\u51fd\u6570</h1>\n",
"<h3>Load <a href=\"latent_diffusion.html\"><span translate=no>_^_0_^_</span> model</a></h3>\n": "<h3>\u52a0\u8f7d<a href=\"latent_diffusion.html\"><span translate=no>_^_0_^_</span>\u6a21\u578b</a></h3>\n",
"<h3>Load an image</h3>\n<p>This loads an image from a file and returns a PyTorch tensor.</p>\n<ul><li><span translate=no>_^_0_^_</span> is the path of the image</li></ul>\n": "<h3>\u52a0\u8f7d\u56fe\u7247</h3>\n<p>\u8fd9\u5c06\u4ece\u6587\u4ef6\u52a0\u8f7d\u56fe\u50cf\u5e76\u8fd4\u56de PyTorch \u5f20\u91cf\u3002</p>\n<ul><li><span translate=no>_^_0_^_</span>\u662f\u56fe\u50cf\u7684\u8def\u5f84</li></ul>\n",
"<h3>Save a images</h3>\n<ul><li><span translate=no>_^_0_^_</span> is the tensor with images of shape <span translate=no>_^_1_^_</span> </li>\n<li><span translate=no>_^_2_^_</span> is the folder to save images in </li>\n<li><span translate=no>_^_3_^_</span> is the prefix to add to file names </li>\n<li><span translate=no>_^_4_^_</span> is the image format</li></ul>\n": "<h3>\u4fdd\u5b58\u56fe\u50cf</h3>\n<ul><li><span translate=no>_^_0_^_</span>\u662f\u5e26\u6709\u5f62\u72b6\u56fe\u50cf\u7684\u5f20\u91cf<span translate=no>_^_1_^_</span></li>\n<li><span translate=no>_^_2_^_</span>\u662f\u4fdd\u5b58\u56fe\u50cf\u7684\u6587\u4ef6\u5939</li>\n<li><span translate=no>_^_3_^_</span>\u662f\u6dfb\u52a0\u5230\u6587\u4ef6\u540d\u7684\u524d\u7f00</li>\n<li><span translate=no>_^_4_^_</span>\u662f\u56fe\u50cf\u683c\u5f0f</li></ul>\n",
"<h3>Set random seeds</h3>\n": "<h3>\u8bbe\u7f6e\u968f\u673a\u79cd\u5b50</h3>\n",
"<p> </p>\n": "<p></p>\n",
"<p>Convert to numpy and map to <span translate=no>_^_0_^_</span> for <span translate=no>_^_1_^_</span> </p>\n": "<p>\u8f6c\u6362\u4e3a numpy \u5e76\u6620\u5c04\u5230 fo<span translate=no>_^_0_^_</span> r<span translate=no>_^_1_^_</span></p>\n",
"<p>Convert to torch </p>\n": "<p>\u8f6c\u6362\u4e3a torch</p>\n",
"<p>Create the destination folder </p>\n": "<p>\u521b\u5efa\u76ee\u6807\u6587\u4ef6\u5939</p>\n",
"<p>Debugging output </p>\n": "<p>\u8c03\u8bd5\u8f93\u51fa</p>\n",
"<p>Get image size </p>\n": "<p>\u83b7\u53d6\u56fe\u50cf\u5927\u5c0f</p>\n",
"<p>Initialize the CLIP text embedder </p>\n": "<p>\u521d\u59cb\u5316 CLIP \u6587\u672c\u5d4c\u5165\u5668</p>\n",
"<p>Initialize the Latent Diffusion model </p>\n": "<p>\u521d\u59cb\u5316\u6f5c\u5728\u6269\u6563\u6a21\u578b</p>\n",
"<p>Initialize the U-Net </p>\n": "<p>\u521d\u59cb\u5316 U-Net</p>\n",
"<p>Initialize the autoencoder </p>\n": "<p>\u521d\u59cb\u5316\u81ea\u52a8\u7f16\u7801\u5668</p>\n",
"<p>Load the checkpoint </p>\n": "<p>\u52a0\u8f7d\u68c0\u67e5\u70b9</p>\n",
"<p>Map images to <span translate=no>_^_0_^_</span> space and clip </p>\n": "<p>\u5c06\u56fe\u50cf\u6620\u5c04\u5230<span translate=no>_^_0_^_</span>\u7a7a\u95f4\u5e76\u526a\u8f91</p>\n",
"<p>Open Image </p>\n": "<p>\u6253\u5f00\u56fe\u7247</p>\n",
"<p>Resize to a multiple of 32 </p>\n": "<p>\u8c03\u6574\u4e3a 32 \u7684\u500d\u6570</p>\n",
"<p>Save images </p>\n": "<p>\u4fdd\u5b58\u56fe\u7247</p>\n",
"<p>Set model state </p>\n": "<p>\u8bbe\u7f6e\u6a21\u578b\u72b6\u6001</p>\n",
"<p>Transpose to <span translate=no>_^_0_^_</span> and convert to numpy </p>\n": "<p>\u8f6c\u7f6e\u4e3a numpy<span translate=no>_^_0_^_</span> \u5e76\u8f6c\u6362\u4e3a numpy</p>\n",
"<p>Transpose to shape <span translate=no>_^_0_^_</span> </p>\n": "<p>\u8f6c\u7f6e\u6210\u5f62\u72b6<span translate=no>_^_0_^_</span></p>\n",
"Utility functions for stable diffusion": "\u7528\u4e8e\u7a33\u5b9a\u6269\u6563\u7684\u5b9e\u7528\u51fd\u6570"
}

View File

@ -1,6 +1,6 @@
{
"<h1>Train a Graph Attention Network (GAT) on Cora dataset</h1>\n<p><a href=\"https://app.labml.ai/run/d6c636cadf3511eba2f1e707f612f95d\"><span translate=no>_^_0_^_</span></a></p>\n": "<h1>\u5728 Cora \u6570\u636e\u96c6\u4e0a\u8bad\u7ec3\u56fe\u5f62\u6ce8\u610f\u529b\u7f51\u7edc (GAT)</h1>\n<p><a href=\"https://app.labml.ai/run/d6c636cadf3511eba2f1e707f612f95d\"><span translate=no>_^_0_^_</span></a></p>\n",
"<h2><a href=\"https://linqs.soe.ucsc.edu/data\">Cora Dataset</a></h2>\n<p>Cora dataset is a dataset of research papers. For each paper we are given a binary feature vector that indicates the presence of words. Each paper is classified into one of 7 classes. The dataset also has the citation network.</p>\n<p>The papers are the nodes of the graph and the edges are the citations.</p>\n<p>The task is to classify the edges to the 7 classes with feature vectors and citation network as input.</p>\n": "<h2><a href=\"https://linqs.soe.ucsc.edu/data\">Cora \u6570\u636e\u96c6</a></h2>\n<p>Cora \u6570\u636e\u96c6\u662f\u7814\u7a76\u8bba\u6587\u7684\u6570\u636e\u96c6\u3002\u5bf9\u4e8e\u6bcf\u7bc7\u8bba\u6587\uff0c\u6211\u4eec\u90fd\u4f1a\u5f97\u5230\u4e00\u4e2a\u8868\u793a\u5355\u8bcd\u5b58\u5728\u7684\u4e8c\u8fdb\u5236\u7279\u5f81\u5411\u91cf\u3002\u6bcf\u7bc7\u8bba\u6587\u5206\u4e3a 7 \u7c7b\u4e2d\u7684\u4e00\u7c7b\u3002\u8be5\u6570\u636e\u96c6\u8fd8\u5177\u6709\u5f15\u6587\u7f51\u7edc\u3002</p>\n<p>\u8bba\u6587\u662f\u56fe\u8868\u7684\u8282\u70b9\uff0c\u8fb9\u7f18\u662f\u5f15\u6587\u3002</p>\n<p>\u4efb\u52a1\u662f\u4f7f\u7528\u7279\u5f81\u5411\u91cf\u548c\u5f15\u6587\u7f51\u7edc\u4f5c\u4e3a\u8f93\u5165\u5c06\u8fb9\u5206\u4e3a 7 \u4e2a\u7c7b\u3002</p>\n",
"<h2><a href=\"https://linqs.soe.ucsc.edu/data\">Cora Dataset</a></h2>\n<p>Cora dataset is a dataset of research papers. For each paper we are given a binary feature vector that indicates the presence of words. Each paper is classified into one of 7 classes. The dataset also has the citation network.</p>\n<p>The papers are the nodes of the graph and the edges are the citations.</p>\n<p>The task is to classify the nodes to the 7 classes with feature vectors and citation network as input.</p>\n": "<h2><a href=\"https://linqs.soe.ucsc.edu/data\">Cora \u6570\u636e\u96c6</a></h2>\n<p>Cora \u6570\u636e\u96c6\u662f\u7814\u7a76\u8bba\u6587\u7684\u6570\u636e\u96c6\u3002\u5bf9\u4e8e\u6bcf\u7bc7\u8bba\u6587\uff0c\u6211\u4eec\u90fd\u5f97\u5230\u4e00\u4e2a\u4e8c\u8fdb\u5236\u7279\u5f81\u5411\u91cf\uff0c\u8be5\u5411\u91cf\u8868\u793a\u5355\u8bcd\u7684\u5b58\u5728\u3002\u6bcf\u7bc7\u8bba\u6587\u5206\u4e3a 7 \u4e2a\u7c7b\u522b\u4e4b\u4e00\u3002\u8be5\u6570\u636e\u96c6\u8fd8\u5177\u6709\u5f15\u6587\u7f51\u7edc\u3002</p>\n<p>\u8bba\u6587\u662f\u56fe\u7684\u8282\u70b9\uff0c\u8fb9\u7f18\u662f\u5f15\u6587\u3002</p>\n<p>\u4efb\u52a1\u662f\u4f7f\u7528\u7279\u5f81\u5411\u91cf\u548c\u5f15\u6587\u7f51\u7edc\u4f5c\u4e3a\u8f93\u5165\uff0c\u5c06\u8282\u70b9\u5206\u7c7b\u4e3a 7 \u7c7b\u3002</p>\n",
"<h2>Configurations</h2>\n": "<h2>\u914d\u7f6e</h2>\n",
"<h2>Graph Attention Network (GAT)</h2>\n<p>This graph attention network has two <a href=\"index.html\">graph attention layers</a>.</p>\n": "<h2>Graph \u6ce8\u610f\u529b\u7f51\u7edc (GAT)</h2>\n<p>\u8fd9\u4e2a\u56fe\u5f62\u5173\u6ce8\u7f51\u7edc\u6709\u4e24\u4e2a<a href=\"index.html\">\u56fe\u5f62\u5173\u6ce8\u5c42</a>\u3002</p>\n",
"<h3>Training loop</h3>\n<p>We do full batch training since the dataset is small. If we were to sample and train we will have to sample a set of nodes for each training step along with the edges that span across those selected nodes.</p>\n": "<h3>\u8bad\u7ec3\u5faa\u73af</h3>\n<p>\u7531\u4e8e\u6570\u636e\u96c6\u5f88\u5c0f\uff0c\u6211\u4eec\u8fdb\u884c\u5168\u6279\u91cf\u8bad\u7ec3\u3002\u5982\u679c\u8981\u8fdb\u884c\u91c7\u6837\u548c\u8bad\u7ec3\uff0c\u6211\u4eec\u5c06\u4e0d\u5f97\u4e0d\u4e3a\u6bcf\u4e2a\u8bad\u7ec3\u6b65\u9aa4\u5bf9\u4e00\u7ec4\u8282\u70b9\u4ee5\u53ca\u8de8\u8d8a\u8fd9\u4e9b\u9009\u5b9a\u8282\u70b9\u7684\u8fb9\u8fdb\u884c\u91c7\u6837\u3002</p>\n",

View File

@ -1,6 +1,7 @@
{
"<h1>Evaluate GPT-NeoX using LLM.int8() quantization on test suite</h1>\n<p>This code evaluate <a href=\"../index.html\">GPT-NeoX</a> using, on a suite of tasks.</p>\n": "<h1>\u5728\u6d4b\u8bd5\u5957\u4ef6\u4e0a\u4f7f\u7528 llm.int8 () \u91cf\u5316\u6765\u8bc4\u4f30 GPT-NEOX</h1>\n<p>\u6b64\u4ee3\u7801\u4f7f\u7528\u5728\u4e00\u5957\u4efb\u52a1\u4e0a\u8bc4\u4f30 <a href=\"../index.html\">GPT-NEOX</a>\u3002</p>\n",
"<p> </p>\n": "<p></p>\n",
"<p>Argument parser </p>\n": "<p>\u53c2\u6570\u89e3\u6790\u5668</p>\n",
"<p>Create <span translate=no>_^_0_^_</span> model </p>\n": "<p>\u521b\u5efa<span translate=no>_^_0_^_</span>\u6a21\u578b</p>\n",
"<p>Device </p>\n": "<p>\u8bbe\u5907</p>\n",
"<p>Load layers </p>\n": "<p>\u52a0\u8f7d\u56fe\u5c42</p>\n",

View File

@ -6,7 +6,7 @@
"<h2>Final normalization layer</h2>\n": "<h2>\u6700\u7ec8\u5f52\u4e00\u5316\u5c42</h2>\n",
"<h2>Rotary Positional Embeddings</h2>\n<p>GPT-NeoX uses <a href=\"https://papers.labml.ai/paper/2104.09864\">rotary positional embeddings (RoPE)</a>.</p>\n<p>WE have annotated implementation of RoPE <a href=\"https://nn.labml.ai/transformers/rope/index.html\">here</a> with more notes the theory.</p>\n": "<h2>\u65cb\u8f6c\u4f4d\u7f6e\u5d4c\u5165</h2>\n<p>GPT-NEOX \u4f7f\u7528<a href=\"https://papers.labml.ai/paper/2104.09864\">\u65cb\u8f6c\u4f4d\u7f6e\u5d4c\u5165\uff08RoP\uff09</a>\u3002</p>\n<p>\u6211\u4eec<a href=\"https://nn.labml.ai/transformers/rope/index.html\">\u5728\u8fd9\u91cc</a>\u6ce8\u91ca\u4e86 RoPe \u7684\u5b9e\u73b0\uff0c\u5e76\u9644\u4e0a\u4e86\u66f4\u591a\u5173\u4e8e\u7406\u8bba\u7684\u6ce8\u91ca\u3002</p>\n",
"<h2>Transformer Layer</h2>\n": "<h2>\u53d8\u538b\u5668\u5c42</h2>\n",
"<h3>Generator to create layers</h3>\n<p>The layers are generated in the same order as checkpoints.</p>\n<p>It gives <span translate=no>_^_0_^_</span> when a layer is not available; we use the layer indices as NeoX and there are two transformation layers we don&#x27;t need in our implementation.</p>\n<ul><li><span translate=no>_^_1_^_</span> is the number of tokens in the vocabulary </li>\n<li><span translate=no>_^_2_^_</span> is the number of features in the embeddings </li>\n<li><span translate=no>_^_3_^_</span> is the number of transformer layers </li>\n<li><span translate=no>_^_4_^_</span> is the number of attention heads </li>\n<li><span translate=no>_^_5_^_</span> are the set of layers to be used. All layers will be used if None. This is used to test smaller versions of the model with fewer layers </li>\n<li><span translate=no>_^_6_^_</span> specifies whether to clone the transformer layers (a bit faster) </li>\n<li><span translate=no>_^_7_^_</span> is the data type of the model </li>\n<li><span translate=no>_^_8_^_</span> is the device of the model </li>\n<li><span translate=no>_^_9_^_</span> specifies whether to use int8 quantization </li>\n<li><span translate=no>_^_10_^_</span> is the threshold <span translate=no>_^_11_^_</span> used to separate outlier features</li></ul>\n": "<h3>\u7528\u4e8e\u521b\u5efa\u5c42\u7684\u751f\u6210\u5668</h3>\n<p>\u56fe\u5c42\u7684\u751f\u6210\u987a\u5e8f\u4e0e\u68c0\u67e5\u70b9\u76f8\u540c\u3002</p>\n<p>\u5b83\u7ed9\u51fa\u4e86\u5c42<span translate=no>_^_0_^_</span>\u4f55\u65f6\u4e0d\u53ef\u7528\uff1b\u6211\u4eec\u4f7f\u7528\u5c42\u7d22\u5f15\u4f5c\u4e3a NeoX\uff0c\u5e76\u4e14\u5728\u6211\u4eec\u7684\u5b9e\u73b0\u4e2d\u4e0d\u9700\u8981\u4e24\u4e2a\u53d8\u6362\u5c42\u3002</p>\n<ul><li><span translate=no>_^_1_^_</span>\u662f\u8bcd\u6c47\u8868\u4e2d\u4ee3\u5e01\u7684\u6570\u91cf</li>\n<li><span translate=no>_^_2_^_</span>\u662f\u5d4c\u5165\u4e2d\u7684\u8981\u7d20\u6570\u91cf</li>\n<li><span translate=no>_^_3_^_</span>\u662f\u53d8\u538b\u5668\u5c42\u7684\u6570\u91cf</li>\n<li><span translate=no>_^_4_^_</span>\u662f\u6ce8\u610f\u5934\u7684\u6570\u91cf</li>\n<li><span translate=no>_^_5_^_</span>\u662f\u8981\u4f7f\u7528\u7684\u56fe\u5c42\u96c6\u3002\u5982\u679c\u4e3a None\uff0c\u5219\u4f7f\u7528\u6240\u6709\u56fe\u5c42\u3002\u8fd9\u7528\u4e8e\u6d4b\u8bd5\u5c42\u6570\u8f83\u5c11\u7684\u6a21\u578b\u7684\u8f83\u5c0f\u7248\u672c</li>\n<li><span translate=no>_^_6_^_</span>\u6307\u5b9a\u662f\u5426\u514b\u9686\u53d8\u538b\u5668\u5c42\uff08\u5feb\u4e00\u70b9\uff09</li>\n<li><span translate=no>_^_7_^_</span>\u662f\u6a21\u578b\u7684\u6570\u636e\u7c7b\u578b</li>\n<li><span translate=no>_^_8_^_</span>\u662f\u8be5\u578b\u53f7\u7684\u8bbe\u5907</li>\n<li><span translate=no>_^_9_^_</span>\u6307\u5b9a\u662f\u5426\u4f7f\u7528 int8 \u91cf\u5316</li>\n<li><span translate=no>_^_10_^_</span>\u662f<span translate=no>_^_11_^_</span>\u7528\u4e8e\u5206\u9694\u5f02\u5e38\u503c\u8981\u7d20\u7684\u9608\u503c</li></ul>\n",
"<h3>Generator to create layers</h3>\n<p>The layers are generated in the same order as checkpoints.</p>\n<p>It gives <span translate=no>_^_0_^_</span> when a layer is not available; we use the layer indices as NeoX and there are two transformation layers we don&#x27;t need in our implementation.</p>\n<ul><li><span translate=no>_^_1_^_</span> is the number of tokens in the vocabulary </li>\n<li><span translate=no>_^_2_^_</span> is the number of features in the embeddings </li>\n<li><span translate=no>_^_3_^_</span> is the number of transformer layers </li>\n<li><span translate=no>_^_4_^_</span> is the number of attention heads </li>\n<li><span translate=no>_^_5_^_</span> are the set of layers to be used. All layers will be used if None. This is used to test smaller versions of the model with fewer layers </li>\n<li><span translate=no>_^_6_^_</span> specifies whether to clone the transformer layers (a bit faster) </li>\n<li><span translate=no>_^_7_^_</span> is the data type of the model </li>\n<li><span translate=no>_^_8_^_</span> is the device of the model </li>\n<li><span translate=no>_^_9_^_</span> specifies whether to use int8 quantization </li>\n<li><span translate=no>_^_10_^_</span> is the threshold <span translate=no>_^_11_^_</span> used to separate outlier features </li>\n<li><span translate=no>_^_12_^_</span> specifies whether to use <a href=\"https://github.com/HazyResearch/flash-attention\">FlashAttention</a></li></ul>\n": "<h3>\u7528\u4e8e\u521b\u5efa\u56fe\u5c42\u7684\u751f\u6210\u5668</h3>\n<p>\u56fe\u5c42\u7684\u751f\u6210\u987a\u5e8f\u4e0e\u68c0\u67e5\u70b9\u7684\u751f\u6210\u987a\u5e8f\u76f8\u540c\u3002</p>\n<p>\u5b83\u5728\u56fe\u5c42\u4e0d\u53ef\u7528<span translate=no>_^_0_^_</span>\u65f6\u7ed9\u51fa\uff1b\u6211\u4eec\u5c06\u56fe\u5c42\u7d22\u5f15\u7528\u4f5c NeoX\uff0c\u5e76\u4e14\u5728\u5b9e\u73b0\u4e2d\u4e0d\u9700\u8981\u4e24\u4e2a\u8f6c\u6362\u5c42\u3002</p>\n<ul><li><span translate=no>_^_1_^_</span>\u662f\u8bcd\u6c47\u8868\u4e2d\u7684\u4ee3\u5e01\u6570\u91cf</li>\n<li><span translate=no>_^_2_^_</span>\u662f\u5d4c\u5165\u4e2d\u7684\u7279\u5f81\u6570\u91cf</li>\n<li><span translate=no>_^_3_^_</span>\u662f\u53d8\u538b\u5668\u5c42\u6570</li>\n<li><span translate=no>_^_4_^_</span>\u662f\u6ce8\u610f\u529b\u5934\u7684\u6570\u91cf</li>\n<li><span translate=no>_^_5_^_</span>\u662f\u8981\u4f7f\u7528\u7684\u56fe\u5c42\u96c6\u3002\u5982\u679c\u6ca1\u6709\uff0c\u5219\u5c06\u4f7f\u7528\u6240\u6709\u56fe\u5c42\u3002\u8fd9\u7528\u4e8e\u6d4b\u8bd5\u5c42\u6570\u8f83\u5c11\u7684\u6a21\u578b\u7684\u8f83\u5c0f\u7248\u672c</li>\n<li><span translate=no>_^_6_^_</span>\u6307\u5b9a\u662f\u5426\u514b\u9686\u53d8\u538b\u5668\u5c42\uff08\u5feb\u4e00\u70b9\uff09</li>\n<li><span translate=no>_^_7_^_</span>\u662f\u6a21\u578b\u7684\u6570\u636e\u7c7b\u578b</li>\n<li><span translate=no>_^_8_^_</span>\u662f\u6a21\u578b\u7684\u8bbe\u5907</li>\n<li><span translate=no>_^_9_^_</span>\u6307\u5b9a\u662f\u5426\u4f7f\u7528 int8 \u91cf\u5316</li>\n<li><span translate=no>_^_10_^_</span>\u662f<span translate=no>_^_11_^_</span>\u7528\u4e8e\u5206\u79bb\u5f02\u5e38\u7279\u5f81\u7684\u9608\u503c</li>\n<li><span translate=no>_^_12_^_</span>\u6307\u5b9a\u662f\u5426\u4f7f\u7528 <a href=\"https://github.com/HazyResearch/flash-attention\">FlashAttention</a></li></ul>\n",
"<h3>Generator to get layers</h3>\n": "<h3>\u83b7\u53d6\u56fe\u5c42\u7684\u751f\u6210\u5668</h3>\n",
"<h3>Generator to load layers</h3>\n": "<h3>\u7528\u4e8e\u52a0\u8f7d\u5c42\u7684\u751f\u6210\u5668</h3>\n",
"<h3>Returns the total number of layers</h3>\n": "<h3>\u8fd4\u56de\u603b\u5c42\u6570</h3>\n",
@ -19,6 +19,7 @@
"<p> <span translate=no>_^_0_^_</span> </p>\n": "<p><span translate=no>_^_0_^_</span></p>\n",
"<p> Code to load the checkpoint</p>\n": "<p>\u52a0\u8f7d\u68c0\u67e5\u70b9\u7684\u4ee3\u7801</p>\n",
"<p> Readout layer</p>\n": "<p>\u8bfb\u51fa\u5c42</p>\n",
"<p><a href=\"https://github.com/HazyResearch/flash-attention\">FlashAttention</a> </p>\n": "<p><a href=\"https://github.com/HazyResearch/flash-attention\">\u95ea\u5149\u6ce8\u610f</a></p>\n",
"<p><span translate=no>_^_0_^_</span> </p>\n": "<p><span translate=no>_^_0_^_</span></p>\n",
"<p>Add RoPE embeddings </p>\n": "<p>\u6dfb\u52a0\u7ef3\u7d22\u5d4c\u5165</p>\n",
"<p>Add head dimension </p>\n": "<p>\u6dfb\u52a0\u5934\u90e8\u5c3a\u5bf8</p>\n",
@ -75,6 +76,7 @@
"<p>Number of features per head </p>\n": "<p>\u6bcf\u5934\u7279\u5f81\u6570</p>\n",
"<p>Offset of the current embeddings </p>\n": "<p>\u5f53\u524d\u5d4c\u5165\u7684\u504f\u79fb\u91cf</p>\n",
"<p>Only convert the linear layers in the transformer layers </p>\n": "<p>\u4ec5\u8f6c\u6362\u53d8\u538b\u5668\u5c42\u4e2d\u7684\u7ebf\u6027\u5c42</p>\n",
"<p>Otherwise, use normal attention </p>\n": "<p>\u5426\u5219\uff0c\u8bf7\u6b63\u5e38\u6ce8\u610f</p>\n",
"<p>Query and key lengths </p>\n": "<p>\u67e5\u8be2\u548c\u5bc6\u94a5\u957f\u5ea6</p>\n",
"<p>Readout layer </p>\n": "<p>\u8bfb\u51fa\u5c42</p>\n",
"<p>Reshape from <span translate=no>_^_0_^_</span><a href=\"batch_size, seq_len, n_hidden\">batch_size, seq_len, n_hidden</a>` </p>\n": "<p>\u4ece<span translate=no>_^_0_^_</span> <a href=\"batch_size, seq_len, n_hidden\">batch_size\u3001seq_len\u3001n_hidden \u8fdb\u884c\u91cd\u5851</a> `</p>\n",
@ -88,21 +90,24 @@
"<p>Split into heads by changing the shape to <span translate=no>_^_0_^_</span> </p>\n": "<p>\u901a\u8fc7\u5c06\u5f62\u72b6\u6539\u4e3a\u5206\u6210\u5934\u90e8<span translate=no>_^_0_^_</span></p>\n",
"<p>Split into query, key and value each of shape <span translate=no>_^_0_^_</span> </p>\n": "<p>\u5206\u4e3a\u67e5\u8be2\u3001\u952e\u548c\u503c\u5404\u5f62\u72b6<span translate=no>_^_0_^_</span></p>\n",
"<p>Split the features. We apply RoPE to only <span translate=no>_^_0_^_</span> features </p>\n": "<p>\u62c6\u5206\u8981\u7d20\u3002\u6211\u4eec\u4ec5\u5c06 RoPe \u5e94\u7528\u4e8e\u8981<span translate=no>_^_0_^_</span>\u7d20</p>\n",
"<p>Stack them into shape <span translate=no>_^_0_^_</span> </p>\n": "<p>\u5c06\u5b83\u4eec\u5806\u53e0\u6210\u5f62\u72b6<span translate=no>_^_0_^_</span></p>\n",
"<p>The output is of shape <span translate=no>_^_0_^_</span> </p>\n": "<p>\u8f93\u51fa\u7684\u5f62\u72b6\u662f\u8fd9\u6837\u7684<span translate=no>_^_0_^_</span></p>\n",
"<p>To cache causal mask </p>\n": "<p>\u7f13\u5b58\u56e0\u679c\u63a9\u7801</p>\n",
"<p>To store <span translate=no>_^_0_^_</span> for the features </p>\n": "<p>\u4e3a\u8981\u7d20\u5b58\u50a8<span translate=no>_^_0_^_</span></p>\n",
"<p>Transformer layer </p>\n": "<p>\u53d8\u538b\u5668\u5c42</p>\n",
"<p>Transformer layers </p>\n": "<p>\u53d8\u538b\u5668\u5c42</p>\n",
"<p>Use <span translate=no>_^_0_^_</span> defined in <a href=\"./utils/llm_int8.html\">utilities</a>. </p>\n": "<p>\u4f7f\u7528\u5728<a href=\"./utils/llm_int8.html\">\u5b9e\u7528\u7a0b\u5e8f</a>\u4e2d<span translate=no>_^_0_^_</span>\u5b9a\u4e49\u3002</p>\n",
"<p>Use flash attention </p>\n": "<p>\u4f7f\u7528\u95ea\u5149\u706f\u6ce8\u610f\u529b</p>\n",
"<ul><li><span translate=no>_^_0_^_</span> are the embeddings of shape <span translate=no>_^_1_^_</span></li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u5f62\u72b6\u7684\u5d4c\u5165<span translate=no>_^_1_^_</span></li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> are the token ids of shape <span translate=no>_^_1_^_</span></li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u5f62\u72b6\u7684\u4ee4\u724c ID<span translate=no>_^_1_^_</span></li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> has shape <span translate=no>_^_1_^_</span> </li>\n<li><span translate=no>_^_2_^_</span> is the starting position of <span translate=no>_^_3_^_</span>. This is <span translate=no>_^_4_^_</span> when we have cached the keys and queries of previous positions</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u6709\u5f62\u72b6<span translate=no>_^_1_^_</span></li>\n<li><span translate=no>_^_2_^_</span>\u662f\u7684\u8d77\u59cb\u4f4d\u7f6e<span translate=no>_^_3_^_</span>\u3002\u8fd9\u662f\u6211\u4eec\u7f13\u5b58\u5148\u524d\u4f4d\u7f6e\u7684\u952e\u548c\u67e5\u8be2<span translate=no>_^_4_^_</span>\u7684\u65f6\u5019</li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> has shape <span translate=no>_^_1_^_</span></li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u6709\u5f62\u72b6<span translate=no>_^_1_^_</span></li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the embedding size </li>\n<li><span translate=no>_^_1_^_</span> is the number of heads</li></ul>\n<p><em>Out implementation doesn&#x27;t include dropout</em>.</p>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u5d4c\u5165\u7684\u5927\u5c0f</li>\n<li><span translate=no>_^_1_^_</span>\u662f\u4eba\u5934\u7684\u6570\u91cf</li></ul>\nOu@@ <p><em>t \u5b9e\u73b0\u4e0d\u5305\u62ec\u8f8d\u5b66</em>\u3002</p>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the embedding size </li>\n<li><span translate=no>_^_1_^_</span> is the number of heads </li>\n<li><span translate=no>_^_2_^_</span> specifies whether to use <a href=\"https://github.com/HazyResearch/flash-attention\">FlashAttention</a></li></ul>\n<p><em>Out implementation doesn&#x27;t include dropout</em>.</p>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u5d4c\u5165\u5927\u5c0f</li>\n<li><span translate=no>_^_1_^_</span>\u662f\u5934\u6570</li>\n<li><span translate=no>_^_2_^_</span>\u6307\u5b9a\u662f\u5426\u4f7f\u7528 <a href=\"https://github.com/HazyResearch/flash-attention\">FlashAttention</a></li></ul>\n<p><em>Out \u7684\u5b9e\u73b0\u4e0d\u5305\u62ec\u9000\u51fa</em>\u3002</p>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the embedding size </li>\n<li><span translate=no>_^_1_^_</span> is the size of the vocabulary</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u5d4c\u5165\u7684\u5927\u5c0f</li>\n<li><span translate=no>_^_1_^_</span>\u662f\u8bcd\u6c47\u91cf\u7684\u5927\u5c0f</li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the embedding size</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u5d4c\u5165\u7684\u5927\u5c0f</li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the number of features for RoPE embeddings </li>\n<li><span translate=no>_^_1_^_</span> is the base for <span translate=no>_^_2_^_</span>, which defaults to <span translate=no>_^_3_^_</span></li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f RoPe \u5d4c\u5165\u7684\u8981\u7d20\u6570\u91cf</li>\n<li><span translate=no>_^_1_^_</span>\u662f\u7684\u57fa\u7840<span translate=no>_^_2_^_</span>\uff0c\u9ed8\u8ba4\u4e3a<span translate=no>_^_3_^_</span></li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> is the size of the vocabulary </li>\n<li><span translate=no>_^_1_^_</span> is the size of the embeddings</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u662f\u8bcd\u6c47\u91cf\u7684\u5927\u5c0f</li>\n<li><span translate=no>_^_1_^_</span>\u662f\u5d4c\u5165\u7684\u5927\u5c0f</li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> the number of features in embeddings </li>\n<li><span translate=no>_^_1_^_</span> the number of attention heads </li>\n<li><span translate=no>_^_2_^_</span> percentage of features to add RoPE embeddings </li>\n<li><span translate=no>_^_3_^_</span> masking fill value for attention matrix</li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u5d4c\u5165\u4e2d\u7684\u8981\u7d20\u6570\u91cf</li>\n<li><span translate=no>_^_1_^_</span>\u6ce8\u610f\u5934\u7684\u6570\u91cf</li>\n<li><span translate=no>_^_2_^_</span>\u8981\u6dfb\u52a0 RoPe \u5d4c\u5165\u7684\u8981\u7d20\u767e\u5206\u6bd4</li>\n<li><span translate=no>_^_3_^_</span>\u906e\u853d\u6ce8\u610f\u529b\u77e9\u9635\u7684\u586b\u5145\u503c</li></ul>\n",
"<ul><li><span translate=no>_^_0_^_</span> the number of features in embeddings </li>\n<li><span translate=no>_^_1_^_</span> the number of attention heads </li>\n<li><span translate=no>_^_2_^_</span> percentage of features to add RoPE embeddings </li>\n<li><span translate=no>_^_3_^_</span> masking fill value for attention matrix </li>\n<li><span translate=no>_^_4_^_</span> specifies whether to use <a href=\"https://github.com/HazyResearch/flash-attention\">FlashAttention</a></li></ul>\n": "<ul><li><span translate=no>_^_0_^_</span>\u5d4c\u5165\u4e2d\u7684\u7279\u5f81\u6570\u91cf</li>\n<li><span translate=no>_^_1_^_</span>\u6ce8\u610f\u529b\u5934\u7684\u6570\u91cf</li>\n<li><span translate=no>_^_2_^_</span>\u6dfb\u52a0 RoPE \u5d4c\u5165\u7684\u529f\u80fd\u767e\u5206\u6bd4</li>\n<li><span translate=no>_^_3_^_</span>\u63a9\u76d6\u6ce8\u610f\u529b\u77e9\u9635\u7684\u586b\u5145\u503c</li>\n<li><span translate=no>_^_4_^_</span>\u6307\u5b9a\u662f\u5426\u4f7f\u7528 <a href=\"https://github.com/HazyResearch/flash-attention\">FlashAttention</a></li></ul>\n",
"GPT-NeoX Model Definition": "GPT-NEOX \u578b\u53f7\u5b9a\u4e49",
"This is the model definition of GPT-NeoX.": "\u8fd9\u662f GPT-NEOX \u7684\u6a21\u578b\u5b9a\u4e49\u3002"
}

File diff suppressed because one or more lines are too long

View File

@ -1,6 +1,6 @@
{
"<h1>Deep Q Networks (DQN)</h1>\n<p>This is a <a href=\"https://pytorch.org\">PyTorch</a> implementation of paper <a href=\"https://papers.labml.ai/paper/1312.5602\">Playing Atari with Deep Reinforcement Learning</a> along with <a href=\"model.html\">Dueling Network</a>, <a href=\"replay_buffer.html\">Prioritized Replay</a> and Double Q Network.</p>\n<p>Here is the <a href=\"experiment.html\">experiment</a> and <a href=\"model.html\">model</a> implementation.</p>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/dqn/experiment.ipynb\"><span translate=no>_^_0_^_</span></a> <a href=\"https://app.labml.ai/run/fe1ad986237511ec86e8b763a2d3f710\"><span translate=no>_^_1_^_</span></a></p>\n": "<h1>\u6df1\u5ea6\u95ee\u7b54\u7f51\u7edc (DQN)</h1>\n<p>\u8fd9\u662f <a href=\"https://pytorch.org\">PyTorch</a> \u5b9e\u73b0\u7684\u8bba\u6587\u300a<a href=\"https://papers.labml.ai/paper/1312.5602\">\u73a9\u96c5\u8fbe\u5229\u4e0e\u6df1\u5ea6\u5f3a\u5316\u5b66\u4e60</a>\u300b\u4ee5\u53ca\u300a<a href=\"model.html\">\u51b3\u6597\u7f51\u7edc</a>\u300b\u3001\u300a<a href=\"replay_buffer.html\">\u4f18\u5148\u91cd\u64ad</a>\u300b\u548c Double Q Network\u3002</p>\n<p>\u4ee5\u4e0b\u662f<a href=\"experiment.html\">\u5b9e\u9a8c</a>\u548c<a href=\"model.html\">\u6a21\u578b</a>\u5b9e\u73b0\u3002</p>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/rl/dqn/experiment.ipynb\"><span translate=no>_^_0_^_</span></a><a href=\"https://app.labml.ai/run/fe1ad986237511ec86e8b763a2d3f710\"><span translate=no>_^_1_^_</span></a></p>\n",
"<h2>Train the model</h2>\n<p>We want to find optimal action-value function.</p>\n<span translate=no>_^_0_^_</span><h3>Target network \ud83c\udfaf</h3>\n<p>In order to improve stability we use experience replay that randomly sample from previous experience <span translate=no>_^_1_^_</span>. We also use a Q network with a separate set of paramters <span translate=no>_^_2_^_</span> to calculate the target. <span translate=no>_^_3_^_</span> is updated periodically. This is according to paper <a href=\"https://deepmind.com/research/dqn/\">Human Level Control Through Deep Reinforcement Learning</a>.</p>\n<p>So the loss function is, <span translate=no>_^_4_^_</span></p>\n<h3>Double <span translate=no>_^_5_^_</span>-Learning</h3>\n<p>The max operator in the above calculation uses same network for both selecting the best action and for evaluating the value. That is, <span translate=no>_^_6_^_</span> We use <a href=\"https://papers.labml.ai/paper/1509.06461\">double Q-learning</a>, where the <span translate=no>_^_7_^_</span> is taken from <span translate=no>_^_8_^_</span> and the value is taken from <span translate=no>_^_9_^_</span>.</p>\n<p>And the loss function becomes,</p>\n<span translate=no>_^_10_^_</span>": "<h2>\u8bad\u7ec3\u6a21\u578b</h2>\n<p>\u6211\u4eec\u60f3\u627e\u5230\u6700\u4f18\u7684\u52a8\u4f5c\u503c\u51fd\u6570\u3002</p>\n<span translate=no>_^_0_^_</span><h3>\u76ee\u6807\u7f51\u7edc \ud83c\udfaf</h3>\n<p>\u4e3a\u4e86\u63d0\u9ad8\u7a33\u5b9a\u6027\uff0c\u6211\u4eec\u4f7f\u7528\u4ece\u4e4b\u524d\u7684\u4f53\u9a8c\u4e2d\u968f\u673a\u62bd\u6837\u7684\u4f53\u9a8c\u91cd\u64ad<span translate=no>_^_1_^_</span>\u3002\u6211\u4eec\u8fd8\u4f7f\u7528\u5e26\u6709\u5355\u72ec\u53c2\u6570\u96c6\u7684 Q \u7f51\u7edc<span translate=no>_^_2_^_</span>\u6765\u8ba1\u7b97\u76ee\u6807\u3002<span translate=no>_^_3_^_</span>\u4f1a\u5b9a\u671f\u66f4\u65b0\u3002\u8fd9\u662f\u6839\u636e\u8bba\u6587\u300a\u901a\u8fc7\u6df1\u5ea6\u5f3a\u5316\u5b66\u4e60\u63a7\u5236<a href=\"https://deepmind.com/research/dqn/\">\u4eba\u6587\u6c34\u5e73</a>\u300b\u6240\u8bf4\u7684\u3002</p>\n<p>\u6240\u4ee5\u635f\u5931\u51fd\u6570\u662f\uff0c<span translate=no>_^_4_^_</span></p>\n<h3>\u53cc\u91cd<span translate=no>_^_5_^_</span>\u5b66\u4e60</h3>\n<p>\u4e0a\u8ff0\u8ba1\u7b97\u4e2d\u7684\u6700\u5927\u8fd0\u7b97\u7b26\u4f7f\u7528\u76f8\u540c\u7684\u7f51\u7edc\u6765\u9009\u62e9\u6700\u4f73\u64cd\u4f5c\u548c\u8bc4\u4f30\u503c\u3002\u4e5f\u5c31\u662f\u8bf4\uff0c<span translate=no>_^_6_^_</span>\u6211\u4eec\u4f7f\u7528<a href=\"https://papers.labml.ai/paper/1509.06461\">\u53cc Q \u5b66\u4e60</a><span translate=no>_^_7_^_</span>\uff0c\u5176\u4e2d\u53d6\u81ea<span translate=no>_^_8_^_</span>\uff0c\u503c\u53d6\u81ea<span translate=no>_^_9_^_</span>\u3002</p>\n<p>\u7136\u540e\u635f\u5931\u51fd\u6570\u53d8\u6210\uff0c</p>\n<span translate=no>_^_10_^_</span>",
"<h2>Train the model</h2>\n<p>We want to find optimal action-value function.</p>\n<span translate=no>_^_0_^_</span><h3>Target network \ud83c\udfaf</h3>\n<p>In order to improve stability we use experience replay that randomly sample from previous experience <span translate=no>_^_1_^_</span>. We also use a Q network with a separate set of parameters <span translate=no>_^_2_^_</span> to calculate the target. <span translate=no>_^_3_^_</span> is updated periodically. This is according to paper <a href=\"https://deepmind.com/research/dqn/\">Human Level Control Through Deep Reinforcement Learning</a>.</p>\n<p>So the loss function is, <span translate=no>_^_4_^_</span></p>\n<h3>Double <span translate=no>_^_5_^_</span>-Learning</h3>\n<p>The max operator in the above calculation uses same network for both selecting the best action and for evaluating the value. That is, <span translate=no>_^_6_^_</span> We use <a href=\"https://papers.labml.ai/paper/1509.06461\">double Q-learning</a>, where the <span translate=no>_^_7_^_</span> is taken from <span translate=no>_^_8_^_</span> and the value is taken from <span translate=no>_^_9_^_</span>.</p>\n<p>And the loss function becomes,</p>\n<span translate=no>_^_10_^_</span>": "<h2>\u8bad\u7ec3\u6a21\u578b</h2>\n<p>\u6211\u4eec\u60f3\u627e\u5230\u6700\u4f73\u7684\u52a8\u4f5c\u503c\u51fd\u6570\u3002</p>\n<span translate=no>_^_0_^_</span><h3>\u76ee\u6807\u7f51\u7edc \ud83c\udfaf</h3>\n<p>\u4e3a\u4e86\u63d0\u9ad8\u7a33\u5b9a\u6027\uff0c\u6211\u4eec\u4f7f\u7528\u7ecf\u9a8c\u56de\u653e\uff0c\u4ece\u4ee5\u524d\u7684\u7ecf\u9a8c\u4e2d\u968f\u673a\u62bd\u6837<span translate=no>_^_1_^_</span>\u3002\u6211\u4eec\u8fd8\u4f7f\u7528\u5177\u6709\u4e00\u7ec4\u5355\u72ec\u53c2\u6570\u7684 Q \u7f51\u7edc<span translate=no>_^_2_^_</span>\u6765\u8ba1\u7b97\u76ee\u6807\u3002<span translate=no>_^_3_^_</span>\u5b9a\u671f\u66f4\u65b0\u3002\u8fd9\u662f\u6839\u636e\u8bba\u6587\u300a\u901a\u8fc7\u6df1\u5ea6\u5f3a\u5316\u5b66\u4e60\u8fdb\u884c<a href=\"https://deepmind.com/research/dqn/\">\u4eba\u4f53\u6c34\u5e73\u63a7\u5236</a>\u300b\u5f97\u51fa\u7684\u3002</p>\n<p>\u6240\u4ee5\u635f\u5931\u51fd\u6570\u662f\uff0c<span translate=no>_^_4_^_</span></p>\n<h3>\u53cc<span translate=no>_^_5_^_</span>\u91cd\u5b66\u4e60</h3>\n<p>\u4e0a\u8ff0\u8ba1\u7b97\u4e2d\u7684\u6700\u5927\u503c\u8fd0\u7b97\u7b26\u4f7f\u7528\u76f8\u540c\u7684\u7f51\u7edc\u6765\u9009\u62e9\u6700\u4f73\u52a8\u4f5c\u548c\u8bc4\u4f30\u503c\u3002\u4e5f\u5c31\u662f\u8bf4\uff0c<span translate=no>_^_6_^_</span>\u6211\u4eec\u4f7f\u7528<a href=\"https://papers.labml.ai/paper/1509.06461\">\u53cc\u91cdQ-L</a><span translate=no>_^_7_^_</span> earning<span translate=no>_^_8_^_</span>\uff0c\u5176\u4e2d\u53d6\u81ea\u503c\uff0c\u53d6\u81ea\u503c<span translate=no>_^_9_^_</span>\u3002</p>\n<p>\u635f\u5931\u51fd\u6570\u53d8\u6210\uff0c</p>\n<span translate=no>_^_10_^_</span>",
"<p><span translate=no>_^_0_^_</span> </p>\n": "<p><span translate=no>_^_0_^_</span></p>\n",
"<p>Calculate the desired Q value. We multiply by <span translate=no>_^_0_^_</span> to zero out the next state Q values if the game ended.</p>\n<p><span translate=no>_^_1_^_</span> </p>\n": "<p>\u8ba1\u7b97\u6240\u9700\u7684 Q \u503c\u3002\u5982\u679c\u6e38\u620f\u7ed3\u675f\uff0c\u6211\u4eec\u5c06\u4e58<span translate=no>_^_0_^_</span>\u4ee5\u5c06\u4e0b\u4e00\u4e2a\u72b6\u6001 Q \u503c\u5f52\u96f6\u3002</p>\n<p><span translate=no>_^_1_^_</span></p>\n",
"<p>Get the best action at state <span translate=no>_^_0_^_</span> <span translate=no>_^_1_^_</span> </p>\n": "<p>\u5728\u5dde\u5185\u91c7\u53d6\u6700\u4f73\u884c\u52a8<span translate=no>_^_0_^_</span><span translate=no>_^_1_^_</span></p>\n",

View File

@ -1,5 +1,5 @@
{
"<h1>Transformer Auto-Regression Experiment</h1>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/basic/autoregressive_experiment.ipynb\"><span translate=no>_^_0_^_</span></a> <a href=\"https://comet.ml/labml/transformer/ea8c108c2d94434ca3c2bc2b21015082\"><span translate=no>_^_1_^_</span></a></p>\n<p>This trains a simple transformer introduced in <a href=\"https://papers.labml.ai/paper/1706.03762\">Attention Is All You Need</a> on an NLP auto-regression task (with Tiny Shakespeare dataset).</p>\n": "<h1>\u53d8\u538b\u5668\u81ea\u52a8\u56de\u5f52\u5b9e\u9a8c</h1>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/basic/autoregressive_experiment.ipynb\"><span translate=no>_^_0_^_</span></a><a href=\"https://comet.ml/labml/transformer/ea8c108c2d94434ca3c2bc2b21015082\"><span translate=no>_^_1_^_</span></a></p>\n<p>\u8fd9\u4f1a\u5728 NLP \u81ea\u52a8\u56de\u5f52\u4efb\u52a1\uff08\u4f7f\u7528 Tiny Shakespeare \u6570\u636e\u96c6\uff09\u4e2d\u8bad\u7ec3\u5728 \u201c<a href=\"https://papers.labml.ai/paper/1706.03762\">\u6ce8\u610f\u5c31\u662f\u4f60\u6240\u9700\u8981\u7684\u4e00\u5207</a>\u201d \u4e2d\u5f15\u5165\u7684\u7b80\u5355\u8f6c\u6362\u5668\u3002</p>\n",
"<h1>Transformer Auto-Regression Experiment</h1>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/basic/autoregressive_experiment.ipynb\"><span translate=no>_^_0_^_</span></a></p>\n<p>This trains a simple transformer introduced in <a href=\"https://papers.labml.ai/paper/1706.03762\">Attention Is All You Need</a> on an NLP auto-regression task (with Tiny Shakespeare dataset).</p>\n": "<h1>\u53d8\u538b\u5668\u81ea\u52a8\u56de\u5f52\u5b9e\u9a8c</h1>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/basic/autoregressive_experiment.ipynb\"><span translate=no>_^_0_^_</span></a></p>\n<p>\u8fd9\u5c06\u8bad\u7ec3\u4e00\u4e2a\u5728 NLP \u81ea\u52a8\u56de\u5f52\u4efb\u52a1\uff08\u4f7f\u7528 Tiny Shakespeare \u6570\u636e\u96c6\uff09\u4e2d\u5f15\u5165\u7684 \u201c<a href=\"https://papers.labml.ai/paper/1706.03762\">\u6ce8\u610f\u529b\u5c31\u662f\u4f60\u6240\u9700\u8981</a>\u7684\u201d \u7b80\u5355\u53d8\u538b\u5668\u3002</p>\n",
"<h2>Auto-Regressive model</h2>\n": "<h2>\u81ea\u56de\u5f52\u6a21\u578b</h2>\n",
"<h2>Configurations</h2>\n<p>This inherits from <a href=\"../../experiments/nlp_autoregression.html#NLPAutoRegressionConfigs\"><span translate=no>_^_0_^_</span></a></p>\n": "<h2>\u914d\u7f6e</h2>\n<p>\u8fd9\u7ee7\u627f\u81ea <a href=\"../../experiments/nlp_autoregression.html#NLPAutoRegressionConfigs\"><span translate=no>_^_0_^_</span></a></p>\n",
"<h3>Transformer configurations</h3>\n": "<h3>\u53d8\u538b\u5668\u914d\u7f6e</h3>\n",

View File

@ -1,5 +1,5 @@
{
"<h1>Multi-Headed Attention (MHA)</h1>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/basic/autoregressive_experiment.ipynb\"><span translate=no>_^_0_^_</span></a> <a href=\"https://comet.ml/labml/transformer/ea8c108c2d94434ca3c2bc2b21015082\"><span translate=no>_^_1_^_</span></a></p>\n<p>This is a tutorial/implementation of multi-headed attention from paper <a href=\"https://papers.labml.ai/paper/1706.03762\">Attention Is All You Need</a> in <a href=\"https://pytorch.org/\">PyTorch</a>. The implementation is inspired from <a href=\"https://nlp.seas.harvard.edu/2018/04/03/attention.html\">Annotated Transformer</a>.</p>\n<p>Here is the <a href=\"basic/autoregressive_experiment.html\">training code</a> that uses a basic transformer with MHA for NLP auto-regression.</p>\n<p><a href=\"basic/autoregressive_experiment.html\">Here is an experiment implementation</a> that trains a simple transformer.</p>\n": "<h1>\u591a\u5934\u6ce8\u610f (MHA)</h1>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/basic/autoregressive_experiment.ipynb\"><span translate=no>_^_0_^_</span></a><a href=\"https://comet.ml/labml/transformer/ea8c108c2d94434ca3c2bc2b21015082\"><span translate=no>_^_1_^_</span></a></p>\n</a><p>\u8fd9\u662f <a href=\"https://pytorch.org/\">PyTorch \u4e2d\u591a\u5934\u6ce8\u610f\u529b\u7684\u6559\u7a0b/\u5b9e\u73b0\uff0c\u6458\u81ea\u8bba\u6587 \u201c<a href=\"https://papers.labml.ai/paper/1706.03762\">\u6ce8\u610f\u5c31\u662f\u4f60\u6240\u9700\u8981</a>\u7684\u201d\u3002\u8be5\u5b9e\u73b0\u7684\u7075\u611f\u6765\u81ea<a href=\"https://nlp.seas.harvard.edu/2018/04/03/attention.html\">\u5e26\u6ce8\u91ca\u7684\u53d8\u538b\u5668</a>\u3002</p>\n<p>\u4ee5\u4e0b\u662f\u4f7f\u7528\u5e26\u6709 MHA \u7684\u57fa\u672c\u8f6c\u6362\u5668\u8fdb\u884c NLP \u81ea\u52a8\u56de\u5f52\u7684<a href=\"basic/autoregressive_experiment.html\">\u8bad\u7ec3\u4ee3\u7801</a>\u3002</p>\n<p><a href=\"basic/autoregressive_experiment.html\">\u8fd9\u662f\u4e00\u4e2a\u8bad\u7ec3\u7b80\u5355\u53d8\u538b\u5668\u7684\u5b9e\u9a8c\u5b9e\u73b0</a>\u3002</p>\n",
"<h1>Multi-Headed Attention (MHA)</h1>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/basic/autoregressive_experiment.ipynb\"><span translate=no>_^_0_^_</span></a></p>\n<p>This is a tutorial/implementation of multi-headed attention from paper <a href=\"https://papers.labml.ai/paper/1706.03762\">Attention Is All You Need</a> in <a href=\"https://pytorch.org/\">PyTorch</a>. The implementation is inspired from <a href=\"https://nlp.seas.harvard.edu/2018/04/03/attention.html\">Annotated Transformer</a>.</p>\n<p>Here is the <a href=\"basic/autoregressive_experiment.html\">training code</a> that uses a basic transformer with MHA for NLP auto-regression.</p>\n<p><a href=\"basic/autoregressive_experiment.html\">Here is an experiment implementation</a> that trains a simple transformer.</p>\n": "<h1>\u591a\u5934\u6ce8\u610f\u529b (MHA)</h1>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/basic/autoregressive_experiment.ipynb\"><span translate=no>_^_0_^_</span></a></p>\n</a><p>\u8fd9\u662f P <a href=\"https://pytorch.org/\">yTorch \u4e2d\u8bba\u6587 \u201c\u6ce8\u610f\u529b<a href=\"https://papers.labml.ai/paper/1706.03762\">\u5c31\u662f\u4f60\u6240\u9700\u8981\u7684\u201d \u591a\u5934\u6ce8\u610f</a>\u529b\u7684\u6559\u7a0b/\u5b9e\u73b0\u3002\u8be5\u5b9e\u73b0\u7684\u7075\u611f\u6765\u81ea<a href=\"https://nlp.seas.harvard.edu/2018/04/03/attention.html\">\u5e26\u6ce8\u91ca\u7684\u53d8\u5f62\u91d1\u521a</a>\u3002</p>\n<p>\u4ee5\u4e0b\u662f\u4f7f\u7528\u5e26\u6709 MHA \u7684\u57fa\u672c\u8f6c\u6362\u5668\u8fdb\u884c NLP \u81ea\u52a8\u56de\u5f52\u7684<a href=\"basic/autoregressive_experiment.html\">\u8bad\u7ec3\u4ee3\u7801</a>\u3002</p>\n<p><a href=\"basic/autoregressive_experiment.html\">\u8fd9\u662f\u4e00\u4e2a\u8bad\u7ec3\u7b80\u5355\u53d8\u538b\u5668\u7684\u5b9e\u9a8c\u5b9e\u73b0</a>\u3002</p>\n",
"<h3>Calculate scores between queries and keys</h3>\n<p>This method can be overridden for other variations like relative attention.</p>\n": "<h3>\u8ba1\u7b97\u67e5\u8be2\u548c\u952e\u4e4b\u95f4\u7684\u5206\u6570</h3>\n<p>\u5bf9\u4e8e\u5176\u4ed6\u53d8\u4f53\uff0c\u4f8b\u5982\u76f8\u5bf9\u6ce8\u610f\u529b\uff0c\u53ef\u4ee5\u8986\u76d6\u6b64\u65b9\u6cd5\u3002</p>\n",
"<p> <a id=\"MHA\"></a></p>\n<h2>Multi-Head Attention Module</h2>\n<p>This computes scaled multi-headed attention for given <span translate=no>_^_0_^_</span>, <span translate=no>_^_1_^_</span> and <span translate=no>_^_2_^_</span> vectors.</p>\n<p><span translate=no>_^_3_^_</span></p>\n<p>In simple terms, it finds keys that matches the query, and gets the values of those keys.</p>\n<p>It uses dot-product of query and key as the indicator of how matching they are. Before taking the <span translate=no>_^_4_^_</span> the dot-products are scaled by <span translate=no>_^_5_^_</span>. This is done to avoid large dot-product values causing softmax to give very small gradients when <span translate=no>_^_6_^_</span> is large.</p>\n<p>Softmax is calculated along the axis of of the sequence (or time).</p>\n": "<p><a id=\"MHA\"></a></p>\n<h2>\u591a\u5934\u6ce8\u610f\u6a21\u5757</h2>\n<p>\u8fd9\u5c06\u8ba1\u7b97\u7ed9\u5b9a<span translate=no>_^_1_^_</span>\u548c<span translate=no>_^_2_^_</span>\u5411\u91cf\u7684\u7f29\u653e\u591a\u5934\u6ce8\u610f<span translate=no>_^_0_^_</span>\u529b\u3002</p>\n<p><span translate=no>_^_3_^_</span></p>\n<p>\u7b80\u5355\u6765\u8bf4\uff0c\u5b83\u4f1a\u627e\u5230\u4e0e\u67e5\u8be2\u5339\u914d\u7684\u952e\uff0c\u5e76\u83b7\u53d6\u8fd9\u4e9b\u952e\u7684\u503c\u3002</p>\n<p>\u5b83\u4f7f\u7528\u67e5\u8be2\u548c\u952e\u7684\u70b9\u79ef\u4f5c\u4e3a\u5b83\u4eec\u5339\u914d\u7a0b\u5ea6\u7684\u6307\u6807\u3002\u5728\u670d\u7528\u70b9\u4ea7\u54c1\u4e4b\u524d<span translate=no>_^_4_^_</span>\uff0c\u5148\u6309\u6bd4\u4f8b\u7f29\u653e<span translate=no>_^_5_^_</span>\u3002\u8fd9\u6837\u505a\u662f\u4e3a\u4e86\u907f\u514d\u8f83\u5927\u7684\u70b9\u79ef\u503c\u5bfc\u81f4 softmax \u5728\u8f83\u5927\u65f6<span translate=no>_^_6_^_</span>\u7ed9\u51fa\u975e\u5e38\u5c0f\u7684\u68af\u5ea6\u3002</p>\n<p>Softmax \u662f\u6cbf\u5e8f\u5217\uff08\u6216\u65f6\u95f4\uff09\u7684\u8f74\u8ba1\u7b97\u7684\u3002</p>\n",
"<p> <a id=\"PrepareMHA\"></a></p>\n<h2>Prepare for multi-head attention</h2>\n<p>This module does a linear transformation and splits the vector into given number of heads for multi-head attention. This is used to transform <strong>key</strong>, <strong>query</strong>, and <strong>value</strong> vectors.</p>\n": "<p><a id=\"PrepareMHA\"></a></p>\n<h2>\u4e3a\u591a\u5934\u6ce8\u610f\u505a\u597d\u51c6\u5907</h2>\n<p>\u8be5\u6a21\u5757\u8fdb\u884c\u7ebf\u6027\u53d8\u6362\uff0c\u5e76\u5c06\u5411\u91cf\u62c6\u5206\u4e3a\u7ed9\u5b9a\u6570\u91cf\u7684\u5934\u90e8\uff0c\u4ee5\u83b7\u5f97\u591a\u5934\u6ce8\u610f\u3002\u8fd9\u7528\u4e8e\u8f6c\u6362<strong>\u952e</strong>\u3001<strong>\u67e5\u8be2</strong>\u548c<strong>\u503c</strong>\u5411\u91cf\u3002</p>\n",

View File

@ -1,5 +1,5 @@
{
"<h1>Transformer Encoder and Decoder Models</h1>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/basic/autoregressive_experiment.ipynb\"><span translate=no>_^_0_^_</span></a> <a href=\"https://comet.ml/labml/transformer/ea8c108c2d94434ca3c2bc2b21015082\"><span translate=no>_^_1_^_</span></a></p>\n": "<h1>\u53d8\u538b\u5668\u7f16\u7801\u5668\u548c\u89e3\u7801\u5668\u578b\u53f7</h1>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/basic/autoregressive_experiment.ipynb\"><span translate=no>_^_0_^_</span></a><a href=\"https://comet.ml/labml/transformer/ea8c108c2d94434ca3c2bc2b21015082\"><span translate=no>_^_1_^_</span></a></p>\n",
"<h1>Transformer Encoder and Decoder Models</h1>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/basic/autoregressive_experiment.ipynb\"><span translate=no>_^_0_^_</span></a></p>\n": "<h1>\u53d8\u538b\u5668\u7f16\u7801\u5668\u548c\u89e3\u7801\u5668\u6a21\u578b</h1>\n<p><a href=\"https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/basic/autoregressive_experiment.ipynb\"><span translate=no>_^_0_^_</span></a></p>\n",
"<p> <a id=\"Decoder\"></a></p>\n<h2>Transformer Decoder</h2>\n": "<p><a id=\"Decoder\"></a></p>\n<h2>\u53d8\u538b\u5668\u89e3\u7801\u5668</h2>\n",
"<p> <a id=\"EmbeddingsWithLearnedPositionalEncoding\"></a></p>\n<h2>Embed tokens and add parameterized positional encodings</h2>\n": "<p><a id=\"EmbeddingsWithLearnedPositionalEncoding\"></a></p>\n<h2>\u5d4c\u5165\u4ee4\u724c\u5e76\u6dfb\u52a0\u53c2\u6570\u5316\u7684\u4f4d\u7f6e\u7f16\u7801</h2>\n",
"<p> <a id=\"EmbeddingsWithPositionalEncoding\"></a></p>\n<h2>Embed tokens and add <a href=\"positional_encoding.html\">fixed positional encoding</a></h2>\n": "<p><a id=\"EmbeddingsWithPositionalEncoding\"></a></p>\n<h2>\u5d4c\u5165\u4ee4\u724c\u5e76\u6dfb\u52a0<a href=\"positional_encoding.html\">\u56fa\u5b9a\u4f4d\u7f6e\u7f16\u7801</a></h2>\n",