mirror of
https://github.com/labmlai/annotated_deep_learning_paper_implementations.git
synced 2025-08-15 18:27:20 +08:00
📊 tracker debug attention
This commit is contained in:
@ -86,9 +86,6 @@ class TransformerLayer(Module):
|
|||||||
ff = self.feed_forward(z)
|
ff = self.feed_forward(z)
|
||||||
x = x + self.dropout(ff)
|
x = x + self.dropout(ff)
|
||||||
|
|
||||||
# guard(x.shape, attn_self.shape, attn_src.shape, ff.shape,
|
|
||||||
# '_batch_size', '_seq_len', 'd_model')
|
|
||||||
|
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@ import math
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
from labml import tracker
|
||||||
from torch import nn as nn
|
from torch import nn as nn
|
||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
|
|
||||||
@ -65,6 +66,7 @@ class MultiHeadAttention(Module):
|
|||||||
assert mask.shape[0] == 1 or mask.shape[0] == mask.shape[1]
|
assert mask.shape[0] == 1 or mask.shape[0] == mask.shape[1]
|
||||||
scores = scores.masked_fill(mask == 0, -1e9)
|
scores = scores.masked_fill(mask == 0, -1e9)
|
||||||
attn = F.softmax(scores, dim=1)
|
attn = F.softmax(scores, dim=1)
|
||||||
|
tracker.debug('attn', attn)
|
||||||
attn = self.dropout(attn)
|
attn = self.dropout(attn)
|
||||||
|
|
||||||
x = torch.einsum("ijbh,jbhd->ibhd", attn, value)
|
x = torch.einsum("ijbh,jbhd->ibhd", attn, value)
|
||||||
|
6
labml_nn/transformers/utils.py
Normal file
6
labml_nn/transformers/utils.py
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
import torch
|
||||||
|
|
||||||
|
|
||||||
|
def subsequent_mask(seq_len):
|
||||||
|
mask = torch.tril(torch.ones(seq_len, seq_len)).to(torch.bool).unsqueeze(-1)
|
||||||
|
return mask
|
Reference in New Issue
Block a user