mirror of
https://github.com/labmlai/annotated_deep_learning_paper_implementations.git
synced 2025-08-16 10:51:23 +08:00
bias fix
This commit is contained in:
@ -201,7 +201,7 @@ class RotaryPEMultiHeadAttention(MultiHeadAttention):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, heads: int, d_model: int, rope_percentage: float = 0.5, dropout_prob: float = 0.0):
|
def __init__(self, heads: int, d_model: int, rope_percentage: float = 0.5, dropout_prob: float = 0.0):
|
||||||
super().__init__(heads, d_model, dropout_prob, bias=False)
|
super().__init__(heads, d_model, dropout_prob)
|
||||||
|
|
||||||
# Rotary positional embedding layers
|
# Rotary positional embedding layers
|
||||||
d_rope = int(self.d_k * rope_percentage)
|
d_rope = int(self.d_k * rope_percentage)
|
||||||
|
@ -174,7 +174,7 @@ class RotaryValuePEMultiHeadAttention(MultiHeadAttention):
|
|||||||
def __init__(self, heads: int, d_model: int,
|
def __init__(self, heads: int, d_model: int,
|
||||||
rope_percentage: float = 0.5, rope_value_percentage: float = 0.5,
|
rope_percentage: float = 0.5, rope_value_percentage: float = 0.5,
|
||||||
dropout_prob: float = 0.0):
|
dropout_prob: float = 0.0):
|
||||||
super().__init__(heads, d_model, dropout_prob, bias=False)
|
super().__init__(heads, d_model, dropout_prob)
|
||||||
|
|
||||||
# Rotary positional embedding layers
|
# Rotary positional embedding layers
|
||||||
d_rope = int(self.d_k * rope_percentage)
|
d_rope = int(self.d_k * rope_percentage)
|
||||||
|
Reference in New Issue
Block a user