mirror of
https://github.com/labmlai/annotated_deep_learning_paper_implementations.git
synced 2025-08-26 16:50:39 +08:00
monit
This commit is contained in:
@ -520,18 +520,19 @@ class LayerGenerator:
|
|||||||
from labml_nn.neox.utils.llm_int8 import make_llm_int8_linear
|
from labml_nn.neox.utils.llm_int8 import make_llm_int8_linear
|
||||||
|
|
||||||
#
|
#
|
||||||
layer.attention.output = make_llm_int8_linear(layer.attention.output,
|
with monit.section('Covert to int8'):
|
||||||
device=self.device,
|
layer.attention.output = make_llm_int8_linear(layer.attention.output,
|
||||||
threshold=self.llm_int8_threshold)
|
device=self.device,
|
||||||
layer.attention.qkv_lin = make_llm_int8_linear(layer.attention.qkv_lin,
|
threshold=self.llm_int8_threshold)
|
||||||
device=self.device,
|
layer.attention.qkv_lin = make_llm_int8_linear(layer.attention.qkv_lin,
|
||||||
threshold=self.llm_int8_threshold)
|
device=self.device,
|
||||||
layer.ffn.dense_h_h4 = make_llm_int8_linear(layer.ffn.dense_h_h4,
|
threshold=self.llm_int8_threshold)
|
||||||
device=self.device,
|
layer.ffn.dense_h_h4 = make_llm_int8_linear(layer.ffn.dense_h_h4,
|
||||||
threshold=self.llm_int8_threshold)
|
device=self.device,
|
||||||
layer.ffn.dense_h4_h = make_llm_int8_linear(layer.ffn.dense_h4_h,
|
threshold=self.llm_int8_threshold)
|
||||||
device=self.device,
|
layer.ffn.dense_h4_h = make_llm_int8_linear(layer.ffn.dense_h4_h,
|
||||||
threshold=self.llm_int8_threshold)
|
device=self.device,
|
||||||
|
threshold=self.llm_int8_threshold)
|
||||||
#
|
#
|
||||||
return layer
|
return layer
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user