This commit is contained in:
Varuna Jayasiri
2022-08-19 09:54:24 +05:30
parent 1d92b5dc62
commit d7af20a62a

View File

@ -520,18 +520,19 @@ class LayerGenerator:
from labml_nn.neox.utils.llm_int8 import make_llm_int8_linear from labml_nn.neox.utils.llm_int8 import make_llm_int8_linear
# #
layer.attention.output = make_llm_int8_linear(layer.attention.output, with monit.section('Covert to int8'):
device=self.device, layer.attention.output = make_llm_int8_linear(layer.attention.output,
threshold=self.llm_int8_threshold) device=self.device,
layer.attention.qkv_lin = make_llm_int8_linear(layer.attention.qkv_lin, threshold=self.llm_int8_threshold)
device=self.device, layer.attention.qkv_lin = make_llm_int8_linear(layer.attention.qkv_lin,
threshold=self.llm_int8_threshold) device=self.device,
layer.ffn.dense_h_h4 = make_llm_int8_linear(layer.ffn.dense_h_h4, threshold=self.llm_int8_threshold)
device=self.device, layer.ffn.dense_h_h4 = make_llm_int8_linear(layer.ffn.dense_h_h4,
threshold=self.llm_int8_threshold) device=self.device,
layer.ffn.dense_h4_h = make_llm_int8_linear(layer.ffn.dense_h4_h, threshold=self.llm_int8_threshold)
device=self.device, layer.ffn.dense_h4_h = make_llm_int8_linear(layer.ffn.dense_h4_h,
threshold=self.llm_int8_threshold) device=self.device,
threshold=self.llm_int8_threshold)
# #
return layer return layer