From d7af20a62a433f523a9732fdebdda6e84715b6a2 Mon Sep 17 00:00:00 2001 From: Varuna Jayasiri Date: Fri, 19 Aug 2022 09:54:24 +0530 Subject: [PATCH] monit --- labml_nn/neox/model.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/labml_nn/neox/model.py b/labml_nn/neox/model.py index dd2556d6..66b68b28 100644 --- a/labml_nn/neox/model.py +++ b/labml_nn/neox/model.py @@ -520,18 +520,19 @@ class LayerGenerator: from labml_nn.neox.utils.llm_int8 import make_llm_int8_linear # - layer.attention.output = make_llm_int8_linear(layer.attention.output, - device=self.device, - threshold=self.llm_int8_threshold) - layer.attention.qkv_lin = make_llm_int8_linear(layer.attention.qkv_lin, - device=self.device, - threshold=self.llm_int8_threshold) - layer.ffn.dense_h_h4 = make_llm_int8_linear(layer.ffn.dense_h_h4, - device=self.device, - threshold=self.llm_int8_threshold) - layer.ffn.dense_h4_h = make_llm_int8_linear(layer.ffn.dense_h4_h, - device=self.device, - threshold=self.llm_int8_threshold) + with monit.section('Covert to int8'): + layer.attention.output = make_llm_int8_linear(layer.attention.output, + device=self.device, + threshold=self.llm_int8_threshold) + layer.attention.qkv_lin = make_llm_int8_linear(layer.attention.qkv_lin, + device=self.device, + threshold=self.llm_int8_threshold) + layer.ffn.dense_h_h4 = make_llm_int8_linear(layer.ffn.dense_h_h4, + device=self.device, + threshold=self.llm_int8_threshold) + layer.ffn.dense_h4_h = make_llm_int8_linear(layer.ffn.dense_h4_h, + device=self.device, + threshold=self.llm_int8_threshold) # return layer