monit

2025-08-26 08:41:23 +08:00 · 2022-08-19 09:54:24 +05:30
parent 1d92b5dc62
commit d7af20a62a
1 changed files with 13 additions and 12 deletions
--- a/labml_nn/neox/model.py
+++ b/labml_nn/neox/model.py
@ -520,18 +520,19 @@ class LayerGenerator:
                from labml_nn.neox.utils.llm_int8 import make_llm_int8_linear
                #
-                layer.attention.output = make_llm_int8_linear(layer.attention.output,
+                with monit.section('Covert to int8'):
-                                                              device=self.device,
+                    layer.attention.output = make_llm_int8_linear(layer.attention.output,
-                                                              threshold=self.llm_int8_threshold)
+                                                                  device=self.device,
-                layer.attention.qkv_lin = make_llm_int8_linear(layer.attention.qkv_lin,
+                                                                  threshold=self.llm_int8_threshold)
-                                                               device=self.device,
+                    layer.attention.qkv_lin = make_llm_int8_linear(layer.attention.qkv_lin,
-                                                               threshold=self.llm_int8_threshold)
+                                                                   device=self.device,
-                layer.ffn.dense_h_h4 = make_llm_int8_linear(layer.ffn.dense_h_h4,
+                                                                   threshold=self.llm_int8_threshold)
-                                                            device=self.device,
+                    layer.ffn.dense_h_h4 = make_llm_int8_linear(layer.ffn.dense_h_h4,
-                                                            threshold=self.llm_int8_threshold)
+                                                                device=self.device,
-                layer.ffn.dense_h4_h = make_llm_int8_linear(layer.ffn.dense_h4_h,
+                                                                threshold=self.llm_int8_threshold)
-                                                            device=self.device,
+                    layer.ffn.dense_h4_h = make_llm_int8_linear(layer.ffn.dense_h4_h,
-                                                            threshold=self.llm_int8_threshold)
+                                                                device=self.device,
                                                                threshold=self.llm_int8_threshold)
        #
        return layer