From d7af20a62a433f523a9732fdebdda6e84715b6a2 Mon Sep 17 00:00:00 2001
From: Varuna Jayasiri <vpjayasiri@gmail.com>
Date: Fri, 19 Aug 2022 09:54:24 +0530
Subject: [PATCH] monit

---
 labml_nn/neox/model.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/labml_nn/neox/model.py b/labml_nn/neox/model.py
index dd2556d6..66b68b28 100644
--- a/labml_nn/neox/model.py
+++ b/labml_nn/neox/model.py
@@ -520,18 +520,19 @@ class LayerGenerator:
                 from labml_nn.neox.utils.llm_int8 import make_llm_int8_linear
 
                 #
-                layer.attention.output = make_llm_int8_linear(layer.attention.output,
-                                                              device=self.device,
-                                                              threshold=self.llm_int8_threshold)
-                layer.attention.qkv_lin = make_llm_int8_linear(layer.attention.qkv_lin,
-                                                               device=self.device,
-                                                               threshold=self.llm_int8_threshold)
-                layer.ffn.dense_h_h4 = make_llm_int8_linear(layer.ffn.dense_h_h4,
-                                                            device=self.device,
-                                                            threshold=self.llm_int8_threshold)
-                layer.ffn.dense_h4_h = make_llm_int8_linear(layer.ffn.dense_h4_h,
-                                                            device=self.device,
-                                                            threshold=self.llm_int8_threshold)
+                with monit.section('Covert to int8'):
+                    layer.attention.output = make_llm_int8_linear(layer.attention.output,
+                                                                  device=self.device,
+                                                                  threshold=self.llm_int8_threshold)
+                    layer.attention.qkv_lin = make_llm_int8_linear(layer.attention.qkv_lin,
+                                                                   device=self.device,
+                                                                   threshold=self.llm_int8_threshold)
+                    layer.ffn.dense_h_h4 = make_llm_int8_linear(layer.ffn.dense_h_h4,
+                                                                device=self.device,
+                                                                threshold=self.llm_int8_threshold)
+                    layer.ffn.dense_h4_h = make_llm_int8_linear(layer.ffn.dense_h4_h,
+                                                                device=self.device,
+                                                                threshold=self.llm_int8_threshold)
         #
         return layer