96f7b5a8e1 
					 
					
						
						
							
							jax transformer  
						
						
						
						
					 
					
						2025-08-21 12:12:20 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						9262c57f18 
					 
					
						
						
							
							flash attention  
						
						
						
						
					 
					
						2025-08-08 19:57:57 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						3dd36b80b3 
					 
					
						
						
							
							all comments  
						
						
						
						
					 
					
						2025-08-01 14:14:00 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						73b9892be6 
					 
					
						
						
							
							all comments  
						
						
						
						
					 
					
						2025-08-01 13:56:39 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						eb5c004fac 
					 
					
						
						
							
							sm scale log2  
						
						
						
						
					 
					
						2025-08-01 13:26:48 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						5a8182d21b 
					 
					
						
						
							
							backward pass formulas  
						
						
						
						
					 
					
						2025-08-01 13:24:57 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						a9b5c923eb 
					 
					
						
						
							
							backward pass formulas  
						
						
						
						
					 
					
						2025-07-31 17:15:26 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						0ae6e6ae2a 
					 
					
						
						
							
							flash comments  
						
						
						
						
					 
					
						2025-07-31 14:49:37 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						1bc2a69803 
					 
					
						
						
							
							flash comments  
						
						
						
						
					 
					
						2025-07-31 09:53:14 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						d7d63e1f83 
					 
					
						
						
							
							triton flash wip  
						
						
						
						
					 
					
						2025-07-30 14:20:28 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						ebb94842db 
					 
					
						
						
							
							cleanup save checkpoint  
						
						
						
						
					 
					
						2025-07-20 09:13:11 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						5eecda7e28 
					 
					
						
						
							
							cleanup log activations  
						
						
						
						
					 
					
						2025-07-20 09:10:05 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						a713c92b82 
					 
					
						
						
							
							cleanup hook model outputs  
						
						
						
						
					 
					
						2025-07-20 09:02:34 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						5bdedcffec 
					 
					
						
						
							
							remove labml_helpers dep  
						
						
						
						
					 
					
						2025-07-20 08:56:03 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						47d4231a73 
					 
					
						
						
							
							seq length in rope  
						
						
						
						
					 
					
						2025-07-18 10:43:38 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						f6d77c36b2 
					 
					
						
						
							
							cleanup some unused imports  
						
						
						
						
					 
					
						2025-07-18 10:40:22 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						1b702523b9 
					 
					
						
						
							
							remove labml_helpers dependency: replace Module with nn.Module  
						
						
						
						
					 
					
						2025-07-18 10:32:36 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						dc4762161d 
					 
					
						
						
							
							Clean up LoRA  
						
						
						
						
					 
					
						2024-08-02 15:32:02 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						bc32b507ea 
					 
					
						
						
							
							clear notebook outputs  
						
						
						
						
					 
					
						2024-07-31 20:39:46 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						77d00f089b 
					 
					
						
						
							
							Add LoRA to GPT2  
						
						
						
						
					 
					
						2024-07-31 18:29:24 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						0f2a9be6d2 
					 
					
						
						
							
							training loop  
						
						
						
						
					 
					
						2024-07-29 23:01:06 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						23b7e2ee8e 
					 
					
						
						
							
							create experiment notebook and refactoring  
						
						
						
						
					 
					
						2024-07-29 19:41:24 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						c82529ce67 
					 
					
						
						
							
							move LoRA to labml.nn  
						
						
						
						
					 
					
						2024-07-29 11:17:38 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						391fa39167 
					 
					
						
						
							
							cleanup notebooks  
						
						
						
						
					 
					
						2024-06-24 16:17:09 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						09d09379c2 
					 
					
						
						
							
							fix value pe double rotation  
						
						
						
						
					 
					
						2024-06-20 12:53:09 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						2236f6383c 
					 
					
						
						
							
							fix rope test code  
						
						
						
						
					 
					
						2024-06-20 12:49:27 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						cf565bcc1d 
					 
					
						
						
							
							cleanup  
						
						
						
						
					 
					
						2024-06-18 11:09:02 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						5ec0f70855 
					 
					
						
						
							
							Fix formula typo in Relative MHA ( #242 )  
						
						... 
						
						
						
						${(\textcolor{lightgreen}{\mathbf{A + C}})}_{i,j} = Q_i^\top K_j + \textcolor{orange}{v^\top} K_j$ 
						
						
					 
					
						2024-03-02 14:19:06 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						bc5565b84c 
					 
					
						
						
							
							Fix a typo in the formula of RoPE  
						
						
						
						
					 
					
						2023-12-08 15:50:21 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						36a374ed76 
					 
					
						
						
							
							Merge pull request  #226  from MrYxJ/patch-1  
						
						... 
						
						
						
						Fix a typo in the formula of ALiBi. 
						
						
					 
					
						2023-11-17 17:42:17 +00:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						830161b299 
					 
					
						
						
							
							Update __init__.py  
						
						... 
						
						
						
						This formula is wrong, there is one symbol '-' missing in front of the 1, which will affect people's understanding when reading. What is expressed here is that the position of the ith token is increasing from -(i-1) to 0, so it should be -1. 
						
						
					 
					
						2023-11-14 00:30:26 +08:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						4d922e838f 
					 
					
						
						
							
							Add backticks to mask'shape  
						
						
						
						
					 
					
						2023-11-10 19:51:37 +08:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						f42c0e9cf4 
					 
					
						
						
							
							right shift example comment fix  
						
						
						
						
					 
					
						2023-11-07 09:28:22 +00:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						ffafaf1df7 
					 
					
						
						
							
							fix: fix cls_token bug in vit.  
						
						
						
						
					 
					
						2023-11-06 11:28:45 +08:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						9a42ac2697 
					 
					
						
						
							
							arxiv.org links  
						
						
						
						
					 
					
						2023-10-24 14:42:32 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						8db330dd22 
					 
					
						
						
							
							sophia-g docs  
						
						
						
						
					 
					
						2023-07-14 21:25:08 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						7c02294e7c 
					 
					
						
						
							
							sophia exp  
						
						
						
						
					 
					
						2023-07-14 16:44:45 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						2eccd8bec6 
					 
					
						
						
							
							Add the missing negative sign in the formula.  
						
						
						
						
					 
					
						2023-06-28 06:52:45 +08:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						c5685c9ffe 
					 
					
						
						
							
							remove app.labml.ai links  
						
						
						
						
					 
					
						2023-04-02 12:10:18 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						97e53c0f3d 
					 
					
						
						
							
							fix glu variants links  
						
						
						
						
					 
					
						2023-04-02 12:00:23 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						3ec5fa9f3d 
					 
					
						
						
							
							fix typo mha  
						
						
						
						
					 
					
						2022-12-24 12:53:36 +00:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						594b525e9d 
					 
					
						
						
							
							rm comet  
						
						
						
						
					 
					
						2022-09-07 09:31:38 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						0bfb210671 
					 
					
						
						
							
							fix vit pe  
						
						
						
						
					 
					
						2022-08-10 15:56:16 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						72669d0526 
					 
					
						
						
							
							ALiBi ( #134 )  
						
						
						
						
					 
					
						2022-07-17 09:28:32 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						b6bef1d2fe 
					 
					
						
						
							
							cleanup  
						
						
						
						
					 
					
						2022-07-02 14:31:16 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						ab4264cbda 
					 
					
						
						
							
							comet links fix  
						
						
						
						
					 
					
						2022-07-02 14:25:27 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						ee5a34aa59 
					 
					
						
						
							
							experiment links transformer  
						
						
						
						
					 
					
						2022-06-28 19:02:20 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						e09ee89f36 
					 
					
						
						
							
							Transformer experiment logs ( #130 )  
						
						
						
						
					 
					
						2022-06-27 14:11:44 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						0ce65adf9e 
					 
					
						
						
							
							RoPER ( #126 )  
						
						
						
						
					 
					
						2022-06-03 21:29:41 +05:30 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						6a41c82b30 
					 
					
						
						
							
							FTA ( #115 )  
						
						
						
						
					 
					
						2022-05-23 22:26:39 +05:30