add model name

daixu · daixu · commit e289aeda9bab · 2024-03-27T05:40:48.000Z
diff --git a/configuration_llama.py b/configuration_llama.py
@@ -155,6 +155,7 @@ def __init__(
         self._rope_scaling_validation()
         self.attention_bias = attention_bias
         self.weights_dir = weights_dir
+        self.model_name = kwargs.pop("_name_or_path", None)
 
     def _rope_scaling_validation(self):
         """
diff --git a/naked_llama2.py b/naked_llama2.py
@@ -29,7 +29,7 @@ def llama2(token_ids: torch.Tensor, config: LlamaConfig):
 
     # 重复 32次(7B)/ 80次(70B) llama2_transformer_block 的计算
     for layer_id in range(config.num_hidden_layers):
-        print(f'Naked llama: Computing Layer {layer_id}')
+        print(f'Naked llama: Computing {config.model_name} Layer {layer_id}')
         output = llama2_transformer_block(hidden_states, config, layer_id=layer_id, attention_mask=mask)
         hidden_states = output[0]
     
@@ -87,15 +87,15 @@ def llama2(token_ids: torch.Tensor, config: LlamaConfig):
     config.weights_dir = model_dict[model_name]['weights_dir']
     logits = llama2(token_ids, config)
     
-    print('Naked llama result:')
+    print(f'Naked llama, model: {config.model_name}, result:')
     print(logits)
     
     # check result
     model = LlamaForCausalLM.from_pretrained(model_dict[model_name]['hf_model'])
     model.eval()
     with torch.inference_mode():
         hf_res = model(input_ids = token_ids)
-        print('Hugging face llama result:')
+        print(f'Hugging face, model: {config.model_name}, result:')
         print(hf_res.logits)
     error = torch.abs(hf_res.logits-logits)
     print(f"Compare error sum: {torch.sum(error)}")