xjdr-alt · deepfates · Oct 7, 2024 · Oct 7, 2024 · Oct 7, 2024 · Oct 7, 2024
diff --git a/entropix/prompts.py b/entropix/prompts.py
@@ -169,4 +169,4 @@ def create_prompts_from_csv(csv_path: str) -> List[str]:
 </antThinking>
 You are a principal devops engineer at google. You are an expert at all things cloud and deployment. Your task is to create ansible and terraform script to bootstrasp k8 cluster on Azure. Be clear and concise. Make sure it is production grade. Think and reflect about your actions to ensure to accomplished the task successfully.<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 
-"""
+"""
diff --git a/entropix/torch_device.py b/entropix/torch_device.py
@@ -0,0 +1,10 @@
+import torch
+
+def get_device():
+    if torch.backends.mps.is_available():
+        device = torch.device("mps")
+    elif torch.cuda.is_available():
+        device = torch.device("cuda")
+    else:
+        device = torch.device("cpu")
+    return device
diff --git a/entropix/torch_kvcache.py b/entropix/torch_kvcache.py
@@ -1,13 +1,8 @@
 import torch
 import torch.nn as nn
 
-# Device selection, tree is like first apple silicion, then cuda, fallback is cpu.
-if torch.backends.mps.is_available():
-    device = torch.device("mps")
-elif torch.cuda.is_available():
-    device = torch.device("cuda")
-else:
-    device = torch.device("cpu")
+from entropix.torch_device import get_device
+device = get_device()
 
 #print(f"Using device: {device}")
 

diff --git a/entropix/torch_main.py b/entropix/torch_main.py
@@ -17,16 +17,9 @@
 from entropix.torch_sampler import sample
 from entropix.prompts import prompt, bp1
 
-DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+from entropix.torch_device import get_device
 
-
-# Device selection, tree is like first apple silicion, then cuda, fallback is cpu.
-if torch.backends.mps.is_available():
-    device = torch.device("mps")
-elif torch.cuda.is_available():
-    device = torch.device("cuda")
-else:
-    device = torch.device("cpu")
+device = get_device()
 
 print(f"Using device: {device}")
 
@@ -107,7 +100,7 @@ def generate(xfmr_weights, model_params, tokens):
       bsz, seqlen = tokens.shape
       attn_mask = build_attn_mask(seqlen, cur_pos)
       freqs_cis = precompute_freqs_cis(model_params.head_dim, model_params.max_seq_len, model_params.rope_theta, model_params.use_scaled_rope)
-      kvcache = KVCache.new(model_params.n_layers, bsz, model_params.max_seq_len, model_params.n_local_kv_heads, model_params.head_dim).to(DEVICE)
+      kvcache = KVCache.new(model_params.n_layers, bsz, model_params.max_seq_len, model_params.n_local_kv_heads, model_params.head_dim).to(device)
       logits, kvcache, _, _ = xfmr(xfmr_weights, model_params, tokens, cur_pos, freqs_cis[:seqlen], kvcache, attn_mask=attn_mask)
       next_token = torch.argmax(logits[:, -1], dim=-1, keepdim=True).to(torch.int32)
       gen_tokens = next_token
@@ -127,4 +120,4 @@ def generate(xfmr_weights, model_params, tokens):
     generate(xfmr_weights, model_params, raw_tokens1)
 
 if __name__ == '__main__':
-  tyro.cli(main)
+  tyro.cli(main)
diff --git a/entropix/torch_model.py b/entropix/torch_model.py
@@ -10,13 +10,8 @@
 
 DEFAULT_MASK_VALUE = -0.7 * float(torch.finfo(torch.float32).max)
 
-# Device selection, tree is like first apple silicion, then cuda, fallback is cpu.
-if torch.backends.mps.is_available():
-    device = torch.device("mps")
-elif torch.cuda.is_available():
-    device = torch.device("cuda")
-else:
-    device = torch.device("cpu")
+from entropix.torch_device import get_device
+device = get_device()
 
 #print(f"Using device: {device}")
 
@@ -77,4 +72,4 @@ def xfmr(xfmr_weights: XfmrWeights, model_params: ModelParams, tokens: torch.Ten
         h = h + h_attn
         h = h + feed_forward(rms_norm(h, xfmr_weights.layer_weights[i].ffn_norm), xfmr_weights.layer_weights[i])
     logits = F.linear(rms_norm(h, xfmr_weights.norm), xfmr_weights.output)
-    return logits, kvcache, scores, attn_stats
+    return logits, kvcache, scores, attn_stats
diff --git a/entropix/torch_sampler.py b/entropix/torch_sampler.py
@@ -2,13 +2,8 @@
 import torch.nn.functional as F
 from typing import Tuple, Dict
 
-# Device selection, tree is like first apple silicion, then cuda, fallback is cpu.
-if torch.backends.mps.is_available():
-    device = torch.device("mps")
-elif torch.cuda.is_available():
-    device = torch.device("cuda")
-else:
-    device = torch.device("cpu")
+from entropix.torch_device import get_device
+device = get_device()
 
 LN_2 = 0.69314718056  # ln(2) = 1.0 / LOG2_E
 
@@ -168,4 +163,4 @@ def sample(gen_tokens: torch.Tensor, logits: torch.Tensor, attention_scores: tor
             base_top_p=top_p,
             base_top_k=top_k,
             generator=generator
-        )
+        )
diff --git a/entropix/torch_stats.py b/entropix/torch_stats.py
@@ -1,12 +1,7 @@
 import torch
 
-# Device selection, tree is like first apple silicion, then cuda, fallback is cpu.
-if torch.backends.mps.is_available():
-    device = torch.device("mps")
-elif torch.cuda.is_available():
-    device = torch.device("cuda")
-else:
-    device = torch.device("cpu")
+from entropix.torch_device import get_device
+device = get_device()
 
 #print(f"Using device: {device}")
 
@@ -45,4 +40,4 @@ def update(self, scores: torch.Tensor, layer_idx: int):
         self.entropy[:, layer_idx, :] = new_entropy
         self.varentropy[:, layer_idx, :] = new_varentropy
 
-        return self
+        return self
diff --git a/entropix/torch_weights.py b/entropix/torch_weights.py
@@ -10,13 +10,8 @@
 
 from pathlib import Path
 
-# Device selection, tree is like first apple silicion, then cuda, fallback is cpu.
-if torch.backends.mps.is_available():
-    device = torch.device("mps")
-elif torch.cuda.is_available():
-    device = torch.device("cuda")
-else:
-    device = torch.device("cpu")
+from entropix.torch_device import get_device
+device = get_device()
 
 #print(f"Using device: {device}")
 
@@ -80,4 +75,4 @@ def load_weights(ckpt_dir: Path = Path('weights/1B-Instruct'), n_layers: int = 1
       layer_weights=layer_weights
     )
 
-    return xfmr_weights
+    return xfmr_weights
-Original file line number
+Diff line change
@@ Expand Up / @@ -169,4 +169,4 @@ def create_prompts_from_csv(csv_path: str) -> List[str]: @@
     </antThinking>
     You are a principal devops engineer at google. You are an expert at all things cloud and deployment. Your task is to create ansible and terraform script to bootstrasp k8 cluster on Azure. Be clear and concise. Make sure it is production grade. Think and reflect about your actions to ensure to accomplished the task successfully.<|eot_id|><|start_header_id|>assistant<|end_header_id|>
-    """
+    """