google-deepmind · DEBADAS001KERNEL · Apr 4, 2025 · Apr 4, 2025
diff --git a/examples/mmlu_ev/MMLU_EV_AUTOMODEL.PY b/examples/mmlu_ev/MMLU_EV_AUTOMODEL.PY
@@ -0,0 +1,45 @@
+from datasets import load_dataset
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+dataset = load_dataset("cais/mmlu", "all")
+test_set = dataset["test"]
+
+
+modelname = "meta-llama/Llama-2-7b-chat-hf"
+tokenizer = AutoTokenizer.from_pretrained(modelname)
+
+model = AutoModelForCausalLM.from_pretrained(modelname, torch_dtype=torch.float16, device_map="auto")
+
+
+def ANS_GEN(q, choices): # ans
+
+    write= f" Question: {q}\nChoices: {', '.join(choices)}"
+
+    inputs = tokenizer(write, return_tensors="pt").to("cuda")
+
+    outputs = model.generate(**inputs, max_length=300)
+    output = tokenizer.decode(outputs[0], skip_special_tokens=True)#
+
+
+    for choice in choices:
+        if choice in output:
+            return choice
+    return None
+
+
+correct = 0
+total = 0
+
+for sample in test_set.select(range(100)):  # 100 SAMPLES
+    Q = sample["question"]
+    choices = sample["choices"]
+    original = choices[sample["answer"]]
+
+    modelanswer = ANS_GEN(Q, choices)
+
+    if modelanswer == original:
+        correct += 1
+    total += 1
+
+accuracy = (correct / total) * 100
+print(f" Model Accuracy  {accuracy:.2f}%")
diff --git a/examples/mmlu_ev/MMLU_EV_pipeline.PY b/examples/mmlu_ev/MMLU_EV_pipeline.PY
@@ -0,0 +1,46 @@
+from datasets import load_dataset
+from transformers import pipeline
+data_set = load_dataset("cais/mmlu", "all") #   load the dataset
+
+Test_set = data_set["test"]
+
+pipe = pipeline("text-generation", model="google/gemma-3-27b-it") # choose the 27 b model
+
+
+import random
+
+sampales=random.sample(list(Test_set),100) # taking 100 samples form the data set ..
+
+'''
+example of the dat set :
+
+{
+  "question": "What is the embryological origin of the hyoid bone?",
+  "choices": ["The first pharyngeal arch", "The first and second pharyngeal arches", "The second pharyngeal arch", "The second and third pharyngeal arches"],
+  "answer": "D"
+}
+
+
+
+'''
+for sample in sampales:
+  q=sample["question"]
+  choices=sample["choices"]
+  original = choices[sample["answer"]]
+
+write= f" Question: {q}\nChoices: {', '.join(choices)}"
+
+ans= pipe(write, max_length=100, do_sample=False)[0]["generated_text"]
+
+print(f"Q: {q}")
+print(f"Choices: {choices}")
+print(f"LLM Response: {ans}")
+print(f"Correct Answer: {original}")
+correct=0
+total=0
+if ans== original:
+   correct +=1
+total += 1
+accuracy = (correct / total) * 100
+
+print(f"accuracy: {accuracy: 2f}%")