-
Notifications
You must be signed in to change notification settings - Fork 27
/
Copy pathevaluate_model.py
71 lines (60 loc) · 2.43 KB
/
evaluate_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from datetime import timedelta
import argparse
from pathlib import Path
from lighteval.logging.evaluation_tracker import EvaluationTracker
from lighteval.models.transformers.transformers_model import TransformersModelConfig
from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters
from lighteval.utils.utils import EnvConfig
from lighteval.utils.imports import is_accelerate_available
if is_accelerate_available():
from accelerate import Accelerator, InitProcessGroupKwargs
accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=3000))])
else:
accelerator = None
def parse_args() -> argparse.Namespace:
"""Parse command line arguments.
Returns:
argparse.Namespace: Parsed command line arguments
"""
parser = argparse.ArgumentParser(description='Evaluate model on math tasks')
parser.add_argument('--task', type=str, required=True,
choices=['gsm8k', 'math', 'math_hard', 'math_500', 'aime24', 'amc23'],
help='Task to evaluate')
parser.add_argument('--model', type=str, required=True,
help='Model name or path')
parser.add_argument('--use_chat_template', action='store_true', default=False,
help='Use chat template')
parser.add_argument('--override_bs', type=int, default=-1,
help='Batch size; -1 for automatic batch size')
return parser.parse_args()
def main() -> None:
"""Main function to run model evaluation."""
args = parse_args()
evaluation_tracker = EvaluationTracker(
output_dir="./results",
save_details=True,
push_to_hub=False,
)
pipeline_params = PipelineParameters(
launcher_type=ParallelismManager.ACCELERATE,
max_samples=1000,
custom_tasks_directory="math_verify.tasks",
env_config=EnvConfig(cache_dir="tmp/"),
override_batch_size=args.override_bs,
)
model_config = TransformersModelConfig(
pretrained=args.model,
dtype="bfloat16",
use_chat_template=args.use_chat_template,
)
pipeline = Pipeline(
tasks=f"lighteval|{args.task}|4|1",
pipeline_parameters=pipeline_params,
evaluation_tracker=evaluation_tracker,
model_config=model_config,
)
pipeline.evaluate()
pipeline.show_results()
pipeline.save_and_push_results()
if __name__ == "__main__":
main()