Skip to content

Commit babd3aa

Browse files
committed
Add Version and Execution Date info to reports
1 parent b5ac619 commit babd3aa

File tree

6 files changed

+57
-22
lines changed

6 files changed

+57
-22
lines changed

VERSION

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
major=1
2+
minor=0
3+
patch=0

byte_infer_perf/general_perf/core/perf_engine.py

+17
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import json
1919
import subprocess
2020
import time
21+
import traceback
2122

2223
from typing import Any, Dict, Tuple
2324
import virtualenv
@@ -70,6 +71,19 @@ def __init__(self) -> None:
7071
self.prev_sys_path = list(sys.path)
7172
self.real_prefix = sys.prefix
7273
self.compile_only_mode = False
74+
self.version = self.get_version()
75+
76+
def get_version(self):
77+
version = ""
78+
try:
79+
version_file = os.path.join(str(BYTE_MLPERF_ROOT), "../VERSION")
80+
with open(version_file) as f:
81+
_version = f.read().splitlines()
82+
version = '.'.join(v.split('=')[1] for v in _version)
83+
except Exception as e:
84+
traceback.print_exc()
85+
log.warning(f"get bytemlperf version failed, error msg: {e}")
86+
return version
7387

7488
def start_engine(self) -> None:
7589
'''
@@ -168,6 +182,9 @@ def single_workload_perf(
168182
base_report.pop("Backend")
169183
return compile_info["compile_status"], base_report
170184

185+
base_report["Version"] = self.version
186+
base_report["Execution Date"] = time.strftime("%Y-%m-%d %H:%M:%S")
187+
171188
# load runtime backend
172189
"""
173190
Start Here

byte_infer_perf/llm_perf/launch.py

+17
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import multiprocessing as mp
2222
import signal
2323
from typing import Any, Dict, Iterable, List
24+
import traceback
2425

2526
# ${prj_root}/
2627
BYTE_MLPERF_ROOT = pathlib.Path(__file__).parents[1]
@@ -45,12 +46,26 @@ def __init__(self, hardware, task, host, port) -> None:
4546
self.result_queue = mp.Queue()
4647
self.jobs: List[mp.Process] = []
4748
self.server_process = None
49+
self.version = self.get_version()
4850

4951

5052
def __del__(self):
5153
self.stop_server()
5254

5355

56+
def get_version(self):
57+
version = ""
58+
try:
59+
version_file = os.path.join(str(BYTE_MLPERF_ROOT), "../VERSION")
60+
with open(version_file) as f:
61+
_version = f.read().splitlines()
62+
version = '.'.join(v.split('=')[1] for v in _version)
63+
except Exception as e:
64+
traceback.print_exc()
65+
logger.warning(f"get bytemlperf version failed, error msg: {e}")
66+
return version
67+
68+
5469
def start_engine(self) -> None:
5570
# load workload
5671
workload = load_workload(self.task)
@@ -85,6 +100,8 @@ def start_engine(self) -> None:
85100

86101
test_perf=test_perf,
87102
test_accuracy=test_accuracy,
103+
104+
version=self.version,
88105
)
89106
self.reporter.start()
90107

byte_infer_perf/llm_perf/utils/reporter.py

+4
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def __init__(
6868
max_new_tokens: int,
6969
test_perf: bool,
7070
test_accuracy: bool,
71+
version: str="",
7172
) -> None:
7273
self._running: bool = False
7374
self.cond: threading.Condition = threading.Condition()
@@ -87,12 +88,15 @@ def __init__(
8788
self.tp_size = tp_size
8889
self.batch_size = batch_size
8990
self.input_tokens = input_tokens
91+
self.version = version
9092

9193
# result template
9294
self.result: Dict[str, Any] = {
9395
"Model": self.task,
9496
"Backend": self.backend,
9597
"Host Info": get_cpu_name(),
98+
"Version": self.version,
99+
"Execution Date": time.strftime("%Y-%m-%d %H:%M:%S"),
96100
"Min New Tokens": min_new_tokens,
97101
"Max New Tokens": max_new_tokens,
98102
"Accuracy": {"PPL": [], "Token Diff": {}, "Logits Diff": {}},

byte_micro_perf/backends/GPU/backend_gpu.py

-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535

3636

3737
class BackendGPU(Backend):
38-
3938
def get_device_count(self):
4039
return torch.cuda.device_count()
4140

byte_micro_perf/core/perf_engine.py

+16-21
Original file line numberDiff line numberDiff line change
@@ -212,16 +212,26 @@ def __init__(self) -> None:
212212
self.old_os_path = os.environ["PATH"]
213213
self.prev_sys_path = list(sys.path)
214214
self.real_prefix = sys.prefix
215+
self.version = self.get_version()
216+
217+
def get_version(self):
218+
version = ""
219+
try:
220+
version_file = os.path.join(str(BYTE_MLPERF_ROOT), "../VERSION")
221+
with open(version_file) as f:
222+
_version = f.read().splitlines()
223+
version = '.'.join(v.split('=')[1] for v in _version)
224+
except Exception as e:
225+
traceback.print_exc()
226+
log.warning(f"get bytemlperf version failed, error msg: {e}")
227+
return version
215228

216229
def get_cpu_name(self):
217230
command = "lscpu | grep 'Model name' | awk -F: '{print $2}'"
218231
cpu_name = subprocess.check_output(command, shell=True)
219232
return cpu_name.decode().strip()
220233

221-
222-
223234
def start_engine(self) -> None:
224-
225235
if self.args.activate_venv:
226236
self.activate_venv(self.backend_type)
227237

@@ -270,7 +280,6 @@ def start_engine(self) -> None:
270280
for shape in shape_list:
271281
test_list.append(ConfigInstance(dtype, shape, case_index))
272282
case_index = case_index + 1
273-
274283

275284
try:
276285
mp.set_start_method("spawn", force=True)
@@ -287,10 +296,6 @@ def start_engine(self) -> None:
287296
if self.workload["operator"] in ["device2host", "host2device"]:
288297
instance_num = 1
289298

290-
291-
292-
293-
294299
input_queues = mp.Queue()
295300
output_queues = mp.Queue(maxsize=1)
296301

@@ -308,16 +313,13 @@ def start_engine(self) -> None:
308313
assert "ready" == output_queues.get()
309314
log.info("all ranks are ready and listening, init done")
310315

311-
312-
313316
if group == 1:
314317
for test_instance in test_list:
315318
input_queues.put(test_instance, True)
316319

317320
for _ in range(instance_num):
318321
input_queues.put("end", True)
319322

320-
321323
for process in _subprocesses.processes:
322324
process.join()
323325

@@ -330,9 +332,6 @@ def start_engine(self) -> None:
330332
if self.args.activate_venv:
331333
self.deactivate_venv()
332334

333-
334-
335-
336335
def perf_func(self, rank: int, *args):
337336
backend_instance = self.backend_class(self.workload, self.args.vendor_path)
338337
op_name = self.workload["operator"]
@@ -342,7 +341,6 @@ def perf_func(self, rank: int, *args):
342341
# set device accroding to local_rank
343342
set_device_func = getattr(backend_instance, "set_device")
344343
set_device_func(rank)
345-
346344

347345
if world_size > 1:
348346
init_ccl_func = getattr(backend_instance, "initialize_ccl")
@@ -354,7 +352,6 @@ def perf_func(self, rank: int, *args):
354352
else:
355353
raise ValueError(f"Unknown operation: {op_name.lower()}")
356354

357-
358355
output_queues.put("ready")
359356

360357
result_list = []
@@ -396,7 +393,6 @@ def perf_func(self, rank: int, *args):
396393

397394
result_list = sorted(output_result_list, key=lambda x: x.config.index)
398395

399-
400396
elif group_size > 1:
401397
for i, test_instance in enumerate(test_list):
402398
if rank == 0:
@@ -421,7 +417,6 @@ def perf_func(self, rank: int, *args):
421417

422418
result_list.append(ResultItem(test_instance, reports))
423419

424-
425420
if rank == 0:
426421
print(f"{len(result_list)} tasks finished.")
427422

@@ -439,9 +434,11 @@ def perf_func(self, rank: int, *args):
439434
"Backend": self.backend_type,
440435
"Host Info": self.get_cpu_name(),
441436
"Device Info": getattr(self.backend, "get_device_name")(),
437+
"Version": self.version,
438+
"Execution Date": time.strftime("%Y-%m-%d %H:%M:%S"),
442439
"Performance": [result.report for result in dtype_results_mapping[dtype]]
443440
}
444-
441+
445442
filename = (
446443
f"result-{str(dtype)}"
447444
+ (
@@ -460,8 +457,6 @@ def perf_func(self, rank: int, *args):
460457
destroy_group_func()
461458

462459
return True
463-
464-
465460

466461
def activate_venv(self, hardware_type: str) -> bool:
467462
if os.path.exists("backends/" + hardware_type + "/requirements.txt"):

0 commit comments

Comments
 (0)