Skip to content

Update verify_performance.py | Refactor the code #2073

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 68 additions & 114 deletions compliance/nvidia/TEST01/verify_performance.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#! /usr/bin/env python3
# Copyright 2018-2022 The MLPerf Authors. All Rights Reserved.
# Copyright 2018-2025 The MLPerf Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -13,140 +13,94 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================

import json
import argparse
import os
import sys
import re

sys.path.append(os.getcwd())
sys.path.append(
os.path.abspath(
os.path.join(
os.path.dirname(__file__),
"..",
"..",
"..",
"tools",
"submission")))
from log_parser import MLPerfLog # noqa

RESULT_FIELD = {
"Offline": "result_samples_per_second",
"SingleStream": "early_stopping_latency_ss",
"MultiStream": "early_stopping_latency_ms",
"Server": "result_completed_samples_per_sec",
}


def result_log(file_path):
score, target_latency = 0, None
score = float(mlperf_log[RESULT_FIELD[scenario]])

mlperf_log = MLPerfLog(file_path)
scenario = mlperf_log["effective_scenario"]

if not (
"result_validity" in mlperf_log.get_keys()
and mlperf_log["result_validity"] == "VALID"
):
sys.exit("TEST FAIL: Invalid results in {}".format(file_path))

if mlperf_log.has_error():
print(
"WARNING: {} ERROR reported in {}".format(
line.split()[0],
file_path))

res = float(mlperf_log[RESULT_FIELD[scenario]])
if scenario == "Server":
target_latency = mlperf_log["effective_target_latency_ns"]

return scenario, score, target_latency


def main():
# Parse arguments to identify the path to the accuracy logs from
# the accuracy and performance runs
parser = argparse.ArgumentParser()
parser.add_argument(
"--reference_summary",
"-r",
help="Specifies the path to the summary log for the performance run.",
default="",
)
"--reference_log_details",
help="Path to reference performance log_details file.",
required=True)
parser.add_argument(
"--test_summary",
"-t",
help="Specifies the path to the summary log for this test.",
default="",
)
"--test_log_details",
help="Path to test performance log_details file.",
required=True)
args = parser.parse_args()

print("Verifying performance.")
ref_file = open(args.reference_summary, "r")
test_file = open(args.test_summary, "r")
ref_score = 0
test_score = 0
ref_mode = ""
test_mode = ""

for line in ref_file:
if re.match("Scenario", line):
ref_mode = line.split(": ", 1)[1].strip()
continue

if ref_mode == "SingleStream":
if re.match(
".*Early stopping (90th|90.0th|99.9th) percentile estimate", line):
ref_score = line.split(": ", 1)[1].strip()
continue

if ref_mode == "MultiStream":
if re.match(
".*Early stopping (99th|99.0th) percentile estimate", line):
ref_score = line.split(": ", 1)[1].strip()
continue

if ref_mode == "Server":
if re.match("Completed samples per second", line):
ref_score = line.split(": ", 1)[1].strip()
continue
if re.match("target_latency (ns)", line):
ref_target_latency = line.split(": ", 1)[1].strip()
continue

if ref_mode == "Offline":
if re.match("Samples per second", line):
ref_score = line.split(": ", 1)[1].strip()
continue

if re.match("Result is", line):
valid = line.split(": ", 1)[1].strip()
if valid == "INVALID":
sys.exit("TEST FAIL: Reference results are invalid")

if re.match("\\d+ ERROR", line):
error = line.split(" ", 1)[0].strip()
print("WARNING: " + error + " ERROR reported in reference results")

for line in test_file:
if re.match("Scenario", line):
test_mode = line.split(": ", 1)[1].strip()
continue
if test_mode == "SingleStream":
if re.match(
".*Early stopping (90th|90.0th|99.9th) percentile estimate", line):
test_score = line.split(": ", 1)[1].strip()
continue

if test_mode == "MultiStream":
if re.match(
".*Early stopping (99th|99.0th) percentile estimate", line):
test_score = line.split(": ", 1)[1].strip()
continue

if test_mode == "Server":
if re.match("Completed samples per second", line):
test_score = line.split(": ", 1)[1].strip()
continue
if re.match("target_latency (ns)", line):
test_target_latency = line.split(": ", 1)[1].strip()
if test_target_latency != ref_target_latency:
print("TEST FAIL: Server target latency mismatch")
sys.exit()
continue

if test_mode == "Offline":
if re.match("Samples per second", line):
test_score = line.split(": ", 1)[1].strip()
continue

if re.match("Result is", line):
valid = line.split(": ", 1)[1].strip()
if valid == "INVALID":
sys.exit("TEST FAIL: Test results are invalid")

if re.match("\\d+ ERROR", line):
error = line.split(" ", 1)[0].strip()
print("WARNING: " + error + " ERROR reported in test results")

if test_mode != ref_mode:
sys.exit("Test and reference scenarios do not match!")

print("reference score = {}".format(ref_score))
print("test score = {}".format(test_score))
ref_scenario, ref_score, ref_target_latency = parse_result_log(
args.reference_log_details)
test_scenario, test_score, test_target_latency = parse_result_log(
args.test_log_details)

threshold = 0.10
if test_scenario != ref_scenario:
sys.exit("TEST FAIL: Test and reference scenarios do not match!")

# In single-/multi-stream mode, latencies can be very short for high performance systems
# and run-to-run variation due to external disturbances (OS) can be significant.
# In this case we relax pass threshold to 20%
if (ref_mode == "SingleStream" and float(ref_score) <= 200000) or (
ref_mode == "MultiStream" and float(ref_score) <= 1600000
):
if ref_mode == "Server" and test_target_latency != ref_target_latency:
sys.exit("TEST FAIL: Server target latency mismatch")

print(f"Reference score = {ref_score}")
print(f"Test score = {test_score}")

threshold = 0.10
if (ref_scenario == "SingleStream" and ref_score <= 200000) or (
ref_scenario == "MultiStream" and ref_score <= 1600000):
threshold = 0.20

if float(test_score) < float(ref_score) * (1 + threshold) and float(
test_score
) > float(ref_score) * (1 - threshold):
if ref_score * (1 - threshold) <= test_score <= ref_score * \
(1 + threshold):
print("TEST PASS")
else:
print("TEST FAIL: Test score invalid")
Expand Down
8 changes: 5 additions & 3 deletions tools/submission/submission_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2094,7 +2094,8 @@ def log_result(
if filter_submitter and submitter != filter_submitter:
continue
results_path = os.path.join(division, submitter, "results")
measurements_path = os.path.join(division, submitter, "measurements")
measurements_path = os.path.join(
division, submitter, "measurements")
systems_path = os.path.join(division, submitter, "systems")
if not os.path.exists(results_path):
continue
Expand Down Expand Up @@ -2200,7 +2201,8 @@ def log_result(
extra_model_mapping = json.load(fp)

if not config.skip_all_systems_with_results:
measurement_diff = list(set(list_dir(measurements_path)) - set(list_dir(results_path)))
measurement_diff = list(
set(list_dir(measurements_path)) - set(list_dir(results_path)))
systems_diff = list(
set(
[
Expand Down Expand Up @@ -3173,7 +3175,7 @@ def main():
args.extra_model_benchmark_map,
ignore_uncommited=args.submission_exceptions,
skip_power_check=args.skip_power_check,
skip_all_systems_with_results = args.skip_all_systems_have_results_check
skip_all_systems_with_results=args.skip_all_systems_have_results_check
)

if args.scenarios_to_skip:
Expand Down