Skip to content

Add QPS vs throughput, p90 NTPOT plots and comparisons #44

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions analysis/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,10 @@ To analyze the price-perf results, you can include instance pricing to get a cha

```
python analyze.py results/ --instance-price-per-hour 10
```

To analyze results across different runs and compare their latency and throughput metrics, you can do the following:

```
python analyze.py run-1/ run-2/ run-3/
```
263 changes: 160 additions & 103 deletions analysis/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,35 +3,50 @@
import os
import argparse

# Helper function for plotting
def _create_plot(x_data, y_data, c_data, x_label, y_label, c_label_text, title, output_filename_base, annotation_data=None, annotation_format_string="{val} qps"):
"""Helper function to generate and save a scatter plot with configurable annotations."""
if not x_data or not y_data or not c_data: # Check if any essential list is empty
def _create_line_plot(all_series_data, x_label, y_label, title, output_filename_base):
"""
Helper function to generate and save a line plot with multiple series.

Args:
all_series_data (dict): A dict where key is the series label (e.g., folder name)
and value is a dict {'x': [...], 'y': [...]}.
x_label (str): Label for the x-axis.
y_label (str): Label for the y-axis.
title (str): Title of the plot.
output_filename_base (str): Base name for the output PNG file.
"""
if not all_series_data:
print(f"No valid data for '{title}'. Cannot generate plot.")
return

plt.figure(figsize=(10, 6))
scatter = plt.scatter(x_data, y_data, c=c_data, cmap='viridis', s=100, alpha=0.8)
plt.figure(figsize=(12, 7))

plt.title(title)
plt.xlabel(x_label)
plt.ylabel(y_label)
for series_label, series_data in all_series_data.items():
x_data = series_data.get('x')
y_data = series_data.get('y')

if not x_data or not y_data:
print(f"Warning: Skipping series '{series_label}' for plot '{title}' due to missing data.")
continue

cbar = plt.colorbar(scatter)
cbar.set_label(c_label_text)
# To draw a clean line, sort the points based on the x-axis value.
sorted_points = sorted(zip(x_data, y_data))
if not sorted_points:
continue
x_sorted, y_sorted = zip(*sorted_points)

# Annotate each point. Default to c_data if annotation_data is not provided.
data_for_annotation = annotation_data if annotation_data is not None else c_data
for i, val in enumerate(data_for_annotation):
annotation_text = annotation_format_string.format(val=val)
plt.annotate(annotation_text, (x_data[i], y_data[i]), textcoords="offset points", xytext=(0,10), ha='center')
plt.plot(x_sorted, y_sorted, marker='o', linestyle='-', label=series_label)

plt.title(title)
plt.xlabel(x_label)
plt.ylabel(y_label)
plt.legend()
plt.grid(True)
output_filename = f'{output_filename_base}.png'
plt.savefig(output_filename)
print(f"Chart saved to {output_filename}")
plt.show()
plt.close() # Close the figure to free up memory
plt.close()

def _print_summary(parsed_data_points):
"""Prints a summary of the best performing data points."""
Expand All @@ -53,28 +68,21 @@ def _print_summary(parsed_data_points):
print(f" - Avg Per Token Latency: {max_throughput_point['latency']:.2f} ms")
if max_throughput_point.get('normalized_latency') is not None:
print(f" - Avg Normalized Latency: {max_throughput_point['normalized_latency']:.2f} ms")
if max_throughput_point.get('p90_normalized_latency') is not None:
print(f" - P90 Normalized Latency: {max_throughput_point['p90_normalized_latency']:.2f} ms")
if max_throughput_point.get('cost_per_million_tokens') is not None:
print(f" - Cost: ${max_throughput_point['cost_per_million_tokens']:.2f} per million tokens")

print("\n-----------------------\n")

def parse_and_plot(folder_path, instance_price_per_hour=None):
"""
Scans a folder for JSON files, extracts benchmark metrics, and plots
various performance and cost metrics.

Args:
folder_path (str): The path to the folder containing the JSON files.
instance_price_per_hour (float, optional): The instance price per hour for cost calculation.
"""
parsed_data_points = [] # Stores dicts of {'throughput', 'latency', 'normalized_latency', 'request_rate'}

def _parse_folder_data(folder_path, instance_price_per_hour):
"""Parses all JSON benchmark files in a single folder."""
parsed_data_points = []
print(f"Scanning folder: {folder_path}")

# Check if the folder path exists and is a directory
if not os.path.isdir(folder_path):
print(f"Error: The provided path '{folder_path}' is not a valid directory.")
return
return parsed_data_points

for filename in os.listdir(folder_path):
if filename.lower().endswith('.json'):
Expand All @@ -86,15 +94,16 @@ def parse_and_plot(folder_path, instance_price_per_hour=None):
metrics = data.get("metrics", {})
throughput = metrics.get("throughput")
latency = metrics.get("avg_per_token_latency_ms")
normalized_latency = metrics.get("avg_normalized_time_per_output_token_ms") # New metric
normalized_latency = metrics.get("avg_normalized_time_per_output_token_ms")
p90_normalized_latency = metrics.get("p90_normalized_time_per_output_token_ms")
request_rate = metrics.get("request_rate")

# Core metrics for any plot are throughput and request_rate
if throughput is not None and request_rate is not None:
point_data = {
"throughput": throughput,
"latency": latency, # Can be None
"normalized_latency": normalized_latency, # Can be None
"latency": latency,
"normalized_latency": normalized_latency,
"p90_normalized_latency": p90_normalized_latency,
"request_rate": request_rate,
"filename": filename
}
Expand All @@ -104,14 +113,16 @@ def parse_and_plot(folder_path, instance_price_per_hour=None):
cost_per_million_tokens = (instance_price_per_hour * 1000000) / (throughput * 3600)
point_data["cost_per_million_tokens"] = cost_per_million_tokens
elif instance_price_per_hour is not None and throughput == 0:
point_data["cost_per_million_tokens"] = float('inf') # Or handle as an error/skip
point_data["cost_per_million_tokens"] = float('inf')

parsed_data_points.append(point_data)
print(f"Successfully parsed common metrics from {filename}")
if latency is None:
print(f" - Note: 'avg_per_token_latency_ms' not found in {filename}.")
if normalized_latency is None:
print(f" - Note: 'avg_normalized_time_per_output_token_ms' not found in {filename}.")
if p90_normalized_latency is None:
print(f" - Note: 'p90_normalized_time_per_output_token_ms' not found in {filename}.")
else:
missing_core = []
if throughput is None: missing_core.append("'throughput'")
Expand All @@ -123,89 +134,135 @@ def parse_and_plot(folder_path, instance_price_per_hour=None):
except Exception as e:
print(f"An unexpected error occurred with {file_path}: {e}")

if not parsed_data_points:
print("No data points with core metrics (throughput, request_rate) were parsed. Cannot generate any plots.")
return parsed_data_points

def _prepare_plot_data(all_folders_data, x_key, y_key):
"""Prepares data for plotting from the parsed folder data."""
plot_data = {}
for folder_name, points in all_folders_data.items():
valid_points = [p for p in points if p.get(x_key) is not None and p.get(y_key) is not None]
if valid_points:
plot_data[folder_name] = {
'x': [p[x_key] for p in valid_points],
'y': [p[y_key] for p in valid_points]
}
return plot_data

def analyze_and_plot(folder_paths, instance_price_per_hour=None):
"""
Scans one or more folders for JSON files, extracts benchmark metrics,
and plots comparison charts with each folder as a separate line.

Args:
folder_paths (list[str]): A list of paths to folders containing JSON files.
instance_price_per_hour (float, optional): The instance price per hour for cost calculation.
"""
all_folders_data = {}

for folder_path in folder_paths:
parsed_data = _parse_folder_data(folder_path, instance_price_per_hour)
if parsed_data:
folder_name = os.path.basename(os.path.normpath(folder_path))
all_folders_data[folder_name] = parsed_data

if not all_folders_data:
print("No data points were parsed from any folder. Cannot generate plots.")
return

# Print summary before generating plots
_print_summary(parsed_data_points)
# Print summaries for each folder
for folder_name, parsed_data in all_folders_data.items():
print(f"\n--- Summary for: {folder_name} ---")
_print_summary(parsed_data)

# --- Plot 1: Throughput vs. Per Token Latency ---
plot1_throughputs = []
plot1_latencies = []
plot1_request_rates = []
for point in parsed_data_points:
if point["latency"] is not None:
plot1_throughputs.append(point["throughput"])
plot1_latencies.append(point["latency"])
plot1_request_rates.append(point["request_rate"])

_create_plot(
x_data=plot1_latencies,
y_data=plot1_throughputs,
c_data=plot1_request_rates,
x_label='Average Per Token Latency (ms)',
y_label='Throughput (output tokens/sec)',
c_label_text='Request Rate (QPS)',
title='Throughput vs. Per Token Latency',
output_filename_base='throughput_vs_latency'
)
plot1_data = _prepare_plot_data(all_folders_data, x_key='latency', y_key='throughput')
if plot1_data:
_create_line_plot(
all_series_data=plot1_data,
x_label='Average Per Token Latency (ms)',
y_label='Throughput (output tokens/sec)',
title='Throughput vs. Per Token Latency',
output_filename_base='throughput_vs_latency_comparison'
)
else:
print("No data available for 'Throughput vs. Per Token Latency' plot.")

# --- Plot 2: Throughput vs. Normalized Per Token Latency ---
plot2_throughputs = []
plot2_normalized_latencies = []
plot2_request_rates = []
for point in parsed_data_points:
if point["normalized_latency"] is not None:
plot2_throughputs.append(point["throughput"])
plot2_normalized_latencies.append(point["normalized_latency"])
plot2_request_rates.append(point["request_rate"])

_create_plot(
x_data=plot2_normalized_latencies,
y_data=plot2_throughputs,
c_data=plot2_request_rates,
x_label='Average Normalized Time Per Output Token (ms)',
y_label='Throughput (output tokens/sec)',
c_label_text='Request Rate (QPS)',
title='Throughput vs. Normalized Per Token Latency',
output_filename_base='throughput_vs_normalized_latency'
)

# --- Plot 3: Cost per Million Output Tokens vs. Normalized Per Token Latency ---
if instance_price_per_hour is not None:
plot3_normalized_latencies = []
plot3_costs_per_million_tokens = []
plot3_request_rates = []
for point in parsed_data_points:
if point.get("normalized_latency") is not None and point.get("cost_per_million_tokens") is not None:
plot3_normalized_latencies.append(point["normalized_latency"])
plot3_costs_per_million_tokens.append(point["cost_per_million_tokens"])
plot3_request_rates.append(point["request_rate"])

_create_plot(
x_data=plot3_normalized_latencies,
y_data=plot3_costs_per_million_tokens,
c_data=plot3_request_rates,
plot2_data = _prepare_plot_data(all_folders_data, x_key='normalized_latency', y_key='throughput')
if plot2_data:
_create_line_plot(
all_series_data=plot2_data,
x_label='Average Normalized Time Per Output Token (ms)',
y_label='$ per Million Output Tokens',
c_label_text='Request Rate (QPS)',
title='Cost per Million Output Tokens vs. Normalized Latency',
output_filename_base='cost_vs_normalized_latency',
annotation_data=plot3_costs_per_million_tokens,
annotation_format_string="${val:.2f}"
y_label='Throughput (output tokens/sec)',
title='Throughput vs. Normalized Per Token Latency',
output_filename_base='throughput_vs_normalized_latency_comparison'
)
else:
print("No data available for 'Throughput vs. Normalized Per Token Latency' plot.")

# --- Plot 3: Throughput vs. P90 Normalized Per Token Latency ---
plot3_data = _prepare_plot_data(all_folders_data, x_key='p90_normalized_latency', y_key='throughput')
if plot3_data:
_create_line_plot(
all_series_data=plot3_data,
x_label='P90 Normalized Time Per Output Token (ms)',
y_label='Throughput (output tokens/sec)',
title='Throughput vs. P90 Normalized Per Token Latency',
output_filename_base='throughput_vs_p90_normalized_latency_comparison'
)
else:
print("No data available for 'Throughput vs. P90 Normalized Per Token Latency' plot.")

# --- Plot 4: Cost per Million Output Tokens vs. Normalized Per Token Latency ---
if instance_price_per_hour is not None:
plot4_data = _prepare_plot_data(all_folders_data, x_key='normalized_latency', y_key='cost_per_million_tokens')
if plot4_data:
_create_line_plot(
all_series_data=plot4_data,
x_label='Average Normalized Time Per Output Token (ms)',
y_label='$ per Million Output Tokens',
title='Cost per Million Output Tokens vs. Normalized Latency',
output_filename_base='cost_vs_normalized_latency_comparison'
)
else:
print("No data available for 'Cost vs. Normalized Latency' plot.")
else:
print("Skipping cost plot as --instance-price-per-hour was not provided.")

# --- Plot 5: Throughput vs. Request Rate (QPS) ---
plot5_data = _prepare_plot_data(all_folders_data, x_key='request_rate', y_key='throughput')
if plot5_data:
_create_line_plot(
all_series_data=plot5_data,
x_label='Request Rate (QPS)',
y_label='Throughput (output tokens/sec)',
title='Throughput vs. Request Rate (QPS)',
output_filename_base='throughput_vs_qps_comparison'
)
else:
print("No data available for 'Throughput vs. Request Rate (QPS)' plot.")

# --- Plot 6: P90 Normalized Latency vs. Request Rate (QPS) ---
plot6_data = _prepare_plot_data(all_folders_data, x_key='request_rate', y_key='p90_normalized_latency')
if plot6_data:
_create_line_plot(
all_series_data=plot6_data,
x_label='Request Rate (QPS)',
y_label='P90 Normalized Time Per Output Token (ms)',
title='P90 Normalized Latency vs. Request Rate (QPS)',
output_filename_base='p90_latency_vs_qps_comparison'
)
else:
print("No data available for 'P90 Normalized Latency vs. Request Rate (QPS)' plot.")

if __name__ == '__main__':
# Set up an argument parser to get the folder path from the command line
parser = argparse.ArgumentParser(description="Parse all benchmark JSON files in a folder and generate a plot.")
parser.add_argument("folder_path", type=str, help="The path to the folder containing the JSON files.")
parser = argparse.ArgumentParser(description="Parse all benchmark JSON files in one or more folders and generate comparison plots.")
parser.add_argument("folder_paths", type=str, nargs='+', help="One or more paths to folders containing the JSON files.")
parser.add_argument("--instance-price-per-hour", type=float, default=None,
help="Optional: Instance price per hour (e.g., 2.50 for $2.50/hour) to calculate cost per million tokens.")

args = parser.parse_args()

# Call the function with the folder path provided by the user
parse_and_plot(args.folder_path, args.instance_price_per_hour)
# Call the function with the folder paths provided by the user
analyze_and_plot(args.folder_paths, args.instance_price_per_hour)