From 6fe1e123553a6e7b9a34611fca67d6a501f4cb24 Mon Sep 17 00:00:00 2001 From: Ashok Chandrasekar Date: Mon, 23 Jun 2025 21:05:10 +0000 Subject: [PATCH 1/3] Add comparison between multiple runs --- analysis/analyze.py | 216 +++++++++++++++++++++++--------------------- 1 file changed, 114 insertions(+), 102 deletions(-) diff --git a/analysis/analyze.py b/analysis/analyze.py index 57bfdee..3f54032 100644 --- a/analysis/analyze.py +++ b/analysis/analyze.py @@ -3,35 +3,50 @@ import os import argparse -# Helper function for plotting -def _create_plot(x_data, y_data, c_data, x_label, y_label, c_label_text, title, output_filename_base, annotation_data=None, annotation_format_string="{val} qps"): - """Helper function to generate and save a scatter plot with configurable annotations.""" - if not x_data or not y_data or not c_data: # Check if any essential list is empty +def _create_line_plot(all_series_data, x_label, y_label, title, output_filename_base): + """ + Helper function to generate and save a line plot with multiple series. + + Args: + all_series_data (dict): A dict where key is the series label (e.g., folder name) + and value is a dict {'x': [...], 'y': [...]}. + x_label (str): Label for the x-axis. + y_label (str): Label for the y-axis. + title (str): Title of the plot. + output_filename_base (str): Base name for the output PNG file. + """ + if not all_series_data: print(f"No valid data for '{title}'. Cannot generate plot.") return - plt.figure(figsize=(10, 6)) - scatter = plt.scatter(x_data, y_data, c=c_data, cmap='viridis', s=100, alpha=0.8) + plt.figure(figsize=(12, 7)) - plt.title(title) - plt.xlabel(x_label) - plt.ylabel(y_label) + for series_label, series_data in all_series_data.items(): + x_data = series_data.get('x') + y_data = series_data.get('y') - cbar = plt.colorbar(scatter) - cbar.set_label(c_label_text) + if not x_data or not y_data: + print(f"Warning: Skipping series '{series_label}' for plot '{title}' due to missing data.") + continue - # Annotate each point. Default to c_data if annotation_data is not provided. - data_for_annotation = annotation_data if annotation_data is not None else c_data - for i, val in enumerate(data_for_annotation): - annotation_text = annotation_format_string.format(val=val) - plt.annotate(annotation_text, (x_data[i], y_data[i]), textcoords="offset points", xytext=(0,10), ha='center') + # To draw a clean line, sort the points based on the x-axis value. + sorted_points = sorted(zip(x_data, y_data)) + if not sorted_points: + continue + x_sorted, y_sorted = zip(*sorted_points) + plt.plot(x_sorted, y_sorted, marker='o', linestyle='-', label=series_label) + + plt.title(title) + plt.xlabel(x_label) + plt.ylabel(y_label) + plt.legend() plt.grid(True) output_filename = f'{output_filename_base}.png' plt.savefig(output_filename) print(f"Chart saved to {output_filename}") plt.show() - plt.close() # Close the figure to free up memory + plt.close() def _print_summary(parsed_data_points): """Prints a summary of the best performing data points.""" @@ -58,23 +73,14 @@ def _print_summary(parsed_data_points): print("\n-----------------------\n") -def parse_and_plot(folder_path, instance_price_per_hour=None): - """ - Scans a folder for JSON files, extracts benchmark metrics, and plots - various performance and cost metrics. - - Args: - folder_path (str): The path to the folder containing the JSON files. - instance_price_per_hour (float, optional): The instance price per hour for cost calculation. - """ - parsed_data_points = [] # Stores dicts of {'throughput', 'latency', 'normalized_latency', 'request_rate'} - +def _parse_folder_data(folder_path, instance_price_per_hour): + """Parses all JSON benchmark files in a single folder.""" + parsed_data_points = [] print(f"Scanning folder: {folder_path}") - # Check if the folder path exists and is a directory if not os.path.isdir(folder_path): print(f"Error: The provided path '{folder_path}' is not a valid directory.") - return + return parsed_data_points for filename in os.listdir(folder_path): if filename.lower().endswith('.json'): @@ -86,15 +92,14 @@ def parse_and_plot(folder_path, instance_price_per_hour=None): metrics = data.get("metrics", {}) throughput = metrics.get("throughput") latency = metrics.get("avg_per_token_latency_ms") - normalized_latency = metrics.get("avg_normalized_time_per_output_token_ms") # New metric + normalized_latency = metrics.get("avg_normalized_time_per_output_token_ms") request_rate = metrics.get("request_rate") - # Core metrics for any plot are throughput and request_rate if throughput is not None and request_rate is not None: point_data = { "throughput": throughput, - "latency": latency, # Can be None - "normalized_latency": normalized_latency, # Can be None + "latency": latency, + "normalized_latency": normalized_latency, "request_rate": request_rate, "filename": filename } @@ -104,7 +109,7 @@ def parse_and_plot(folder_path, instance_price_per_hour=None): cost_per_million_tokens = (instance_price_per_hour * 1000000) / (throughput * 3600) point_data["cost_per_million_tokens"] = cost_per_million_tokens elif instance_price_per_hour is not None and throughput == 0: - point_data["cost_per_million_tokens"] = float('inf') # Or handle as an error/skip + point_data["cost_per_million_tokens"] = float('inf') parsed_data_points.append(point_data) print(f"Successfully parsed common metrics from {filename}") @@ -123,89 +128,96 @@ def parse_and_plot(folder_path, instance_price_per_hour=None): except Exception as e: print(f"An unexpected error occurred with {file_path}: {e}") - if not parsed_data_points: - print("No data points with core metrics (throughput, request_rate) were parsed. Cannot generate any plots.") + return parsed_data_points + +def _prepare_plot_data(all_folders_data, x_key, y_key): + """Prepares data for plotting from the parsed folder data.""" + plot_data = {} + for folder_name, points in all_folders_data.items(): + valid_points = [p for p in points if p.get(x_key) is not None and p.get(y_key) is not None] + if valid_points: + plot_data[folder_name] = { + 'x': [p[x_key] for p in valid_points], + 'y': [p[y_key] for p in valid_points] + } + return plot_data + +def analyze_and_plot(folder_paths, instance_price_per_hour=None): + """ + Scans one or more folders for JSON files, extracts benchmark metrics, + and plots comparison charts with each folder as a separate line. + + Args: + folder_paths (list[str]): A list of paths to folders containing JSON files. + instance_price_per_hour (float, optional): The instance price per hour for cost calculation. + """ + all_folders_data = {} + + for folder_path in folder_paths: + parsed_data = _parse_folder_data(folder_path, instance_price_per_hour) + if parsed_data: + folder_name = os.path.basename(os.path.normpath(folder_path)) + all_folders_data[folder_name] = parsed_data + + if not all_folders_data: + print("No data points were parsed from any folder. Cannot generate plots.") return - # Print summary before generating plots - _print_summary(parsed_data_points) + # Print summaries for each folder + for folder_name, parsed_data in all_folders_data.items(): + print(f"\n--- Summary for: {folder_name} ---") + _print_summary(parsed_data) # --- Plot 1: Throughput vs. Per Token Latency --- - plot1_throughputs = [] - plot1_latencies = [] - plot1_request_rates = [] - for point in parsed_data_points: - if point["latency"] is not None: - plot1_throughputs.append(point["throughput"]) - plot1_latencies.append(point["latency"]) - plot1_request_rates.append(point["request_rate"]) - - _create_plot( - x_data=plot1_latencies, - y_data=plot1_throughputs, - c_data=plot1_request_rates, - x_label='Average Per Token Latency (ms)', - y_label='Throughput (output tokens/sec)', - c_label_text='Request Rate (QPS)', - title='Throughput vs. Per Token Latency', - output_filename_base='throughput_vs_latency' - ) + plot1_data = _prepare_plot_data(all_folders_data, x_key='latency', y_key='throughput') + if plot1_data: + _create_line_plot( + all_series_data=plot1_data, + x_label='Average Per Token Latency (ms)', + y_label='Throughput (output tokens/sec)', + title='Throughput vs. Per Token Latency', + output_filename_base='throughput_vs_latency_comparison' + ) + else: + print("No data available for 'Throughput vs. Per Token Latency' plot.") # --- Plot 2: Throughput vs. Normalized Per Token Latency --- - plot2_throughputs = [] - plot2_normalized_latencies = [] - plot2_request_rates = [] - for point in parsed_data_points: - if point["normalized_latency"] is not None: - plot2_throughputs.append(point["throughput"]) - plot2_normalized_latencies.append(point["normalized_latency"]) - plot2_request_rates.append(point["request_rate"]) - - _create_plot( - x_data=plot2_normalized_latencies, - y_data=plot2_throughputs, - c_data=plot2_request_rates, - x_label='Average Normalized Time Per Output Token (ms)', - y_label='Throughput (output tokens/sec)', - c_label_text='Request Rate (QPS)', - title='Throughput vs. Normalized Per Token Latency', - output_filename_base='throughput_vs_normalized_latency' - ) + plot2_data = _prepare_plot_data(all_folders_data, x_key='normalized_latency', y_key='throughput') + if plot2_data: + _create_line_plot( + all_series_data=plot2_data, + x_label='Average Normalized Time Per Output Token (ms)', + y_label='Throughput (output tokens/sec)', + title='Throughput vs. Normalized Per Token Latency', + output_filename_base='throughput_vs_normalized_latency_comparison' + ) + else: + print("No data available for 'Throughput vs. Normalized Per Token Latency' plot.") # --- Plot 3: Cost per Million Output Tokens vs. Normalized Per Token Latency --- if instance_price_per_hour is not None: - plot3_normalized_latencies = [] - plot3_costs_per_million_tokens = [] - plot3_request_rates = [] - for point in parsed_data_points: - if point.get("normalized_latency") is not None and point.get("cost_per_million_tokens") is not None: - plot3_normalized_latencies.append(point["normalized_latency"]) - plot3_costs_per_million_tokens.append(point["cost_per_million_tokens"]) - plot3_request_rates.append(point["request_rate"]) - - _create_plot( - x_data=plot3_normalized_latencies, - y_data=plot3_costs_per_million_tokens, - c_data=plot3_request_rates, - x_label='Average Normalized Time Per Output Token (ms)', - y_label='$ per Million Output Tokens', - c_label_text='Request Rate (QPS)', - title='Cost per Million Output Tokens vs. Normalized Latency', - output_filename_base='cost_vs_normalized_latency', - annotation_data=plot3_costs_per_million_tokens, - annotation_format_string="${val:.2f}" - ) + plot3_data = _prepare_plot_data(all_folders_data, x_key='normalized_latency', y_key='cost_per_million_tokens') + if plot3_data: + _create_line_plot( + all_series_data=plot3_data, + x_label='Average Normalized Time Per Output Token (ms)', + y_label='$ per Million Output Tokens', + title='Cost per Million Output Tokens vs. Normalized Latency', + output_filename_base='cost_vs_normalized_latency_comparison' + ) + else: + print("No data available for 'Cost vs. Normalized Latency' plot.") else: print("Skipping cost plot as --instance-price-per-hour was not provided.") if __name__ == '__main__': # Set up an argument parser to get the folder path from the command line - parser = argparse.ArgumentParser(description="Parse all benchmark JSON files in a folder and generate a plot.") - parser.add_argument("folder_path", type=str, help="The path to the folder containing the JSON files.") + parser = argparse.ArgumentParser(description="Parse all benchmark JSON files in one or more folders and generate comparison plots.") + parser.add_argument("folder_paths", type=str, nargs='+', help="One or more paths to folders containing the JSON files.") parser.add_argument("--instance-price-per-hour", type=float, default=None, help="Optional: Instance price per hour (e.g., 2.50 for $2.50/hour) to calculate cost per million tokens.") args = parser.parse_args() - # Call the function with the folder path provided by the user - parse_and_plot(args.folder_path, args.instance_price_per_hour) + # Call the function with the folder paths provided by the user + analyze_and_plot(args.folder_paths, args.instance_price_per_hour) From 6512d354fd613c6d9f4343542de90c96290c2ed9 Mon Sep 17 00:00:00 2001 From: Ashok Chandrasekar Date: Tue, 24 Jun 2025 04:51:03 +0000 Subject: [PATCH 2/3] Add comparisons and additional charts --- analysis/analyze.py | 53 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 4 deletions(-) diff --git a/analysis/analyze.py b/analysis/analyze.py index 3f54032..e3be51d 100644 --- a/analysis/analyze.py +++ b/analysis/analyze.py @@ -68,6 +68,8 @@ def _print_summary(parsed_data_points): print(f" - Avg Per Token Latency: {max_throughput_point['latency']:.2f} ms") if max_throughput_point.get('normalized_latency') is not None: print(f" - Avg Normalized Latency: {max_throughput_point['normalized_latency']:.2f} ms") + if max_throughput_point.get('p90_normalized_latency') is not None: + print(f" - P90 Normalized Latency: {max_throughput_point['p90_normalized_latency']:.2f} ms") if max_throughput_point.get('cost_per_million_tokens') is not None: print(f" - Cost: ${max_throughput_point['cost_per_million_tokens']:.2f} per million tokens") @@ -93,6 +95,7 @@ def _parse_folder_data(folder_path, instance_price_per_hour): throughput = metrics.get("throughput") latency = metrics.get("avg_per_token_latency_ms") normalized_latency = metrics.get("avg_normalized_time_per_output_token_ms") + p90_normalized_latency = metrics.get("p90_normalized_time_per_output_token_ms") request_rate = metrics.get("request_rate") if throughput is not None and request_rate is not None: @@ -100,6 +103,7 @@ def _parse_folder_data(folder_path, instance_price_per_hour): "throughput": throughput, "latency": latency, "normalized_latency": normalized_latency, + "p90_normalized_latency": p90_normalized_latency, "request_rate": request_rate, "filename": filename } @@ -117,6 +121,8 @@ def _parse_folder_data(folder_path, instance_price_per_hour): print(f" - Note: 'avg_per_token_latency_ms' not found in {filename}.") if normalized_latency is None: print(f" - Note: 'avg_normalized_time_per_output_token_ms' not found in {filename}.") + if p90_normalized_latency is None: + print(f" - Note: 'p90_normalized_time_per_output_token_ms' not found in {filename}.") else: missing_core = [] if throughput is None: missing_core.append("'throughput'") @@ -194,12 +200,25 @@ def analyze_and_plot(folder_paths, instance_price_per_hour=None): else: print("No data available for 'Throughput vs. Normalized Per Token Latency' plot.") - # --- Plot 3: Cost per Million Output Tokens vs. Normalized Per Token Latency --- + # --- Plot 3: Throughput vs. P90 Normalized Per Token Latency --- + plot3_data = _prepare_plot_data(all_folders_data, x_key='p90_normalized_latency', y_key='throughput') + if plot3_data: + _create_line_plot( + all_series_data=plot3_data, + x_label='P90 Normalized Time Per Output Token (ms)', + y_label='Throughput (output tokens/sec)', + title='Throughput vs. P90 Normalized Per Token Latency', + output_filename_base='throughput_vs_p90_normalized_latency_comparison' + ) + else: + print("No data available for 'Throughput vs. P90 Normalized Per Token Latency' plot.") + + # --- Plot 4: Cost per Million Output Tokens vs. Normalized Per Token Latency --- if instance_price_per_hour is not None: - plot3_data = _prepare_plot_data(all_folders_data, x_key='normalized_latency', y_key='cost_per_million_tokens') - if plot3_data: + plot4_data = _prepare_plot_data(all_folders_data, x_key='normalized_latency', y_key='cost_per_million_tokens') + if plot4_data: _create_line_plot( - all_series_data=plot3_data, + all_series_data=plot4_data, x_label='Average Normalized Time Per Output Token (ms)', y_label='$ per Million Output Tokens', title='Cost per Million Output Tokens vs. Normalized Latency', @@ -210,6 +229,32 @@ def analyze_and_plot(folder_paths, instance_price_per_hour=None): else: print("Skipping cost plot as --instance-price-per-hour was not provided.") + # --- Plot 5: Throughput vs. Request Rate (QPS) --- + plot5_data = _prepare_plot_data(all_folders_data, x_key='request_rate', y_key='throughput') + if plot5_data: + _create_line_plot( + all_series_data=plot5_data, + x_label='Request Rate (QPS)', + y_label='Throughput (output tokens/sec)', + title='Throughput vs. Request Rate (QPS)', + output_filename_base='throughput_vs_qps_comparison' + ) + else: + print("No data available for 'Throughput vs. Request Rate (QPS)' plot.") + + # --- Plot 6: P90 Normalized Latency vs. Request Rate (QPS) --- + plot6_data = _prepare_plot_data(all_folders_data, x_key='request_rate', y_key='p90_normalized_latency') + if plot6_data: + _create_line_plot( + all_series_data=plot6_data, + x_label='Request Rate (QPS)', + y_label='P90 Normalized Time Per Output Token (ms)', + title='P90 Normalized Latency vs. Request Rate (QPS)', + output_filename_base='p90_latency_vs_qps_comparison' + ) + else: + print("No data available for 'P90 Normalized Latency vs. Request Rate (QPS)' plot.") + if __name__ == '__main__': # Set up an argument parser to get the folder path from the command line parser = argparse.ArgumentParser(description="Parse all benchmark JSON files in one or more folders and generate comparison plots.") From 9f4f1e0b6e3e56e109b129ecccf9e8fa53a37d2e Mon Sep 17 00:00:00 2001 From: Ashok Chandrasekar Date: Tue, 24 Jun 2025 05:18:01 +0000 Subject: [PATCH 3/3] Update documentation --- analysis/README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/analysis/README.md b/analysis/README.md index 435a8c0..21dc284 100644 --- a/analysis/README.md +++ b/analysis/README.md @@ -11,4 +11,10 @@ To analyze the price-perf results, you can include instance pricing to get a cha ``` python analyze.py results/ --instance-price-per-hour 10 +``` + +To analyze results across different runs and compare their latency and throughput metrics, you can do the following: + +``` +python analyze.py run-1/ run-2/ run-3/ ``` \ No newline at end of file