AI-Hypercomputer · achandrasekar · Jun 23, 2025 · Jun 24, 2025 · Jun 24, 2025
diff --git a/analysis/README.md b/analysis/README.md
@@ -11,4 +11,10 @@ To analyze the price-perf results, you can include instance pricing to get a cha
 
 ```
 python analyze.py results/ --instance-price-per-hour 10
+```
+
+To analyze results across different runs and compare their latency and throughput metrics, you can do the following:
+
+```
+python analyze.py run-1/ run-2/ run-3/
 ```
diff --git a/analysis/analyze.py b/analysis/analyze.py
@@ -3,35 +3,50 @@
 import os
 import argparse
 
-# Helper function for plotting
-def _create_plot(x_data, y_data, c_data, x_label, y_label, c_label_text, title, output_filename_base, annotation_data=None, annotation_format_string="{val} qps"):
-    """Helper function to generate and save a scatter plot with configurable annotations."""
-    if not x_data or not y_data or not c_data: # Check if any essential list is empty
+def _create_line_plot(all_series_data, x_label, y_label, title, output_filename_base):
+    """
+    Helper function to generate and save a line plot with multiple series.
+
+    Args:
+        all_series_data (dict): A dict where key is the series label (e.g., folder name)
+                                and value is a dict {'x': [...], 'y': [...]}.
+        x_label (str): Label for the x-axis.
+        y_label (str): Label for the y-axis.
+        title (str): Title of the plot.
+        output_filename_base (str): Base name for the output PNG file.
+    """
+    if not all_series_data:
         print(f"No valid data for '{title}'. Cannot generate plot.")
         return
 
-    plt.figure(figsize=(10, 6))
-    scatter = plt.scatter(x_data, y_data, c=c_data, cmap='viridis', s=100, alpha=0.8)
+    plt.figure(figsize=(12, 7))
 
-    plt.title(title)
-    plt.xlabel(x_label)
-    plt.ylabel(y_label)
+    for series_label, series_data in all_series_data.items():
+        x_data = series_data.get('x')
+        y_data = series_data.get('y')
+
+        if not x_data or not y_data:
+            print(f"Warning: Skipping series '{series_label}' for plot '{title}' due to missing data.")
+            continue
 
-    cbar = plt.colorbar(scatter)
-    cbar.set_label(c_label_text)
+        # To draw a clean line, sort the points based on the x-axis value.
+        sorted_points = sorted(zip(x_data, y_data))
+        if not sorted_points:
+            continue
+        x_sorted, y_sorted = zip(*sorted_points)
 
-    # Annotate each point. Default to c_data if annotation_data is not provided.
-    data_for_annotation = annotation_data if annotation_data is not None else c_data
-    for i, val in enumerate(data_for_annotation):
-        annotation_text = annotation_format_string.format(val=val)
-        plt.annotate(annotation_text, (x_data[i], y_data[i]), textcoords="offset points", xytext=(0,10), ha='center')
+        plt.plot(x_sorted, y_sorted, marker='o', linestyle='-', label=series_label)
 
+    plt.title(title)
+    plt.xlabel(x_label)
+    plt.ylabel(y_label)
+    plt.legend()
     plt.grid(True)
     output_filename = f'{output_filename_base}.png'
     plt.savefig(output_filename)
     print(f"Chart saved to {output_filename}")
     plt.show()
-    plt.close() # Close the figure to free up memory
+    plt.close()
 
 def _print_summary(parsed_data_points):
     """Prints a summary of the best performing data points."""
@@ -53,28 +68,21 @@ def _print_summary(parsed_data_points):
             print(f"  - Avg Per Token Latency: {max_throughput_point['latency']:.2f} ms")
         if max_throughput_point.get('normalized_latency') is not None:
             print(f"  - Avg Normalized Latency: {max_throughput_point['normalized_latency']:.2f} ms")
+        if max_throughput_point.get('p90_normalized_latency') is not None:
+            print(f"  - P90 Normalized Latency: {max_throughput_point['p90_normalized_latency']:.2f} ms")
         if max_throughput_point.get('cost_per_million_tokens') is not None:
             print(f"  - Cost: ${max_throughput_point['cost_per_million_tokens']:.2f} per million tokens")
 
     print("\n-----------------------\n")
 
-def parse_and_plot(folder_path, instance_price_per_hour=None):
-    """
-    Scans a folder for JSON files, extracts benchmark metrics, and plots
-    various performance and cost metrics.
-
-    Args:
-        folder_path (str): The path to the folder containing the JSON files.
-        instance_price_per_hour (float, optional): The instance price per hour for cost calculation.
-    """
-    parsed_data_points = [] # Stores dicts of {'throughput', 'latency', 'normalized_latency', 'request_rate'}
-
+def _parse_folder_data(folder_path, instance_price_per_hour):
+    """Parses all JSON benchmark files in a single folder."""
+    parsed_data_points = []
     print(f"Scanning folder: {folder_path}")
 
-    # Check if the folder path exists and is a directory
     if not os.path.isdir(folder_path):
         print(f"Error: The provided path '{folder_path}' is not a valid directory.")
-        return
+        return parsed_data_points
 
     for filename in os.listdir(folder_path):
         if filename.lower().endswith('.json'):
@@ -86,15 +94,16 @@ def parse_and_plot(folder_path, instance_price_per_hour=None):
                 metrics = data.get("metrics", {})
                 throughput = metrics.get("throughput")
                 latency = metrics.get("avg_per_token_latency_ms")
-                normalized_latency = metrics.get("avg_normalized_time_per_output_token_ms") # New metric
+                normalized_latency = metrics.get("avg_normalized_time_per_output_token_ms")
+                p90_normalized_latency = metrics.get("p90_normalized_time_per_output_token_ms")
                 request_rate = metrics.get("request_rate")
 
-                # Core metrics for any plot are throughput and request_rate
                 if throughput is not None and request_rate is not None:
                     point_data = {
                         "throughput": throughput,
-                        "latency": latency,  # Can be None
-                        "normalized_latency": normalized_latency,  # Can be None
+                        "latency": latency,
+                        "normalized_latency": normalized_latency,
+                        "p90_normalized_latency": p90_normalized_latency,
                         "request_rate": request_rate,
                         "filename": filename
                     }
@@ -104,14 +113,16 @@ def parse_and_plot(folder_path, instance_price_per_hour=None):
                         cost_per_million_tokens = (instance_price_per_hour * 1000000) / (throughput * 3600)
                         point_data["cost_per_million_tokens"] = cost_per_million_tokens
                     elif instance_price_per_hour is not None and throughput == 0:
-                        point_data["cost_per_million_tokens"] = float('inf') # Or handle as an error/skip
+                        point_data["cost_per_million_tokens"] = float('inf')
 
                     parsed_data_points.append(point_data)
                     print(f"Successfully parsed common metrics from {filename}")
                     if latency is None:
                         print(f"  - Note: 'avg_per_token_latency_ms' not found in {filename}.")
                     if normalized_latency is None:
                         print(f"  - Note: 'avg_normalized_time_per_output_token_ms' not found in {filename}.")
+                    if p90_normalized_latency is None:
+                        print(f"  - Note: 'p90_normalized_time_per_output_token_ms' not found in {filename}.")
                 else:
                     missing_core = []
                     if throughput is None: missing_core.append("'throughput'")
@@ -123,89 +134,135 @@ def parse_and_plot(folder_path, instance_price_per_hour=None):
             except Exception as e:
                 print(f"An unexpected error occurred with {file_path}: {e}")
 
-    if not parsed_data_points:
-        print("No data points with core metrics (throughput, request_rate) were parsed. Cannot generate any plots.")
+    return parsed_data_points
+
+def _prepare_plot_data(all_folders_data, x_key, y_key):
+    """Prepares data for plotting from the parsed folder data."""
+    plot_data = {}
+    for folder_name, points in all_folders_data.items():
+        valid_points = [p for p in points if p.get(x_key) is not None and p.get(y_key) is not None]
+        if valid_points:
+            plot_data[folder_name] = {
+                'x': [p[x_key] for p in valid_points],
+                'y': [p[y_key] for p in valid_points]
+            }
+    return plot_data
+
+def analyze_and_plot(folder_paths, instance_price_per_hour=None):
+    """
+    Scans one or more folders for JSON files, extracts benchmark metrics,
+    and plots comparison charts with each folder as a separate line.
+
+    Args:
+        folder_paths (list[str]): A list of paths to folders containing JSON files.
+        instance_price_per_hour (float, optional): The instance price per hour for cost calculation.
+    """
+    all_folders_data = {}
+
+    for folder_path in folder_paths:
+        parsed_data = _parse_folder_data(folder_path, instance_price_per_hour)
+        if parsed_data:
+            folder_name = os.path.basename(os.path.normpath(folder_path))
+            all_folders_data[folder_name] = parsed_data
+
+    if not all_folders_data:
+        print("No data points were parsed from any folder. Cannot generate plots.")
         return
 
-    # Print summary before generating plots
-    _print_summary(parsed_data_points)
+    # Print summaries for each folder
+    for folder_name, parsed_data in all_folders_data.items():
+        print(f"\n--- Summary for: {folder_name} ---")
+        _print_summary(parsed_data)
 
     # --- Plot 1: Throughput vs. Per Token Latency ---
-    plot1_throughputs = []
-    plot1_latencies = []
-    plot1_request_rates = []
-    for point in parsed_data_points:
-        if point["latency"] is not None:
-            plot1_throughputs.append(point["throughput"])
-            plot1_latencies.append(point["latency"])
-            plot1_request_rates.append(point["request_rate"])
-
-    _create_plot(
-        x_data=plot1_latencies,
-        y_data=plot1_throughputs,
-        c_data=plot1_request_rates,
-        x_label='Average Per Token Latency (ms)',
-        y_label='Throughput (output tokens/sec)',
-        c_label_text='Request Rate (QPS)',
-        title='Throughput vs. Per Token Latency',
-        output_filename_base='throughput_vs_latency'
-    )
+    plot1_data = _prepare_plot_data(all_folders_data, x_key='latency', y_key='throughput')
+    if plot1_data:
+        _create_line_plot(
+            all_series_data=plot1_data,
+            x_label='Average Per Token Latency (ms)',
+            y_label='Throughput (output tokens/sec)',
+            title='Throughput vs. Per Token Latency',
+            output_filename_base='throughput_vs_latency_comparison'
+        )
+    else:
+        print("No data available for 'Throughput vs. Per Token Latency' plot.")
 
     # --- Plot 2: Throughput vs. Normalized Per Token Latency ---
-    plot2_throughputs = []
-    plot2_normalized_latencies = []
-    plot2_request_rates = []
-    for point in parsed_data_points:
-        if point["normalized_latency"] is not None:
-            plot2_throughputs.append(point["throughput"])
-            plot2_normalized_latencies.append(point["normalized_latency"])
-            plot2_request_rates.append(point["request_rate"])
-
-    _create_plot(
-        x_data=plot2_normalized_latencies,
-        y_data=plot2_throughputs,
-        c_data=plot2_request_rates,
-        x_label='Average Normalized Time Per Output Token (ms)',
-        y_label='Throughput (output tokens/sec)',
-        c_label_text='Request Rate (QPS)',
-        title='Throughput vs. Normalized Per Token Latency',
-        output_filename_base='throughput_vs_normalized_latency'
-    )
-
-    # --- Plot 3: Cost per Million Output Tokens vs. Normalized Per Token Latency ---
-    if instance_price_per_hour is not None:
-        plot3_normalized_latencies = []
-        plot3_costs_per_million_tokens = []
-        plot3_request_rates = []
-        for point in parsed_data_points:
-            if point.get("normalized_latency") is not None and point.get("cost_per_million_tokens") is not None:
-                plot3_normalized_latencies.append(point["normalized_latency"])
-                plot3_costs_per_million_tokens.append(point["cost_per_million_tokens"])
-                plot3_request_rates.append(point["request_rate"])
-
-        _create_plot(
-            x_data=plot3_normalized_latencies,
-            y_data=plot3_costs_per_million_tokens,
-            c_data=plot3_request_rates,
+    plot2_data = _prepare_plot_data(all_folders_data, x_key='normalized_latency', y_key='throughput')
+    if plot2_data:
+        _create_line_plot(
+            all_series_data=plot2_data,
             x_label='Average Normalized Time Per Output Token (ms)',
-            y_label='$ per Million Output Tokens',
-            c_label_text='Request Rate (QPS)',
-            title='Cost per Million Output Tokens vs. Normalized Latency',
-            output_filename_base='cost_vs_normalized_latency',
-            annotation_data=plot3_costs_per_million_tokens,
-            annotation_format_string="${val:.2f}"
+            y_label='Throughput (output tokens/sec)',
+            title='Throughput vs. Normalized Per Token Latency',
+            output_filename_base='throughput_vs_normalized_latency_comparison'
         )
+    else:
+        print("No data available for 'Throughput vs. Normalized Per Token Latency' plot.")
+
+    # --- Plot 3: Throughput vs. P90 Normalized Per Token Latency ---
+    plot3_data = _prepare_plot_data(all_folders_data, x_key='p90_normalized_latency', y_key='throughput')
+    if plot3_data:
+        _create_line_plot(
+            all_series_data=plot3_data,
+            x_label='P90 Normalized Time Per Output Token (ms)',
+            y_label='Throughput (output tokens/sec)',
+            title='Throughput vs. P90 Normalized Per Token Latency',
+            output_filename_base='throughput_vs_p90_normalized_latency_comparison'
+        )
+    else:
+        print("No data available for 'Throughput vs. P90 Normalized Per Token Latency' plot.")
+
+    # --- Plot 4: Cost per Million Output Tokens vs. Normalized Per Token Latency ---
+    if instance_price_per_hour is not None:
+        plot4_data = _prepare_plot_data(all_folders_data, x_key='normalized_latency', y_key='cost_per_million_tokens')
+        if plot4_data:
+            _create_line_plot(
+                all_series_data=plot4_data,
+                x_label='Average Normalized Time Per Output Token (ms)',
+                y_label='$ per Million Output Tokens',
+                title='Cost per Million Output Tokens vs. Normalized Latency',
+                output_filename_base='cost_vs_normalized_latency_comparison'
+            )
+        else:
+            print("No data available for 'Cost vs. Normalized Latency' plot.")
     else:
         print("Skipping cost plot as --instance-price-per-hour was not provided.")
 
+    # --- Plot 5: Throughput vs. Request Rate (QPS) ---
+    plot5_data = _prepare_plot_data(all_folders_data, x_key='request_rate', y_key='throughput')
+    if plot5_data:
+        _create_line_plot(
+            all_series_data=plot5_data,
+            x_label='Request Rate (QPS)',
+            y_label='Throughput (output tokens/sec)',
+            title='Throughput vs. Request Rate (QPS)',
+            output_filename_base='throughput_vs_qps_comparison'
+        )
+    else:
+        print("No data available for 'Throughput vs. Request Rate (QPS)' plot.")
+
+    # --- Plot 6: P90 Normalized Latency vs. Request Rate (QPS) ---
+    plot6_data = _prepare_plot_data(all_folders_data, x_key='request_rate', y_key='p90_normalized_latency')
+    if plot6_data:
+        _create_line_plot(
+            all_series_data=plot6_data,
+            x_label='Request Rate (QPS)',
+            y_label='P90 Normalized Time Per Output Token (ms)',
+            title='P90 Normalized Latency vs. Request Rate (QPS)',
+            output_filename_base='p90_latency_vs_qps_comparison'
+        )
+    else:
+        print("No data available for 'P90 Normalized Latency vs. Request Rate (QPS)' plot.")
+
 if __name__ == '__main__':
     # Set up an argument parser to get the folder path from the command line
-    parser = argparse.ArgumentParser(description="Parse all benchmark JSON files in a folder and generate a plot.")
-    parser.add_argument("folder_path", type=str, help="The path to the folder containing the JSON files.")
+    parser = argparse.ArgumentParser(description="Parse all benchmark JSON files in one or more folders and generate comparison plots.")
+    parser.add_argument("folder_paths", type=str, nargs='+', help="One or more paths to folders containing the JSON files.")
     parser.add_argument("--instance-price-per-hour", type=float, default=None,
                         help="Optional: Instance price per hour (e.g., 2.50 for $2.50/hour) to calculate cost per million tokens.")
 
     args = parser.parse_args()
 
-    # Call the function with the folder path provided by the user
-    parse_and_plot(args.folder_path, args.instance_price_per_hour)
+    # Call the function with the folder paths provided by the user
+    analyze_and_plot(args.folder_paths, args.instance_price_per_hour)