From eb1ee330f0b480d52873f23da7683a5124c3b523 Mon Sep 17 00:00:00 2001 From: Nick Riasanovsky Date: Thu, 23 Oct 2025 14:44:22 -0700 Subject: [PATCH 01/10] Add the web frontend (#182) Summary: Pull Request resolved: https://github.com/meta-pytorch/tritonparse/pull/182 Differential Revision: D85376311 --- website/src/App.tsx | 27 +++++++++++- website/src/pages/IRAnalysis.tsx | 75 ++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 website/src/pages/IRAnalysis.tsx diff --git a/website/src/App.tsx b/website/src/App.tsx index c6d8d2c..75530ba 100644 --- a/website/src/App.tsx +++ b/website/src/App.tsx @@ -12,6 +12,7 @@ import CodeView from "./pages/CodeView"; import FileDiffView from "./pages/FileDiffView"; import SingleCodeViewer from "./components/SingleCodeViewer"; import KernelOverview from "./pages/KernelOverview"; +import IRAnalysis from "./pages/IRAnalysis"; import DataSourceSelector from "./components/DataSourceSelector"; import WelcomeScreen from "./components/WelcomeScreen"; import ExternalLink from "./components/ExternalLink"; @@ -409,7 +410,7 @@ function App() { ); } else { - // Show either overview, IR code, or file diff based on active tab + // Show either overview, IR code, IR analysis, or file diff based on active tab if (activeTab === "overview") { return ( ); } + if (activeTab === "ir_analysis") { + return ( + + ); + } if (activeTab === "comparison") { return ( File Diff + diff --git a/website/src/pages/IRAnalysis.tsx b/website/src/pages/IRAnalysis.tsx new file mode 100644 index 0000000..386bc38 --- /dev/null +++ b/website/src/pages/IRAnalysis.tsx @@ -0,0 +1,75 @@ +import React from "react"; +import { ProcessedKernel } from "../utils/dataLoader"; + +interface IRAnalysisProps { + kernels: ProcessedKernel[]; + selectedKernel: number; +} + +const formatMetadataValue = (value: any): string => { + if (value === null) { + return "null"; + } + if (typeof value === "boolean") { + return value ? "true" : "false"; + } + if (Array.isArray(value)) { + return JSON.stringify(value); + } + if (typeof value === "object") { + return JSON.stringify(value); + } + return String(value); +}; + +interface MetadataItemProps { + label: string; + value: React.ReactNode; +} + +const MetadataItem: React.FC = ({ label, value }) => ( +
+ {label} + {value} +
+); + +const IRAnalysis: React.FC = ({ kernels, selectedKernel }) => { + if (kernels.length === 0) { + return ( +
+
No kernel data available
+
+ ); + } + + const kernel = kernels[selectedKernel]; + + return ( +
+

Triton Kernel IR Analysis

+ +
+

+ Kernel: {kernel.name} +

+ +
+

+ The IR analysis provides helpful insights into important kernel properties + that were derived from the IR. +

+
+ +
+

+ IR analysis data will be displayed here when available in the + kernel data structure. +

+
+
+
+ ); +}; + +export default IRAnalysis; From 17fb4009dce52799ac68ff9af2fb45c2101650d3 Mon Sep 17 00:00:00 2001 From: Nick Riasanovsky Date: Fri, 24 Oct 2025 17:11:23 -0700 Subject: [PATCH 02/10] Updated website, need devmate to improve code --- tritonparse/ir_analysis.py | 11 +++-- tritonparse/trace_processor.py | 11 +++-- website/src/App.tsx | 3 +- website/src/pages/IRAnalysis.tsx | 71 ++++++++++++++++---------------- website/src/utils/dataLoader.ts | 16 +++++++ 5 files changed, 68 insertions(+), 44 deletions(-) diff --git a/tritonparse/ir_analysis.py b/tritonparse/ir_analysis.py index 20addd3..3f5a183 100644 --- a/tritonparse/ir_analysis.py +++ b/tritonparse/ir_analysis.py @@ -40,7 +40,7 @@ def process_amd_gcn_bufferops( ) -> dict[str, int]: ir_content = load_ir_contents(key, file_content, file_path) # TODO: Add atomics - io_keys = ["global_load_", "global_store_", "buffer_load_", "buffer_store_"] + io_keys = ["global_load", "global_store", "buffer_load", "buffer_store"] return process_amd_bufferop(ir_content, io_keys) @@ -64,9 +64,12 @@ def _generate_ir_analysis(entry: str): gcn_bufferops_info = process_amd_gcn_bufferops( amdgcn_key, file_content, file_path ) + io_counts = {} # NDJSON format requires a newline at the end of each line if ttgir_bufferops_info: - ir_analysis["amd_ttgir_bufferops_count"] = ttgir_bufferops_info + io_counts["amd_ttgir_bufferops_count"] = ttgir_bufferops_info if gcn_bufferops_info: - ir_analysis["amd_gcn_bufferops_count"] = gcn_bufferops_info - return {"ir_analysis": ir_analysis} + io_counts["amd_gcn_bufferops_count"] = gcn_bufferops_info + if io_counts: + ir_analysis["io_counts"] = io_counts + return ir_analysis diff --git a/tritonparse/trace_processor.py b/tritonparse/trace_processor.py index b321e6b..e797dae 100644 --- a/tritonparse/trace_processor.py +++ b/tritonparse/trace_processor.py @@ -299,10 +299,15 @@ def parse_single_file( ) if compilation_event: - ir_analysis_event = _generate_ir_analysis(compilation_event) + ir_analysis = _generate_ir_analysis(compilation_event) if ir_analysis_event: - all_output_lines[output_file].append( - json.dumps(ir_analysis_event, separators=(",", ":")) + "\n" + ir_analysis_event = { + "event_type": "ir_analysis", + "hash": _kernel_hash, + "ir_analysis": ir_analysis, + } + all_output_lines[output_file].append( + json.dumps(ir_analysis_event, separators=(",", ":")) + "\n" ) if compilation_event and launches_with_indices: diff --git a/website/src/App.tsx b/website/src/App.tsx index 75530ba..142812f 100644 --- a/website/src/App.tsx +++ b/website/src/App.tsx @@ -559,7 +559,6 @@ function App() { )} - + {dataLoaded && kernels.length > 0 && ( + )} diff --git a/website/src/pages/IRAnalysis.tsx b/website/src/pages/IRAnalysis.tsx index 386bc38..2412b52 100644 --- a/website/src/pages/IRAnalysis.tsx +++ b/website/src/pages/IRAnalysis.tsx @@ -6,34 +6,6 @@ interface IRAnalysisProps { selectedKernel: number; } -const formatMetadataValue = (value: any): string => { - if (value === null) { - return "null"; - } - if (typeof value === "boolean") { - return value ? "true" : "false"; - } - if (Array.isArray(value)) { - return JSON.stringify(value); - } - if (typeof value === "object") { - return JSON.stringify(value); - } - return String(value); -}; - -interface MetadataItemProps { - label: string; - value: React.ReactNode; -} - -const MetadataItem: React.FC = ({ label, value }) => ( -
- {label} - {value} -
-); - const IRAnalysis: React.FC = ({ kernels, selectedKernel }) => { if (kernels.length === 0) { return ( @@ -44,6 +16,16 @@ const IRAnalysis: React.FC = ({ kernels, selectedKernel }) => { } const kernel = kernels[selectedKernel]; + if (kernel.ir_analysis === null) { + return ( +
+
No IR Analysis available
+
+ ); + } + const io_counts = kernel.ir_analysis!.io_counts + const ttgir_info = kernel.ir_analysis!.io_counts!["amd_ttgir_bufferops_count"]; + const amdgcn_info = kernel.ir_analysis!.io_counts!["amd_gcn_bufferops_count"]; return (
@@ -54,17 +36,34 @@ const IRAnalysis: React.FC = ({ kernels, selectedKernel }) => { Kernel: {kernel.name} -
-

- The IR analysis provides helpful insights into important kernel properties - that were derived from the IR. -

-
+

+ AMD BufferOps Information: +

- IR analysis data will be displayed here when available in the - kernel data structure. + Tiled Buffer Load Count: {ttgir_info["tt.load_count"]} +

+

+ Tiled Buffer Store Count: {ttgir_info["tt.store_count"]} +

+

+ Tiled Global Load Count: {ttgir_info["amdgpu.buffer_load_count"]} +

+

+ Tiled Global Store Count:{ttgir_info["amdgpu.buffer_store_count"]} +

+

+ AMDGCN Buffer Load Instruction Count: {amdgcn_info["global_load_count"]} +

+

+ AMDGCN Buffer Store Instruction Count: {amdgcn_info["global_store_count"]} +

+

+ AMDGCN Global Load Instruction Count: {amdgcn_info["buffer_load_count"]} +

+

+ AMDGCN Global Store Instruction Count: {amdgcn_info["buffer_store_count"]}

diff --git a/website/src/utils/dataLoader.ts b/website/src/utils/dataLoader.ts index 924f990..c064e49 100644 --- a/website/src/utils/dataLoader.ts +++ b/website/src/utils/dataLoader.ts @@ -167,6 +167,11 @@ export interface CompilationMetadata { [key: string]: any; // Allow additional unknown fields } +export interface IRAnalysisData { + // Mapping from IR stage -> count> + io_counts?: Record>; +} + /** * Extracted argument information */ @@ -224,6 +229,7 @@ export interface LogEntry { launch_index_map?: LaunchRange[]; diffs?: LaunchDiffData; sames?: LaunchSamesData; + ir_analysis?: IRAnalysisData; // Stored IR Analysis information. } /** @@ -239,6 +245,7 @@ export interface ProcessedKernel { pythonSourceInfo?: PythonSourceCodeInfo; // Python source code information metadata?: KernelMetadata; // Compilation metadata launchDiff?: LogEntry; // Aggregated launch event differences + ir_analysis?: IRAnalysisData; // Stored IR Analysis information. } /** @@ -503,6 +510,15 @@ export function processKernelData(logEntries: LogEntry[]): ProcessedKernel[] { console.warn(`Could not find matching kernel for launch_diff hash: ${hash}`); } } + if (entry.event_type === "ir_analysis") { + const hash = entry.hash; + if (hash && kernelsByHash.has(hash)) { + const kernel = kernelsByHash.get(hash)!; + kernel.ir_analysis = entry.ir_analysis!; // Attach the ir_analysis + } else { + console.warn(`Could not find matching kernel for ir_analysis hash: ${hash}`); + } + } } const finalKernels = Array.from(kernelsByHash.values()); From 800d176aeabf322ad53929ef989d344808fd4dc2 Mon Sep 17 00:00:00 2001 From: Nick Riasanovsky Date: Sat, 25 Oct 2025 18:19:40 -0400 Subject: [PATCH 03/10] Cleaned up the code --- website/src/pages/IRAnalysis.tsx | 87 ++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 32 deletions(-) diff --git a/website/src/pages/IRAnalysis.tsx b/website/src/pages/IRAnalysis.tsx index 2412b52..9bac4f4 100644 --- a/website/src/pages/IRAnalysis.tsx +++ b/website/src/pages/IRAnalysis.tsx @@ -23,9 +23,10 @@ const IRAnalysis: React.FC = ({ kernels, selectedKernel }) => { ); } - const io_counts = kernel.ir_analysis!.io_counts - const ttgir_info = kernel.ir_analysis!.io_counts!["amd_ttgir_bufferops_count"]; - const amdgcn_info = kernel.ir_analysis!.io_counts!["amd_gcn_bufferops_count"]; + + const io_counts = kernel.ir_analysis?.io_counts; + const ttgir_info = io_counts?.["amd_ttgir_bufferops_count"]; + const amdgcn_info = io_counts?.["amd_gcn_bufferops_count"]; return (
@@ -36,36 +37,58 @@ const IRAnalysis: React.FC = ({ kernels, selectedKernel }) => { Kernel: {kernel.name} -

- AMD BufferOps Information: -

+ {io_counts && (ttgir_info || amdgcn_info) && ( + <> +

+ AMD BufferOps Information +

-
-

- Tiled Buffer Load Count: {ttgir_info["tt.load_count"]} -

-

- Tiled Buffer Store Count: {ttgir_info["tt.store_count"]} -

-

- Tiled Global Load Count: {ttgir_info["amdgpu.buffer_load_count"]} -

-

- Tiled Global Store Count:{ttgir_info["amdgpu.buffer_store_count"]} -

-

- AMDGCN Buffer Load Instruction Count: {amdgcn_info["global_load_count"]} -

-

- AMDGCN Buffer Store Instruction Count: {amdgcn_info["global_store_count"]} -

-

- AMDGCN Global Load Instruction Count: {amdgcn_info["buffer_load_count"]} -

-

- AMDGCN Global Store Instruction Count: {amdgcn_info["buffer_store_count"]} -

-
+
+
+ {ttgir_info && ( + <> +
+ Tiled Buffer Load Count + {ttgir_info["tt.load_count"] ?? "NaN"} +
+
+ Tiled Buffer Store Count + {ttgir_info["tt.store_count"] ?? "NaN"} +
+
+ Tiled Global Load Count + {ttgir_info["amdgpu.buffer_load_count"] ?? "NaN"} +
+
+ Tiled Global Store Count + {ttgir_info["amdgpu.buffer_store_count"] ?? "NaN"} +
+ + )} + {amdgcn_info && ( + <> +
+ AMDGCN Buffer Load Instruction Count + {amdgcn_info["global_load_count"] ?? "NaN"} +
+
+ AMDGCN Buffer Store Instruction Count + {amdgcn_info["global_store_count"] ?? "NaN"} +
+
+ AMDGCN Global Load Instruction Count + {amdgcn_info["buffer_load_count"] ?? "NaN"} +
+
+ AMDGCN Global Store Instruction Count + {amdgcn_info["buffer_store_count"] ?? "NaN"} +
+ + )} +
+
+ + )}
); From 6f5043da4b44e96f493b5118d9a5c69e8fbe9411 Mon Sep 17 00:00:00 2001 From: Nick Riasanovsky Date: Sat, 25 Oct 2025 18:34:29 -0400 Subject: [PATCH 04/10] Fixed the formatting issues --- tritonparse/trace_processor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tritonparse/trace_processor.py b/tritonparse/trace_processor.py index e797dae..904e052 100644 --- a/tritonparse/trace_processor.py +++ b/tritonparse/trace_processor.py @@ -300,14 +300,14 @@ def parse_single_file( if compilation_event: ir_analysis = _generate_ir_analysis(compilation_event) - if ir_analysis_event: + if ir_analysis: ir_analysis_event = { "event_type": "ir_analysis", "hash": _kernel_hash, "ir_analysis": ir_analysis, } - all_output_lines[output_file].append( - json.dumps(ir_analysis_event, separators=(",", ":")) + "\n" + all_output_lines[output_file].append( + json.dumps(ir_analysis_event, separators=(",", ":")) + "\n" ) if compilation_event and launches_with_indices: From 9074bcfdab16aa88ea8c682c24e82f2ea728f78f Mon Sep 17 00:00:00 2001 From: Nick Riasanovsky Date: Mon, 27 Oct 2025 11:39:40 -0400 Subject: [PATCH 05/10] Applied feedback --- website/src/pages/IRAnalysis.tsx | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/website/src/pages/IRAnalysis.tsx b/website/src/pages/IRAnalysis.tsx index 9bac4f4..39d97e5 100644 --- a/website/src/pages/IRAnalysis.tsx +++ b/website/src/pages/IRAnalysis.tsx @@ -27,6 +27,7 @@ const IRAnalysis: React.FC = ({ kernels, selectedKernel }) => { const io_counts = kernel.ir_analysis?.io_counts; const ttgir_info = io_counts?.["amd_ttgir_bufferops_count"]; const amdgcn_info = io_counts?.["amd_gcn_bufferops_count"]; + const getCount = (info: Record | undefined, key: string): string => { return info?.[key]?.toString() ?? "N/A"; }; return (
@@ -49,39 +50,39 @@ const IRAnalysis: React.FC = ({ kernels, selectedKernel }) => { <>
Tiled Buffer Load Count - {ttgir_info["tt.load_count"] ?? "NaN"} + {getCount(ttgir_info, "tt.load_count")}
Tiled Buffer Store Count - {ttgir_info["tt.store_count"] ?? "NaN"} + {getCount(ttgir_info, "tt.store_count")}
Tiled Global Load Count - {ttgir_info["amdgpu.buffer_load_count"] ?? "NaN"} + {getCount(ttgir_info, "amdgpu.buffer_load_count")}
Tiled Global Store Count - {ttgir_info["amdgpu.buffer_store_count"] ?? "NaN"} + {getCount(ttgir_info, "amdgpu.buffer_store_count")}
)} {amdgcn_info && ( <>
- AMDGCN Buffer Load Instruction Count - {amdgcn_info["global_load_count"] ?? "NaN"} + AMDGCN Global Load Instruction Count + {getCount(amdgcn_info, "global_load_count")}
- AMDGCN Buffer Store Instruction Count - {amdgcn_info["global_store_count"] ?? "NaN"} + AMDGCN Global Store Instruction Count + {getCount(amdgcn_info, "global_store_count")}
- AMDGCN Global Load Instruction Count - {amdgcn_info["buffer_load_count"] ?? "NaN"} + AMDGCN Buffer Load Instruction Count + {getCount(amdgcn_info, "buffer_load_count")}
- AMDGCN Global Store Instruction Count - {amdgcn_info["buffer_store_count"] ?? "NaN"} + AMDGCN Buffer Store Instruction Count + {getCount(amdgcn_info, "buffer_store_count")}
)} From 4935cc306bcd15df127de441447fbf3731f574d1 Mon Sep 17 00:00:00 2001 From: Nick Riasanovsky Date: Mon, 27 Oct 2025 13:53:52 -0400 Subject: [PATCH 06/10] Added the logic for displaying software pipelining decisions --- tritonparse/ir_analysis.py | 343 ++++++++++++++++++++++++++++++- website/src/pages/IRAnalysis.tsx | 73 ++++++- website/src/utils/dataLoader.ts | 1 + 3 files changed, 413 insertions(+), 4 deletions(-) diff --git a/tritonparse/ir_analysis.py b/tritonparse/ir_analysis.py index 3f5a183..5219c22 100644 --- a/tritonparse/ir_analysis.py +++ b/tritonparse/ir_analysis.py @@ -44,20 +44,336 @@ def process_amd_gcn_bufferops( return process_amd_bufferop(ir_content, io_keys) +def find_loop_bounds(ir_content: str) -> list[tuple[int, int]]: + """ + Find the bounds of all scf.for loops in the IR content. + These are the only candidates for Software Pipelining (SWP). + + A loop starts with 'scf.for' and ends when its closing brace '}' is found. + Brace counts are tracked to determine when each loop closes. + + Args: + ir_content: The IR content as a string. + + Returns: + A list of tuples (start_line, end_line) for each scf.for loop found. + Line numbers are 0-indexed. + """ + if not ir_content: + return [] + + loop_bounds: list[tuple[int, int]] = [] + lines = ir_content.split("\n") + + # Stack to track loop starts and their brace counts + # Each entry is (start_line, brace_count_at_start) + loop_stack: list[tuple[int, int]] = [] + current_brace_count = 0 + + for line_idx, line in enumerate(lines): + # Check if this line starts a new scf.for loop + if "scf.for" in line: + loop_stack.append((line_idx, current_brace_count)) + + # Count braces on this line + for char in line: + if char == "{": + current_brace_count += 1 + elif char == "}": + current_brace_count -= 1 + + # Check if we've closed any loops + while loop_stack and current_brace_count <= loop_stack[-1][1]: + start_line, _start_brace_count = loop_stack.pop() + # The loop ends at this line + loop_bounds.append((start_line, line_idx)) + + return loop_bounds + + +def find_inner_loop_bounds(ir_content: str) -> list[tuple[int, int]]: + """ + Find the bounds of inner scf.for loops (loops without nested loops inside). + + Inner loops are the primary candidates for Software Pipelining (SWP) as they + represent the innermost computation that can be optimized. + + Args: + ir_content: The IR content as a string. + + Returns: + A list of tuples (start_line, end_line) for each inner scf.for loop found. + Line numbers are 0-indexed. + """ + all_loops = find_loop_bounds(ir_content) + + if not all_loops: + return [] + + # Filter to keep only inner loops (loops that don't contain other loops) + inner_loops: list[tuple[int, int]] = [] + + for i, (start_i, end_i) in enumerate(all_loops): + # Check if any other loop is nested inside this loop + has_nested_loop = False + for j, (start_j, end_j) in enumerate(all_loops): + if i != j: + # Check if loop j is nested inside loop i + if start_i < start_j and end_j < end_i: + has_nested_loop = True + break + + # If no nested loops found, this is an inner loop + if not has_nested_loop: + inner_loops.append((start_i, end_i)) + + return inner_loops + + +def find_loop_pipelining( + ttir_content: str, + ttgir_content: str, + ttir_loop_start: int, + ttir_loop_end: int, + ttir_to_ttgir_mapping: dict[str, dict], + ttgir_to_source_mapping: dict[str, dict], + python_source_content: str | None, + python_source_start_line: int, +) -> dict[str, list[str]]: + """ + Find pipelining information for a specific loop by identifying tt.load and tt.dot operations + in TTIR and mapping them to their corresponding operations in the original Python source code. + + For each tt.load or tt.dot operation found in the TTIR loop, this function uses source + mappings to find the corresponding operations in TTGIR, then maps them back to the original + Python source code. Operations are categorized into three sections: + - prologue: Operations that appear before the loop body + - loop_body: Operations that appear within the loop body + - epilogue: Operations that appear after the loop body + + Operations are merged together (both loads and dots) and sorted in program order + within each section. + + Args: + ttir_content: The TTIR content as a string. + ttgir_content: The TTGIR content as a string. + ttir_loop_start: The starting line number of the loop in TTIR (0-indexed). + ttir_loop_end: The ending line number of the loop in TTIR (0-indexed). + ttir_to_ttgir_mapping: Source mapping from TTIR lines to TTGIR lines. + ttgir_to_source_mapping: Source mapping from TTGIR lines to original Python source. + python_source_content: The original Python source code content. + + Returns: + A dictionary containing: + - "prologue": List of Python source line strings in program order + - "loop_body": List of Python source line strings in program order + - "epilogue": List of Python source line strings in program order + """ + if not ttir_content or not ttgir_content: + return { + "prologue": [], + "loop_body": [], + "epilogue": [], + } + + ttir_lines = ttir_content.split("\n") + ttgir_lines = ttgir_content.split("\n") + python_lines = python_source_content.split("\n") if python_source_content else [] + + # Step 1: Find tt.load and tt.dot operations in TTIR loop + ttir_operations: list[tuple[str, int]] = [] + + for line_idx in range(ttir_loop_start, min(ttir_loop_end + 1, len(ttir_lines))): + line = ttir_lines[line_idx] + if "tt.load" in line: + ttir_operations.append(("tt.load", line_idx)) + if "tt.dot" in line: + ttir_operations.append(("tt.dot", line_idx)) + + # Step 2: Find the corresponding loop in TTGIR using source mappings + # Map the TTIR loop bounds to TTGIR using source mappings + ttgir_inner_loops = find_inner_loop_bounds(ttgir_content) + + if not ttgir_inner_loops: + # No loop found in TTGIR, return empty results + return { + "prologue": [], + "loop_body": [], + "epilogue": [], + } + + # Use the first inner loop as the reference + # TODO: Implement more sophisticated mapping logic to match TTIR loops to TTGIR loops + ttgir_loop_start, ttgir_loop_end = ttgir_inner_loops[0] + + # Step 3: Map TTIR operations to TTGIR operations using source mappings + # and categorize them by their position relative to the TTGIR loop + # Store as (line_number, source_line) to maintain order before extracting just the source + prologue_ops: list[tuple[int, str]] = [] + loop_body_ops: list[tuple[int, str]] = [] + epilogue_ops: list[tuple[int, str]] = [] + + for op_type, ttir_line in ttir_operations: + # Convert 0-indexed line to 1-indexed string key for mapping lookup + ttir_line_key = str(ttir_line + 1) + + # Get the corresponding TTGIR lines from the source mapping + if ttir_line_key in ttir_to_ttgir_mapping: + ttgir_lines_list = ttir_to_ttgir_mapping[ttir_line_key].get( + "ttgir_lines", [] + ) + + # For each mapped TTGIR line, categorize it + for ttgir_line in ttgir_lines_list: + # Convert back to 0-indexed + ttgir_line_idx = ttgir_line - 1 + + # Get the actual TTGIR line content to check if it's relevant + if ttgir_line_idx < len(ttgir_lines): + ttgir_source_line = ttgir_lines[ttgir_line_idx].strip() + + # Only keep lines with specific operations + relevant_ops = [ + "tt.load", + "tt.dot", + "async_copy_global_to_local", + "warp_group_dot", + ] + if any(op in ttgir_source_line for op in relevant_ops): + # Map TTGIR line back to Python source + ttgir_line_key = str(ttgir_line) + python_source_line = ttgir_source_line # Default to TTGIR line + + if ttgir_line_key in ttgir_to_source_mapping: + source_info = ttgir_to_source_mapping[ttgir_line_key] + python_line_num = source_info.get("line") + + if python_line_num and python_lines: + # Account for the offset: the Python source may not start at line 1 + # python_line_num is the absolute line number in the original file + # python_source_start_line is where the extracted code starts + # So we need to subtract the offset to get the index in our python_lines array + python_line_idx = ( + python_line_num - python_source_start_line + ) + if 0 <= python_line_idx < len(python_lines): + python_source_line = python_lines[ + python_line_idx + ].strip() + + if ttgir_line_idx < ttgir_loop_start: + prologue_ops.append((ttgir_line_idx, python_source_line)) + elif ttgir_loop_start <= ttgir_line_idx <= ttgir_loop_end: + loop_body_ops.append((ttgir_line_idx, python_source_line)) + else: + epilogue_ops.append((ttgir_line_idx, python_source_line)) + + # Step 4: Sort each section by line number to maintain program order + prologue_ops.sort(key=lambda x: x[0]) + loop_body_ops.sort(key=lambda x: x[0]) + epilogue_ops.sort(key=lambda x: x[0]) + + # Extract just the source lines (without line numbers) + prologue_lines = [line for _, line in prologue_ops] + loop_body_lines = [line for _, line in loop_body_ops] + epilogue_lines = [line for _, line in epilogue_ops] + + # Log the pipelining results + logger.info( + f"Loop pipelining results (TTIR lines {ttir_loop_start}-{ttir_loop_end}):" + ) + logger.info(f" Prologue ({len(prologue_lines)} ops):") + for line in prologue_lines: + logger.info(f" {line}") + logger.info(f" Loop Body ({len(loop_body_lines)} ops):") + for line in loop_body_lines: + logger.info(f" {line}") + logger.info(f" Epilogue ({len(epilogue_lines)} ops):") + for line in epilogue_lines: + logger.info(f" {line}") + + return { + "prologue": prologue_lines, + "loop_body": loop_body_lines, + "epilogue": epilogue_lines, + } + + +def generate_loop_schedule( + ttir_key: str, + ttgir_key: str, + file_content: dict[str, str], + file_path: dict[str, str], + source_mappings: dict[str, dict], + python_source_content: str | None, + python_source_start_line: int, +) -> list[dict]: + """ + Generate loop schedule information by finding inner scf.for loops in TTIR + and analyzing their pipelining potential using source mappings. + + Only inner loops (loops without nested loops) are considered as they are + the primary candidates for Software Pipelining (SWP). + + Args: + ttir_key: Key for the TTIR file. + ttgir_key: Key for the TTGIR file. + file_content: Dictionary mapping file keys to content. + file_path: Dictionary mapping file keys to file paths. + source_mappings: Dictionary containing source mappings between IR stages. + python_source_content: The original Python source code content. + python_source_start_line: The starting line number of the Python source in the original file. + + Returns: + A list of dictionaries, each containing: + - "loop_bounds": Tuple of (start_line, end_line) for the loop in TTIR + - "pipelining": Dictionary with Python source lines for operations + """ + ttir_content = load_ir_contents(ttir_key, file_content, file_path) + ttgir_content = load_ir_contents(ttgir_key, file_content, file_path) + + # Get the TTIR to TTGIR mapping and TTGIR to source mapping + ttir_to_ttgir_mapping = source_mappings.get("ttir", {}) + ttgir_to_source_mapping = source_mappings.get("ttgir", {}) + + # Find only inner loops (loops without nested loops inside) + inner_loop_bounds = find_inner_loop_bounds(ttir_content) + + # For each inner loop, find pipelining information + loop_schedules = [] + for loop_start, loop_end in inner_loop_bounds: + pipelining_info = find_loop_pipelining( + ttir_content, + ttgir_content, + loop_start, + loop_end, + ttir_to_ttgir_mapping, + ttgir_to_source_mapping, + python_source_content, + python_source_start_line, + ) + loop_schedules.append(pipelining_info) + + return loop_schedules + + def _generate_ir_analysis(entry: str): payload = entry.setdefault("payload", {}) file_content = payload.get("file_content", {}) file_path = payload.get("file_path", {}) + source_mappings = payload.get("source_mappings", {}) # Find the IR file keys + ttir_key = next((k for k in file_content if k.endswith(".ttir")), None) ttgir_key = next((k for k in file_content if k.endswith(".ttgir")), None) amdgcn_key = next((k for k in file_content if k.endswith(".amdgcn")), None) # Skip if no IR files found - if not (ttgir_key or amdgcn_key): - logger.debug("No AMD IR found") + if not (ttir_key or ttgir_key or amdgcn_key): + logger.debug("No IR found") return {} ir_analysis = {} - if amdgcn_key: + if amdgcn_key and ttgir_key: + # Add BufferOps information ttgir_bufferops_info = process_amd_ttgir_bufferops( ttgir_key, file_content, file_path ) @@ -72,4 +388,25 @@ def _generate_ir_analysis(entry: str): io_counts["amd_gcn_bufferops_count"] = gcn_bufferops_info if io_counts: ir_analysis["io_counts"] = io_counts + if ttir_key and ttgir_key: + # Get Python source content and start line if available + python_source_content = None + python_source_start_line = 1 # Default to 1 if not available + python_source_info = payload.get("python_source") + if python_source_info: + python_source_content = python_source_info.get("code") + python_source_start_line = python_source_info.get("start_line", 1) + + # Add loop schedule information + loop_schedule = generate_loop_schedule( + ttir_key, + ttgir_key, + file_content, + file_path, + source_mappings, + python_source_content, + python_source_start_line, + ) + if loop_schedule: + ir_analysis["loop_schedules"] = loop_schedule return ir_analysis diff --git a/website/src/pages/IRAnalysis.tsx b/website/src/pages/IRAnalysis.tsx index 39d97e5..2f8e0d2 100644 --- a/website/src/pages/IRAnalysis.tsx +++ b/website/src/pages/IRAnalysis.tsx @@ -27,6 +27,7 @@ const IRAnalysis: React.FC = ({ kernels, selectedKernel }) => { const io_counts = kernel.ir_analysis?.io_counts; const ttgir_info = io_counts?.["amd_ttgir_bufferops_count"]; const amdgcn_info = io_counts?.["amd_gcn_bufferops_count"]; + const loop_schedule = kernel.ir_analysis?.loop_schedules; const getCount = (info: Record | undefined, key: string): string => { return info?.[key]?.toString() ?? "N/A"; }; return ( @@ -44,7 +45,7 @@ const IRAnalysis: React.FC = ({ kernels, selectedKernel }) => { AMD BufferOps Information -
+
{ttgir_info && ( <> @@ -90,6 +91,76 @@ const IRAnalysis: React.FC = ({ kernels, selectedKernel }) => {
)} + + {loop_schedule && loop_schedule.length > 0 && ( + <> +

+ Software Pipelining Schedule +

+ + {loop_schedule.map((schedule: any, loopIndex: number) => { + const prologue = schedule?.prologue || []; + const loopBody = schedule?.loop_body || []; + const epilogue = schedule?.epilogue || []; + + return ( +
+

+ Software Pipelining for Loop {loopIndex + 1} +

+ + {/* Prologue */} + {prologue.length > 0 && ( +
+
Prologue:
+
+ {prologue.map((line: string, idx: number) => ( +
+ {line} +
+ ))} +
+
+ )} + + {/* Loop Body */} +
+
Loop Body:
+
+
for (...) {'{'}
+
+ {loopBody.length > 0 ? ( + loopBody.map((line: string, idx: number) => ( +
+ {line} +
+ )) + ) : ( +
No operations in loop body
+ )} +
+
{'}'}
+
+
+ + {/* Epilogue */} + {epilogue.length > 0 && ( +
+
Epilogue:
+
+ {epilogue.map((line: string, idx: number) => ( +
+ {line} +
+ ))} +
+
+ )} +
+ ); + })} + + )}
); diff --git a/website/src/utils/dataLoader.ts b/website/src/utils/dataLoader.ts index c064e49..c174364 100644 --- a/website/src/utils/dataLoader.ts +++ b/website/src/utils/dataLoader.ts @@ -170,6 +170,7 @@ export interface CompilationMetadata { export interface IRAnalysisData { // Mapping from IR stage -> count> io_counts?: Record>; + loop_schedules?: [Record]; } /** From 1c193e06f494371fdad66510e14a8e438185c6db Mon Sep 17 00:00:00 2001 From: Nick Riasanovsky Date: Wed, 29 Oct 2025 11:56:09 -0400 Subject: [PATCH 07/10] Fix mapping to avoid false positives --- tritonparse/ir_analysis.py | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/tritonparse/ir_analysis.py b/tritonparse/ir_analysis.py index cd27700..d2abbee 100644 --- a/tritonparse/ir_analysis.py +++ b/tritonparse/ir_analysis.py @@ -182,15 +182,31 @@ def find_loop_pipelining( ttgir_lines = ttgir_content.split("\n") python_lines = python_source_content.split("\n") if python_source_content else [] + def apply_trailing_space(op: str) -> str: + """ + Add a trailing space to all ops to avoid false positives like + warp_group_dot and warp_group_dot_wait. + """ + return op + " " + # Step 1: Find tt.load and tt.dot operations in TTIR loop - ttir_operations: list[tuple[str, int]] = [] + ttir_lines: list[int] = [] + pipeline_tt_ops = ["tt.load", "tt.dot"] + pipeline_tt_ops = [apply_trailing_space(op) for op in pipeline_tt_ops] + pipeline_ttgir_ops = [ + "tt.load", + "tt.dot", + "async_copy_global_to_local", + "warp_group_dot", + ] + pipeline_ttgir_ops = [apply_trailing_space(op) for op in pipeline_ttgir_ops] for line_idx in range(ttir_loop_start, min(ttir_loop_end + 1, len(ttir_lines))): line = ttir_lines[line_idx] - if "tt.load" in line: - ttir_operations.append(("tt.load", line_idx)) - if "tt.dot" in line: - ttir_operations.append(("tt.dot", line_idx)) + for op in pipeline_tt_ops: + if op in line: + ttir_lines.append(line_idx) + break # Step 2: Find the corresponding loop in TTGIR using source mappings # Map the TTIR loop bounds to TTGIR using source mappings @@ -215,7 +231,7 @@ def find_loop_pipelining( loop_body_ops: list[tuple[int, str]] = [] epilogue_ops: list[tuple[int, str]] = [] - for op_type, ttir_line in ttir_operations: + for ttir_line in ttir_lines: # Convert 0-indexed line to 1-indexed string key for mapping lookup ttir_line_key = str(ttir_line + 1) @@ -234,14 +250,8 @@ def find_loop_pipelining( if ttgir_line_idx < len(ttgir_lines): ttgir_source_line = ttgir_lines[ttgir_line_idx].strip() - # Only keep lines with specific operations - relevant_ops = [ - "tt.load", - "tt.dot", - "async_copy_global_to_local", - "warp_group_dot", - ] - if any(op in ttgir_source_line for op in relevant_ops): + # Only keep mappings to the "compute" op. + if any(op in ttgir_source_line for op in pipeline_ttgir_ops): # Map TTGIR line back to Python source ttgir_line_key = str(ttgir_line) python_source_line = ttgir_source_line # Default to TTGIR line From 033feb1ccf5c15ee5577842087154e352cd516a8 Mon Sep 17 00:00:00 2001 From: Nick Riasanovsky Date: Wed, 29 Oct 2025 12:23:50 -0400 Subject: [PATCH 08/10] Fixed typo --- tritonparse/ir_analysis.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tritonparse/ir_analysis.py b/tritonparse/ir_analysis.py index d2abbee..4cfd07e 100644 --- a/tritonparse/ir_analysis.py +++ b/tritonparse/ir_analysis.py @@ -190,7 +190,7 @@ def apply_trailing_space(op: str) -> str: return op + " " # Step 1: Find tt.load and tt.dot operations in TTIR loop - ttir_lines: list[int] = [] + ttir_pipeline_lines: list[int] = [] pipeline_tt_ops = ["tt.load", "tt.dot"] pipeline_tt_ops = [apply_trailing_space(op) for op in pipeline_tt_ops] pipeline_ttgir_ops = [ @@ -205,7 +205,7 @@ def apply_trailing_space(op: str) -> str: line = ttir_lines[line_idx] for op in pipeline_tt_ops: if op in line: - ttir_lines.append(line_idx) + ttir_pipeline_lines.append(line_idx) break # Step 2: Find the corresponding loop in TTGIR using source mappings @@ -231,7 +231,7 @@ def apply_trailing_space(op: str) -> str: loop_body_ops: list[tuple[int, str]] = [] epilogue_ops: list[tuple[int, str]] = [] - for ttir_line in ttir_lines: + for ttir_line in ttir_pipeline_lines: # Convert 0-indexed line to 1-indexed string key for mapping lookup ttir_line_key = str(ttir_line + 1) From 5641663b918fb5273fbe4c09de19dd59c45ffc4c Mon Sep 17 00:00:00 2001 From: Nick Riasanovsky Date: Wed, 29 Oct 2025 13:56:01 -0400 Subject: [PATCH 09/10] Fix initial bug --- tritonparse/ir_analysis.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tritonparse/ir_analysis.py b/tritonparse/ir_analysis.py index 4cfd07e..16c632e 100644 --- a/tritonparse/ir_analysis.py +++ b/tritonparse/ir_analysis.py @@ -84,11 +84,11 @@ def find_loop_bounds(ir_content: str) -> list[tuple[int, int]]: elif char == "}": current_brace_count -= 1 - # Check if we've closed any loops - while loop_stack and current_brace_count <= loop_stack[-1][1]: - start_line, _start_brace_count = loop_stack.pop() - # The loop ends at this line - loop_bounds.append((start_line, line_idx)) + # Check if we've closed any loops + while loop_stack and current_brace_count <= loop_stack[-1][1]: + start_line, _start_brace_count = loop_stack.pop() + # The loop ends at this line + loop_bounds.append((start_line, line_idx)) return loop_bounds @@ -137,6 +137,7 @@ def find_loop_pipelining( ttgir_content: str, ttir_loop_start: int, ttir_loop_end: int, + loop_index: int, ttir_to_ttgir_mapping: dict[str, dict], ttgir_to_source_mapping: dict[str, dict], python_source_content: str | None, @@ -222,7 +223,7 @@ def apply_trailing_space(op: str) -> str: # Use the first inner loop as the reference # TODO: Implement more sophisticated mapping logic to match TTIR loops to TTGIR loops - ttgir_loop_start, ttgir_loop_end = ttgir_inner_loops[0] + ttgir_loop_start, ttgir_loop_end = ttgir_inner_loops[loop_index] # Step 3: Map TTIR operations to TTGIR operations using source mappings # and categorize them by their position relative to the TTGIR loop @@ -353,12 +354,13 @@ def generate_loop_schedule( # For each inner loop, find pipelining information loop_schedules = [] - for loop_start, loop_end in inner_loop_bounds: + for i, (loop_start, loop_end) in enumerate(inner_loop_bounds): pipelining_info = find_loop_pipelining( ttir_content, ttgir_content, loop_start, loop_end, + i, ttir_to_ttgir_mapping, ttgir_to_source_mapping, python_source_content, From 84357ef342a8361a01472a767e7bf9dfe7164370 Mon Sep 17 00:00:00 2001 From: Nick Riasanovsky Date: Wed, 29 Oct 2025 14:07:11 -0400 Subject: [PATCH 10/10] Limit to 1 loop --- tritonparse/ir_analysis.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tritonparse/ir_analysis.py b/tritonparse/ir_analysis.py index 16c632e..37d3b67 100644 --- a/tritonparse/ir_analysis.py +++ b/tritonparse/ir_analysis.py @@ -201,7 +201,6 @@ def apply_trailing_space(op: str) -> str: "warp_group_dot", ] pipeline_ttgir_ops = [apply_trailing_space(op) for op in pipeline_ttgir_ops] - for line_idx in range(ttir_loop_start, min(ttir_loop_end + 1, len(ttir_lines))): line = ttir_lines[line_idx] for op in pipeline_tt_ops: @@ -351,6 +350,8 @@ def generate_loop_schedule( # Find only inner loops (loops without nested loops inside) inner_loop_bounds = find_inner_loop_bounds(ttir_content) + # TODO: Fix loop mapping with multiple loops. + inner_loop_bounds = inner_loop_bounds[:1] # For each inner loop, find pipelining information loop_schedules = []