From eb1ee330f0b480d52873f23da7683a5124c3b523 Mon Sep 17 00:00:00 2001
From: Nick Riasanovsky <njriasan@meta.com>
Date: Thu, 23 Oct 2025 14:44:22 -0700
Subject: [PATCH 01/10] Add the web frontend (#182)

Summary: Pull Request resolved: https://github.com/meta-pytorch/tritonparse/pull/182

Differential Revision: D85376311
---
 website/src/App.tsx              | 27 +++++++++++-
 website/src/pages/IRAnalysis.tsx | 75 ++++++++++++++++++++++++++++++++
 2 files changed, 101 insertions(+), 1 deletion(-)
 create mode 100644 website/src/pages/IRAnalysis.tsx
diff --git a/website/src/App.tsx b/website/src/App.tsx
index c6d8d2c..75530ba 100644
--- a/website/src/App.tsx
+++ b/website/src/App.tsx
@@ -12,6 +12,7 @@ import CodeView from "./pages/CodeView";
 import FileDiffView from "./pages/FileDiffView";
 import SingleCodeViewer from "./components/SingleCodeViewer";
 import KernelOverview from "./pages/KernelOverview";
+import IRAnalysis from "./pages/IRAnalysis";
 import DataSourceSelector from "./components/DataSourceSelector";
 import WelcomeScreen from "./components/WelcomeScreen";
 import ExternalLink from "./components/ExternalLink";
@@ -409,7 +410,7 @@ function App() {
         </div>
       );
     } else {
-      // Show either overview, IR code, or file diff based on active tab
+      // Show either overview, IR code, IR analysis, or file diff based on active tab
       if (activeTab === "overview") {
         return (
           <KernelOverview
@@ -420,6 +421,14 @@ function App() {
           />
         );
       }
+      if (activeTab === "ir_analysis") {
+        return (
+          <IRAnalysis
+            kernels={kernels}
+            selectedKernel={selectedKernel}
+          />
+        );
+      }
       if (activeTab === "comparison") {
         return (
           <CodeView
@@ -561,6 +570,22 @@ function App() {
               >
                 File Diff
               </button>
+              <button
+                    className={`px-3 py-2 text-sm font-medium rounded-md ${activeTab === "ir_analysis" ? "bg-blue-700 text-white shadow-md" : "bg-blue-100 text-blue-700 hover:bg-blue-200"
+                      }`}
+                    onClick={() => {
+                      if (sess.preview?.active) sess.clearPreview();
+                      setActiveTab("ir_analysis");
+
+                      if (loadedUrl) {
+                        const newUrl = new URL(window.location.href);
+                        newUrl.searchParams.set("view", "ir_analysis");
+                        window.history.replaceState({}, "", newUrl.toString());
+                      }
+                    }}
+                  >
+                    IR Analysis (Beta)
+                  </button>
             </div>
           </div>
         </div>
diff --git a/website/src/pages/IRAnalysis.tsx b/website/src/pages/IRAnalysis.tsx
new file mode 100644
index 0000000..386bc38
--- /dev/null
+++ b/website/src/pages/IRAnalysis.tsx
@@ -0,0 +1,75 @@
+import React from "react";
+import { ProcessedKernel } from "../utils/dataLoader";
+
+interface IRAnalysisProps {
+  kernels: ProcessedKernel[];
+  selectedKernel: number;
+}
+
+const formatMetadataValue = (value: any): string => {
+  if (value === null) {
+    return "null";
+  }
+  if (typeof value === "boolean") {
+    return value ? "true" : "false";
+  }
+  if (Array.isArray(value)) {
+    return JSON.stringify(value);
+  }
+  if (typeof value === "object") {
+    return JSON.stringify(value);
+  }
+  return String(value);
+};
+
+interface MetadataItemProps {
+  label: string;
+  value: React.ReactNode;
+}
+
+const MetadataItem: React.FC<MetadataItemProps> = ({ label, value }) => (
+  <div className="flex flex-col">
+    <span className="text-sm font-medium text-gray-500">{label}</span>
+    <span className="font-mono text-sm break-words">{value}</span>
+  </div>
+);
+
+const IRAnalysis: React.FC<IRAnalysisProps> = ({ kernels, selectedKernel }) => {
+  if (kernels.length === 0) {
+    return (
+      <div className="flex items-center justify-center h-screen">
+        <div className="text-gray-800">No kernel data available</div>
+      </div>
+    );
+  }
+
+  const kernel = kernels[selectedKernel];
+
+  return (
+    <div className="p-6">
+      <h1 className="text-2xl font-bold text-gray-800 mb-6">Triton Kernel IR Analysis</h1>
+
+      <div className="bg-white rounded-lg p-4 mb-4 shadow-sm border border-gray-200">
+        <h2 className="text-xl font-semibold mb-4 text-gray-800">
+          Kernel: {kernel.name}
+        </h2>
+
+        <div className="mb-6">
+          <p className="text-gray-600 mb-4">
+            The IR analysis provides helpful insights into important kernel properties
+            that were derived from the IR.
+          </p>
+        </div>
+
+        <div className="bg-gray-50 p-4 rounded-md border border-gray-200">
+          <p className="text-sm text-gray-700">
+            IR analysis data will be displayed here when available in the
+            kernel data structure.
+          </p>
+        </div>
+      </div>
+    </div>
+  );
+};
+
+export default IRAnalysis;

From 17fb4009dce52799ac68ff9af2fb45c2101650d3 Mon Sep 17 00:00:00 2001
From: Nick Riasanovsky <njriasan@meta.com>
Date: Fri, 24 Oct 2025 17:11:23 -0700
Subject: [PATCH 02/10] Updated website, need devmate to improve code

---
 tritonparse/ir_analysis.py       | 11 +++--
 tritonparse/trace_processor.py   | 11 +++--
 website/src/App.tsx              |  3 +-
 website/src/pages/IRAnalysis.tsx | 71 ++++++++++++++++----------------
 website/src/utils/dataLoader.ts  | 16 +++++++
 5 files changed, 68 insertions(+), 44 deletions(-)

diff --git a/tritonparse/ir_analysis.py b/tritonparse/ir_analysis.py
index 20addd3..3f5a183 100644
--- a/tritonparse/ir_analysis.py
+++ b/tritonparse/ir_analysis.py
@@ -40,7 +40,7 @@ def process_amd_gcn_bufferops(
 ) -> dict[str, int]:
     ir_content = load_ir_contents(key, file_content, file_path)
     # TODO: Add atomics
-    io_keys = ["global_load_", "global_store_", "buffer_load_", "buffer_store_"]
+    io_keys = ["global_load", "global_store", "buffer_load", "buffer_store"]
     return process_amd_bufferop(ir_content, io_keys)
 
 
@@ -64,9 +64,12 @@ def _generate_ir_analysis(entry: str):
         gcn_bufferops_info = process_amd_gcn_bufferops(
             amdgcn_key, file_content, file_path
         )
+        io_counts = {}
         # NDJSON format requires a newline at the end of each line
         if ttgir_bufferops_info:
-            ir_analysis["amd_ttgir_bufferops_count"] = ttgir_bufferops_info
+            io_counts["amd_ttgir_bufferops_count"] = ttgir_bufferops_info
         if gcn_bufferops_info:
-            ir_analysis["amd_gcn_bufferops_count"] = gcn_bufferops_info
-    return {"ir_analysis": ir_analysis}
+            io_counts["amd_gcn_bufferops_count"] = gcn_bufferops_info
+        if io_counts:
+            ir_analysis["io_counts"] = io_counts
+    return ir_analysis
diff --git a/tritonparse/trace_processor.py b/tritonparse/trace_processor.py
index b321e6b..e797dae 100644
--- a/tritonparse/trace_processor.py
+++ b/tritonparse/trace_processor.py
@@ -299,10 +299,15 @@ def parse_single_file(
             )
 
         if compilation_event:
-            ir_analysis_event = _generate_ir_analysis(compilation_event)
+            ir_analysis = _generate_ir_analysis(compilation_event)
             if ir_analysis_event:
-                all_output_lines[output_file].append(
-                    json.dumps(ir_analysis_event, separators=(",", ":")) + "\n"
+                ir_analysis_event = {
+                    "event_type": "ir_analysis",
+                    "hash": _kernel_hash,
+                    "ir_analysis": ir_analysis,
+                }
+                    all_output_lines[output_file].append(
+                        json.dumps(ir_analysis_event, separators=(",", ":")) + "\n"
                 )
 
         if compilation_event and launches_with_indices:
diff --git a/website/src/App.tsx b/website/src/App.tsx
index 75530ba..142812f 100644
--- a/website/src/App.tsx
+++ b/website/src/App.tsx
@@ -559,7 +559,6 @@ function App() {
                   </button>
                 </>
               )}
-
               <button
                 className={`px-3 py-2 text-sm font-medium rounded-md ${activeTab === "file_diff" ? "bg-blue-700 text-white shadow-md" : "bg-blue-100 text-blue-700 hover:bg-blue-200"
                   }`}
@@ -570,6 +569,7 @@ function App() {
               >
                 File Diff
               </button>
+              {dataLoaded && kernels.length > 0 && (
               <button
                     className={`px-3 py-2 text-sm font-medium rounded-md ${activeTab === "ir_analysis" ? "bg-blue-700 text-white shadow-md" : "bg-blue-100 text-blue-700 hover:bg-blue-200"
                       }`}
@@ -586,6 +586,7 @@ function App() {
                   >
                     IR Analysis (Beta)
                   </button>
+              )}
             </div>
           </div>
         </div>
diff --git a/website/src/pages/IRAnalysis.tsx b/website/src/pages/IRAnalysis.tsx
index 386bc38..2412b52 100644
--- a/website/src/pages/IRAnalysis.tsx
+++ b/website/src/pages/IRAnalysis.tsx
@@ -6,34 +6,6 @@ interface IRAnalysisProps {
   selectedKernel: number;
 }
 
-const formatMetadataValue = (value: any): string => {
-  if (value === null) {
-    return "null";
-  }
-  if (typeof value === "boolean") {
-    return value ? "true" : "false";
-  }
-  if (Array.isArray(value)) {
-    return JSON.stringify(value);
-  }
-  if (typeof value === "object") {
-    return JSON.stringify(value);
-  }
-  return String(value);
-};
-
-interface MetadataItemProps {
-  label: string;
-  value: React.ReactNode;
-}
-
-const MetadataItem: React.FC<MetadataItemProps> = ({ label, value }) => (
-  <div className="flex flex-col">
-    <span className="text-sm font-medium text-gray-500">{label}</span>
-    <span className="font-mono text-sm break-words">{value}</span>
-  </div>
-);
-
 const IRAnalysis: React.FC<IRAnalysisProps> = ({ kernels, selectedKernel }) => {
   if (kernels.length === 0) {
     return (
@@ -44,6 +16,16 @@ const IRAnalysis: React.FC<IRAnalysisProps> = ({ kernels, selectedKernel }) => {
   }
 
   const kernel = kernels[selectedKernel];
+  if (kernel.ir_analysis === null) {
+    return (
+      <div className="flex items-center justify-center h-screen">
+        <div className="text-gray-800">No IR Analysis available</div>
+      </div>
+    );
+  }
+  const io_counts = kernel.ir_analysis!.io_counts
+  const ttgir_info = kernel.ir_analysis!.io_counts!["amd_ttgir_bufferops_count"];
+  const amdgcn_info = kernel.ir_analysis!.io_counts!["amd_gcn_bufferops_count"];
 
   return (
     <div className="p-6">
@@ -54,17 +36,34 @@ const IRAnalysis: React.FC<IRAnalysisProps> = ({ kernels, selectedKernel }) => {
           Kernel: {kernel.name}
         </h2>
 
-        <div className="mb-6">
-          <p className="text-gray-600 mb-4">
-            The IR analysis provides helpful insights into important kernel properties
-            that were derived from the IR.
-          </p>
-        </div>
+        <h3 className="text-lg font-medium mb-3 text-gray-800">
+          AMD BufferOps Information:
+        </h3>
 
         <div className="bg-gray-50 p-4 rounded-md border border-gray-200">
           <p className="text-sm text-gray-700">
-            IR analysis data will be displayed here when available in the
-            kernel data structure.
+            Tiled Buffer Load Count: {ttgir_info["tt.load_count"]}
+          </p>
+          <p className="text-sm text-gray-700">
+            Tiled Buffer Store Count: {ttgir_info["tt.store_count"]}
+          </p>
+          <p className="text-sm text-gray-700">
+            Tiled Global Load Count: {ttgir_info["amdgpu.buffer_load_count"]}
+          </p>
+          <p className="text-sm text-gray-700">
+            Tiled Global Store Count:{ttgir_info["amdgpu.buffer_store_count"]}
+          </p>
+          <p className="text-sm text-gray-700">
+            AMDGCN Buffer Load Instruction Count: {amdgcn_info["global_load_count"]}
+          </p>
+          <p className="text-sm text-gray-700">
+            AMDGCN Buffer Store Instruction Count: {amdgcn_info["global_store_count"]}
+          </p>
+          <p className="text-sm text-gray-700">
+            AMDGCN Global Load Instruction Count: {amdgcn_info["buffer_load_count"]}
+          </p>
+          <p className="text-sm text-gray-700">
+            AMDGCN Global Store Instruction Count: {amdgcn_info["buffer_store_count"]}
           </p>
         </div>
       </div>
diff --git a/website/src/utils/dataLoader.ts b/website/src/utils/dataLoader.ts
index 924f990..c064e49 100644
--- a/website/src/utils/dataLoader.ts
+++ b/website/src/utils/dataLoader.ts
@@ -167,6 +167,11 @@ export interface CompilationMetadata {
     [key: string]: any; // Allow additional unknown fields
 }
 
+export interface IRAnalysisData {
+    // Mapping from IR stage -> <IO type -> count>
+    io_counts?: Record<string, Record<string, number>>;
+}
+
 /**
  * Extracted argument information
  */
@@ -224,6 +229,7 @@ export interface LogEntry {
     launch_index_map?: LaunchRange[];
     diffs?: LaunchDiffData;
     sames?: LaunchSamesData;
+    ir_analysis?: IRAnalysisData; // Stored IR Analysis information.
 }
 
 /**
@@ -239,6 +245,7 @@ export interface ProcessedKernel {
     pythonSourceInfo?: PythonSourceCodeInfo; // Python source code information
     metadata?: KernelMetadata; // Compilation metadata
     launchDiff?: LogEntry; // Aggregated launch event differences
+    ir_analysis?: IRAnalysisData; // Stored IR Analysis information.
 }
 
 /**
@@ -503,6 +510,15 @@ export function processKernelData(logEntries: LogEntry[]): ProcessedKernel[] {
                 console.warn(`Could not find matching kernel for launch_diff hash: ${hash}`);
             }
         }
+        if (entry.event_type === "ir_analysis") {
+            const hash = entry.hash;
+            if (hash && kernelsByHash.has(hash)) {
+                const kernel = kernelsByHash.get(hash)!;
+                kernel.ir_analysis = entry.ir_analysis!; // Attach the ir_analysis
+            } else {
+                console.warn(`Could not find matching kernel for ir_analysis hash: ${hash}`);
+            }
+        }
     }
 
     const finalKernels = Array.from(kernelsByHash.values());

From 800d176aeabf322ad53929ef989d344808fd4dc2 Mon Sep 17 00:00:00 2001
From: Nick Riasanovsky <njriasan@meta.com>
Date: Sat, 25 Oct 2025 18:19:40 -0400
Subject: [PATCH 03/10] Cleaned up the code

---
 website/src/pages/IRAnalysis.tsx | 87 ++++++++++++++++++++------------
 1 file changed, 55 insertions(+), 32 deletions(-)

diff --git a/website/src/pages/IRAnalysis.tsx b/website/src/pages/IRAnalysis.tsx
index 2412b52..9bac4f4 100644
--- a/website/src/pages/IRAnalysis.tsx
+++ b/website/src/pages/IRAnalysis.tsx
@@ -23,9 +23,10 @@ const IRAnalysis: React.FC<IRAnalysisProps> = ({ kernels, selectedKernel }) => {
       </div>
     );
   }
-  const io_counts = kernel.ir_analysis!.io_counts
-  const ttgir_info = kernel.ir_analysis!.io_counts!["amd_ttgir_bufferops_count"];
-  const amdgcn_info = kernel.ir_analysis!.io_counts!["amd_gcn_bufferops_count"];
+
+  const io_counts = kernel.ir_analysis?.io_counts;
+  const ttgir_info = io_counts?.["amd_ttgir_bufferops_count"];
+  const amdgcn_info = io_counts?.["amd_gcn_bufferops_count"];
 
   return (
     <div className="p-6">
@@ -36,36 +37,58 @@ const IRAnalysis: React.FC<IRAnalysisProps> = ({ kernels, selectedKernel }) => {
           Kernel: {kernel.name}
         </h2>
 
-        <h3 className="text-lg font-medium mb-3 text-gray-800">
-          AMD BufferOps Information:
-        </h3>
+        {io_counts && (ttgir_info || amdgcn_info) && (
+          <>
+            <h3 className="text-lg font-medium mb-3 text-gray-800">
+              AMD BufferOps Information
+            </h3>
 
-        <div className="bg-gray-50 p-4 rounded-md border border-gray-200">
-          <p className="text-sm text-gray-700">
-            Tiled Buffer Load Count: {ttgir_info["tt.load_count"]}
-          </p>
-          <p className="text-sm text-gray-700">
-            Tiled Buffer Store Count: {ttgir_info["tt.store_count"]}
-          </p>
-          <p className="text-sm text-gray-700">
-            Tiled Global Load Count: {ttgir_info["amdgpu.buffer_load_count"]}
-          </p>
-          <p className="text-sm text-gray-700">
-            Tiled Global Store Count:{ttgir_info["amdgpu.buffer_store_count"]}
-          </p>
-          <p className="text-sm text-gray-700">
-            AMDGCN Buffer Load Instruction Count: {amdgcn_info["global_load_count"]}
-          </p>
-          <p className="text-sm text-gray-700">
-            AMDGCN Buffer Store Instruction Count: {amdgcn_info["global_store_count"]}
-          </p>
-          <p className="text-sm text-gray-700">
-            AMDGCN Global Load Instruction Count: {amdgcn_info["buffer_load_count"]}
-          </p>
-          <p className="text-sm text-gray-700">
-            AMDGCN Global Store Instruction Count: {amdgcn_info["buffer_store_count"]}
-          </p>
-        </div>
+            <div className="bg-gray-50 p-4 rounded-md border border-gray-200">
+              <div className="grid grid-cols-[repeat(auto-fit,_minmax(180px,_1fr))] gap-3">
+                {ttgir_info && (
+                  <>
+                    <div className="flex flex-col">
+                      <span className="text-sm font-medium text-gray-500">Tiled Buffer Load Count</span>
+                      <span className="font-mono text-sm break-words">{ttgir_info["tt.load_count"] ?? "NaN"}</span>
+                    </div>
+                    <div className="flex flex-col">
+                      <span className="text-sm font-medium text-gray-500">Tiled Buffer Store Count</span>
+                      <span className="font-mono text-sm break-words">{ttgir_info["tt.store_count"] ?? "NaN"}</span>
+                    </div>
+                    <div className="flex flex-col">
+                      <span className="text-sm font-medium text-gray-500">Tiled Global Load Count</span>
+                      <span className="font-mono text-sm break-words">{ttgir_info["amdgpu.buffer_load_count"] ?? "NaN"}</span>
+                    </div>
+                    <div className="flex flex-col">
+                      <span className="text-sm font-medium text-gray-500">Tiled Global Store Count</span>
+                      <span className="font-mono text-sm break-words">{ttgir_info["amdgpu.buffer_store_count"] ?? "NaN"}</span>
+                    </div>
+                  </>
+                )}
+                {amdgcn_info && (
+                  <>
+                    <div className="flex flex-col">
+                      <span className="text-sm font-medium text-gray-500">AMDGCN Buffer Load Instruction Count</span>
+                      <span className="font-mono text-sm break-words">{amdgcn_info["global_load_count"] ?? "NaN"}</span>
+                    </div>
+                    <div className="flex flex-col">
+                      <span className="text-sm font-medium text-gray-500">AMDGCN Buffer Store Instruction Count</span>
+                      <span className="font-mono text-sm break-words">{amdgcn_info["global_store_count"] ?? "NaN"}</span>
+                    </div>
+                    <div className="flex flex-col">
+                      <span className="text-sm font-medium text-gray-500">AMDGCN Global Load Instruction Count</span>
+                      <span className="font-mono text-sm break-words">{amdgcn_info["buffer_load_count"] ?? "NaN"}</span>
+                    </div>
+                    <div className="flex flex-col">
+                      <span className="text-sm font-medium text-gray-500">AMDGCN Global Store Instruction Count</span>
+                      <span className="font-mono text-sm break-words">{amdgcn_info["buffer_store_count"] ?? "NaN"}</span>
+                    </div>
+                  </>
+                )}
+              </div>
+            </div>
+          </>
+        )}
       </div>
     </div>
   );

From 6f5043da4b44e96f493b5118d9a5c69e8fbe9411 Mon Sep 17 00:00:00 2001
From: Nick Riasanovsky <njriasan@meta.com>
Date: Sat, 25 Oct 2025 18:34:29 -0400
Subject: [PATCH 04/10] Fixed the formatting issues

---
 tritonparse/trace_processor.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tritonparse/trace_processor.py b/tritonparse/trace_processor.py
index e797dae..904e052 100644
--- a/tritonparse/trace_processor.py
+++ b/tritonparse/trace_processor.py
@@ -300,14 +300,14 @@ def parse_single_file(
 
         if compilation_event:
             ir_analysis = _generate_ir_analysis(compilation_event)
-            if ir_analysis_event:
+            if ir_analysis:
                 ir_analysis_event = {
                     "event_type": "ir_analysis",
                     "hash": _kernel_hash,
                     "ir_analysis": ir_analysis,
                 }
-                    all_output_lines[output_file].append(
-                        json.dumps(ir_analysis_event, separators=(",", ":")) + "\n"
+                all_output_lines[output_file].append(
+                    json.dumps(ir_analysis_event, separators=(",", ":")) + "\n"
                 )
 
         if compilation_event and launches_with_indices:

From 9074bcfdab16aa88ea8c682c24e82f2ea728f78f Mon Sep 17 00:00:00 2001
From: Nick Riasanovsky <njriasan@meta.com>
Date: Mon, 27 Oct 2025 11:39:40 -0400
Subject: [PATCH 05/10] Applied feedback

---
 website/src/pages/IRAnalysis.tsx | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/website/src/pages/IRAnalysis.tsx b/website/src/pages/IRAnalysis.tsx
index 9bac4f4..39d97e5 100644
--- a/website/src/pages/IRAnalysis.tsx
+++ b/website/src/pages/IRAnalysis.tsx
@@ -27,6 +27,7 @@ const IRAnalysis: React.FC<IRAnalysisProps> = ({ kernels, selectedKernel }) => {
   const io_counts = kernel.ir_analysis?.io_counts;
   const ttgir_info = io_counts?.["amd_ttgir_bufferops_count"];
   const amdgcn_info = io_counts?.["amd_gcn_bufferops_count"];
+  const getCount = (info: Record<string, number> | undefined, key: string): string => { return info?.[key]?.toString() ?? "N/A"; };
 
   return (
     <div className="p-6">
@@ -49,39 +50,39 @@ const IRAnalysis: React.FC<IRAnalysisProps> = ({ kernels, selectedKernel }) => {
                   <>
                     <div className="flex flex-col">
                       <span className="text-sm font-medium text-gray-500">Tiled Buffer Load Count</span>
-                      <span className="font-mono text-sm break-words">{ttgir_info["tt.load_count"] ?? "NaN"}</span>
+                      <span className="font-mono text-sm break-words">{getCount(ttgir_info, "tt.load_count")}</span>
                     </div>
                     <div className="flex flex-col">
                       <span className="text-sm font-medium text-gray-500">Tiled Buffer Store Count</span>
-                      <span className="font-mono text-sm break-words">{ttgir_info["tt.store_count"] ?? "NaN"}</span>
+                      <span className="font-mono text-sm break-words">{getCount(ttgir_info, "tt.store_count")}</span>
                     </div>
                     <div className="flex flex-col">
                       <span className="text-sm font-medium text-gray-500">Tiled Global Load Count</span>
-                      <span className="font-mono text-sm break-words">{ttgir_info["amdgpu.buffer_load_count"] ?? "NaN"}</span>
+                      <span className="font-mono text-sm break-words">{getCount(ttgir_info, "amdgpu.buffer_load_count")}</span>
                     </div>
                     <div className="flex flex-col">
                       <span className="text-sm font-medium text-gray-500">Tiled Global Store Count</span>
-                      <span className="font-mono text-sm break-words">{ttgir_info["amdgpu.buffer_store_count"] ?? "NaN"}</span>
+                      <span className="font-mono text-sm break-words">{getCount(ttgir_info, "amdgpu.buffer_store_count")}</span>
                     </div>
                   </>
                 )}
                 {amdgcn_info && (
                   <>
                     <div className="flex flex-col">
-                      <span className="text-sm font-medium text-gray-500">AMDGCN Buffer Load Instruction Count</span>
-                      <span className="font-mono text-sm break-words">{amdgcn_info["global_load_count"] ?? "NaN"}</span>
+                      <span className="text-sm font-medium text-gray-500">AMDGCN Global Load Instruction Count</span>
+                      <span className="font-mono text-sm break-words">{getCount(amdgcn_info, "global_load_count")}</span>
                     </div>
                     <div className="flex flex-col">
-                      <span className="text-sm font-medium text-gray-500">AMDGCN Buffer Store Instruction Count</span>
-                      <span className="font-mono text-sm break-words">{amdgcn_info["global_store_count"] ?? "NaN"}</span>
+                      <span className="text-sm font-medium text-gray-500">AMDGCN Global Store Instruction Count</span>
+                      <span className="font-mono text-sm break-words">{getCount(amdgcn_info, "global_store_count")}</span>
                     </div>
                     <div className="flex flex-col">
-                      <span className="text-sm font-medium text-gray-500">AMDGCN Global Load Instruction Count</span>
-                      <span className="font-mono text-sm break-words">{amdgcn_info["buffer_load_count"] ?? "NaN"}</span>
+                      <span className="text-sm font-medium text-gray-500">AMDGCN Buffer Load Instruction Count</span>
+                      <span className="font-mono text-sm break-words">{getCount(amdgcn_info, "buffer_load_count")}</span>
                     </div>
                     <div className="flex flex-col">
-                      <span className="text-sm font-medium text-gray-500">AMDGCN Global Store Instruction Count</span>
-                      <span className="font-mono text-sm break-words">{amdgcn_info["buffer_store_count"] ?? "NaN"}</span>
+                      <span className="text-sm font-medium text-gray-500">AMDGCN Buffer Store Instruction Count</span>
+                      <span className="font-mono text-sm break-words">{getCount(amdgcn_info, "buffer_store_count")}</span>
                     </div>
                   </>
                 )}

From 4935cc306bcd15df127de441447fbf3731f574d1 Mon Sep 17 00:00:00 2001
From: Nick Riasanovsky <njriasan@meta.com>
Date: Mon, 27 Oct 2025 13:53:52 -0400
Subject: [PATCH 06/10] Added the logic for displaying software pipelining
 decisions

---
 tritonparse/ir_analysis.py       | 343 ++++++++++++++++++++++++++++++-
 website/src/pages/IRAnalysis.tsx |  73 ++++++-
 website/src/utils/dataLoader.ts  |   1 +
 3 files changed, 413 insertions(+), 4 deletions(-)

diff --git a/tritonparse/ir_analysis.py b/tritonparse/ir_analysis.py
index 3f5a183..5219c22 100644
--- a/tritonparse/ir_analysis.py
+++ b/tritonparse/ir_analysis.py
@@ -44,20 +44,336 @@ def process_amd_gcn_bufferops(
     return process_amd_bufferop(ir_content, io_keys)
 
 
+def find_loop_bounds(ir_content: str) -> list[tuple[int, int]]:
+    """
+    Find the bounds of all scf.for loops in the IR content.
+    These are the only candidates for Software Pipelining (SWP).
+
+    A loop starts with 'scf.for' and ends when its closing brace '}' is found.
+    Brace counts are tracked to determine when each loop closes.
+
+    Args:
+        ir_content: The IR content as a string.
+
+    Returns:
+        A list of tuples (start_line, end_line) for each scf.for loop found.
+        Line numbers are 0-indexed.
+    """
+    if not ir_content:
+        return []
+
+    loop_bounds: list[tuple[int, int]] = []
+    lines = ir_content.split("\n")
+
+    # Stack to track loop starts and their brace counts
+    # Each entry is (start_line, brace_count_at_start)
+    loop_stack: list[tuple[int, int]] = []
+    current_brace_count = 0
+
+    for line_idx, line in enumerate(lines):
+        # Check if this line starts a new scf.for loop
+        if "scf.for" in line:
+            loop_stack.append((line_idx, current_brace_count))
+
+        # Count braces on this line
+        for char in line:
+            if char == "{":
+                current_brace_count += 1
+            elif char == "}":
+                current_brace_count -= 1
+
+                # Check if we've closed any loops
+                while loop_stack and current_brace_count <= loop_stack[-1][1]:
+                    start_line, _start_brace_count = loop_stack.pop()
+                    # The loop ends at this line
+                    loop_bounds.append((start_line, line_idx))
+
+    return loop_bounds
+
+
+def find_inner_loop_bounds(ir_content: str) -> list[tuple[int, int]]:
+    """
+    Find the bounds of inner scf.for loops (loops without nested loops inside).
+
+    Inner loops are the primary candidates for Software Pipelining (SWP) as they
+    represent the innermost computation that can be optimized.
+
+    Args:
+        ir_content: The IR content as a string.
+
+    Returns:
+        A list of tuples (start_line, end_line) for each inner scf.for loop found.
+        Line numbers are 0-indexed.
+    """
+    all_loops = find_loop_bounds(ir_content)
+
+    if not all_loops:
+        return []
+
+    # Filter to keep only inner loops (loops that don't contain other loops)
+    inner_loops: list[tuple[int, int]] = []
+
+    for i, (start_i, end_i) in enumerate(all_loops):
+        # Check if any other loop is nested inside this loop
+        has_nested_loop = False
+        for j, (start_j, end_j) in enumerate(all_loops):
+            if i != j:
+                # Check if loop j is nested inside loop i
+                if start_i < start_j and end_j < end_i:
+                    has_nested_loop = True
+                    break
+
+        # If no nested loops found, this is an inner loop
+        if not has_nested_loop:
+            inner_loops.append((start_i, end_i))
+
+    return inner_loops
+
+
+def find_loop_pipelining(
+    ttir_content: str,
+    ttgir_content: str,
+    ttir_loop_start: int,
+    ttir_loop_end: int,
+    ttir_to_ttgir_mapping: dict[str, dict],
+    ttgir_to_source_mapping: dict[str, dict],
+    python_source_content: str | None,
+    python_source_start_line: int,
+) -> dict[str, list[str]]:
+    """
+    Find pipelining information for a specific loop by identifying tt.load and tt.dot operations
+    in TTIR and mapping them to their corresponding operations in the original Python source code.
+
+    For each tt.load or tt.dot operation found in the TTIR loop, this function uses source
+    mappings to find the corresponding operations in TTGIR, then maps them back to the original
+    Python source code. Operations are categorized into three sections:
+    - prologue: Operations that appear before the loop body
+    - loop_body: Operations that appear within the loop body
+    - epilogue: Operations that appear after the loop body
+
+    Operations are merged together (both loads and dots) and sorted in program order
+    within each section.
+
+    Args:
+        ttir_content: The TTIR content as a string.
+        ttgir_content: The TTGIR content as a string.
+        ttir_loop_start: The starting line number of the loop in TTIR (0-indexed).
+        ttir_loop_end: The ending line number of the loop in TTIR (0-indexed).
+        ttir_to_ttgir_mapping: Source mapping from TTIR lines to TTGIR lines.
+        ttgir_to_source_mapping: Source mapping from TTGIR lines to original Python source.
+        python_source_content: The original Python source code content.
+
+    Returns:
+        A dictionary containing:
+        - "prologue": List of Python source line strings in program order
+        - "loop_body": List of Python source line strings in program order
+        - "epilogue": List of Python source line strings in program order
+    """
+    if not ttir_content or not ttgir_content:
+        return {
+            "prologue": [],
+            "loop_body": [],
+            "epilogue": [],
+        }
+
+    ttir_lines = ttir_content.split("\n")
+    ttgir_lines = ttgir_content.split("\n")
+    python_lines = python_source_content.split("\n") if python_source_content else []
+
+    # Step 1: Find tt.load and tt.dot operations in TTIR loop
+    ttir_operations: list[tuple[str, int]] = []
+
+    for line_idx in range(ttir_loop_start, min(ttir_loop_end + 1, len(ttir_lines))):
+        line = ttir_lines[line_idx]
+        if "tt.load" in line:
+            ttir_operations.append(("tt.load", line_idx))
+        if "tt.dot" in line:
+            ttir_operations.append(("tt.dot", line_idx))
+
+    # Step 2: Find the corresponding loop in TTGIR using source mappings
+    # Map the TTIR loop bounds to TTGIR using source mappings
+    ttgir_inner_loops = find_inner_loop_bounds(ttgir_content)
+
+    if not ttgir_inner_loops:
+        # No loop found in TTGIR, return empty results
+        return {
+            "prologue": [],
+            "loop_body": [],
+            "epilogue": [],
+        }
+
+    # Use the first inner loop as the reference
+    # TODO: Implement more sophisticated mapping logic to match TTIR loops to TTGIR loops
+    ttgir_loop_start, ttgir_loop_end = ttgir_inner_loops[0]
+
+    # Step 3: Map TTIR operations to TTGIR operations using source mappings
+    # and categorize them by their position relative to the TTGIR loop
+    # Store as (line_number, source_line) to maintain order before extracting just the source
+    prologue_ops: list[tuple[int, str]] = []
+    loop_body_ops: list[tuple[int, str]] = []
+    epilogue_ops: list[tuple[int, str]] = []
+
+    for op_type, ttir_line in ttir_operations:
+        # Convert 0-indexed line to 1-indexed string key for mapping lookup
+        ttir_line_key = str(ttir_line + 1)
+
+        # Get the corresponding TTGIR lines from the source mapping
+        if ttir_line_key in ttir_to_ttgir_mapping:
+            ttgir_lines_list = ttir_to_ttgir_mapping[ttir_line_key].get(
+                "ttgir_lines", []
+            )
+
+            # For each mapped TTGIR line, categorize it
+            for ttgir_line in ttgir_lines_list:
+                # Convert back to 0-indexed
+                ttgir_line_idx = ttgir_line - 1
+
+                # Get the actual TTGIR line content to check if it's relevant
+                if ttgir_line_idx < len(ttgir_lines):
+                    ttgir_source_line = ttgir_lines[ttgir_line_idx].strip()
+
+                    # Only keep lines with specific operations
+                    relevant_ops = [
+                        "tt.load",
+                        "tt.dot",
+                        "async_copy_global_to_local",
+                        "warp_group_dot",
+                    ]
+                    if any(op in ttgir_source_line for op in relevant_ops):
+                        # Map TTGIR line back to Python source
+                        ttgir_line_key = str(ttgir_line)
+                        python_source_line = ttgir_source_line  # Default to TTGIR line
+
+                        if ttgir_line_key in ttgir_to_source_mapping:
+                            source_info = ttgir_to_source_mapping[ttgir_line_key]
+                            python_line_num = source_info.get("line")
+
+                            if python_line_num and python_lines:
+                                # Account for the offset: the Python source may not start at line 1
+                                # python_line_num is the absolute line number in the original file
+                                # python_source_start_line is where the extracted code starts
+                                # So we need to subtract the offset to get the index in our python_lines array
+                                python_line_idx = (
+                                    python_line_num - python_source_start_line
+                                )
+                                if 0 <= python_line_idx < len(python_lines):
+                                    python_source_line = python_lines[
+                                        python_line_idx
+                                    ].strip()
+
+                        if ttgir_line_idx < ttgir_loop_start:
+                            prologue_ops.append((ttgir_line_idx, python_source_line))
+                        elif ttgir_loop_start <= ttgir_line_idx <= ttgir_loop_end:
+                            loop_body_ops.append((ttgir_line_idx, python_source_line))
+                        else:
+                            epilogue_ops.append((ttgir_line_idx, python_source_line))
+
+    # Step 4: Sort each section by line number to maintain program order
+    prologue_ops.sort(key=lambda x: x[0])
+    loop_body_ops.sort(key=lambda x: x[0])
+    epilogue_ops.sort(key=lambda x: x[0])
+
+    # Extract just the source lines (without line numbers)
+    prologue_lines = [line for _, line in prologue_ops]
+    loop_body_lines = [line for _, line in loop_body_ops]
+    epilogue_lines = [line for _, line in epilogue_ops]
+
+    # Log the pipelining results
+    logger.info(
+        f"Loop pipelining results (TTIR lines {ttir_loop_start}-{ttir_loop_end}):"
+    )
+    logger.info(f"  Prologue ({len(prologue_lines)} ops):")
+    for line in prologue_lines:
+        logger.info(f"    {line}")
+    logger.info(f"  Loop Body ({len(loop_body_lines)} ops):")
+    for line in loop_body_lines:
+        logger.info(f"    {line}")
+    logger.info(f"  Epilogue ({len(epilogue_lines)} ops):")
+    for line in epilogue_lines:
+        logger.info(f"    {line}")
+
+    return {
+        "prologue": prologue_lines,
+        "loop_body": loop_body_lines,
+        "epilogue": epilogue_lines,
+    }
+
+
+def generate_loop_schedule(
+    ttir_key: str,
+    ttgir_key: str,
+    file_content: dict[str, str],
+    file_path: dict[str, str],
+    source_mappings: dict[str, dict],
+    python_source_content: str | None,
+    python_source_start_line: int,
+) -> list[dict]:
+    """
+    Generate loop schedule information by finding inner scf.for loops in TTIR
+    and analyzing their pipelining potential using source mappings.
+
+    Only inner loops (loops without nested loops) are considered as they are
+    the primary candidates for Software Pipelining (SWP).
+
+    Args:
+        ttir_key: Key for the TTIR file.
+        ttgir_key: Key for the TTGIR file.
+        file_content: Dictionary mapping file keys to content.
+        file_path: Dictionary mapping file keys to file paths.
+        source_mappings: Dictionary containing source mappings between IR stages.
+        python_source_content: The original Python source code content.
+        python_source_start_line: The starting line number of the Python source in the original file.
+
+    Returns:
+        A list of dictionaries, each containing:
+        - "loop_bounds": Tuple of (start_line, end_line) for the loop in TTIR
+        - "pipelining": Dictionary with Python source lines for operations
+    """
+    ttir_content = load_ir_contents(ttir_key, file_content, file_path)
+    ttgir_content = load_ir_contents(ttgir_key, file_content, file_path)
+
+    # Get the TTIR to TTGIR mapping and TTGIR to source mapping
+    ttir_to_ttgir_mapping = source_mappings.get("ttir", {})
+    ttgir_to_source_mapping = source_mappings.get("ttgir", {})
+
+    # Find only inner loops (loops without nested loops inside)
+    inner_loop_bounds = find_inner_loop_bounds(ttir_content)
+
+    # For each inner loop, find pipelining information
+    loop_schedules = []
+    for loop_start, loop_end in inner_loop_bounds:
+        pipelining_info = find_loop_pipelining(
+            ttir_content,
+            ttgir_content,
+            loop_start,
+            loop_end,
+            ttir_to_ttgir_mapping,
+            ttgir_to_source_mapping,
+            python_source_content,
+            python_source_start_line,
+        )
+        loop_schedules.append(pipelining_info)
+
+    return loop_schedules
+
+
 def _generate_ir_analysis(entry: str):
     payload = entry.setdefault("payload", {})
     file_content = payload.get("file_content", {})
     file_path = payload.get("file_path", {})
+    source_mappings = payload.get("source_mappings", {})
 
     # Find the IR file keys
+    ttir_key = next((k for k in file_content if k.endswith(".ttir")), None)
     ttgir_key = next((k for k in file_content if k.endswith(".ttgir")), None)
     amdgcn_key = next((k for k in file_content if k.endswith(".amdgcn")), None)
     # Skip if no IR files found
-    if not (ttgir_key or amdgcn_key):
-        logger.debug("No AMD IR found")
+    if not (ttir_key or ttgir_key or amdgcn_key):
+        logger.debug("No IR found")
         return {}
     ir_analysis = {}
-    if amdgcn_key:
+    if amdgcn_key and ttgir_key:
+        # Add BufferOps information
         ttgir_bufferops_info = process_amd_ttgir_bufferops(
             ttgir_key, file_content, file_path
         )
@@ -72,4 +388,25 @@ def _generate_ir_analysis(entry: str):
             io_counts["amd_gcn_bufferops_count"] = gcn_bufferops_info
         if io_counts:
             ir_analysis["io_counts"] = io_counts
+    if ttir_key and ttgir_key:
+        # Get Python source content and start line if available
+        python_source_content = None
+        python_source_start_line = 1  # Default to 1 if not available
+        python_source_info = payload.get("python_source")
+        if python_source_info:
+            python_source_content = python_source_info.get("code")
+            python_source_start_line = python_source_info.get("start_line", 1)
+
+        # Add loop schedule information
+        loop_schedule = generate_loop_schedule(
+            ttir_key,
+            ttgir_key,
+            file_content,
+            file_path,
+            source_mappings,
+            python_source_content,
+            python_source_start_line,
+        )
+        if loop_schedule:
+            ir_analysis["loop_schedules"] = loop_schedule
     return ir_analysis
diff --git a/website/src/pages/IRAnalysis.tsx b/website/src/pages/IRAnalysis.tsx
index 39d97e5..2f8e0d2 100644
--- a/website/src/pages/IRAnalysis.tsx
+++ b/website/src/pages/IRAnalysis.tsx
@@ -27,6 +27,7 @@ const IRAnalysis: React.FC<IRAnalysisProps> = ({ kernels, selectedKernel }) => {
   const io_counts = kernel.ir_analysis?.io_counts;
   const ttgir_info = io_counts?.["amd_ttgir_bufferops_count"];
   const amdgcn_info = io_counts?.["amd_gcn_bufferops_count"];
+  const loop_schedule = kernel.ir_analysis?.loop_schedules;
   const getCount = (info: Record<string, number> | undefined, key: string): string => { return info?.[key]?.toString() ?? "N/A"; };
 
   return (
@@ -44,7 +45,7 @@ const IRAnalysis: React.FC<IRAnalysisProps> = ({ kernels, selectedKernel }) => {
               AMD BufferOps Information
             </h3>
 
-            <div className="bg-gray-50 p-4 rounded-md border border-gray-200">
+            <div className="bg-gray-50 p-4 rounded-md border border-gray-200 mb-6">
               <div className="grid grid-cols-[repeat(auto-fit,_minmax(180px,_1fr))] gap-3">
                 {ttgir_info && (
                   <>
@@ -90,6 +91,76 @@ const IRAnalysis: React.FC<IRAnalysisProps> = ({ kernels, selectedKernel }) => {
             </div>
           </>
         )}
+
+        {loop_schedule && loop_schedule.length > 0 && (
+          <>
+            <h3 className="text-lg font-medium mb-3 text-gray-800">
+              Software Pipelining Schedule
+            </h3>
+
+            {loop_schedule.map((schedule: any, loopIndex: number) => {
+              const prologue = schedule?.prologue || [];
+              const loopBody = schedule?.loop_body || [];
+              const epilogue = schedule?.epilogue || [];
+
+              return (
+                <div key={loopIndex} className="bg-gray-50 p-4 rounded-md border border-gray-200 mb-4">
+                  <h4 className="text-md font-semibold mb-2 text-gray-700">
+                    Software Pipelining for Loop {loopIndex + 1}
+                  </h4>
+
+                  {/* Prologue */}
+                  {prologue.length > 0 && (
+                    <div className="mb-3">
+                      <div className="text-sm font-medium text-gray-600 mb-1">Prologue:</div>
+                      <div className="bg-white p-2 rounded border border-gray-200 font-mono text-xs">
+                        {prologue.map((line: string, idx: number) => (
+                          <div key={idx} className="text-gray-700">
+                            {line}
+                          </div>
+                        ))}
+                      </div>
+                    </div>
+                  )}
+
+                  {/* Loop Body */}
+                  <div className="mb-3">
+                    <div className="text-sm font-medium text-gray-600 mb-1">Loop Body:</div>
+                    <div className="bg-white p-2 rounded border border-gray-200">
+                      <div className="font-mono text-xs text-gray-500 mb-1">for (...) {'{'}</div>
+                      <div className="pl-4 font-mono text-xs">
+                        {loopBody.length > 0 ? (
+                          loopBody.map((line: string, idx: number) => (
+                            <div key={idx} className="text-gray-700">
+                              {line}
+                            </div>
+                          ))
+                        ) : (
+                          <div className="text-gray-400 italic">No operations in loop body</div>
+                        )}
+                      </div>
+                      <div className="font-mono text-xs text-gray-500 mt-1">{'}'}</div>
+                    </div>
+                  </div>
+
+                  {/* Epilogue */}
+                  {epilogue.length > 0 && (
+                    <div>
+                      <div className="text-sm font-medium text-gray-600 mb-1">Epilogue:</div>
+                      <div className="bg-white p-2 rounded border border-gray-200 font-mono text-xs">
+                        {epilogue.map((line: string, idx: number) => (
+                          <div key={idx} className="text-gray-700">
+                            {line}
+                          </div>
+                        ))}
+                      </div>
+                    </div>
+                  )}
+                </div>
+              );
+            })}
+          </>
+        )}
       </div>
     </div>
   );
diff --git a/website/src/utils/dataLoader.ts b/website/src/utils/dataLoader.ts
index c064e49..c174364 100644
--- a/website/src/utils/dataLoader.ts
+++ b/website/src/utils/dataLoader.ts
@@ -170,6 +170,7 @@ export interface CompilationMetadata {
 export interface IRAnalysisData {
     // Mapping from IR stage -> <IO type -> count>
     io_counts?: Record<string, Record<string, number>>;
+    loop_schedules?: [Record<string, [string]>];
 }
 
 /**

From 1c193e06f494371fdad66510e14a8e438185c6db Mon Sep 17 00:00:00 2001
From: Nick Riasanovsky <njriasan@meta.com>
Date: Wed, 29 Oct 2025 11:56:09 -0400
Subject: [PATCH 07/10] Fix mapping to avoid false positives

---
 tritonparse/ir_analysis.py | 38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/tritonparse/ir_analysis.py b/tritonparse/ir_analysis.py
index cd27700..d2abbee 100644
--- a/tritonparse/ir_analysis.py
+++ b/tritonparse/ir_analysis.py
@@ -182,15 +182,31 @@ def find_loop_pipelining(
     ttgir_lines = ttgir_content.split("\n")
     python_lines = python_source_content.split("\n") if python_source_content else []
 
+    def apply_trailing_space(op: str) -> str:
+        """
+        Add a trailing space to all ops to avoid false positives like
+        warp_group_dot and warp_group_dot_wait.
+        """
+        return op + " "
+
     # Step 1: Find tt.load and tt.dot operations in TTIR loop
-    ttir_operations: list[tuple[str, int]] = []
+    ttir_lines: list[int] = []
+    pipeline_tt_ops = ["tt.load", "tt.dot"]
+    pipeline_tt_ops = [apply_trailing_space(op) for op in pipeline_tt_ops]
+    pipeline_ttgir_ops = [
+        "tt.load",
+        "tt.dot",
+        "async_copy_global_to_local",
+        "warp_group_dot",
+    ]
+    pipeline_ttgir_ops = [apply_trailing_space(op) for op in pipeline_ttgir_ops]
 
     for line_idx in range(ttir_loop_start, min(ttir_loop_end + 1, len(ttir_lines))):
         line = ttir_lines[line_idx]
-        if "tt.load" in line:
-            ttir_operations.append(("tt.load", line_idx))
-        if "tt.dot" in line:
-            ttir_operations.append(("tt.dot", line_idx))
+        for op in pipeline_tt_ops:
+            if op in line:
+                ttir_lines.append(line_idx)
+                break
 
     # Step 2: Find the corresponding loop in TTGIR using source mappings
     # Map the TTIR loop bounds to TTGIR using source mappings
@@ -215,7 +231,7 @@ def find_loop_pipelining(
     loop_body_ops: list[tuple[int, str]] = []
     epilogue_ops: list[tuple[int, str]] = []
 
-    for op_type, ttir_line in ttir_operations:
+    for ttir_line in ttir_lines:
         # Convert 0-indexed line to 1-indexed string key for mapping lookup
         ttir_line_key = str(ttir_line + 1)
 
@@ -234,14 +250,8 @@ def find_loop_pipelining(
                 if ttgir_line_idx < len(ttgir_lines):
                     ttgir_source_line = ttgir_lines[ttgir_line_idx].strip()
 
-                    # Only keep lines with specific operations
-                    relevant_ops = [
-                        "tt.load",
-                        "tt.dot",
-                        "async_copy_global_to_local",
-                        "warp_group_dot",
-                    ]
-                    if any(op in ttgir_source_line for op in relevant_ops):
+                    # Only keep mappings to the "compute" op.
+                    if any(op in ttgir_source_line for op in pipeline_ttgir_ops):
                         # Map TTGIR line back to Python source
                         ttgir_line_key = str(ttgir_line)
                         python_source_line = ttgir_source_line  # Default to TTGIR line

From 033feb1ccf5c15ee5577842087154e352cd516a8 Mon Sep 17 00:00:00 2001
From: Nick Riasanovsky <njriasan@meta.com>
Date: Wed, 29 Oct 2025 12:23:50 -0400
Subject: [PATCH 08/10] Fixed typo

---
 tritonparse/ir_analysis.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tritonparse/ir_analysis.py b/tritonparse/ir_analysis.py
index d2abbee..4cfd07e 100644
--- a/tritonparse/ir_analysis.py
+++ b/tritonparse/ir_analysis.py
@@ -190,7 +190,7 @@ def apply_trailing_space(op: str) -> str:
         return op + " "
 
     # Step 1: Find tt.load and tt.dot operations in TTIR loop
-    ttir_lines: list[int] = []
+    ttir_pipeline_lines: list[int] = []
     pipeline_tt_ops = ["tt.load", "tt.dot"]
     pipeline_tt_ops = [apply_trailing_space(op) for op in pipeline_tt_ops]
     pipeline_ttgir_ops = [
@@ -205,7 +205,7 @@ def apply_trailing_space(op: str) -> str:
         line = ttir_lines[line_idx]
         for op in pipeline_tt_ops:
             if op in line:
-                ttir_lines.append(line_idx)
+                ttir_pipeline_lines.append(line_idx)
                 break
 
     # Step 2: Find the corresponding loop in TTGIR using source mappings
@@ -231,7 +231,7 @@ def apply_trailing_space(op: str) -> str:
     loop_body_ops: list[tuple[int, str]] = []
     epilogue_ops: list[tuple[int, str]] = []
 
-    for ttir_line in ttir_lines:
+    for ttir_line in ttir_pipeline_lines:
         # Convert 0-indexed line to 1-indexed string key for mapping lookup
         ttir_line_key = str(ttir_line + 1)
 

From 5641663b918fb5273fbe4c09de19dd59c45ffc4c Mon Sep 17 00:00:00 2001
From: Nick Riasanovsky <njriasan@meta.com>
Date: Wed, 29 Oct 2025 13:56:01 -0400
Subject: [PATCH 09/10] Fix initial bug

---
 tritonparse/ir_analysis.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/tritonparse/ir_analysis.py b/tritonparse/ir_analysis.py
index 4cfd07e..16c632e 100644
--- a/tritonparse/ir_analysis.py
+++ b/tritonparse/ir_analysis.py
@@ -84,11 +84,11 @@ def find_loop_bounds(ir_content: str) -> list[tuple[int, int]]:
             elif char == "}":
                 current_brace_count -= 1
 
-                # Check if we've closed any loops
-                while loop_stack and current_brace_count <= loop_stack[-1][1]:
-                    start_line, _start_brace_count = loop_stack.pop()
-                    # The loop ends at this line
-                    loop_bounds.append((start_line, line_idx))
+        # Check if we've closed any loops
+        while loop_stack and current_brace_count <= loop_stack[-1][1]:
+            start_line, _start_brace_count = loop_stack.pop()
+            # The loop ends at this line
+            loop_bounds.append((start_line, line_idx))
 
     return loop_bounds
 
@@ -137,6 +137,7 @@ def find_loop_pipelining(
     ttgir_content: str,
     ttir_loop_start: int,
     ttir_loop_end: int,
+    loop_index: int,
     ttir_to_ttgir_mapping: dict[str, dict],
     ttgir_to_source_mapping: dict[str, dict],
     python_source_content: str | None,
@@ -222,7 +223,7 @@ def apply_trailing_space(op: str) -> str:
 
     # Use the first inner loop as the reference
     # TODO: Implement more sophisticated mapping logic to match TTIR loops to TTGIR loops
-    ttgir_loop_start, ttgir_loop_end = ttgir_inner_loops[0]
+    ttgir_loop_start, ttgir_loop_end = ttgir_inner_loops[loop_index]
 
     # Step 3: Map TTIR operations to TTGIR operations using source mappings
     # and categorize them by their position relative to the TTGIR loop
@@ -353,12 +354,13 @@ def generate_loop_schedule(
 
     # For each inner loop, find pipelining information
     loop_schedules = []
-    for loop_start, loop_end in inner_loop_bounds:
+    for i, (loop_start, loop_end) in enumerate(inner_loop_bounds):
         pipelining_info = find_loop_pipelining(
             ttir_content,
             ttgir_content,
             loop_start,
             loop_end,
+            i,
             ttir_to_ttgir_mapping,
             ttgir_to_source_mapping,
             python_source_content,

From 84357ef342a8361a01472a767e7bf9dfe7164370 Mon Sep 17 00:00:00 2001
From: Nick Riasanovsky <njriasan@meta.com>
Date: Wed, 29 Oct 2025 14:07:11 -0400
Subject: [PATCH 10/10] Limit to 1 loop

---
 tritonparse/ir_analysis.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tritonparse/ir_analysis.py b/tritonparse/ir_analysis.py
index 16c632e..37d3b67 100644
--- a/tritonparse/ir_analysis.py
+++ b/tritonparse/ir_analysis.py
@@ -201,7 +201,6 @@ def apply_trailing_space(op: str) -> str:
         "warp_group_dot",
     ]
     pipeline_ttgir_ops = [apply_trailing_space(op) for op in pipeline_ttgir_ops]
-
     for line_idx in range(ttir_loop_start, min(ttir_loop_end + 1, len(ttir_lines))):
         line = ttir_lines[line_idx]
         for op in pipeline_tt_ops:
@@ -351,6 +350,8 @@ def generate_loop_schedule(
 
     # Find only inner loops (loops without nested loops inside)
     inner_loop_bounds = find_inner_loop_bounds(ttir_content)
+    # TODO: Fix loop mapping with multiple loops.
+    inner_loop_bounds = inner_loop_bounds[:1]
 
     # For each inner loop, find pipelining information
     loop_schedules = []