From dc6e97a0598b01dcd98d96b5e0efdb4982d5b784 Mon Sep 17 00:00:00 2001
From: Mikhail Goncharov <goncharov@google.com>
Date: Thu, 20 Mar 2025 06:16:18 -0700
Subject: [PATCH] [XLA:GPU] move instruction renaming to a separate pass from
 priority fusion

Why:

1. Smaller function.

2. Previously priority fusion pass claimed that it has not modified the HLO while it at least renamed the instructions.

PiperOrigin-RevId: 738775070
---
 xla/service/gpu/BUILD                         |  1 +
 xla/service/gpu/gpu_compiler.cc               |  5 ++
 xla/service/gpu/transforms/BUILD              | 24 ++++++++
 ...dd_tracking_suffix_to_instruction_names.cc | 57 +++++++++++++++++++
 ...add_tracking_suffix_to_instruction_names.h | 51 +++++++++++++++++
 ...acking_suffix_to_instruction_names_test.cc | 52 +++++++++++++++++
 xla/service/gpu/transforms/priority_fusion.cc | 17 ------
 7 files changed, 190 insertions(+), 17 deletions(-)
 create mode 100644 xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names.cc
 create mode 100644 xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names.h
 create mode 100644 xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names_test.cc

diff --git a/xla/service/gpu/BUILD b/xla/service/gpu/BUILD
index 708b713a99d9f..6075462a5b3c2 100644
--- a/xla/service/gpu/BUILD
+++ b/xla/service/gpu/BUILD
@@ -1506,6 +1506,7 @@ cc_library(
         "//xla/service/gpu/transforms/collectives:gpu_all_reduce_combiner",
         "//xla/service/gpu/transforms/collectives:gpu_collective_combiner_utils",
         "//xla/service/gpu/transforms/collectives:gpu_reduce_scatter_combiner",
+        "//xla/service/gpu/transforms:add_tracking_suffix_to_instruction_names",
         "//xla/service/gpu/transforms:algebraic_simplifier",
         "//xla/service/gpu/transforms:algorithm_checker",
         "//xla/service/gpu/transforms:all_gather_dynamic_slice_simplifier",
diff --git a/xla/service/gpu/gpu_compiler.cc b/xla/service/gpu/gpu_compiler.cc
index 0c713c2152536..3d76e2d9abee1 100644
--- a/xla/service/gpu/gpu_compiler.cc
+++ b/xla/service/gpu/gpu_compiler.cc
@@ -179,6 +179,7 @@ limitations under the License.
 #include "xla/service/gpu/reduction_utils.h"
 #include "xla/service/gpu/runtime_intrinsics.h"
 #include "xla/service/gpu/stream_executor_util.h"
+#include "xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names.h"
 #include "xla/service/gpu/transforms/algebraic_simplifier.h"
 #include "xla/service/gpu/transforms/algorithm_checker.h"
 #include "xla/service/gpu/transforms/all_gather_dynamic_slice_simplifier.h"
@@ -1076,6 +1077,10 @@ absl::Status RunFusionPasses(HloModule* hlo_module,
   const se::DeviceDescription& gpu_device_info =
       gpu_target_config.device_description;
 
+  HloPassPipeline pre_fusion("pre-fusion");
+  pre_fusion.AddPass<AddTrackingSuffixToInstructionNames>();
+  TF_RETURN_IF_ERROR(pre_fusion.Run(hlo_module).status());
+
   TF_RETURN_IF_ERROR(FusionPipeline(hlo_module->config().debug_options(),
                                     shape_size_fn, thread_pool, gpu_device_info)
                          .Run(hlo_module)
diff --git a/xla/service/gpu/transforms/BUILD b/xla/service/gpu/transforms/BUILD
index 07ffabc7d05c6..ab4a54fca6f30 100644
--- a/xla/service/gpu/transforms/BUILD
+++ b/xla/service/gpu/transforms/BUILD
@@ -2461,6 +2461,30 @@ xla_cc_test(
     ],
 )
 
+cc_library(
+    name = "add_tracking_suffix_to_instruction_names",
+    srcs = ["add_tracking_suffix_to_instruction_names.cc"],
+    hdrs = ["add_tracking_suffix_to_instruction_names.h"],
+    deps = [
+        "//xla/hlo/ir:hlo",
+        "//xla/hlo/pass:hlo_pass",
+        "@com_google_absl//absl/container:flat_hash_set",
+        "@com_google_absl//absl/status:statusor",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+xla_cc_test(
+    name = "add_tracking_suffix_to_instruction_names_test",
+    srcs = ["add_tracking_suffix_to_instruction_names_test.cc"],
+    deps = [
+        ":add_tracking_suffix_to_instruction_names",
+        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
+        "@com_google_absl//absl/strings:string_view",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
 cc_library(
     name = "reduce_scatter_creator",
     srcs = ["reduce_scatter_creator.cc"],
diff --git a/xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names.cc b/xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names.cc
new file mode 100644
index 0000000000000..297e2523159b6
--- /dev/null
+++ b/xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names.cc
@@ -0,0 +1,57 @@
+/* Copyright 2025 The OpenXLA Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names.h"
+
+#include "absl/container/flat_hash_set.h"
+#include "absl/status/statusor.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+#include "xla/hlo/ir/hlo_computation.h"
+#include "xla/hlo/ir/hlo_module.h"
+#include "xla/hlo/ir/hlo_opcode.h"
+
+namespace xla {
+namespace gpu {
+
+absl::StatusOr<bool> AddTrackingSuffixToInstructionNames::Run(
+    HloModule* module,
+    const absl::flat_hash_set<absl::string_view>& execution_threads) {
+  bool changed = false;
+
+  // Only rename instructions in non-fusion computations.
+  for (xla::HloComputation* computation :
+       module->MakeNonfusionComputationsSorted(execution_threads)) {
+    for (auto* instruction : computation->instructions()) {
+      // Skip non-fusible instruction to avoid breaking tests that are not
+      // related to fusion.
+      if (instruction->opcode() == HloOpcode::kParameter ||
+          instruction->opcode() == HloOpcode::kCustomCall ||
+          instruction->opcode() == HloOpcode::kFusion ||
+          !instruction->IsFusible())
+        continue;
+
+      auto new_name = absl::StrCat(instruction->name(), ".0");
+      module->SetAndUniquifyInstrName(instruction, new_name);
+
+      changed = true;
+    }
+  }
+
+  return changed;
+}
+
+}  // namespace gpu
+}  // namespace xla
diff --git a/xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names.h b/xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names.h
new file mode 100644
index 0000000000000..741937b79336e
--- /dev/null
+++ b/xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names.h
@@ -0,0 +1,51 @@
+/* Copyright 2025 The OpenXLA Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef XLA_SERVICE_GPU_TRANSFORMS_ADD_TRACKING_SUFFIX_TO_INSTRUCTION_NAMES_H_
+#define XLA_SERVICE_GPU_TRANSFORMS_ADD_TRACKING_SUFFIX_TO_INSTRUCTION_NAMES_H_
+
+#include "absl/container/flat_hash_set.h"
+#include "absl/status/statusor.h"
+#include "absl/strings/string_view.h"
+#include "xla/hlo/ir/hlo_module.h"
+#include "xla/hlo/pass/hlo_pass_interface.h"
+
+namespace xla::gpu {
+
+// Appends ".0" suffix to instruction names.
+//
+// Priority fusion pass duplicates instructions, and it's hard to match
+// instructions before and after the run as they got renamed.
+// To make debugging easier, we append ".0" suffix to instruction names
+// and priority fusion pass will increment this suffix:
+//
+// Original: broadcast.123
+// After this pass: broadcast.123.0
+// After priority fusion: broadcast.123.1, broadcast.123.2, ...
+//
+// One can match instructions before and after by their original name.
+class AddTrackingSuffixToInstructionNames : public HloModulePass {
+ public:
+  absl::string_view name() const override { return "rename-instructions"; }
+
+  using HloPassInterface::Run;
+  absl::StatusOr<bool> Run(
+      HloModule* module,
+      const absl::flat_hash_set<absl::string_view>& execution_threads) override;
+};
+
+}  // namespace xla::gpu
+
+#endif  // XLA_SERVICE_GPU_TRANSFORMS_ADD_TRACKING_SUFFIX_TO_INSTRUCTION_NAMES_H_
diff --git a/xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names_test.cc b/xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names_test.cc
new file mode 100644
index 0000000000000..30d24a03e6e3e
--- /dev/null
+++ b/xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names_test.cc
@@ -0,0 +1,52 @@
+/* Copyright 2025 The OpenXLA Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names.h"
+
+#include <gtest/gtest.h>
+#include "absl/strings/string_view.h"
+#include "xla/hlo/testlib/hlo_hardware_independent_test_base.h"
+
+namespace xla {
+namespace gpu {
+namespace {
+
+class RenameInstructionsTest : public HloHardwareIndependentTestBase {};
+
+TEST_F(RenameInstructionsTest, BasicCase) {
+  absl::string_view kHlo = R"(
+  HloModule m
+
+  ENTRY main {
+    param_0.315 = f32[] parameter(0)
+    log.482 = f32[] log(param_0.315)
+    param_1.426 = f32[] parameter(1)
+    subtract.22 = f32[] subtract(log.482, param_1.426)
+    ROOT exponential.15 = f32[] exponential(subtract.22)
+  }
+  )";
+  RunAndFilecheckHloRewrite(kHlo, AddTrackingSuffixToInstructionNames(), R"(
+  // CHECK: ENTRY %main {{.*}} {
+  // CHECK:   %param_0.315 = f32[] parameter(0)
+  // CHECK:   %log.482.0 = f32[] log(%param_0.315)
+  // CHECK:   %param_1.426 = f32[] parameter(1)
+  // CHECK:   %subtract.22.0 = f32[] subtract(%log.482.0, %param_1.426)
+  // CHECK:   ROOT %exponential.15.0 = f32[] exponential(%subtract.22.0)
+  // CHECK: })");
+}
+
+}  // namespace
+}  // namespace gpu
+}  // namespace xla
diff --git a/xla/service/gpu/transforms/priority_fusion.cc b/xla/service/gpu/transforms/priority_fusion.cc
index ea507b965ffe7..1850ff499126d 100644
--- a/xla/service/gpu/transforms/priority_fusion.cc
+++ b/xla/service/gpu/transforms/priority_fusion.cc
@@ -954,23 +954,6 @@ absl::StatusOr<bool> PriorityFusion::Run(
   auto fusible_computations =
       GetFusibleComputations(*module, execution_threads);
 
-  // Appends ".0" suffix to all instructions.
-  //
-  // Every time an instruction is duplicated, the last integer suffix is
-  // incremented.
-  // Before: broadcast.123 -> broadcast.124
-  // After: broadcast.123.0 -> broadcast.123.1
-  //
-  // With this modification it will be easier to match instructions before and
-  // after fusion passes, because they will have the same unique prefix. Names
-  // are not used in the pipeline, but it makes debugging much easier.
-  for (auto* computation : fusible_computations) {
-    for (auto* instruction : computation->instructions()) {
-      module->SetAndUniquifyInstrName(instruction,
-                                      absl::StrCat(instruction->name(), ".0"));
-    }
-  }
-
   if (dump_enabled) {
     fusion_process_dump_->set_hlo_module_before_fusion(
         module->ToString(HloPrintOptions::ShortParsable()));