From dc6e97a0598b01dcd98d96b5e0efdb4982d5b784 Mon Sep 17 00:00:00 2001 From: Mikhail Goncharov <goncharov@google.com> Date: Thu, 20 Mar 2025 06:16:18 -0700 Subject: [PATCH] [XLA:GPU] move instruction renaming to a separate pass from priority fusion Why: 1. Smaller function. 2. Previously priority fusion pass claimed that it has not modified the HLO while it at least renamed the instructions. PiperOrigin-RevId: 738775070 --- xla/service/gpu/BUILD | 1 + xla/service/gpu/gpu_compiler.cc | 5 ++ xla/service/gpu/transforms/BUILD | 24 ++++++++ ...dd_tracking_suffix_to_instruction_names.cc | 57 +++++++++++++++++++ ...add_tracking_suffix_to_instruction_names.h | 51 +++++++++++++++++ ...acking_suffix_to_instruction_names_test.cc | 52 +++++++++++++++++ xla/service/gpu/transforms/priority_fusion.cc | 17 ------ 7 files changed, 190 insertions(+), 17 deletions(-) create mode 100644 xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names.cc create mode 100644 xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names.h create mode 100644 xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names_test.cc diff --git a/xla/service/gpu/BUILD b/xla/service/gpu/BUILD index 708b713a99d9f..6075462a5b3c2 100644 --- a/xla/service/gpu/BUILD +++ b/xla/service/gpu/BUILD @@ -1506,6 +1506,7 @@ cc_library( "//xla/service/gpu/transforms/collectives:gpu_all_reduce_combiner", "//xla/service/gpu/transforms/collectives:gpu_collective_combiner_utils", "//xla/service/gpu/transforms/collectives:gpu_reduce_scatter_combiner", + "//xla/service/gpu/transforms:add_tracking_suffix_to_instruction_names", "//xla/service/gpu/transforms:algebraic_simplifier", "//xla/service/gpu/transforms:algorithm_checker", "//xla/service/gpu/transforms:all_gather_dynamic_slice_simplifier", diff --git a/xla/service/gpu/gpu_compiler.cc b/xla/service/gpu/gpu_compiler.cc index 0c713c2152536..3d76e2d9abee1 100644 --- a/xla/service/gpu/gpu_compiler.cc +++ b/xla/service/gpu/gpu_compiler.cc @@ -179,6 +179,7 @@ limitations under the License. #include "xla/service/gpu/reduction_utils.h" #include "xla/service/gpu/runtime_intrinsics.h" #include "xla/service/gpu/stream_executor_util.h" +#include "xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names.h" #include "xla/service/gpu/transforms/algebraic_simplifier.h" #include "xla/service/gpu/transforms/algorithm_checker.h" #include "xla/service/gpu/transforms/all_gather_dynamic_slice_simplifier.h" @@ -1076,6 +1077,10 @@ absl::Status RunFusionPasses(HloModule* hlo_module, const se::DeviceDescription& gpu_device_info = gpu_target_config.device_description; + HloPassPipeline pre_fusion("pre-fusion"); + pre_fusion.AddPass<AddTrackingSuffixToInstructionNames>(); + TF_RETURN_IF_ERROR(pre_fusion.Run(hlo_module).status()); + TF_RETURN_IF_ERROR(FusionPipeline(hlo_module->config().debug_options(), shape_size_fn, thread_pool, gpu_device_info) .Run(hlo_module) diff --git a/xla/service/gpu/transforms/BUILD b/xla/service/gpu/transforms/BUILD index 07ffabc7d05c6..ab4a54fca6f30 100644 --- a/xla/service/gpu/transforms/BUILD +++ b/xla/service/gpu/transforms/BUILD @@ -2461,6 +2461,30 @@ xla_cc_test( ], ) +cc_library( + name = "add_tracking_suffix_to_instruction_names", + srcs = ["add_tracking_suffix_to_instruction_names.cc"], + hdrs = ["add_tracking_suffix_to_instruction_names.h"], + deps = [ + "//xla/hlo/ir:hlo", + "//xla/hlo/pass:hlo_pass", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + ], +) + +xla_cc_test( + name = "add_tracking_suffix_to_instruction_names_test", + srcs = ["add_tracking_suffix_to_instruction_names_test.cc"], + deps = [ + ":add_tracking_suffix_to_instruction_names", + "//xla/hlo/testlib:hlo_hardware_independent_test_base", + "@com_google_absl//absl/strings:string_view", + "@com_google_googletest//:gtest_main", + ], +) + cc_library( name = "reduce_scatter_creator", srcs = ["reduce_scatter_creator.cc"], diff --git a/xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names.cc b/xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names.cc new file mode 100644 index 0000000000000..297e2523159b6 --- /dev/null +++ b/xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names.cc @@ -0,0 +1,57 @@ +/* Copyright 2025 The OpenXLA Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names.h" + +#include "absl/container/flat_hash_set.h" +#include "absl/status/statusor.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "xla/hlo/ir/hlo_computation.h" +#include "xla/hlo/ir/hlo_module.h" +#include "xla/hlo/ir/hlo_opcode.h" + +namespace xla { +namespace gpu { + +absl::StatusOr<bool> AddTrackingSuffixToInstructionNames::Run( + HloModule* module, + const absl::flat_hash_set<absl::string_view>& execution_threads) { + bool changed = false; + + // Only rename instructions in non-fusion computations. + for (xla::HloComputation* computation : + module->MakeNonfusionComputationsSorted(execution_threads)) { + for (auto* instruction : computation->instructions()) { + // Skip non-fusible instruction to avoid breaking tests that are not + // related to fusion. + if (instruction->opcode() == HloOpcode::kParameter || + instruction->opcode() == HloOpcode::kCustomCall || + instruction->opcode() == HloOpcode::kFusion || + !instruction->IsFusible()) + continue; + + auto new_name = absl::StrCat(instruction->name(), ".0"); + module->SetAndUniquifyInstrName(instruction, new_name); + + changed = true; + } + } + + return changed; +} + +} // namespace gpu +} // namespace xla diff --git a/xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names.h b/xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names.h new file mode 100644 index 0000000000000..741937b79336e --- /dev/null +++ b/xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names.h @@ -0,0 +1,51 @@ +/* Copyright 2025 The OpenXLA Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef XLA_SERVICE_GPU_TRANSFORMS_ADD_TRACKING_SUFFIX_TO_INSTRUCTION_NAMES_H_ +#define XLA_SERVICE_GPU_TRANSFORMS_ADD_TRACKING_SUFFIX_TO_INSTRUCTION_NAMES_H_ + +#include "absl/container/flat_hash_set.h" +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" +#include "xla/hlo/ir/hlo_module.h" +#include "xla/hlo/pass/hlo_pass_interface.h" + +namespace xla::gpu { + +// Appends ".0" suffix to instruction names. +// +// Priority fusion pass duplicates instructions, and it's hard to match +// instructions before and after the run as they got renamed. +// To make debugging easier, we append ".0" suffix to instruction names +// and priority fusion pass will increment this suffix: +// +// Original: broadcast.123 +// After this pass: broadcast.123.0 +// After priority fusion: broadcast.123.1, broadcast.123.2, ... +// +// One can match instructions before and after by their original name. +class AddTrackingSuffixToInstructionNames : public HloModulePass { + public: + absl::string_view name() const override { return "rename-instructions"; } + + using HloPassInterface::Run; + absl::StatusOr<bool> Run( + HloModule* module, + const absl::flat_hash_set<absl::string_view>& execution_threads) override; +}; + +} // namespace xla::gpu + +#endif // XLA_SERVICE_GPU_TRANSFORMS_ADD_TRACKING_SUFFIX_TO_INSTRUCTION_NAMES_H_ diff --git a/xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names_test.cc b/xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names_test.cc new file mode 100644 index 0000000000000..30d24a03e6e3e --- /dev/null +++ b/xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names_test.cc @@ -0,0 +1,52 @@ +/* Copyright 2025 The OpenXLA Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "xla/service/gpu/transforms/add_tracking_suffix_to_instruction_names.h" + +#include <gtest/gtest.h> +#include "absl/strings/string_view.h" +#include "xla/hlo/testlib/hlo_hardware_independent_test_base.h" + +namespace xla { +namespace gpu { +namespace { + +class RenameInstructionsTest : public HloHardwareIndependentTestBase {}; + +TEST_F(RenameInstructionsTest, BasicCase) { + absl::string_view kHlo = R"( + HloModule m + + ENTRY main { + param_0.315 = f32[] parameter(0) + log.482 = f32[] log(param_0.315) + param_1.426 = f32[] parameter(1) + subtract.22 = f32[] subtract(log.482, param_1.426) + ROOT exponential.15 = f32[] exponential(subtract.22) + } + )"; + RunAndFilecheckHloRewrite(kHlo, AddTrackingSuffixToInstructionNames(), R"( + // CHECK: ENTRY %main {{.*}} { + // CHECK: %param_0.315 = f32[] parameter(0) + // CHECK: %log.482.0 = f32[] log(%param_0.315) + // CHECK: %param_1.426 = f32[] parameter(1) + // CHECK: %subtract.22.0 = f32[] subtract(%log.482.0, %param_1.426) + // CHECK: ROOT %exponential.15.0 = f32[] exponential(%subtract.22.0) + // CHECK: })"); +} + +} // namespace +} // namespace gpu +} // namespace xla diff --git a/xla/service/gpu/transforms/priority_fusion.cc b/xla/service/gpu/transforms/priority_fusion.cc index ea507b965ffe7..1850ff499126d 100644 --- a/xla/service/gpu/transforms/priority_fusion.cc +++ b/xla/service/gpu/transforms/priority_fusion.cc @@ -954,23 +954,6 @@ absl::StatusOr<bool> PriorityFusion::Run( auto fusible_computations = GetFusibleComputations(*module, execution_threads); - // Appends ".0" suffix to all instructions. - // - // Every time an instruction is duplicated, the last integer suffix is - // incremented. - // Before: broadcast.123 -> broadcast.124 - // After: broadcast.123.0 -> broadcast.123.1 - // - // With this modification it will be easier to match instructions before and - // after fusion passes, because they will have the same unique prefix. Names - // are not used in the pipeline, but it makes debugging much easier. - for (auto* computation : fusible_computations) { - for (auto* instruction : computation->instructions()) { - module->SetAndUniquifyInstrName(instruction, - absl::StrCat(instruction->name(), ".0")); - } - } - if (dump_enabled) { fusion_process_dump_->set_hlo_module_before_fusion( module->ToString(HloPrintOptions::ShortParsable()));