openxla · copybara-service · Mar 20, 2025 · Mar 20, 2025
diff --git a/xla/BUILD b/xla/BUILD
@@ -1218,6 +1218,8 @@ cc_library(
             "@com_google_absl//absl/log:check",
             "@com_google_absl//absl/strings",
             "@com_google_absl//absl/strings:str_format",
+            "@tsl//tsl/platform",
+            "@tsl//tsl/platform:platform_port",
             "@tsl//tsl/platform:protobuf",
         ],
 )

diff --git a/xla/debug_options_flags.cc b/xla/debug_options_flags.cc
@@ -44,10 +44,24 @@ limitations under the License.
 #include "xla/stream_executor/cuda/ptx_compiler_support.h"
 #include "xla/tsl/util/command_line_flags.h"
 #include "xla/xla.pb.h"
+#include "tsl/platform/cpu_info.h"  // NOLINT
+#include "tsl/platform/platform.h"
 #include "tsl/platform/protobuf.h"  // IWYU pragma: keep
 
 namespace xla {
 
+inline std::string DefaultMaxIsa() {
+#ifdef PLATFORM_GOOGLE
+  return "";
+#else
+  // There are many missing SVE lowerings in LLVM. Limit features to NEON for
+  // now. There shouldn't be significant performance impact as most AAarch64
+  // CPUs still use 128-bit registers.
+  // TODO(penporn): Remove this once SVE is fully supported.
+  return tsl::port::IsAarch64CPU() ? "NEON" : "";
+#endif  // PLATFORM_GOOGLE
+}
+
 DebugOptions DefaultDebugOptionsIgnoringFlags() {
   DebugOptions opts;
   opts.set_xla_llvm_enable_alias_scope_metadata(true);
@@ -96,7 +110,7 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() {
   opts.set_xla_cpu_copy_insertion_use_region_analysis(false);
   opts.set_xla_cpu_enable_concurrency_optimized_scheduler(true);
   opts.set_xla_cpu_prefer_vector_width(256);
-  opts.set_xla_cpu_max_isa("");
+  opts.set_xla_cpu_max_isa(DefaultMaxIsa());
   opts.set_xla_cpu_generate_unique_c_style_kernel_entry_points(false);
 
   opts.set_xla_cpu_enable_fast_math(false);

diff --git a/xla/service/cpu/tests/BUILD b/xla/service/cpu/tests/BUILD
@@ -337,6 +337,7 @@ xla_cc_test(
         "@llvm-project//llvm:ARMCodeGen",  # fixdeps: keep
         "@llvm-project//llvm:Target",
         "@llvm-project//llvm:X86CodeGen",  # fixdeps: keep
+        "@tsl//tsl/platform",
         "@tsl//tsl/platform:platform_port",
     ],
 )

diff --git a/xla/service/cpu/tests/cpu_vectorization_test.cc b/xla/service/cpu/tests/cpu_vectorization_test.cc
@@ -37,6 +37,7 @@ limitations under the License.
 #include "xla/xla.pb.h"
 #include "xla/xla_data.pb.h"
 #include "tsl/platform/cpu_info.h"
+#include "tsl/platform/platform.h"
 
 namespace xla {
 namespace cpu {
@@ -233,6 +234,20 @@ INSTANTIATE_TEST_SUITE_P(AArch64MaxIsaTestInstantiation, AArch64MaxIsaTest,
                          ::testing::ValuesIn(GetAArch64MaxIsaTestCases()),
                          AArch64MaxIsaTest::Name);
 
+class DefaultMaxIsaTest : public CpuCodegenTest {};
+
+TEST_F(DefaultMaxIsaTest, NeonForOssAArch64) {
+  if (!tsl::port::IsAarch64CPU()) {
+    GTEST_SKIP() << "This test is for AArch64 CPUs.";
+  }
+  DebugOptions debug_options = HloTestBase::GetDebugOptionsForTest();
+#ifdef PLATFORM_GOOGLE
+  EXPECT_EQ(debug_options.xla_cpu_max_isa(), "");
+#else
+  EXPECT_EQ(debug_options.xla_cpu_max_isa(), "NEON");
+#endif  // PLATFORM_GOOGLE
+}
+
 struct JitVectorizationTestSpec {
   HloOpcode opcode;
   std::string max_isa;