Xilinx · ljfitz · Jun 20, 2024 · Jun 12, 2024 · Jun 13, 2024 · Jun 14, 2024
diff --git a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp
@@ -99,9 +99,12 @@ struct SqrtReciprocalOptimization : public OpRewritePattern<tosa::PowOp> {
     // An improvement for the future would be to generate a tile operator here instead
     if (inputType != outputType)
       return rewriter.notifyMatchFailure(op, "input type and output type are different, tiling is not supported for this canonicalization");
-
-    rewriter.replaceOpWithNewOp<tosa::RsqrtOp>(user, outputType, op.getInput1());
 
+    auto rsqrtOp = rewriter.create<tosa::RsqrtOp>(
+        rewriter.getFusedLoc({op.getLoc(), user->getLoc()}), outputType,
+        op.getInput1());
+    rewriter.replaceOp(user, rsqrtOp);
+
     return success();
   }
 };
@@ -888,7 +891,7 @@ OpFoldResult CastOp::fold(FoldAdaptor adaptor) {
           llvm::cast<IntegerType>(outETy).getIntOrFloatBitWidth(), unsign);
       auto floatVal = operand.getSplatValue<APFloat>();
       bool exact;
-      floatVal.convertToInteger(intVal, llvm::RoundingMode::TowardZero, &exact);
+      floatVal.convertToInteger(intVal, llvm::RoundingMode::NearestTiesToEven, &exact);
       return SplatElementsAttr::get(outTy, intVal);
     }
 

diff --git a/mlir/test/Dialect/Tosa/canonicalize_with_debuginfo.mlir b/mlir/test/Dialect/Tosa/canonicalize_with_debuginfo.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -mlir-print-debuginfo -canonicalize="test-convergence" %s | FileCheck %s
+// RUN: mlir-opt -split-input-file -mlir-print-debuginfo -canonicalize="test-convergence" %s | FileCheck %s
 
 // CHECK-LABEL: @clamp_twice_is_single_clamp
 func.func @clamp_twice_is_single_clamp(%arg0: tensor<4xi8>) -> tensor<4xi8> {
@@ -11,4 +11,36 @@ func.func @clamp_twice_is_single_clamp(%arg0: tensor<4xi8>) -> tensor<4xi8> {
   return %1 : tensor<4xi8>
 }
 #loc0 = loc("Clamp_A")
-#loc1 = loc("Clamp_B")
+#loc1 = loc("Clamp_B")
+
+// -----
+
+// CHECK-LABEL: @canonicalize_optimize_sqrt_reciprocal
+func.func @canonicalize_optimize_sqrt_reciprocal_with_debinfo(%arg0: tensor<1x5x1x1xf32>) -> tensor<1x5x1x1xf32> {
+  // CHECK: %[[RSQRT:.*]] = tosa.rsqrt %arg{{.*}} : (tensor<1x5x1x1xf32>) -> tensor<1x5x1x1xf32> loc([[LOC:.*]])
+  // CHECK-DAG: #[[A:.*]] = loc("Pow_A")
+  // CHECK-DAG: #[[B:.*]] = loc("Reciprocal_B")
+  // CHECK-DAG: [[LOC]] = loc(fused[#[[A]], #[[B]]])
+  %0 = "tosa.const"() <{value = dense<5.000000e-01> : tensor<1x1x1x1xf32>}> : () -> tensor<1x1x1x1xf32>
+  %1 = tosa.pow %arg0, %0 : (tensor<1x5x1x1xf32>, tensor<1x1x1x1xf32>) -> tensor<1x5x1x1xf32> loc(#loc0)
+  %2 = tosa.reciprocal %1 : (tensor<1x5x1x1xf32>) -> tensor<1x5x1x1xf32> loc(#loc1)
+  return %2 : tensor<1x5x1x1xf32>
+}
+#loc0 = loc("Pow_A")
+#loc1 = loc("Reciprocal_B")
+
+// -----
+
+// CHECK-LABEL: @canonicalize_optimize_sqrt_reciprocal
+func.func @canonicalize_optimize_sqrt_reciprocal_bf16(%arg0: tensor<1x5x1x1xbf16>) -> tensor<1x5x1x1xbf16> {
+  // CHECK: %[[RSQRT:.*]] = tosa.rsqrt %arg{{.*}} : (tensor<1x5x1x1xbf16>) -> tensor<1x5x1x1xbf16> loc([[LOC:.*]])
+  // CHECK-DAG: #[[A:.*]] = loc("Pow_B")
+  // CHECK-DAG: #[[B:.*]] = loc("Reciprocal_C")
+  // CHECK-DAG: [[LOC]] = loc(fused[#[[A]], #[[B]]])
+  %0 = "tosa.const"() <{value = dense<5.000000e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16>
+  %1 = tosa.pow %arg0, %0 : (tensor<1x5x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x5x1x1xbf16> loc(#loc0)
+  %2 = tosa.reciprocal %1 : (tensor<1x5x1x1xbf16>) -> tensor<1x5x1x1xbf16> loc(#loc1)
+  return %2 : tensor<1x5x1x1xbf16>
+}
+#loc0 = loc("Pow_B")
+#loc1 = loc("Reciprocal_C")
diff --git a/mlir/test/Dialect/Tosa/constant-op-fold.mlir b/mlir/test/Dialect/Tosa/constant-op-fold.mlir
@@ -602,6 +602,17 @@ func.func @cast_float_to_int() -> tensor<i16> {
 
 // -----
 
+// CHECK: func.func @cast_float_to_int_round
+func.func @cast_float_to_int_round() -> tensor<i16> {
+  %splat = "tosa.const"() {value = dense<-3.5> : tensor<f32>} : () -> tensor<f32>
+  // CHECK: %[[SPLAT:.+]] = "tosa.const"() <{value = dense<-4> : tensor<i16>}
+  %cast = tosa.cast %splat : (tensor<f32>) -> tensor<i16>
+  // CHECK: return %[[SPLAT]]
+  return %cast : tensor<i16>
+}
+
+// -----
+
 // CHECK: func.func @cast_int_to_int_trunc
 func.func @cast_int_to_int_trunc() -> tensor<i16> {
   %splat = "tosa.const"() {value = dense<-1> : tensor<i32>} : () -> tensor<i32>