Merge pull request #201 from Xilinx/release_rai_1_2

ljfitz · web-flow · commit 1c9b6e2961cd · 2024-06-20T10:18:28.000+02:00
Release rai 1 2
diff --git a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp
@@ -99,9 +99,12 @@ struct SqrtReciprocalOptimization : public OpRewritePattern<tosa::PowOp> {
     // An improvement for the future would be to generate a tile operator here instead
     if (inputType != outputType)
       return rewriter.notifyMatchFailure(op, "input type and output type are different, tiling is not supported for this canonicalization");
-      
-    rewriter.replaceOpWithNewOp<tosa::RsqrtOp>(user, outputType, op.getInput1());
 
+    auto rsqrtOp = rewriter.create<tosa::RsqrtOp>(
+        rewriter.getFusedLoc({op.getLoc(), user->getLoc()}), outputType,
+        op.getInput1());
+    rewriter.replaceOp(user, rsqrtOp);
+      
     return success();
   }
 };
diff --git a/mlir/test/Dialect/Tosa/canonicalize_with_debuginfo.mlir b/mlir/test/Dialect/Tosa/canonicalize_with_debuginfo.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -mlir-print-debuginfo -canonicalize="test-convergence" %s | FileCheck %s
+// RUN: mlir-opt -split-input-file -mlir-print-debuginfo -canonicalize="test-convergence" %s | FileCheck %s
 
 // CHECK-LABEL: @clamp_twice_is_single_clamp
 func.func @clamp_twice_is_single_clamp(%arg0: tensor<4xi8>) -> tensor<4xi8> {
@@ -11,4 +11,36 @@ func.func @clamp_twice_is_single_clamp(%arg0: tensor<4xi8>) -> tensor<4xi8> {
   return %1 : tensor<4xi8>
 }
 #loc0 = loc("Clamp_A")
-#loc1 = loc("Clamp_B")
+#loc1 = loc("Clamp_B")
+
+// -----
+
+// CHECK-LABEL: @canonicalize_optimize_sqrt_reciprocal
+func.func @canonicalize_optimize_sqrt_reciprocal_with_debinfo(%arg0: tensor<1x5x1x1xf32>) -> tensor<1x5x1x1xf32> {
+  // CHECK: %[[RSQRT:.*]] = tosa.rsqrt %arg{{.*}} : (tensor<1x5x1x1xf32>) -> tensor<1x5x1x1xf32> loc([[LOC:.*]])
+  // CHECK-DAG: #[[A:.*]] = loc("Pow_A")
+  // CHECK-DAG: #[[B:.*]] = loc("Reciprocal_B")
+  // CHECK-DAG: [[LOC]] = loc(fused[#[[A]], #[[B]]])
+  %0 = "tosa.const"() <{value = dense<5.000000e-01> : tensor<1x1x1x1xf32>}> : () -> tensor<1x1x1x1xf32>
+  %1 = tosa.pow %arg0, %0 : (tensor<1x5x1x1xf32>, tensor<1x1x1x1xf32>) -> tensor<1x5x1x1xf32> loc(#loc0)
+  %2 = tosa.reciprocal %1 : (tensor<1x5x1x1xf32>) -> tensor<1x5x1x1xf32> loc(#loc1)
+  return %2 : tensor<1x5x1x1xf32>
+}
+#loc0 = loc("Pow_A")
+#loc1 = loc("Reciprocal_B")
+
+// -----
+
+// CHECK-LABEL: @canonicalize_optimize_sqrt_reciprocal
+func.func @canonicalize_optimize_sqrt_reciprocal_bf16(%arg0: tensor<1x5x1x1xbf16>) -> tensor<1x5x1x1xbf16> {
+  // CHECK: %[[RSQRT:.*]] = tosa.rsqrt %arg{{.*}} : (tensor<1x5x1x1xbf16>) -> tensor<1x5x1x1xbf16> loc([[LOC:.*]])
+  // CHECK-DAG: #[[A:.*]] = loc("Pow_B")
+  // CHECK-DAG: #[[B:.*]] = loc("Reciprocal_C")
+  // CHECK-DAG: [[LOC]] = loc(fused[#[[A]], #[[B]]])
+  %0 = "tosa.const"() <{value = dense<5.000000e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16>
+  %1 = tosa.pow %arg0, %0 : (tensor<1x5x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x5x1x1xbf16> loc(#loc0)
+  %2 = tosa.reciprocal %1 : (tensor<1x5x1x1xbf16>) -> tensor<1x5x1x1xbf16> loc(#loc1)
+  return %2 : tensor<1x5x1x1xbf16>
+}
+#loc0 = loc("Pow_B")
+#loc1 = loc("Reciprocal_C")