From 8874eb14624b614fcdec9f68e84eb8dc4e7dd7f8 Mon Sep 17 00:00:00 2001 From: Fabian Mora Date: Mon, 30 Jun 2025 12:25:01 +0000 Subject: [PATCH] replace 0 with ub.poison in linalg vectorizer Signed-off-by: Fabian Mora --- .../Linalg/Transforms/Vectorization.cpp | 16 ++-- .../vectorization/extract-with-patterns.mlir | 39 +++++----- .../Dialect/Linalg/vectorization/extract.mlir | 6 +- .../linalg-ops-with-patterns.mlir | 14 ++-- .../Linalg/vectorization/linalg-ops.mlir | 74 +++++++++---------- 5 files changed, 73 insertions(+), 76 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index b467114c72f7d..f8592e2ca2174 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -1183,10 +1183,6 @@ vectorizeTensorExtract(RewriterBase &rewriter, VectorizationState &state, auto srcRank = extractOp.getTensor().getType().getRank(); SmallVector inBounds(dstRank, true); - // Get the value to pad transfer reads with 0. - Value padding = - arith::getZeroConstant(rewriter, loc, resultType.getElementType()); - // 2a. Handle scalar broadcast access. if (memAccessKind == VectorMemoryAccessKind::ScalarBroadcast) { MLIRContext *ctx = rewriter.getContext(); @@ -1194,8 +1190,8 @@ vectorizeTensorExtract(RewriterBase &rewriter, VectorizationState &state, auto permutationMap = AffineMap::get(srcRank, 0, exprs, ctx); auto transferReadOp = rewriter.create( - loc, resultType, extractOp.getTensor(), transferReadIdxs, padding, - permutationMap, inBounds); + loc, resultType, extractOp.getTensor(), transferReadIdxs, + /*padding=*/std::nullopt, permutationMap, inBounds); // Mask this broadcasting xfer_read here rather than relying on the generic // path (the generic path assumes identity masking map, which wouldn't be @@ -1231,8 +1227,8 @@ vectorizeTensorExtract(RewriterBase &rewriter, VectorizationState &state, } auto transferReadOp = rewriter.create( - loc, resultType, extractOp.getTensor(), transferReadIdxs, padding, - permutationMap, inBounds); + loc, resultType, extractOp.getTensor(), transferReadIdxs, + /*padding=*/std::nullopt, permutationMap, inBounds); LDBG("Vectorised as contiguous load: " << extractOp); return VectorizationHookResult{VectorizationHookStatus::NewOp, @@ -1444,7 +1440,7 @@ vectorizeAsLinalgGeneric(RewriterBase &rewriter, VectorizationState &state, Operation *read = rewriter.create( loc, readType, opOperand->get(), indices, - /*padding=*/arith::getZeroConstant(rewriter, loc, elemType), readMap); + /*padding=*/std::nullopt, readMap); read = state.maskOperation(rewriter, read, linalgOp, indexingMap); Value readValue = read->getResult(0); @@ -2646,7 +2642,7 @@ LogicalResult mlir::linalg::vectorizeCopy(RewriterBase &rewriter, Value readValue = rewriter.create( loc, readType, copyOp.getSource(), indices, - /*padding=*/arith::getZeroConstant(rewriter, loc, srcElementType), + /*padding=*/std::nullopt, rewriter.getMultiDimIdentityMap(srcType.getRank())); if (cast(readValue.getType()).getRank() == 0) { readValue = diff --git a/mlir/test/Dialect/Linalg/vectorization/extract-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization/extract-with-patterns.mlir index f62e257f80016..c3ee8929dc3f3 100644 --- a/mlir/test/Dialect/Linalg/vectorization/extract-with-patterns.mlir +++ b/mlir/test/Dialect/Linalg/vectorization/extract-with-patterns.mlir @@ -31,9 +31,9 @@ func.func @vectorize_nd_tensor_extract_transfer_read_basic( // CHECK-SAME: %[[ARG1:.*]]: tensor<1x1x3xf32> // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK-DAG: %[[CST:.+]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[PV:.+]] = ub.poison : f32 -// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]], %[[CST]] {in_bounds = [true, true, true]} : tensor<3x3x3xf32>, vector<1x1x3xf32> +// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]], %[[PV]] {in_bounds = [true, true, true]} : tensor<3x3x3xf32>, vector<1x1x3xf32> // CHECK: vector.transfer_write %[[READ]], %[[ARG1]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x3xf32>, tensor<1x1x3xf32> // ----- @@ -64,12 +64,12 @@ func.func @vectorize_nd_tensor_extract_transfer_read_complex(%6: tensor<45x80x16 // CHECK-SAME: %[[ARG5:.*]]: tensor<1x4xf32>) -> tensor<1x4xf32> { // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[PV:.*]] = ub.poison : f32 // CHECK-DAG: %[[C79:.*]] = arith.constant 79 : index // CHECK: %[[ADD1:.*]] = arith.addi %[[ARG1]], %[[ARG2]] : index // CHECK: %[[ADD2:.*]] = arith.addi %[[ARG3]], %[[ARG4]] : index -// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]]{{\[}}%[[ADD1]], %[[C79]], %[[ADD2]]], %[[CST]] {in_bounds = [true, true]} : tensor<45x80x16xf32>, vector<1x4xf32> +// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]]{{\[}}%[[ADD1]], %[[C79]], %[[ADD2]]], %[[PV]] {in_bounds = [true, true]} : tensor<45x80x16xf32>, vector<1x4xf32> // CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[ARG5]]{{\[}}%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x4xf32> // CHECK: return %[[WRITE]] : tensor<1x4xf32> // CHECK: } @@ -97,11 +97,11 @@ func.func @vectorize_nd_tensor_extract_with_affine_apply_contiguous(%6: tensor<8 // CHECK-SAME: %[[ARG1:.*]]: index, // CHECK-SAME: %[[ARG2:.*]]: tensor<1x4xf32>) -> tensor<1x4xf32> { -// CHECK-DAG: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[PV:.*]] = ub.poison : f32 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C79:.*]] = arith.constant 79 : index -// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]]{{\[}}%[[C79]], %[[ARG1]]], %[[CST]] {in_bounds = [true, true]} : tensor<80x16xf32>, vector<1x4xf32> +// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]]{{\[}}%[[C79]], %[[ARG1]]], %[[PV]] {in_bounds = [true, true]} : tensor<80x16xf32>, vector<1x4xf32> // CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[ARG2]]{{\[}}%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x4xf32> // CHECK: return %[[WRITE]] : tensor<1x4xf32> // CHECK: } @@ -164,9 +164,9 @@ func.func @vectorize_nd_tensor_extract_with_maxsi_contiguous(%arg0: tensor<80x16 // CHECK-SAME: %[[VAL_1:.*]]: tensor<1x4xf32>) -> tensor<1x4xf32> { // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index -// CHECK-DAG: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[PV:.*]] = ub.poison : f32 -// CHECK: %[[VAL_8:.*]] = vector.transfer_read %[[VAL_0]]{{\[}}%[[C16]], %[[C0]]], %[[CST]] {in_bounds = [true, true]} : tensor<80x16xf32>, vector<1x4xf32> +// CHECK: %[[VAL_8:.*]] = vector.transfer_read %[[VAL_0]]{{\[}}%[[C16]], %[[C0]]], %[[PV]] {in_bounds = [true, true]} : tensor<80x16xf32>, vector<1x4xf32> // CHECK: %[[VAL_9:.*]] = vector.transfer_write %[[VAL_8]], %[[VAL_1]]{{\[}}%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x4xf32> // CHECK: return %[[VAL_9]] : tensor<1x4xf32> // CHECK: } @@ -229,12 +229,12 @@ func.func @vectorize_nd_tensor_extract_index_from_tensor(%arg0: tensor<3x3xf32>, // CHECK-SAME: %[[ARG3:.*]]: tensor<4x7x2xf32> // CHECK-SAME: %[[ARG4:.*]]: tensor<4x7x3x2xf32> // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[C0_i32:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[PV:.*]] = ub.poison : i32 // CHECK-DAG: %[[CST:.*]] = arith.constant dense<3> : vector<7x2x4x3xindex> // CHECK-DAG: %[[CST_1:.*]] = arith.constant dense : vector<4x7x3x2xi1> // CHECK-DAG: %[[PASSTHRU:.*]] = arith.constant dense<0.000000e+00> : vector<4x7x3x2xf32> -// CHECK: %[[V0:.*]] = vector.transfer_read %[[ARG1]][%[[C0]], %[[C0]]], %[[C0_i32]] {in_bounds = [true, true]} : tensor<4x3xi32>, vector<4x3xi32> -// CHECK: %[[V1:.*]] = vector.transfer_read %[[ARG2]][%[[C0]], %[[C0]]], %[[C0_i32]] {in_bounds = [true, true]} : tensor<4x3xi32>, vector<4x3xi32> +// CHECK: %[[V0:.*]] = vector.transfer_read %[[ARG1]][%[[C0]], %[[C0]]], %[[PV]] {in_bounds = [true, true]} : tensor<4x3xi32>, vector<4x3xi32> +// CHECK: %[[V1:.*]] = vector.transfer_read %[[ARG2]][%[[C0]], %[[C0]]], %[[PV]] {in_bounds = [true, true]} : tensor<4x3xi32>, vector<4x3xi32> // CHECK: %[[CAST:.*]] = arith.index_cast %[[V0]] : vector<4x3xi32> to vector<4x3xindex> // CHECK: %[[B1:.*]] = vector.broadcast %[[CAST]] : vector<4x3xindex> to vector<7x2x4x3xindex> // CHECK: %[[CAST_1:.*]] = arith.index_cast %[[V1]] : vector<4x3xi32> to vector<4x3xindex> @@ -382,7 +382,7 @@ func.func @vectorize_nd_tensor_extract_contiguous_and_gather(%arg0: tensor<6xf32 // CHECK-SAME: %[[VAL_0:.*]]: tensor<6xf32> // CHECK-SAME: %[[VAL_1:.*]]: tensor<5xi32> // CHECK-DAG: %[[VAL_2:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[VAL_3:.*]] = ub.poison : i32 // CHECK-DAG: %[[VAL_4:.*]] = arith.constant dense<0> : vector<5xindex> // CHECK-DAG: %[[VAL_5:.*]] = arith.constant dense<5> : vector<5xindex> // CHECK-DAG: %[[VAL_6:.*]] = arith.constant dense : vector<5xi1> @@ -480,13 +480,14 @@ func.func @vectorize_nd_tensor_extract_block_arg(%arg0: tensor<5x6xf32>, %arg1: // CHECK-LABEL: func.func @vectorize_nd_tensor_extract_block_arg( // CHECK-SAME: %[[VAL_0:.*]]: tensor<5x6xf32>, // CHECK-SAME: %[[VAL_1:.*]]: tensor<5xindex>) -> tensor<5xf32> { +// CHECK-DAG: %[[PAD:.*]] = ub.poison : index // CHECK-DAG: %[[VAL_2:.*]] = arith.constant 0 : index // CHECK-DAG: %[[VAL_3:.*]] = arith.constant dense<[0, 1, 2, 3, 4]> : vector<5xindex> // CHECK-DAG: %[[VAL_4:.*]] = arith.constant dense : vector<5xi1> // CHECK-DAG: %[[VAL_5:.*]] = arith.constant dense<0.000000e+00> : vector<5xf32> // CHECK-DAG: %[[VAL_6:.*]] = arith.constant dense<6> : vector<5xindex> // CHECK: %[[VAL_7:.*]] = tensor.empty() : tensor<5xf32> -// CHECK: %[[VAL_8:.*]] = vector.transfer_read %[[VAL_1]]{{\[}}%[[VAL_2]]], %[[VAL_2]] {in_bounds = [true]} : tensor<5xindex>, vector<5xindex> +// CHECK: %[[VAL_8:.*]] = vector.transfer_read %[[VAL_1]]{{\[}}%[[VAL_2]]], %[[PAD]] {in_bounds = [true]} : tensor<5xindex>, vector<5xindex> // CHECK: %[[VAL_9:.*]] = arith.muli %[[VAL_8]], %[[VAL_6]] : vector<5xindex> // CHECK: %[[VAL_10:.*]] = arith.addi %[[VAL_9]], %[[VAL_3]] : vector<5xindex> // CHECK: %[[VAL_11:.*]] = vector.gather %[[VAL_0]]{{\[}}%[[VAL_2]], %[[VAL_2]]] {{\[}}%[[VAL_10]]], %[[VAL_4]], %[[VAL_5]] : tensor<5x6xf32>, vector<5xindex>, vector<5xi1>, vector<5xf32> into vector<5xf32> @@ -559,7 +560,7 @@ func.func @vectorize_nd_tensor_extract_scalar_broadcast(%src: tensor<3x3xf32>, % // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[PAD:.*]] = ub.poison : f32 // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][%[[C1]], %[[C2]]], %[[PAD]] : tensor<3x3xf32>, vector // CHECK: %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector to vector<1x1x3xf32> // CHECK: vector.transfer_write %[[READ_BCAST]], %[[INIT]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x3xf32>, tensor<1x1x3xf32> @@ -583,7 +584,7 @@ func.func @extract_scalar_from_0d_into_0d(%src: tensor, %init: tensor) // CHECK-LABEL: func.func @extract_scalar_from_0d_into_0d( // CHECK-SAME: %[[SRC:.*]]: tensor, // CHECK-SAME: %[[INIT:.*]]: tensor) -> tensor { -// CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[PAD:.*]] = ub.poison : f32 // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][], %[[PAD]] : tensor, vector // CHECK: vector.transfer_write %[[READ]], %[[INIT]][] : vector, tensor @@ -606,7 +607,7 @@ func.func @extract_scalar_from_0d_into_1d(%src: tensor, %init: tensor<1xf32 // CHECK-SAME: %[[SRC:.*]]: tensor, // CHECK-SAME: %[[INIT:.*]]: tensor<1xf32>) -> tensor<1xf32> { // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[PAD:.*]] = ub.poison : f32 // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][], %[[PAD]] : tensor, vector // CHECK: %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector to vector<1xf32> // CHECK: vector.transfer_write %[[READ_BCAST]], %[[INIT]][%[[C0]]] {in_bounds = [true]} : vector<1xf32>, tensor<1xf32> @@ -654,7 +655,7 @@ func.func @scalar_read_with_broadcast_from_column_tensor(%init: tensor<1x1x4xi32 // CHECK-LABEL: func.func @scalar_read_with_broadcast_from_column_tensor // CHECK-SAME: %[[INIT:.*]]: tensor<1x1x4xi32>) -> tensor<1x1x4xi32> { -// CHECK-DAG: %[[PAD:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[PAD:.*]] = ub.poison : i32 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[SRC:.*]] = arith.constant dense<{{\[\[}}0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14]]> : tensor<15x1xi32> // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]]{{\[}}%[[C0]], %[[C0]]], %[[PAD]] : tensor<15x1xi32>, vector @@ -688,7 +689,7 @@ func.func @vectorize_nd_tensor_extract_transfer_read_basic_column( // CHECK-SAME: %[[SRC:.*]]: tensor<3x3x3xf32>, // CHECK-SAME: %[[INIT:.*]]: tensor<3x1x1xf32>) // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[CST_0:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C0]], %[[C0]]], %[[CST_0]] : tensor<3x3x3xf32>, vector +// CHECK-DAG: %[[PV:.*]] = ub.poison : f32 +// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C0]], %[[C0]]], %[[PV]] : tensor<3x3x3xf32>, vector // CHECK: %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector to vector<3x1x1xf32> // CHECK: vector.transfer_write %[[READ_BCAST]], %[[INIT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<3x1x1xf32>, tensor<3x1x1xf32> diff --git a/mlir/test/Dialect/Linalg/vectorization/extract.mlir b/mlir/test/Dialect/Linalg/vectorization/extract.mlir index d0d3b58a05704..76ac4b8398069 100644 --- a/mlir/test/Dialect/Linalg/vectorization/extract.mlir +++ b/mlir/test/Dialect/Linalg/vectorization/extract.mlir @@ -299,7 +299,7 @@ func.func @masked_dynamic_vectorize_nd_tensor_extract_with_affine_apply_gather(% // CHECK: %[[VAL_6:.*]] = arith.constant 1 : index // CHECK: %[[VAL_7:.*]] = tensor.dim %[[VAL_2]], %[[VAL_6]] : tensor // CHECK: %[[VAL_8:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_9:.*]] = ub.poison : f32 // CHECK: %[[VAL_10:.*]] = vector.create_mask %[[VAL_5]], %[[VAL_7]] : vector<1x4xi1> // CHECK: %[[VAL_11:.*]] = vector.mask %[[VAL_10]] { vector.transfer_read %[[VAL_2]]{{\[}}%[[VAL_8]], %[[VAL_8]]], %[[VAL_9]] {in_bounds = [true, true]} : tensor, vector<1x4xf32> } : vector<1x4xi1> -> vector<1x4xf32> // CHECK: %[[VAL_12:.*]] = vector.step : vector<4xindex> @@ -356,7 +356,7 @@ func.func @extract_masked_vectorize(%arg0: tensor, %arg1: tensor // CHECK: %[[VAL_8:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_9:.*]] = ub.poison : f32 // CHECK: %[[VAL_10:.*]] = vector.create_mask %[[VAL_5]], %[[VAL_7]] : vector<3x3xi1> // CHECK: %[[VAL_11:.*]] = vector.mask %[[VAL_10]] { vector.transfer_read %[[VAL_1]]{{\[}}%[[VAL_8]], %[[VAL_8]]], %[[VAL_9]] {in_bounds = [true, true]} : tensor, vector<3x3xf32> } : vector<3x3xi1> -> vector<3x3xf32> // CHECK: %[[VAL_12:.*]] = arith.constant dense : vector<3x3xi1> @@ -458,7 +458,7 @@ func.func @scalar_broadcast(%init : tensor<1x1x3xi32>, %src: tensor<1x3x2x4xi32> // CHECK: %[[MASK_RES:.*]] = vector.create_mask %[[C1]], %[[C1_2]], %[[C3]] : vector<1x1x4xi1> /// Read and broadcast the scalar -// CHECK: %[[PAD:.*]] = arith.constant 0 : i32 +// CHECK: %[[PAD:.*]] = ub.poison : i32 // CHECK: %[[MASK_READ:.*]] = vector.constant_mask [1] : vector<1xi1> // CHECK: %[[READ:.*]] = vector.mask %[[MASK_READ]] { // CHECK-SAME: vector.transfer_read %[[SRC]]{{\[}}%[[IDX]], %[[IDX]], %[[IDX]], %[[IDX]]], %[[PAD]] diff --git a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir index b282c57e3e4cb..4eeae4c064519 100644 --- a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir +++ b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir @@ -161,7 +161,7 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: %[[ARG_0:.*]]: tensor, %[[ARG_1:.*]]: tensor, %[[ARG_2:.*]]: tensor) func.func @generic_0d(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -// CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[PAD:.*]] = ub.poison : f32 // CHECK: %[[READ_0:.*]] = vector.transfer_read %[[ARG_0]][], %[[PAD]] : tensor, vector // CHECK: %[[ARG_0_AS_SCALAR:.*]] = vector.extract %[[READ_0]][] : f32 from vector // CHECK: %[[READ_1:.*]] = vector.transfer_read %[[ARG_1]][], %[[PAD]] : tensor, vector @@ -770,11 +770,11 @@ module attributes {transform.with_named_sequence} { // CHECK-DAG: #[[$MAP3:.*]] = affine_map<(d0, d1) -> (d1, 0, d0, 0)> // CHECK: func @generic_vectorize_broadcast_transpose // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[CF:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[V0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %[[CF]] {in_bounds = [true, true, true, true], permutation_map = #[[$MAP0]]} : memref<4x4xf32>, vector<4x4x4x4xf32> -// CHECK: %[[V1:.*]] = vector.transfer_read %{{.*}}[%[[C0]]], %[[CF]] {in_bounds = [true, true, true, true], permutation_map = #[[$MAP1]]} : memref<4xf32>, vector<4x4x4x4xf32> -// CHECK: %[[V2:.*]] = vector.transfer_read %{{.*}}[%[[C0]]], %[[CF]] {in_bounds = [true, true, true, true], permutation_map = #[[$MAP2]]} : memref<4xf32>, vector<4x4x4x4xf32> -// CHECK: %[[V3:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %[[CF]] {in_bounds = [true, true, true, true], permutation_map = #[[$MAP3]]} : memref<4x4xf32>, vector<4x4x4x4xf32> +// CHECK-DAG: %[[PV:.*]] = ub.poison : f32 +// CHECK: %[[V0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %[[PV]] {in_bounds = [true, true, true, true], permutation_map = #[[$MAP0]]} : memref<4x4xf32>, vector<4x4x4x4xf32> +// CHECK: %[[V1:.*]] = vector.transfer_read %{{.*}}[%[[C0]]], %[[PV]] {in_bounds = [true, true, true, true], permutation_map = #[[$MAP1]]} : memref<4xf32>, vector<4x4x4x4xf32> +// CHECK: %[[V2:.*]] = vector.transfer_read %{{.*}}[%[[C0]]], %[[PV]] {in_bounds = [true, true, true, true], permutation_map = #[[$MAP2]]} : memref<4xf32>, vector<4x4x4x4xf32> +// CHECK: %[[V3:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %[[PV]] {in_bounds = [true, true, true, true], permutation_map = #[[$MAP3]]} : memref<4x4xf32>, vector<4x4x4x4xf32> // CHECK: %[[SUB:.*]] = arith.subf %[[V0]], %[[V1]] : vector<4x4x4x4xf32> // CHECK: %[[ADD0:.*]] = arith.addf %[[V2]], %[[SUB]] : vector<4x4x4x4xf32> // CHECK: %[[ADD1:.*]] = arith.addf %[[V3]], %[[ADD0]] : vector<4x4x4x4xf32> @@ -1702,7 +1702,7 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: func.func @generic_with_reduction_and_broadcast( // CHECK-SAME: %[[VAL_0:.*]]: tensor<1x12x197x197xf32>) -> tensor<1x12x197x1xf32> { -// CHECK: %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_1:.*]] = ub.poison : f32 // CHECK: %[[VAL_2:.*]] = arith.constant 0 : index // CHECK: %[[VAL_3:.*]] = tensor.empty() : tensor<1x12x197x1xf32> // CHECK: %[[VAL_4:.*]] = vector.transfer_read %[[VAL_0]]{{\[}}%[[VAL_2]], %[[VAL_2]], %[[VAL_2]], %[[VAL_2]]], %[[VAL_1]] {in_bounds = [true, true, true, true]} : tensor<1x12x197x197xf32>, vector<1x12x197x197xf32> diff --git a/mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir b/mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir index 6722de817f6bf..9e501affdd2a5 100644 --- a/mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir +++ b/mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir @@ -211,10 +211,10 @@ func.func @dynamic_generic_with_reduction_and_broadcast(%arg0: tensor, // CHECK: %[[VAL_4:.*]] = arith.constant 1 : index // CHECK: %[[VAL_5:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor // CHECK: %[[VAL_6:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_7:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_7:.*]] = ub.poison : f32 // CHECK: %[[VAL_8:.*]] = vector.create_mask %[[VAL_3]], %[[VAL_5]] : vector<4x4xi1> // CHECK: %[[VAL_9:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_0]]{{\[}}%[[VAL_6]], %[[VAL_6]]], %[[VAL_7]] {in_bounds = [true, true]} : tensor, vector<4x4xf32> } : vector<4x4xi1> -> vector<4x4xf32> -// CHECK: %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_10:.*]] = ub.poison : f32 // CHECK: %[[VAL_11:.*]] = vector.create_mask %[[VAL_3]] : vector<4xi1> // CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_11]] { vector.transfer_read %[[VAL_1]]{{\[}}%[[VAL_6]], %[[VAL_6]]], %[[VAL_10]] {in_bounds = [true], permutation_map = #[[$MAP]]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> // CHECK: %[[VAL_13:.*]] = vector.mask %[[VAL_8]] { vector.multi_reduction , %[[VAL_9]], %[[VAL_12]] [1] : vector<4x4xf32> to vector<4xf32> } : vector<4x4xi1> -> vector<4xf32> @@ -257,7 +257,7 @@ func.func @vectorize_dynamic_2d_transpose(%arg0: tensor, // CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_9]] { vector.transfer_read %{{.*}} {in_bounds = [true, true], permutation_map = #{{.*}}} : tensor, vector<4x8xf32> } : vector<8x4xi1> -> vector<4x8xf32> // CHECK: %[[VAL_12:.*]] = vector.create_mask %[[VAL_4]], %[[VAL_6]] : vector<4x8xi1> // CHECK: %[[VAL_13:.*]] = vector.mask %[[VAL_12]] { vector.transfer_read %{{.*}} {in_bounds = [true, true]} : tensor, vector<4x8xf32> } : vector<4x8xi1> -> vector<4x8xf32> -// CHECK: %[[VAL_14:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_14:.*]] = ub.poison : f32 // CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_12]] { vector.transfer_read %{{.*}} {in_bounds = [true, true]} : tensor, vector<4x8xf32> } : vector<4x8xi1> -> vector<4x8xf32> // CHECK: %[[VAL_16:.*]] = arith.addf %[[VAL_10]], %[[VAL_13]] : vector<4x8xf32> // CHECK: %[[VAL_17:.*]] = vector.mask %[[VAL_12]] { vector.transfer_write %[[VAL_16]], %{{.*}} {in_bounds = [true, true]} : vector<4x8xf32>, tensor } : vector<4x8xi1> -> tensor @@ -372,12 +372,12 @@ func.func @vectorize_dynamic_reduction_2d_scalable(%arg0: tensor, // CHECK: %[[C1_IDX:.*]] = arith.constant 1 : index // CHECK: %[[DIM_A0_1:.*]] = tensor.dim %[[ARG_0]], %[[C1_IDX]] : tensor // CHECK: %[[C0_IDX:.*]] = arith.constant 0 : index -// CHECK: %[[C0_f32:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[PV:.*]] = ub.poison : f32 // CHECK: %[[MASK_2D:.*]] = vector.create_mask %[[DIM_A0_0]], %[[DIM_A0_1]] : vector<4x[8]xi1> -// CHECK: %[[VEC_RD_0:.*]] = vector.mask %[[MASK_2D]] { vector.transfer_read %[[ARG_0]][%[[C0_IDX]], %[[C0_IDX]]], %[[C0_f32]] {in_bounds = [true, true]} : tensor, vector<4x[8]xf32> } : vector<4x[8]xi1> -> vector<4x[8]xf32> -// CHECK: %[[C0_f32:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VEC_RD_0:.*]] = vector.mask %[[MASK_2D]] { vector.transfer_read %[[ARG_0]][%[[C0_IDX]], %[[C0_IDX]]], %[[PV]] {in_bounds = [true, true]} : tensor, vector<4x[8]xf32> } : vector<4x[8]xi1> -> vector<4x[8]xf32> +// CHECK: %[[PV:.*]] = ub.poison : f32 // CHECK: %[[MASK_1D:.*]] = vector.create_mask %[[DIM_A0_0]] : vector<4xi1> -// CHECK: %[[VEC_RD_1:.*]] = vector.mask %[[MASK_1D]] { vector.transfer_read %[[ARG_1]][%[[C0_IDX]]], %[[C0_f32]] {in_bounds = [true]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> +// CHECK: %[[VEC_RD_1:.*]] = vector.mask %[[MASK_1D]] { vector.transfer_read %[[ARG_1]][%[[C0_IDX]]], %[[PV]] {in_bounds = [true]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> // CHECK: %[[REDUCE:.*]] = vector.mask %[[MASK_2D]] { vector.multi_reduction , %[[VEC_RD_0]], %[[VEC_RD_1]] [1] : vector<4x[8]xf32> to vector<4xf32> } : vector<4x[8]xi1> -> vector<4xf32> // CHECK: %[[C0_IDX:.*]] = arith.constant 0 : index // CHECK: %{{.*}} = vector.mask %[[MASK_1D]] { vector.transfer_write %[[REDUCE]], %[[ARG_1]][%[[C0_IDX]]] {in_bounds = [true]} : vector<4xf32>, tensor } : vector<4xi1> -> tensor @@ -408,11 +408,11 @@ func.func @vectorize_dynamic_reduction_scalable_1d(%arg0: tensor, // CHECK: %[[C0_IDX:.*]] = arith.constant 0 : index // CHECK: %[[DIM_A0_0:.*]] = tensor.dim %[[ARG_0]], %[[C0_IDX]] : tensor // CHECK: %[[C0_IDX:.*]] = arith.constant 0 : index -// CHECK: %[[C0_f32:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[PV:.*]] = ub.poison : f32 // CHECK: %[[MASK:.*]] = vector.create_mask %[[DIM_A0_0]] : vector<[4]xi1> -// CHECK: %[[VEC_RD_0:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[ARG_0]][%[[C0_IDX]]], %[[C0_f32]] {in_bounds = [true]} : tensor, vector<[4]xf32> } : vector<[4]xi1> -> vector<[4]xf32> -// CHECK: %[[C0_F32:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[VEC_RD_1:.*]] = vector.transfer_read %[[ARG_1]][], %[[C0_F32]] : tensor, vector +// CHECK: %[[VEC_RD_0:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[ARG_0]][%[[C0_IDX]]], %[[PV]] {in_bounds = [true]} : tensor, vector<[4]xf32> } : vector<[4]xi1> -> vector<[4]xf32> +// CHECK: %[[PV:.*]] = ub.poison : f32 +// CHECK: %[[VEC_RD_1:.*]] = vector.transfer_read %[[ARG_1]][], %[[PV]] : tensor, vector // CHECK: %[[ACC_f32:.*]] = vector.extract %[[VEC_RD_1]][] : f32 from vector // CHECK: %[[REDUCE:.*]] = vector.mask %[[MASK]] { vector.multi_reduction , %[[VEC_RD_0]], %[[ACC_f32]] [0] : vector<[4]xf32> to f32 } : vector<[4]xi1> -> f32 // CHECK: %[[VEC_f32:.*]] = vector.broadcast %[[REDUCE]] : f32 to vector @@ -532,13 +532,13 @@ func.func @vectorize_partial_dynamic_identity(%arg0: tensor<8x?xf32>, // CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index // CHECK-DAG: %[[VAL_4:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<8x?xf32> // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[VAL_6:.*]] = ub.poison : f32 // CHECK-DAG: %[[VAL_7:.*]] = arith.constant 8 : index // CHECK: %[[VAL_8:.*]] = vector.create_mask %[[VAL_7]], %[[VAL_4]] : vector<8x32xi1> // CHECK: %[[VAL_9:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_0]][%[[VAL_5]], %[[VAL_5]]], %[[VAL_6]] {in_bounds = [true, true]} : tensor<8x?xf32>, vector<8x32xf32> } : vector<8x32xi1> -> vector<8x32xf32> -// CHECK: %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_10:.*]] = ub.poison : f32 // CHECK: %[[VAL_11:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_1]][%[[VAL_5]], %[[VAL_5]]], %[[VAL_10]] {in_bounds = [true, true]} : tensor<8x?xf32>, vector<8x32xf32> } : vector<8x32xi1> -> vector<8x32xf32> -// CHECK: %[[VAL_12:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_12:.*]] = ub.poison : f32 // CHECK: %[[VAL_13:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_2]][%[[VAL_5]], %[[VAL_5]]], %[[VAL_12]] {in_bounds = [true, true]} : tensor<8x?xf32>, vector<8x32xf32> } : vector<8x32xi1> -> vector<8x32xf32> // CHECK: %[[VAL_14:.*]] = arith.addf %[[VAL_9]], %[[VAL_11]] : vector<8x32xf32> // CHECK: %[[VAL_15:.*]] = arith.constant 0 : index @@ -576,13 +576,13 @@ func.func @vectorize_partial_dynamic_identity_scalable(%arg0: tensor<8x?xf32>, // CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index // CHECK-DAG: %[[VAL_4:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<8x?xf32> // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[VAL_6:.*]] = ub.poison : f32 // CHECK-DAG: %[[VAL_7:.*]] = arith.constant 8 : index // CHECK: %[[VAL_8:.*]] = vector.create_mask %[[VAL_7]], %[[VAL_4]] : vector<8x[32]xi1> // CHECK: %[[VAL_9:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_0]][%[[VAL_5]], %[[VAL_5]]], %[[VAL_6]] {in_bounds = [true, true]} : tensor<8x?xf32>, vector<8x[32]xf32> } : vector<8x[32]xi1> -> vector<8x[32]xf32> -// CHECK: %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_10:.*]] = ub.poison : f32 // CHECK: %[[VAL_11:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_1]][%[[VAL_5]], %[[VAL_5]]], %[[VAL_10]] {in_bounds = [true, true]} : tensor<8x?xf32>, vector<8x[32]xf32> } : vector<8x[32]xi1> -> vector<8x[32]xf32> -// CHECK: %[[VAL_12:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_12:.*]] = ub.poison : f32 // CHECK: %[[VAL_13:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_2]][%[[VAL_5]], %[[VAL_5]]], %[[VAL_12]] {in_bounds = [true, true]} : tensor<8x?xf32>, vector<8x[32]xf32> } : vector<8x[32]xi1> -> vector<8x[32]xf32> // CHECK: %[[VAL_14:.*]] = arith.addf %[[VAL_9]], %[[VAL_11]] : vector<8x[32]xf32> // CHECK: %[[VAL_15:.*]] = arith.constant 0 : index @@ -647,14 +647,14 @@ func.func @vectorize_static_shape_with_mask(%arg0: tensor<8x30xf32>, // CHECK-LABEL: func.func @vectorize_static_shape_with_mask( // CHECK-SAME: %[[VAL_0:.*]]: tensor<8x30xf32>, %[[VAL_1:.*]]: tensor<8x30xf32>, %[[VAL_2:.*]]: tensor<8x30xf32>) -> tensor<8x30xf32> { // CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[VAL_4:.*]] = ub.poison : f32 // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 8 : index // CHECK-DAG: %[[VAL_6:.*]] = arith.constant 30 : index // CHECK: %[[VAL_7:.*]] = vector.create_mask %[[VAL_5]], %[[VAL_6]] : vector<8x32xi1> // CHECK: %[[VAL_8:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %[[VAL_0]][%[[VAL_3]], %[[VAL_3]]], %[[VAL_4]] {in_bounds = [true, true]} : tensor<8x30xf32>, vector<8x32xf32> } : vector<8x32xi1> -> vector<8x32xf32> -// CHECK: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_9:.*]] = ub.poison : f32 // CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %[[VAL_1]][%[[VAL_3]], %[[VAL_3]]], %[[VAL_9]] {in_bounds = [true, true]} : tensor<8x30xf32>, vector<8x32xf32> } : vector<8x32xi1> -> vector<8x32xf32> -// CHECK: %[[VAL_11:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_11:.*]] = ub.poison : f32 // CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %[[VAL_2]][%[[VAL_3]], %[[VAL_3]]], %[[VAL_11]] {in_bounds = [true, true]} : tensor<8x30xf32>, vector<8x32xf32> } : vector<8x32xi1> -> vector<8x32xf32> // CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_8]], %[[VAL_10]] : vector<8x32xf32> // CHECK: %[[VAL_14:.*]] = arith.constant 0 : index @@ -689,14 +689,14 @@ func.func @vectorize_static_shape_with_mask_scalable(%arg0: tensor<8x30xf32>, // CHECK-LABEL: func.func @vectorize_static_shape_with_mask_scalable( // CHECK-SAME: %[[VAL_0:.*]]: tensor<8x30xf32>, %[[VAL_1:.*]]: tensor<8x30xf32>, %[[VAL_2:.*]]: tensor<8x30xf32>) -> tensor<8x30xf32> { // CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[VAL_4:.*]] = ub.poison : f32 // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 8 : index // CHECK-DAG: %[[VAL_6:.*]] = arith.constant 30 : index // CHECK: %[[VAL_7:.*]] = vector.create_mask %[[VAL_5]], %[[VAL_6]] : vector<8x[32]xi1> // CHECK: %[[VAL_8:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %[[VAL_0]][%[[VAL_3]], %[[VAL_3]]], %[[VAL_4]] {in_bounds = [true, true]} : tensor<8x30xf32>, vector<8x[32]xf32> } : vector<8x[32]xi1> -> vector<8x[32]xf32> -// CHECK: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_9:.*]] = ub.poison : f32 // CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %[[VAL_1]][%[[VAL_3]], %[[VAL_3]]], %[[VAL_9]] {in_bounds = [true, true]} : tensor<8x30xf32>, vector<8x[32]xf32> } : vector<8x[32]xi1> -> vector<8x[32]xf32> -// CHECK: %[[VAL_11:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_11:.*]] = ub.poison : f32 // CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %[[VAL_2]][%[[VAL_3]], %[[VAL_3]]], %[[VAL_11]] {in_bounds = [true, true]} : tensor<8x30xf32>, vector<8x[32]xf32> } : vector<8x[32]xi1> -> vector<8x[32]xf32> // CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_8]], %[[VAL_10]] : vector<8x[32]xf32> // CHECK: %[[VAL_14:.*]] = arith.constant 0 : index @@ -733,15 +733,15 @@ func.func @vectorize_dynamic_matvec_trailing_reduction_dim(%arg0: tensor // CHECK: %[[C0_idx:.*]] = arith.constant 0 : index -// CHECK: %[[C0_f32:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[PV:.*]] = ub.poison : f32 // CHECK: %[[MASK_2d:.*]] = vector.create_mask %[[DIM_A0_0]], %[[DIM_A0_1]] : vector<4x[4]xi1> -// CHECK: %[[VEC_RD_0:.*]] = vector.mask %[[MASK_2d]] { vector.transfer_read %[[ARG_0]][%[[C0_idx]], %[[C0_idx]]], %[[C0_f32]] {in_bounds = [true, true]} : tensor, vector<4x[4]xf32> } : vector<4x[4]xi1> -> vector<4x[4]xf32> -// CHECK: %[[C0_f32:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VEC_RD_0:.*]] = vector.mask %[[MASK_2d]] { vector.transfer_read %[[ARG_0]][%[[C0_idx]], %[[C0_idx]]], %[[PV]] {in_bounds = [true, true]} : tensor, vector<4x[4]xf32> } : vector<4x[4]xi1> -> vector<4x[4]xf32> +// CHECK: %[[PV:.*]] = ub.poison : f32 // CHECK: %[[MASK_d1:.*]] = vector.create_mask %[[DIM_A0_1]] : vector<[4]xi1> -// CHECK: %[[VEC_RD_1:.*]] = vector.mask %[[MASK_d1]] { vector.transfer_read %[[ARG_1]][%[[C0_idx]]], %[[C0_f32]] {in_bounds = [true, true], permutation_map = #map} : tensor, vector<4x[4]xf32> } : vector<[4]xi1> -> vector<4x[4]xf32> -// CHECK: %[[C0_f32:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VEC_RD_1:.*]] = vector.mask %[[MASK_d1]] { vector.transfer_read %[[ARG_1]][%[[C0_idx]]], %[[PV]] {in_bounds = [true, true], permutation_map = #map} : tensor, vector<4x[4]xf32> } : vector<[4]xi1> -> vector<4x[4]xf32> +// CHECK: %[[PV:.*]] = ub.poison : f32 // CHECK: %[[MASK_d2:.*]] = vector.create_mask %[[DIM_A0_0]] : vector<4xi1> -// CHECK: %[[VEC_RD_2:.*]] = vector.mask %[[MASK_d2]] { vector.transfer_read %[[ARG_2]][%[[C0_idx]]], %[[C0_f32]] {in_bounds = [true]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> +// CHECK: %[[VEC_RD_2:.*]] = vector.mask %[[MASK_d2]] { vector.transfer_read %[[ARG_2]][%[[C0_idx]]], %[[PV]] {in_bounds = [true]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> // CHECK: %[[MUL:.*]] = arith.mulf %[[VEC_RD_0:.*]], %[[VEC_RD_1:.*]] : vector<4x[4]xf32> // CHECK: %[[REDUCE:.*]] = vector.mask %[[MASK_2d]] { vector.multi_reduction , %[[MUL]], %[[VEC_RD_2]] [1] : vector<4x[4]xf32> to vector<4xf32> } : vector<4x[4]xi1> -> vector<4xf32> // CHECK: %[[C0_idx:.*]] = arith.constant 0 : index @@ -776,15 +776,15 @@ func.func @vectorize_dynamic_matvec_trailing_reduction_dim(%arg0: tensor // CHECK: %[[C0_idx:.*]] = arith.constant 0 : index -// CHECK: %[[C0_f32:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[PV:.*]] = ub.poison : f32 // CHECK: %[[MASK_2d:.*]] = vector.create_mask %[[DIM_A0_0]], %[[DIM_A0_1]] : vector<[4]x4xi1> -// CHECK: %[[VEC_RD_0:.*]] = vector.mask %[[MASK_2d]] { vector.transfer_read %[[ARG_0]][%[[C0_idx]], %[[C0_idx]]], %[[C0_f32]] {in_bounds = [true, true]} : tensor, vector<[4]x4xf32> } : vector<[4]x4xi1> -> vector<[4]x4xf32> -// CHECK: %[[C0_f32:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VEC_RD_0:.*]] = vector.mask %[[MASK_2d]] { vector.transfer_read %[[ARG_0]][%[[C0_idx]], %[[C0_idx]]], %[[PV]] {in_bounds = [true, true]} : tensor, vector<[4]x4xf32> } : vector<[4]x4xi1> -> vector<[4]x4xf32> +// CHECK: %[[PV:.*]] = ub.poison : f32 // CHECK: %[[MASK_d1:.*]] = vector.create_mask %[[DIM_A0_1]] : vector<4xi1> -// CHECK: %[[VEC_RD_1:.*]] = vector.mask %[[MASK_d1]] { vector.transfer_read %[[ARG_1]][%[[C0_idx]]], %[[C0_f32]] {in_bounds = [true, true], permutation_map = #map} : tensor, vector<[4]x4xf32> } : vector<4xi1> -> vector<[4]x4xf32> -// CHECK: %[[C0_f32:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VEC_RD_1:.*]] = vector.mask %[[MASK_d1]] { vector.transfer_read %[[ARG_1]][%[[C0_idx]]], %[[PV]] {in_bounds = [true, true], permutation_map = #map} : tensor, vector<[4]x4xf32> } : vector<4xi1> -> vector<[4]x4xf32> +// CHECK: %[[PV:.*]] = ub.poison : f32 // CHECK: %[[MASK_d2:.*]] = vector.create_mask %[[DIM_A0_0]] : vector<[4]xi1> -// CHECK: %[[VEC_RD_2:.*]] = vector.mask %[[MASK_d2]] { vector.transfer_read %[[ARG_2]][%[[C0_idx]]], %[[C0_f32]] {in_bounds = [true]} : tensor, vector<[4]xf32> } : vector<[4]xi1> -> vector<[4]xf32> +// CHECK: %[[VEC_RD_2:.*]] = vector.mask %[[MASK_d2]] { vector.transfer_read %[[ARG_2]][%[[C0_idx]]], %[[PV]] {in_bounds = [true]} : tensor, vector<[4]xf32> } : vector<[4]xi1> -> vector<[4]xf32> // CHECK: %[[MUL:.*]] = arith.mulf %[[VEC_RD_0:.*]], %[[VEC_RD_1:.*]] : vector<[4]x4xf32> // CHECK: %[[REDUCE:.*]] = vector.mask %[[MASK_2d]] { vector.multi_reduction , %[[MUL]], %[[VEC_RD_2]] [1] : vector<[4]x4xf32> to vector<[4]xf32> } : vector<[4]x4xi1> -> vector<[4]xf32> // CHECK: %[[C0_idx:.*]] = arith.constant 0 : index @@ -822,10 +822,10 @@ func.func @vectorize_linalg_index_scalable(%dest: tensor) -> tensor // CHECK: %[[C0_1:.*]] = arith.constant 0 : index -// CHECK: %[[C0_2:.*]] = arith.constant 0 : index +// CHECK: %[[PV:.*]] = ub.poison : index // CHECK: %[[MASK:.*]] = vector.create_mask %[[D0]] : vector<[4]xi1> // TODO: This xfer_read is not used - avoid creating it. -// CHECK: %[[READ:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[DEST]]{{\[}}%[[C0_1]]], %[[C0_2]] {in_bounds = [true]} : tensor, vector<[4]xindex> } : vector<[4]xi1> -> vector<[4]xindex> +// CHECK: %[[READ:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[DEST]]{{\[}}%[[C0_1]]], %[[PV]] {in_bounds = [true]} : tensor, vector<[4]xindex> } : vector<[4]xi1> -> vector<[4]xindex> // CHECK: %[[STEP:.*]] = vector.step : vector<[4]xindex> // CHECK: %[[C0_3:.*]] = arith.constant 0 : index // CHECK: %[[WRITE:.*]] = vector.mask %[[MASK]] { vector.transfer_write %[[STEP]], %[[DEST]]{{\[}}%[[C0_3]]] {in_bounds = [true]} : vector<[4]xindex>, tensor } : vector<[4]xi1> -> tensor