Skip to content

Commit a647fd7

Browse files
committed
[AArch64] Add a cost for v2i32 vecreduce.add.
These can lower to a addp. The score does not alter with this patch, but this should help keep the scores the same with #146526.
1 parent 9d0ac39 commit a647fd7

File tree

2 files changed

+3
-17
lines changed

2 files changed

+3
-17
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5205,6 +5205,7 @@ AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
52055205
{ISD::ADD, MVT::v16i8, 2},
52065206
{ISD::ADD, MVT::v4i16, 2},
52075207
{ISD::ADD, MVT::v8i16, 2},
5208+
{ISD::ADD, MVT::v2i32, 2},
52085209
{ISD::ADD, MVT::v4i32, 2},
52095210
{ISD::ADD, MVT::v2i64, 2},
52105211
{ISD::OR, MVT::v8i8, 15},

llvm/test/Analysis/CostModel/AArch64/reduce-add.ll

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
66
define void @reduce() {
77
; CHECK-LABEL: 'reduce'
88
; CHECK-NEXT: Cost Model: Found costs of 2 for: %V1i8 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
9+
; CHECK-NEXT: Cost Model: Found costs of 2 for: %V2i8 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
910
; CHECK-NEXT: Cost Model: Found costs of 2 for: %V3i8 = call i8 @llvm.vector.reduce.add.v3i8(<3 x i8> undef)
1011
; CHECK-NEXT: Cost Model: Found costs of 2 for: %V4i8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
1112
; CHECK-NEXT: Cost Model: Found costs of 2 for: %V8i8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
@@ -24,6 +25,7 @@ define void @reduce() {
2425
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
2526
;
2627
%V1i8 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
28+
%V2i8 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
2729
%V3i8 = call i8 @llvm.vector.reduce.add.v3i8(<3 x i8> undef)
2830
%V4i8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
2931
%V8i8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
@@ -41,20 +43,3 @@ define void @reduce() {
4143
%V4i64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
4244
ret void
4345
}
44-
45-
declare i8 @llvm.vector.reduce.add.v1i8(<1 x i8>)
46-
declare i8 @llvm.vector.reduce.add.v3i8(<3 x i8>)
47-
declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>)
48-
declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>)
49-
declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
50-
declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>)
51-
declare i8 @llvm.vector.reduce.add.v64i8(<64 x i8>)
52-
declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>)
53-
declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)
54-
declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
55-
declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
56-
declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>)
57-
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
58-
declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
59-
declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
60-
declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)

0 commit comments

Comments
 (0)