Skip to content

Commit ab0360c

Browse files
committed
[VectorUtils] Trivially vectorize ldexp, [l]lround
1 parent fba63e3 commit ab0360c

File tree

2 files changed

+215
-0
lines changed

2 files changed

+215
-0
lines changed

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
8181
case Intrinsic::exp:
8282
case Intrinsic::exp10:
8383
case Intrinsic::exp2:
84+
case Intrinsic::ldexp:
8485
case Intrinsic::log:
8586
case Intrinsic::log10:
8687
case Intrinsic::log2:
@@ -108,6 +109,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
108109
case Intrinsic::canonicalize:
109110
case Intrinsic::fptosi_sat:
110111
case Intrinsic::fptoui_sat:
112+
case Intrinsic::lround:
113+
case Intrinsic::llround:
111114
case Intrinsic::lrint:
112115
case Intrinsic::llrint:
113116
case Intrinsic::ucmp:
@@ -189,6 +192,8 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(
189192
switch (ID) {
190193
case Intrinsic::fptosi_sat:
191194
case Intrinsic::fptoui_sat:
195+
case Intrinsic::lround:
196+
case Intrinsic::llround:
192197
case Intrinsic::lrint:
193198
case Intrinsic::llrint:
194199
case Intrinsic::vp_lrint:
@@ -203,6 +208,7 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(
203208
case Intrinsic::vp_is_fpclass:
204209
return OpdIdx == 0;
205210
case Intrinsic::powi:
211+
case Intrinsic::ldexp:
206212
return OpdIdx == -1 || OpdIdx == 1;
207213
default:
208214
return OpdIdx == -1;

llvm/test/Transforms/LoopVectorize/intrinsic.ll

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,58 @@ for.end: ; preds = %for.body, %entry
324324

325325
declare double @llvm.exp2.f64(double)
326326

327+
define void @ldexp_f32i32(i32 %n, ptr %y, ptr %x, i32 %exp) {
328+
; CHECK-LABEL: @ldexp_f32i32(
329+
; CHECK: llvm.ldexp.v4f32.v4i32
330+
; CHECK: ret void
331+
;
332+
entry:
333+
%cmp6 = icmp sgt i32 %n, 0
334+
br i1 %cmp6, label %for.body, label %for.end
335+
336+
for.body: ; preds = %entry, %for.body
337+
%iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
338+
%arrayidx = getelementptr inbounds float, ptr %y, i32 %iv
339+
%0 = load float, ptr %arrayidx, align 4
340+
%call = tail call float @llvm.ldexp.f32.i32(float %0, i32 %exp)
341+
%arrayidx2 = getelementptr inbounds float, ptr %x, i32 %iv
342+
store float %call, ptr %arrayidx2, align 4
343+
%iv.next = add i32 %iv, 1
344+
%exitcond = icmp eq i32 %iv.next, %n
345+
br i1 %exitcond, label %for.end, label %for.body
346+
347+
for.end: ; preds = %for.body, %entry
348+
ret void
349+
}
350+
351+
declare float @llvm.exp2.f32.i32(float, i32)
352+
353+
define void @ldexp_f64i32(i32 %n, ptr %y, ptr %x, i32 %exp) {
354+
; CHECK-LABEL: @ldexp_f64i32(
355+
; CHECK: llvm.ldexp.v4f64.v4i32
356+
; CHECK: ret void
357+
;
358+
entry:
359+
%cmp6 = icmp sgt i32 %n, 0
360+
br i1 %cmp6, label %for.body, label %for.end
361+
362+
for.body: ; preds = %entry, %for.body
363+
%iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
364+
%arrayidx = getelementptr inbounds double, ptr %y, i32 %iv
365+
%0 = load double, ptr %arrayidx, align 8
366+
%call = tail call double @llvm.ldexp.f64.i32(double %0, i32 %exp)
367+
%arrayidx2 = getelementptr inbounds double, ptr %x, i32 %iv
368+
store double %call, ptr %arrayidx2, align 8
369+
%iv.next = add i32 %iv, 1
370+
%exitcond = icmp eq i32 %iv.next, %n
371+
br i1 %exitcond, label %for.end, label %for.body
372+
373+
for.end: ; preds = %for.body, %entry
374+
ret void
375+
}
376+
377+
declare double @llvm.ldexp.f64i32(double, i32)
378+
327379
define void @log_f32(i32 %n, ptr %y, ptr %x) {
328380
; CHECK-LABEL: @log_f32(
329381
; CHECK: llvm.log.v4f32
@@ -976,6 +1028,163 @@ for.end: ; preds = %for.body, %entry
9761028

9771029
declare double @llvm.roundeven.f64(double)
9781030

1031+
1032+
define void @lround_i32f32(i32 %n, ptr %y, ptr %x) {
1033+
; CHECK-LABEL: @lround_i32f32(
1034+
; CHECK: llvm.lround.v4i32.v4f32
1035+
; CHECK: ret void
1036+
;
1037+
entry:
1038+
%cmp6 = icmp sgt i32 %n, 0
1039+
br i1 %cmp6, label %for.body, label %for.end
1040+
1041+
for.body: ; preds = %entry, %for.body
1042+
%iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
1043+
%arrayidx = getelementptr inbounds float, ptr %y, i32 %iv
1044+
%0 = load float, ptr %arrayidx, align 4
1045+
%call = tail call i32 @llvm.lround.i32.f32(float %0)
1046+
%arrayidx2 = getelementptr inbounds i32, ptr %x, i32 %iv
1047+
store i32 %call, ptr %arrayidx2, align 4
1048+
%iv.next = add i32 %iv, 1
1049+
%exitcond = icmp eq i32 %iv.next, %n
1050+
br i1 %exitcond, label %for.end, label %for.body
1051+
1052+
for.end: ; preds = %for.body, %entry
1053+
ret void
1054+
}
1055+
1056+
declare i32 @llvm.lround.i32.f32(float)
1057+
1058+
define void @lround_i32f64(i32 %n, ptr %y, ptr %x) {
1059+
; CHECK-LABEL: @lround_i32f64(
1060+
; CHECK: llvm.lround.v4i32.v4f64
1061+
; CHECK: ret void
1062+
;
1063+
entry:
1064+
%cmp6 = icmp sgt i32 %n, 0
1065+
br i1 %cmp6, label %for.body, label %for.end
1066+
1067+
for.body: ; preds = %entry, %for.body
1068+
%iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
1069+
%arrayidx = getelementptr inbounds double, ptr %y, i32 %iv
1070+
%0 = load double, ptr %arrayidx, align 8
1071+
%call = tail call i32 @llvm.lround.f64(double %0)
1072+
%arrayidx2 = getelementptr inbounds i32, ptr %x, i32 %iv
1073+
store i32 %call, ptr %arrayidx2, align 8
1074+
%iv.next = add i32 %iv, 1
1075+
%exitcond = icmp eq i32 %iv.next, %n
1076+
br i1 %exitcond, label %for.end, label %for.body
1077+
1078+
for.end: ; preds = %for.body, %entry
1079+
ret void
1080+
}
1081+
1082+
declare i32 @llvm.lround.i32.f64(double)
1083+
1084+
define void @lround_i64f32(i32 %n, ptr %y, ptr %x) {
1085+
; CHECK-LABEL: @lround_i64f32(
1086+
; CHECK: llvm.lround.v4i64.v4f32
1087+
; CHECK: ret void
1088+
;
1089+
entry:
1090+
%cmp6 = icmp sgt i32 %n, 0
1091+
br i1 %cmp6, label %for.body, label %for.end
1092+
1093+
for.body: ; preds = %entry, %for.body
1094+
%iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
1095+
%arrayidx = getelementptr inbounds float, ptr %y, i32 %iv
1096+
%0 = load float, ptr %arrayidx, align 4
1097+
%call = tail call i64 @llvm.lround.i64.f32(float %0)
1098+
%arrayidx2 = getelementptr inbounds i64, ptr %x, i32 %iv
1099+
store i64 %call, ptr %arrayidx2, align 4
1100+
%iv.next = add i32 %iv, 1
1101+
%exitcond = icmp eq i32 %iv.next, %n
1102+
br i1 %exitcond, label %for.end, label %for.body
1103+
1104+
for.end: ; preds = %for.body, %entry
1105+
ret void
1106+
}
1107+
1108+
declare i64 @llvm.lround.i64.f32(float)
1109+
1110+
define void @lround_i64f64(i32 %n, ptr %y, ptr %x) {
1111+
; CHECK-LABEL: @lround_i64f64(
1112+
; CHECK: llvm.lround.v4i64.v4f64
1113+
; CHECK: ret void
1114+
;
1115+
entry:
1116+
%cmp6 = icmp sgt i32 %n, 0
1117+
br i1 %cmp6, label %for.body, label %for.end
1118+
1119+
for.body: ; preds = %entry, %for.body
1120+
%iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
1121+
%arrayidx = getelementptr inbounds double, ptr %y, i32 %iv
1122+
%0 = load double, ptr %arrayidx, align 8
1123+
%call = tail call i64 @llvm.lround.f64(double %0)
1124+
%arrayidx2 = getelementptr inbounds i64, ptr %x, i32 %iv
1125+
store i64 %call, ptr %arrayidx2, align 8
1126+
%iv.next = add i32 %iv, 1
1127+
%exitcond = icmp eq i32 %iv.next, %n
1128+
br i1 %exitcond, label %for.end, label %for.body
1129+
1130+
for.end: ; preds = %for.body, %entry
1131+
ret void
1132+
}
1133+
1134+
declare i64 @llvm.lround.i64.f64(double)
1135+
1136+
define void @llround_i64f32(i32 %n, ptr %y, ptr %x) {
1137+
; CHECK-LABEL: @llround_i64f32(
1138+
; CHECK: llvm.llround.v4i64.v4f32
1139+
; CHECK: ret void
1140+
;
1141+
entry:
1142+
%cmp6 = icmp sgt i32 %n, 0
1143+
br i1 %cmp6, label %for.body, label %for.end
1144+
1145+
for.body: ; preds = %entry, %for.body
1146+
%iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
1147+
%arrayidx = getelementptr inbounds float, ptr %y, i32 %iv
1148+
%0 = load float, ptr %arrayidx, align 4
1149+
%call = tail call i64 @llvm.llround.i64.f32(float %0)
1150+
%arrayidx2 = getelementptr inbounds i64, ptr %x, i32 %iv
1151+
store i64 %call, ptr %arrayidx2, align 4
1152+
%iv.next = add i32 %iv, 1
1153+
%exitcond = icmp eq i32 %iv.next, %n
1154+
br i1 %exitcond, label %for.end, label %for.body
1155+
1156+
for.end: ; preds = %for.body, %entry
1157+
ret void
1158+
}
1159+
1160+
declare i64 @llvm.llround.i64.f32(float)
1161+
1162+
define void @llround_i64f64(i32 %n, ptr %y, ptr %x) {
1163+
; CHECK-LABEL: @llround_i64f64(
1164+
; CHECK: llvm.llround.v4i64.v4f64
1165+
; CHECK: ret void
1166+
;
1167+
entry:
1168+
%cmp6 = icmp sgt i32 %n, 0
1169+
br i1 %cmp6, label %for.body, label %for.end
1170+
1171+
for.body: ; preds = %entry, %for.body
1172+
%iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
1173+
%arrayidx = getelementptr inbounds double, ptr %y, i32 %iv
1174+
%0 = load double, ptr %arrayidx, align 8
1175+
%call = tail call i64 @llvm.llround.f64(double %0)
1176+
%arrayidx2 = getelementptr inbounds i64, ptr %x, i32 %iv
1177+
store i64 %call, ptr %arrayidx2, align 8
1178+
%iv.next = add i32 %iv, 1
1179+
%exitcond = icmp eq i32 %iv.next, %n
1180+
br i1 %exitcond, label %for.end, label %for.body
1181+
1182+
for.end: ; preds = %for.body, %entry
1183+
ret void
1184+
}
1185+
1186+
declare i64 @llvm.llround.i64.f64(double)
1187+
9791188
define void @fma_f32(i32 %n, ptr %y, ptr %x, ptr %z, ptr %w) {
9801189
; CHECK-LABEL: @fma_f32(
9811190
; CHECK: llvm.fma.v4f32

0 commit comments

Comments
 (0)