Skip to content

Commit d59cd61

Browse files
committed
address pr comments
1 parent e64fff3 commit d59cd61

File tree

2 files changed

+43
-65
lines changed

2 files changed

+43
-65
lines changed

llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp

Lines changed: 39 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,18 @@ static Value *expandRadiansIntrinsic(CallInst *Orig) {
544544
return Builder.CreateFMul(X, PiOver180);
545545
}
546546

547+
static Value* createCombinedi32toi64Expansion(IRBuilder<> &Builder, Value *LoBytes, Value *HighBytes) {
548+
// For int64, manually combine two int32s
549+
// First, zero-extend both values to i64
550+
Value *Lo = Builder.CreateZExt(LoBytes, Builder.getInt64Ty());
551+
Value *Hi =
552+
Builder.CreateZExt(HighBytes, Builder.getInt64Ty());
553+
// Shift the high bits left by 32 bits
554+
Value *ShiftedHi = Builder.CreateShl(Hi, Builder.getInt64(32));
555+
// OR the high and low bits together
556+
return Builder.CreateOr(Lo, ShiftedHi);
557+
}
558+
547559
static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) {
548560
IRBuilder<> Builder(Orig);
549561

@@ -579,22 +591,14 @@ static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) {
579591
Value *Result = PoisonValue::get(BufferTy);
580592
for (unsigned I = 0; I < ExtractNum; I += 2) {
581593
Value *Combined = nullptr;
582-
if (IsDouble) {
594+
if (IsDouble)
583595
// For doubles, use dx_asdouble intrinsic
584596
Combined =
585597
Builder.CreateIntrinsic(Builder.getDoubleTy(), Intrinsic::dx_asdouble,
586598
{ExtractElements[I], ExtractElements[I + 1]});
587-
} else {
588-
// For int64, manually combine two int32s
589-
// First, zero-extend both values to i64
590-
Value *Lo = Builder.CreateZExt(ExtractElements[I], Builder.getInt64Ty());
591-
Value *Hi =
592-
Builder.CreateZExt(ExtractElements[I + 1], Builder.getInt64Ty());
593-
// Shift the high bits left by 32 bits
594-
Value *ShiftedHi = Builder.CreateShl(Hi, Builder.getInt64(32));
595-
// OR the high and low bits together
596-
Combined = Builder.CreateOr(Lo, ShiftedHi);
597-
}
599+
else
600+
Combined =
601+
createCombinedi32toi64Expansion(Builder, ExtractElements[I], ExtractElements[I + 1]);
598602

599603
if (ExtractNum == 4)
600604
Result = Builder.CreateInsertElement(Result, Combined,
@@ -650,60 +654,42 @@ static bool expandTypedBufferStoreIntrinsic(CallInst *Orig) {
650654
Type *Int32Ty = Builder.getInt32Ty();
651655
Type *ResultTy = VectorType::get(Int32Ty, IsVector ? 4 : 2, false);
652656
Value *Val = PoisonValue::get(ResultTy);
653-
657+
658+
// Handle double type(s)
659+
Type *SplitElementTy = Int32Ty;
660+
if (IsVector)
661+
SplitElementTy = VectorType::get(SplitElementTy, 2, false);
662+
663+
Value *LowBits = nullptr;
664+
Value *HighBits = nullptr;
654665
// Split the 64-bit values into 32-bit components
655666
if (IsDouble) {
656-
// Handle double type(s)
657-
Type *SplitElementTy = Int32Ty;
658-
if (IsVector)
659-
SplitElementTy = VectorType::get(SplitElementTy, 2, false);
660-
661667
auto *SplitTy = llvm::StructType::get(SplitElementTy, SplitElementTy);
662668
Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble,
663669
{Orig->getOperand(2)});
664-
Value *LowBits = Builder.CreateExtractValue(Split, 0);
665-
Value *HighBits = Builder.CreateExtractValue(Split, 1);
666-
667-
if (IsVector) {
668-
// For vector doubles, use shuffle to create the final vector
669-
Val = Builder.CreateShuffleVector(LowBits, HighBits, {0, 2, 1, 3});
670-
} else {
671-
// For scalar doubles, insert the elements
672-
Val = Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(0));
673-
Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1));
674-
}
670+
LowBits = Builder.CreateExtractValue(Split, 0);
671+
HighBits = Builder.CreateExtractValue(Split, 1);
675672
} else {
676673
// Handle int64 type(s)
677674
Value *InputVal = Orig->getOperand(2);
675+
Constant *ShiftAmt = Builder.getInt64(32);
676+
if (IsVector)
677+
ShiftAmt = ConstantVector::getSplat(ElementCount::getFixed(2), ShiftAmt);
678678

679-
if (IsVector) {
680-
// Handle vector of int64
681-
for (unsigned I = 0; I < 2; ++I) {
682-
// Extract each int64 element
683-
Value *Int64Val =
684-
Builder.CreateExtractElement(InputVal, Builder.getInt32(I));
685-
686-
// Split into low and high 32-bit parts
687-
Value *LowBits = Builder.CreateTrunc(Int64Val, Int32Ty);
688-
Value *ShiftedVal = Builder.CreateLShr(Int64Val, Builder.getInt64(32));
689-
Value *HighBits = Builder.CreateTrunc(ShiftedVal, Int32Ty);
690-
691-
// Insert into result vector
692-
Val =
693-
Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(I * 2));
694-
Val = Builder.CreateInsertElement(Val, HighBits,
695-
Builder.getInt32(I * 2 + 1));
696-
}
697-
} else {
698-
// Handle scalar int64
699-
Value *LowBits = Builder.CreateTrunc(InputVal, Int32Ty);
700-
Value *ShiftedVal = Builder.CreateLShr(InputVal, Builder.getInt64(32));
701-
Value *HighBits = Builder.CreateTrunc(ShiftedVal, Int32Ty);
679+
// Split into low and high 32-bit parts
680+
LowBits = Builder.CreateTrunc(InputVal, SplitElementTy);
681+
Value *ShiftedVal = Builder.CreateLShr(InputVal, ShiftAmt);
682+
HighBits = Builder.CreateTrunc(ShiftedVal, SplitElementTy);
683+
}
702684

685+
if (IsVector) {
686+
// For vector doubles, use shuffle to create the final vector
687+
Val = Builder.CreateShuffleVector(LowBits, HighBits, {0, 2, 1, 3});
688+
} else {
689+
// For scalar doubles, insert the elements
703690
Val = Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(0));
704691
Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1));
705692
}
706-
}
707693

708694
// Create the final intrinsic call
709695
Builder.CreateIntrinsic(Builder.getVoidTy(),

llvm/test/CodeGen/DirectX/BufferStoreInt64.ll

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,18 +25,10 @@ define void @storev2i64(<2 x i64> %0) {
2525
; CHECK-LABEL: define void @storev2i64(
2626
; CHECK-SAME: <2 x i64> [[TMP0:%.*]]) {
2727
; CHECK-NEXT: [[BUFFER:%.*]] = tail call target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
28-
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0
29-
; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
30-
; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP2]], 32
31-
; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
32-
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0
33-
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP5]], i32 1
34-
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
35-
; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32
36-
; CHECK-NEXT: [[TMP10:%.*]] = lshr i64 [[TMP8]], 32
37-
; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP10]] to i32
38-
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP9]], i32 2
39-
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP11]], i32 3
28+
; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i64> [[TMP0]] to <2 x i32>
29+
; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP0]], splat (i64 32)
30+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP3]] to <2 x i32>
31+
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP4]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
4032
; CHECK-NEXT: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v2i64_1_0_0t.v4i32(target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) [[BUFFER]], i32 0, <4 x i32> [[TMP13]])
4133
; CHECK-NEXT: ret void
4234
;

0 commit comments

Comments
 (0)