-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[DirectX] add support for i64 buffer load/stores #145047
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-directx Author: Farzon Lotfi (farzonl) Changesfixes #143380 Specifically it fixes The change does two things it adds i64 support for intrinsic expansion for the It also lets loaded typedbuffers be returned, which previously caused a crash because of Patch is 21.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/145047.diff 5 Files Affected:
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index f99e8e7ccdc5d..45d8e497165cf 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -25,6 +25,7 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -70,15 +71,17 @@ static bool isIntrinsicExpansion(Function &F) {
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_fadd:
return true;
- case Intrinsic::dx_resource_load_typedbuffer:
- // We need to handle doubles and vector of doubles.
- return F.getReturnType()
- ->getStructElementType(0)
- ->getScalarType()
- ->isDoubleTy();
- case Intrinsic::dx_resource_store_typedbuffer:
- // We need to handle doubles and vector of doubles.
- return F.getFunctionType()->getParamType(2)->getScalarType()->isDoubleTy();
+ case Intrinsic::dx_resource_load_typedbuffer: {
+ // We need to handle i64, doubles, and vectors of them.
+ Type *ScalarTy =
+ F.getReturnType()->getStructElementType(0)->getScalarType();
+ return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64);
+ }
+ case Intrinsic::dx_resource_store_typedbuffer: {
+ // We need to handle i64 and doubles and vectors of i64 and doubles.
+ Type *ScalarTy = F.getFunctionType()->getParamType(2)->getScalarType();
+ return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64);
+ }
}
return false;
}
@@ -545,13 +548,15 @@ static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) {
IRBuilder<> Builder(Orig);
Type *BufferTy = Orig->getType()->getStructElementType(0);
- assert(BufferTy->getScalarType()->isDoubleTy() &&
- "Only expand double or double2");
+ Type *ScalarTy = BufferTy->getScalarType();
+ bool IsDouble = ScalarTy->isDoubleTy();
+ assert(IsDouble || ScalarTy->isIntegerTy(64) &&
+ "Only expand double or int64 scalars or vectors");
unsigned ExtractNum = 2;
if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
assert(VT->getNumElements() == 2 &&
- "TypedBufferLoad double vector has wrong size");
+ "TypedBufferLoad vector must be size 2");
ExtractNum = 4;
}
@@ -570,22 +575,54 @@ static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) {
ExtractElements.push_back(
Builder.CreateExtractElement(Extract, Builder.getInt32(I)));
- // combine into double(s)
+ // combine into double(s) or int64(s)
Value *Result = PoisonValue::get(BufferTy);
for (unsigned I = 0; I < ExtractNum; I += 2) {
- Value *Dbl =
- Builder.CreateIntrinsic(Builder.getDoubleTy(), Intrinsic::dx_asdouble,
- {ExtractElements[I], ExtractElements[I + 1]});
+ Value *Combined = nullptr;
+ if (IsDouble) {
+ // For doubles, use dx_asdouble intrinsic
+ Combined =
+ Builder.CreateIntrinsic(Builder.getDoubleTy(), Intrinsic::dx_asdouble,
+ {ExtractElements[I], ExtractElements[I + 1]});
+ } else {
+ // For int64, manually combine two int32s
+ // First, zero-extend both values to i64
+ Value *Lo = Builder.CreateZExt(ExtractElements[I], Builder.getInt64Ty());
+ Value *Hi =
+ Builder.CreateZExt(ExtractElements[I + 1], Builder.getInt64Ty());
+ // Shift the high bits left by 32 bits
+ Value *ShiftedHi = Builder.CreateShl(Hi, Builder.getInt64(32));
+ // OR the high and low bits together
+ Combined = Builder.CreateOr(Lo, ShiftedHi);
+ }
+
if (ExtractNum == 4)
- Result =
- Builder.CreateInsertElement(Result, Dbl, Builder.getInt32(I / 2));
+ Result = Builder.CreateInsertElement(Result, Combined,
+ Builder.getInt32(I / 2));
else
- Result = Dbl;
+ Result = Combined;
}
Value *CheckBit = nullptr;
for (User *U : make_early_inc_range(Orig->users())) {
- auto *EVI = cast<ExtractValueInst>(U);
+ if (auto *Ret = dyn_cast<ReturnInst>(U)) {
+ // For return instructions, we need to handle the case where the function
+ // is directly returning the result of the call
+ Type *RetTy = Ret->getFunction()->getReturnType();
+ Value *StructRet = PoisonValue::get(RetTy);
+ StructRet = Builder.CreateInsertValue(StructRet, Result, {0});
+ Value *CheckBitForRet = Builder.CreateExtractValue(Load, {1});
+ StructRet = Builder.CreateInsertValue(StructRet, CheckBitForRet, {1});
+ Ret->setOperand(0, StructRet);
+ continue;
+ }
+ auto *EVI = dyn_cast<ExtractValueInst>(U);
+ if (!EVI) {
+ // If it's not a ReturnInst or ExtractValueInst, we don't know how to
+ // handle it
+ llvm_unreachable("Unexpected user of typedbufferload");
+ }
+
ArrayRef<unsigned> Indices = EVI->getIndices();
assert(Indices.size() == 1);
@@ -609,38 +646,82 @@ static bool expandTypedBufferStoreIntrinsic(CallInst *Orig) {
IRBuilder<> Builder(Orig);
Type *BufferTy = Orig->getFunctionType()->getParamType(2);
- assert(BufferTy->getScalarType()->isDoubleTy() &&
- "Only expand double or double2");
-
- unsigned ExtractNum = 2;
- if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
- assert(VT->getNumElements() == 2 &&
- "TypedBufferStore double vector has wrong size");
- ExtractNum = 4;
+ Type *ScalarTy = BufferTy->getScalarType();
+ bool IsDouble = ScalarTy->isDoubleTy();
+ assert((IsDouble || ScalarTy->isIntegerTy(64)) &&
+ "Only expand double or int64 scalars or vectors");
+
+ // Determine if we're dealing with a vector or scalar
+ bool IsVector = isa<FixedVectorType>(BufferTy);
+ if (IsVector) {
+ assert(cast<FixedVectorType>(BufferTy)->getNumElements() == 2 &&
+ "TypedBufferStore vector must be size 2");
}
- Type *SplitElementTy = Builder.getInt32Ty();
- if (ExtractNum == 4)
- SplitElementTy = VectorType::get(SplitElementTy, 2, false);
-
- // split our double(s)
- auto *SplitTy = llvm::StructType::get(SplitElementTy, SplitElementTy);
- Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble,
- Orig->getOperand(2));
- // create our vector
- Value *LowBits = Builder.CreateExtractValue(Split, 0);
- Value *HighBits = Builder.CreateExtractValue(Split, 1);
- Value *Val;
- if (ExtractNum == 2) {
- Val = PoisonValue::get(VectorType::get(SplitElementTy, 2, false));
- Val = Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(0));
- Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1));
- } else
- Val = Builder.CreateShuffleVector(LowBits, HighBits, {0, 2, 1, 3});
+ // Create the appropriate vector type for the result
+ Type *Int32Ty = Builder.getInt32Ty();
+ Type *ResultTy = VectorType::get(Int32Ty, IsVector ? 4 : 2, false);
+ Value *Val = PoisonValue::get(ResultTy);
+
+ // Split the 64-bit values into 32-bit components
+ if (IsDouble) {
+ // Handle double type(s)
+ Type *SplitElementTy = Int32Ty;
+ if (IsVector)
+ SplitElementTy = VectorType::get(SplitElementTy, 2, false);
+
+ auto *SplitTy = llvm::StructType::get(SplitElementTy, SplitElementTy);
+ Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble,
+ {Orig->getOperand(2)});
+ Value *LowBits = Builder.CreateExtractValue(Split, 0);
+ Value *HighBits = Builder.CreateExtractValue(Split, 1);
+
+ if (IsVector) {
+ // For vector doubles, use shuffle to create the final vector
+ Val = Builder.CreateShuffleVector(LowBits, HighBits, {0, 2, 1, 3});
+ } else {
+ // For scalar doubles, insert the elements
+ Val = Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(0));
+ Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1));
+ }
+ } else {
+ // Handle int64 type(s)
+ Value *InputVal = Orig->getOperand(2);
+
+ if (IsVector) {
+ // Handle vector of int64
+ for (unsigned I = 0; I < 2; ++I) {
+ // Extract each int64 element
+ Value *Int64Val =
+ Builder.CreateExtractElement(InputVal, Builder.getInt32(I));
+
+ // Split into low and high 32-bit parts
+ Value *LowBits = Builder.CreateTrunc(Int64Val, Int32Ty);
+ Value *ShiftedVal = Builder.CreateLShr(Int64Val, Builder.getInt64(32));
+ Value *HighBits = Builder.CreateTrunc(ShiftedVal, Int32Ty);
+
+ // Insert into result vector
+ Val =
+ Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(I * 2));
+ Val = Builder.CreateInsertElement(Val, HighBits,
+ Builder.getInt32(I * 2 + 1));
+ }
+ } else {
+ // Handle scalar int64
+ Value *LowBits = Builder.CreateTrunc(InputVal, Int32Ty);
+ Value *ShiftedVal = Builder.CreateLShr(InputVal, Builder.getInt64(32));
+ Value *HighBits = Builder.CreateTrunc(ShiftedVal, Int32Ty);
+
+ Val = Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(0));
+ Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1));
+ }
+ }
+ // Create the final intrinsic call
Builder.CreateIntrinsic(Builder.getVoidTy(),
Intrinsic::dx_resource_store_typedbuffer,
{Orig->getOperand(0), Orig->getOperand(1), Val});
+
Orig->eraseFromParent();
return true;
}
diff --git a/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll b/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
index 80a071a66364b..af3ec9df37967 100644
--- a/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
+++ b/llvm/test/CodeGen/DirectX/BufferLoadDouble.ll
@@ -1,6 +1,6 @@
; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s
-target triple = "dxil-pc-shadermodel6.6-compute"
+target triple = "dxil-pc-shadermodel6.2-compute"
define void @loadf64() {
; check the handle from binding is unchanged
@@ -88,4 +88,4 @@ define void @loadf64WithCheckBit() {
; CHECK-NOT: extractvalue { double, i1 }
%cb = extractvalue {double, i1} %load0, 1
ret void
-}
\ No newline at end of file
+}
diff --git a/llvm/test/CodeGen/DirectX/BufferLoadInt64.ll b/llvm/test/CodeGen/DirectX/BufferLoadInt64.ll
new file mode 100644
index 0000000000000..cea475524945c
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/BufferLoadInt64.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.2-compute"
+
+define { i64, i1 } @loadi64() {
+; CHECK-LABEL: define { i64, i1 } @loadi64() {
+; CHECK-NEXT: [[BUFFER:%.*]] = tail call target("dx.TypedBuffer", i64, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+; CHECK-NEXT: [[TMP1:%.*]] = call { <2 x i32>, i1 } @llvm.dx.resource.load.typedbuffer.v2i32.tdx.TypedBuffer_i64_1_0_0t(target("dx.TypedBuffer", i64, 1, 0, 0) [[BUFFER]], i32 0)
+; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i32>, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP3]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 32
+; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { i64, i1 } poison, i64 [[TMP8]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <2 x i32>, i1 } [[TMP1]], 1
+; CHECK-NEXT: [[TMP11:%.*]] = insertvalue { i64, i1 } [[TMP9]], i1 [[TMP10]], 1
+; CHECK-NEXT: ret { i64, i1 } [[TMP11]]
+;
+ %buffer = tail call target("dx.TypedBuffer", i64, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+ %result = call { i64, i1 } @llvm.dx.resource.load.typedbuffer.tdx.TypedBuffer_i64_1_0_0t(
+ target("dx.TypedBuffer", i64, 1, 0, 0) %buffer, i32 0)
+ ret { i64, i1 } %result
+}
+
+define { <2 x i64>, i1 } @loadv2i64() {
+; CHECK-LABEL: define { <2 x i64>, i1 } @loadv2i64() {
+; CHECK-NEXT: [[BUFFER:%.*]] = tail call target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+; CHECK-NEXT: [[TMP1:%.*]] = call { <4 x i32>, i1 } @llvm.dx.resource.load.typedbuffer.v4i32.tdx.TypedBuffer_v2i64_1_0_0t(target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) [[BUFFER]], i32 0)
+; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
+; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP3]] to i64
+; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP4]] to i64
+; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 32
+; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP7]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP5]] to i64
+; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP13]], 32
+; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP12]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP15]], i32 1
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <2 x i64>, i1 } poison, <2 x i64> [[TMP16]], 0
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <4 x i32>, i1 } [[TMP1]], 1
+; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { <2 x i64>, i1 } [[TMP17]], i1 [[TMP18]], 1
+; CHECK-NEXT: ret { <2 x i64>, i1 } [[TMP19]]
+;
+ %buffer = tail call target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+ %result = call { <2 x i64>, i1 } @llvm.dx.resource.load.typedbuffer.tdx.TypedBuffer_v2i64_1_0_0t(
+ target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) %buffer, i32 0)
+ ret { <2 x i64>, i1 } %result
+}
diff --git a/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll b/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll
index 9c3dab0cc1e46..882948b6dce74 100644
--- a/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll
+++ b/llvm/test/CodeGen/DirectX/BufferStoreDouble.ll
@@ -45,3 +45,46 @@ define void @storev2f64(<2 x double> %0) {
<2 x double> %0)
ret void
}
+
+define { double, i1 } @loadAndReturnf64() {
+; CHECK-LABEL: define { double, i1 } @loadAndReturnf64() {
+; CHECK-NEXT: [[BUFFER:%.*]] = tail call target("dx.TypedBuffer", double, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+; CHECK-NEXT: [[TMP1:%.*]] = call { <2 x i32>, i1 } @llvm.dx.resource.load.typedbuffer.v2i32.tdx.TypedBuffer_f64_1_0_0t(target("dx.TypedBuffer", double, 1, 0, 0) [[BUFFER]], i32 0)
+; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i32>, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[TMP3]], i32 [[TMP4]])
+; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { double, i1 } poison, double [[TMP5]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i32>, i1 } [[TMP1]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { double, i1 } [[TMP6]], i1 [[TMP7]], 1
+; CHECK-NEXT: ret { double, i1 } [[TMP8]]
+;
+ %buffer = tail call target("dx.TypedBuffer", double, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+ %result = call { double, i1 } @llvm.dx.resource.load.typedbuffer.tdx.TypedBuffer_f64_1_0_0t(
+ target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0)
+ ret { double, i1 } %result
+}
+
+define { <2 x double>, i1 } @loadAndReturnv2f64() {
+; CHECK-LABEL: define { <2 x double>, i1 } @loadAndReturnv2f64() {
+; CHECK-NEXT: [[BUFFER:%.*]] = tail call target("dx.TypedBuffer", <2 x double>, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+; CHECK-NEXT: [[TMP1:%.*]] = call { <4 x i32>, i1 } @llvm.dx.resource.load.typedbuffer.v4i32.tdx.TypedBuffer_v2f64_1_0_0t(target("dx.TypedBuffer", <2 x double>, 1, 0, 0) [[BUFFER]], i32 0)
+; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
+; CHECK-NEXT: [[TMP7:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[TMP3]], i32 [[TMP4]])
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[TMP7]], i32 0
+; CHECK-NEXT: [[TMP9:%.*]] = call double @llvm.dx.asdouble.i32(i32 [[TMP5]], i32 [[TMP6]])
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[TMP8]], double [[TMP9]], i32 1
+; CHECK-NEXT: [[TMP11:%.*]] = insertvalue { <2 x double>, i1 } poison, <2 x double> [[TMP10]], 0
+; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { <4 x i32>, i1 } [[TMP1]], 1
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <2 x double>, i1 } [[TMP11]], i1 [[TMP12]], 1
+; CHECK-NEXT: ret { <2 x double>, i1 } [[TMP13]]
+;
+ %buffer = tail call target("dx.TypedBuffer", <2 x double>, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2f64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+ %result = call { <2 x double>, i1 } @llvm.dx.resource.load.typedbuffer.tdx.TypedBuffer_v2f64_1_0_0t(
+ target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0)
+ ret { <2 x double>, i1 } %result
+}
diff --git a/llvm/test/CodeGen/DirectX/BufferStoreInt64.ll b/llvm/test/CodeGen/DirectX/BufferStoreInt64.ll
new file mode 100644
index 0000000000000..efb7c0ac104ed
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/BufferStoreInt64.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -dxil-intrinsic-expansion %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+define void @storei64(i64 %0) {
+; CHECK-LABEL: define void @storei64(
+; CHECK-SAME: i64 [[TMP0:%.*]]) {
+; CHECK-NEXT: [[BUFFER:%.*]] = tail call target("dx.TypedBuffer", i64, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 32
+; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP4]], i32 1
+; CHECK-NEXT: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_i64_1_0_0t.v2i32(target("dx.TypedBuffer", i64, 1, 0, 0) [[BUFFER]], i32 0, <2 x i32> [[TMP6]])
+; CHECK-NEXT: ret void
+;
+ %buffer = tail call target("dx.TypedBuffer", i64, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+ call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_i64_1_0_0t(target("dx.TypedBuffer", i64, 1, 0, 0) %buffer, i32 0,i64 %0)
+ ret void
+}
+
+
+define void @storev2i64(<2 x i64> %0) {
+; CHECK-LABEL: define void @storev2i64(
+; CHECK-SAME: <2 x i64> [[TMP0:%.*]]) {
+; CHECK-NEXT: [[BUFFER:%.*]] = tail call target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
+; CHECK-NEXT:...
[truncated]
|
Value *Combined = nullptr; | ||
if (IsDouble) { | ||
// For doubles, use dx_asdouble intrinsic | ||
Combined = | ||
Builder.CreateIntrinsic(Builder.getDoubleTy(), Intrinsic::dx_asdouble, | ||
{ExtractElements[I], ExtractElements[I + 1]}); | ||
} else { | ||
// For int64, manually combine two int32s | ||
// First, zero-extend both values to i64 | ||
Value *Lo = Builder.CreateZExt(ExtractElements[I], Builder.getInt64Ty()); | ||
Value *Hi = | ||
Builder.CreateZExt(ExtractElements[I + 1], Builder.getInt64Ty()); | ||
// Shift the high bits left by 32 bits | ||
Value *ShiftedHi = Builder.CreateShl(Hi, Builder.getInt64(32)); | ||
// OR the high and low bits together | ||
Combined = Builder.CreateOr(Lo, ShiftedHi); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Might be worth pulling this out into a function? createCombined64Bit(IRBuilder, Value *, Value *)
or something like that.
// Handle scalar int64 | ||
Value *LowBits = Builder.CreateTrunc(InputVal, Int32Ty); | ||
Value *ShiftedVal = Builder.CreateLShr(InputVal, Builder.getInt64(32)); | ||
Value *HighBits = Builder.CreateTrunc(ShiftedVal, Int32Ty); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you can combine the scalar and vector logic if you avoid scalarizing this by hand. Ie, you could do something like:
Constant *ShiftAmt = Builder.getInt64(32);
if (IsVector)
ShiftAmt = ConstantVector::getSplat(ElementCount::getFixed(2), ShiftAmt);
// Split into low and high 32-bit parts
Value *LowBits = Builder.CreateTrunc(InputVal, SplitElementTy);
Value *ShiftedVal = Builder.CreateLShr(InputVal, ShiftAmt);
Value *HighBits = Builder.CreateTrunc(ShiftedVal, SplitElementTy);
Then I think this fits nicely with the logic for doubles above where we're just changing the conversion operation itself and the surrounding IR is unchanged.
This also closes #145729 ? |
f7b3d84
to
d59cd61
Compare
i'm going to convert that into a test ticket for the offload test suite |
✅ With the latest revision this PR passed the C/C++ code formatter. |
d59cd61
to
45dbfb1
Compare
fixes #140321
Specifically it fixes
error: Cannot create BufferLoad operation: Invalid overload type
https://hlsl.godbolt.org/z/dTq4q7o58
but no new DML shaders are building. This change now exposes #144747.
The change does two things it adds i64 support for intrinsic expansion for the
dx_resource_load_typedbuffer
, anddx_resource_store_typedbuffer
intrinsics.It also lets loaded typedbuffers crash more gracefully because of
auto *EVI = cast<ExtractValueInst>(U);
is now adyn_cast
andllvm_unreachable
.