99#include " ABIInfoImpl.h"
1010#include " HLSLBufferLayoutBuilder.h"
1111#include " TargetInfo.h"
12+ #include " clang/Basic/LangOptions.h"
13+ #include " llvm/IR/DerivedTypes.h"
14+
15+ #include < stdint.h>
16+ #include < utility>
1217
1318using namespace clang ;
1419using namespace clang ::CodeGen;
@@ -33,9 +38,43 @@ class SPIRVABIInfo : public CommonSPIRABIInfo {
3338 void computeInfo (CGFunctionInfo &FI) const override ;
3439
3540private:
41+ ABIArgInfo classifyKernelArgumentType (QualType Ty) const ;
42+ };
43+
44+ class AMDGCNSPIRVABIInfo : public SPIRVABIInfo {
45+ // TODO: this should be unified / shared with AMDGPU, ideally we'd like to
46+ // re-use AMDGPUABIInfo eventually, rather than duplicate.
47+ static constexpr unsigned MaxNumRegsForArgsRet = 16 ; // 16 32-bit registers
48+ mutable unsigned NumRegsLeft = 0 ;
49+
50+ unsigned numRegsForType (QualType Ty) const ;
51+
52+ bool isHomogeneousAggregateBaseType (QualType Ty) const override {
53+ return true ;
54+ }
55+ bool isHomogeneousAggregateSmallEnough (const Type *Base,
56+ uint64_t Members) const override {
57+ uint32_t NumRegs = (getContext ().getTypeSize (Base) + 31 ) / 32 ;
58+
59+ // Homogeneous Aggregates may occupy at most 16 registers.
60+ return Members * NumRegs <= MaxNumRegsForArgsRet;
61+ }
62+
63+ // Coerce HIP scalar pointer arguments from generic pointers to global ones.
64+ llvm::Type *coerceKernelArgumentType (llvm::Type *Ty, unsigned FromAS,
65+ unsigned ToAS) const ;
66+
3667 ABIArgInfo classifyReturnType (QualType RetTy) const ;
3768 ABIArgInfo classifyKernelArgumentType (QualType Ty) const ;
3869 ABIArgInfo classifyArgumentType (QualType Ty) const ;
70+
71+ public:
72+ AMDGCNSPIRVABIInfo (CodeGenTypes &CGT) : SPIRVABIInfo(CGT) {}
73+ void computeInfo (CGFunctionInfo &FI) const override ;
74+
75+ llvm::FixedVectorType *
76+ getOptimalVectorMemoryType (llvm::FixedVectorType *Ty,
77+ const LangOptions &LangOpt) const override ;
3978};
4079} // end anonymous namespace
4180namespace {
@@ -81,7 +120,10 @@ class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo {
81120class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
82121public:
83122 SPIRVTargetCodeGenInfo (CodeGen::CodeGenTypes &CGT)
84- : CommonSPIRTargetCodeGenInfo(std::make_unique<SPIRVABIInfo>(CGT)) {}
123+ : CommonSPIRTargetCodeGenInfo(
124+ (CGT.getTarget().getTriple().getVendor() == llvm::Triple::AMD)
125+ ? std::make_unique<AMDGCNSPIRVABIInfo>(CGT)
126+ : std::make_unique<SPIRVABIInfo>(CGT)) {}
85127 void setCUDAKernelCallingConvention (const FunctionType *&FT) const override ;
86128 LangAS getGlobalVarAddressSpace (CodeGenModule &CGM,
87129 const VarDecl *D) const override ;
@@ -130,25 +172,6 @@ void CommonSPIRABIInfo::setCCs() {
130172 RuntimeCC = llvm::CallingConv::SPIR_FUNC;
131173}
132174
133- ABIArgInfo SPIRVABIInfo::classifyReturnType (QualType RetTy) const {
134- if (getTarget ().getTriple ().getVendor () != llvm::Triple::AMD)
135- return DefaultABIInfo::classifyReturnType (RetTy);
136- if (!isAggregateTypeForABI (RetTy) || getRecordArgABI (RetTy, getCXXABI ()))
137- return DefaultABIInfo::classifyReturnType (RetTy);
138-
139- if (const auto *RD = RetTy->getAsRecordDecl ();
140- RD && RD->hasFlexibleArrayMember ())
141- return DefaultABIInfo::classifyReturnType (RetTy);
142-
143- // TODO: The AMDGPU ABI is non-trivial to represent in SPIR-V; in order to
144- // avoid encoding various architecture specific bits here we return everything
145- // as direct to retain type info for things like aggregates, for later perusal
146- // when translating back to LLVM/lowering in the BE. This is also why we
147- // disable flattening as the outcomes can mismatch between SPIR-V and AMDGPU.
148- // This will be revisited / optimised in the future.
149- return ABIArgInfo::getDirect (CGT.ConvertType (RetTy), 0u , nullptr , false );
150- }
151-
152175ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType (QualType Ty) const {
153176 if (getContext ().getLangOpts ().isTargetDevice ()) {
154177 // Coerce pointer arguments with default address space to CrossWorkGroup
@@ -165,18 +188,6 @@ ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
165188 }
166189
167190 if (isAggregateTypeForABI (Ty)) {
168- if (getTarget ().getTriple ().getVendor () == llvm::Triple::AMD)
169- // TODO: The AMDGPU kernel ABI passes aggregates byref, which is not
170- // currently expressible in SPIR-V; SPIR-V passes aggregates byval,
171- // which the AMDGPU kernel ABI does not allow. Passing aggregates as
172- // direct works around this impedance mismatch, as it retains type info
173- // and can be correctly handled, post reverse-translation, by the AMDGPU
174- // BE, which has to support this CC for legacy OpenCL purposes. It can
175- // be brittle and does lead to performance degradation in certain
176- // pathological cases. This will be revisited / optimised in the future,
177- // once a way to deal with the byref/byval impedance mismatch is
178- // identified.
179- return ABIArgInfo::getDirect (LTy, 0 , nullptr , false );
180191 // Force copying aggregate type in kernel arguments by value when
181192 // compiling CUDA targeting SPIR-V. This is required for the object
182193 // copied to be valid on the device.
@@ -191,49 +202,238 @@ ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
191202 return classifyArgumentType (Ty);
192203}
193204
194- ABIArgInfo SPIRVABIInfo::classifyArgumentType (QualType Ty) const {
195- if (getTarget ().getTriple ().getVendor () != llvm::Triple::AMD)
196- return DefaultABIInfo::classifyArgumentType (Ty);
197- if (!isAggregateTypeForABI (Ty))
198- return DefaultABIInfo::classifyArgumentType (Ty);
205+ void SPIRVABIInfo::computeInfo (CGFunctionInfo &FI) const {
206+ // The logic is same as in DefaultABIInfo with an exception on the kernel
207+ // arguments handling.
208+ llvm::CallingConv::ID CC = FI.getCallingConvention ();
209+
210+ if (!getCXXABI ().classifyReturnType (FI))
211+ FI.getReturnInfo () = classifyReturnType (FI.getReturnType ());
212+
213+ for (auto &I : FI.arguments ()) {
214+ if (CC == llvm::CallingConv::SPIR_KERNEL) {
215+ I.info = classifyKernelArgumentType (I.type );
216+ } else {
217+ I.info = classifyArgumentType (I.type );
218+ }
219+ }
220+ }
221+
222+ unsigned AMDGCNSPIRVABIInfo::numRegsForType (QualType Ty) const {
223+ // This duplicates the AMDGPUABI computation.
224+ unsigned NumRegs = 0 ;
225+
226+ if (const VectorType *VT = Ty->getAs <VectorType>()) {
227+ // Compute from the number of elements. The reported size is based on the
228+ // in-memory size, which includes the padding 4th element for 3-vectors.
229+ QualType EltTy = VT->getElementType ();
230+ unsigned EltSize = getContext ().getTypeSize (EltTy);
231+
232+ // 16-bit element vectors should be passed as packed.
233+ if (EltSize == 16 )
234+ return (VT->getNumElements () + 1 ) / 2 ;
235+
236+ unsigned EltNumRegs = (EltSize + 31 ) / 32 ;
237+ return EltNumRegs * VT->getNumElements ();
238+ }
239+
240+ if (const auto *RD = Ty->getAsRecordDecl ()) {
241+ assert (!RD->hasFlexibleArrayMember ());
242+
243+ for (const FieldDecl *Field : RD->fields ()) {
244+ QualType FieldTy = Field->getType ();
245+ NumRegs += numRegsForType (FieldTy);
246+ }
247+
248+ return NumRegs;
249+ }
250+
251+ return (getContext ().getTypeSize (Ty) + 31 ) / 32 ;
252+ }
253+
254+ llvm::Type *AMDGCNSPIRVABIInfo::coerceKernelArgumentType (llvm::Type *Ty,
255+ unsigned FromAS,
256+ unsigned ToAS) const {
257+ // Single value types.
258+ auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(Ty);
259+ if (PtrTy && PtrTy->getAddressSpace () == FromAS)
260+ return llvm::PointerType::get (Ty->getContext (), ToAS);
261+ return Ty;
262+ }
263+
264+ ABIArgInfo AMDGCNSPIRVABIInfo::classifyReturnType (QualType RetTy) const {
265+ if (!isAggregateTypeForABI (RetTy) || getRecordArgABI (RetTy, getCXXABI ()))
266+ return DefaultABIInfo::classifyReturnType (RetTy);
267+
268+ // Ignore empty structs/unions.
269+ if (isEmptyRecord (getContext (), RetTy, true ))
270+ return ABIArgInfo::getIgnore ();
271+
272+ // Lower single-element structs to just return a regular value.
273+ if (const Type *SeltTy = isSingleElementStruct (RetTy, getContext ()))
274+ return ABIArgInfo::getDirect (CGT.ConvertType (QualType (SeltTy, 0 )));
275+
276+ if (const auto *RD = RetTy->getAsRecordDecl ();
277+ RD && RD->hasFlexibleArrayMember ())
278+ return DefaultABIInfo::classifyReturnType (RetTy);
279+
280+ // Pack aggregates <= 4 bytes into single VGPR or pair.
281+ uint64_t Size = getContext ().getTypeSize (RetTy);
282+ if (Size <= 16 )
283+ return ABIArgInfo::getDirect (llvm::Type::getInt16Ty (getVMContext ()));
284+
285+ if (Size <= 32 )
286+ return ABIArgInfo::getDirect (llvm::Type::getInt32Ty (getVMContext ()));
287+
288+ // TODO: This carried over from AMDGPU oddity, we retain it to
289+ // ensure consistency, but it might be reasonable to return Int64.
290+ if (Size <= 64 ) {
291+ llvm::Type *I32Ty = llvm::Type::getInt32Ty (getVMContext ());
292+ return ABIArgInfo::getDirect (llvm::ArrayType::get (I32Ty, 2 ));
293+ }
294+
295+ if (numRegsForType (RetTy) <= MaxNumRegsForArgsRet)
296+ return ABIArgInfo::getDirect ();
297+ return DefaultABIInfo::classifyReturnType (RetTy);
298+ }
299+
300+ // / For kernels all parameters are really passed in a special buffer. It doesn't
301+ // / make sense to pass anything byval, so everything must be direct.
302+ ABIArgInfo AMDGCNSPIRVABIInfo::classifyKernelArgumentType (QualType Ty) const {
303+ Ty = useFirstFieldIfTransparentUnion (Ty);
304+
305+ // TODO: Can we omit empty structs?
306+
307+ if (const Type *SeltTy = isSingleElementStruct (Ty, getContext ()))
308+ Ty = QualType (SeltTy, 0 );
309+
310+ llvm::Type *OrigLTy = CGT.ConvertType (Ty);
311+ llvm::Type *LTy = OrigLTy;
312+ if (getContext ().getLangOpts ().isTargetDevice ()) {
313+ LTy = coerceKernelArgumentType (
314+ OrigLTy, /* FromAS=*/ getContext ().getTargetAddressSpace (LangAS::Default),
315+ /* ToAS=*/ getContext ().getTargetAddressSpace (LangAS::opencl_global));
316+ }
317+
318+ // FIXME: This doesn't apply the optimization of coercing pointers in structs
319+ // to global address space when using byref. This would require implementing a
320+ // new kind of coercion of the in-memory type when for indirect arguments.
321+ if (LTy == OrigLTy && isAggregateTypeForABI (Ty)) {
322+ return ABIArgInfo::getIndirectAliased (
323+ getContext ().getTypeAlignInChars (Ty),
324+ getContext ().getTargetAddressSpace (LangAS::opencl_constant),
325+ false /* Realign*/ , nullptr /* Padding*/ );
326+ }
327+
328+ // TODO: inhibiting flattening is an AMDGPU workaround for Clover, which might
329+ // be vestigial and should be revisited.
330+ return ABIArgInfo::getDirect (LTy, 0 , nullptr , false );
331+ }
332+
333+ ABIArgInfo AMDGCNSPIRVABIInfo::classifyArgumentType (QualType Ty) const {
334+ assert (NumRegsLeft <= MaxNumRegsForArgsRet && " register estimate underflow" );
335+
336+ Ty = useFirstFieldIfTransparentUnion (Ty);
337+
338+ // TODO: support for variadics.
339+
340+ if (!isAggregateTypeForABI (Ty)) {
341+ ABIArgInfo ArgInfo = DefaultABIInfo::classifyArgumentType (Ty);
342+ if (!ArgInfo.isIndirect ()) {
343+ unsigned NumRegs = numRegsForType (Ty);
344+ NumRegsLeft -= std::min (NumRegs, NumRegsLeft);
345+ }
346+
347+ return ArgInfo;
348+ }
199349
200350 // Records with non-trivial destructors/copy-constructors should not be
201351 // passed by value.
202352 if (auto RAA = getRecordArgABI (Ty, getCXXABI ()))
203353 return getNaturalAlignIndirect (Ty, getDataLayout ().getAllocaAddrSpace (),
204354 RAA == CGCXXABI::RAA_DirectInMemory);
205355
356+ // Ignore empty structs/unions.
357+ if (isEmptyRecord (getContext (), Ty, true ))
358+ return ABIArgInfo::getIgnore ();
359+
360+ // Lower single-element structs to just pass a regular value. TODO: We
361+ // could do reasonable-size multiple-element structs too, using getExpand(),
362+ // though watch out for things like bitfields.
363+ if (const Type *SeltTy = isSingleElementStruct (Ty, getContext ()))
364+ return ABIArgInfo::getDirect (CGT.ConvertType (QualType (SeltTy, 0 )));
365+
206366 if (const auto *RD = Ty->getAsRecordDecl ();
207367 RD && RD->hasFlexibleArrayMember ())
208368 return DefaultABIInfo::classifyArgumentType (Ty);
209369
210- return ABIArgInfo::getDirect (CGT.ConvertType (Ty), 0u , nullptr , false );
370+ uint64_t Size = getContext ().getTypeSize (Ty);
371+ if (Size <= 64 ) {
372+ // Pack aggregates <= 8 bytes into single VGPR or pair.
373+ unsigned NumRegs = (Size + 31 ) / 32 ;
374+ NumRegsLeft -= std::min (NumRegsLeft, NumRegs);
375+
376+ if (Size <= 16 )
377+ return ABIArgInfo::getDirect (llvm::Type::getInt16Ty (getVMContext ()));
378+
379+ if (Size <= 32 )
380+ return ABIArgInfo::getDirect (llvm::Type::getInt32Ty (getVMContext ()));
381+
382+ // TODO: This is an AMDGPU oddity, and might be vestigial, we retain it to
383+ // ensure consistency, but it should be revisited.
384+ llvm::Type *I32Ty = llvm::Type::getInt32Ty (getVMContext ());
385+ return ABIArgInfo::getDirect (llvm::ArrayType::get (I32Ty, 2 ));
386+ }
387+
388+ if (NumRegsLeft > 0 ) {
389+ unsigned NumRegs = numRegsForType (Ty);
390+ if (NumRegsLeft >= NumRegs) {
391+ NumRegsLeft -= NumRegs;
392+ return ABIArgInfo::getDirect ();
393+ }
394+ }
395+
396+ // Use pass-by-reference in stead of pass-by-value for struct arguments in
397+ // function ABI.
398+ return ABIArgInfo::getIndirectAliased (
399+ getContext ().getTypeAlignInChars (Ty),
400+ getContext ().getTargetAddressSpace (LangAS::opencl_private));
211401}
212402
213- void SPIRVABIInfo::computeInfo (CGFunctionInfo &FI) const {
214- // The logic is same as in DefaultABIInfo with an exception on the kernel
215- // arguments handling.
403+ void AMDGCNSPIRVABIInfo::computeInfo (CGFunctionInfo &FI) const {
216404 llvm::CallingConv::ID CC = FI.getCallingConvention ();
217405
218406 if (!getCXXABI ().classifyReturnType (FI))
219407 FI.getReturnInfo () = classifyReturnType (FI.getReturnType ());
220408
409+ NumRegsLeft = MaxNumRegsForArgsRet;
221410 for (auto &I : FI.arguments ()) {
222- if (CC == llvm::CallingConv::SPIR_KERNEL) {
411+ if (CC == llvm::CallingConv::SPIR_KERNEL)
223412 I.info = classifyKernelArgumentType (I.type );
224- } else {
413+ else
225414 I.info = classifyArgumentType (I.type );
226- }
227415 }
228416}
229417
418+ llvm::FixedVectorType *AMDGCNSPIRVABIInfo::getOptimalVectorMemoryType (
419+ llvm::FixedVectorType *Ty, const LangOptions &LangOpt) const {
420+ // AMDGPU has legal instructions for 96-bit so 3x32 can be supported.
421+ if (Ty->getNumElements () == 3 && getDataLayout ().getTypeSizeInBits (Ty) == 96 )
422+ return Ty;
423+ return DefaultABIInfo::getOptimalVectorMemoryType (Ty, LangOpt);
424+ }
425+
230426namespace clang {
231427namespace CodeGen {
232428void computeSPIRKernelABIInfo (CodeGenModule &CGM, CGFunctionInfo &FI) {
233- if (CGM.getTarget ().getTriple ().isSPIRV ())
234- SPIRVABIInfo (CGM.getTypes ()).computeInfo (FI);
235- else
429+ if (CGM.getTarget ().getTriple ().isSPIRV ()) {
430+ if (CGM.getTarget ().getTriple ().getVendor () == llvm::Triple::AMD)
431+ AMDGCNSPIRVABIInfo (CGM.getTypes ()).computeInfo (FI);
432+ else
433+ SPIRVABIInfo (CGM.getTypes ()).computeInfo (FI);
434+ } else {
236435 CommonSPIRABIInfo (CGM.getTypes ()).computeInfo (FI);
436+ }
237437}
238438}
239439}
0 commit comments