Skip to content

[clang][RISCV] Fix crash on VLS calling convention #145489

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 28 additions & 11 deletions clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1329,6 +1329,19 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
llvm::TypeSize DstSize = CGF.CGM.getDataLayout().getTypeAllocSize(Ty);

if (llvm::StructType *SrcSTy = dyn_cast<llvm::StructType>(SrcTy)) {
if (Ty->isScalableTy() || Ty->isRISCVVectorTupleTy()) {
// In RISC-V VLS calling convention, struct of fixed vector might be
// lowered using scalable vector, we consider it as a valid load, e.g.
// struct i32x4 {
// __attribute__((vector_size(16))) int i;
// };
// is lowered to
// <vscale x 2 x i32> when ABI_VLEN = 128 bits, please checkout
// clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c
// for more information.
Src = Src.withElementType(Ty);
return CGF.Builder.CreateLoad(Src);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CreateCoercedLoad/CreateCoercedStore have two purposes:

  • Try to make the type of the load/store match the type of other operations on the memory, as indicated by SrcTy.
  • Ensure that we don't load/store past the end of the object.

The first is an optimization, but the second is required for correctness: accessing past the end of an object is undefined behavior. You can't skip past the relevant checks just because there's a scalable type involved.

}
Src = EnterStructPointerForCoercedAccess(Src, SrcSTy,
DstSize.getFixedValue(), CGF);
SrcTy = Src.getElementType();
Expand Down Expand Up @@ -1412,6 +1425,21 @@ void CodeGenFunction::CreateCoercedStore(llvm::Value *Src, Address Dst,
if (SrcTy != Dst.getElementType()) {
if (llvm::StructType *DstSTy =
dyn_cast<llvm::StructType>(Dst.getElementType())) {
if (SrcTy->isScalableTy() || SrcTy->isRISCVVectorTupleTy()) {
// In RISC-V VLS calling convention, struct of fixed vector might be
// lowered using scalable vector, we consider it as a valid load, e.g.
// struct i32x4 {
// __attribute__((vector_size(16))) int i;
// };
// is lowered to
// <vscale x 2 x i32> when ABI_VLEN = 128 bits, please checkout
// clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c
// for more information.
Dst = Dst.withElementType(SrcTy);
auto *I = Builder.CreateStore(Src, Dst, DstIsVolatile);
addInstToCurrentSourceAtom(I, Src);
return;
}
assert(!SrcSize.isScalable());
Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy,
SrcSize.getFixedValue(), *this);
Expand Down Expand Up @@ -3328,17 +3356,6 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
}
}

// Struct of fixed-length vectors and struct of array of fixed-length
// vector in VLS calling convention are coerced to vector tuple
// type(represented as TargetExtType) and scalable vector type
// respectively, they're no longer handled as struct.
if (ArgI.isDirect() && isa<llvm::StructType>(ConvertType(Ty)) &&
(isa<llvm::TargetExtType>(ArgI.getCoerceToType()) ||
isa<llvm::ScalableVectorType>(ArgI.getCoerceToType()))) {
ArgVals.push_back(ParamValue::forDirect(AI));
break;
}

llvm::StructType *STy =
dyn_cast<llvm::StructType>(ArgI.getCoerceToType());
Address Alloca =
Expand Down
62 changes: 46 additions & 16 deletions clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,52 +143,82 @@ void __attribute__((riscv_vls_cc)) test_too_large(int32x64_t arg) {}
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_too_large_256(<vscale x 16 x i32> noundef %arg.coerce)
void __attribute__((riscv_vls_cc(256))) test_too_large_256(int32x64_t arg) {}

// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4(<vscale x 2 x i32> %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4(<vscale x 2 x i32> %arg.coerce)
void __attribute__((riscv_vls_cc)) test_st_i32x4(struct st_i32x4 arg) {}
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4_256(<vscale x 1 x i32> %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4_256(<vscale x 1 x i32> %arg.coerce)
void __attribute__((riscv_vls_cc(256))) test_st_i32x4_256(struct st_i32x4 arg) {}

// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4_arr1(<vscale x 2 x i32> %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4_arr1(<vscale x 2 x i32> %arg.coerce)
void __attribute__((riscv_vls_cc)) test_st_i32x4_arr1(struct st_i32x4_arr1 arg) {}
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4_arr1_256(<vscale x 1 x i32> %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4_arr1_256(<vscale x 1 x i32> %arg.coerce)
void __attribute__((riscv_vls_cc(256))) test_st_i32x4_arr1_256(struct st_i32x4_arr1 arg) {}

// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4_arr4(<vscale x 8 x i32> %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4_arr4(<vscale x 8 x i32> %arg.coerce)
void __attribute__((riscv_vls_cc)) test_st_i32x4_arr4(struct st_i32x4_arr4 arg) {}
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4_arr4_256(<vscale x 4 x i32> %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4_arr4_256(<vscale x 4 x i32> %arg.coerce)
void __attribute__((riscv_vls_cc(256))) test_st_i32x4_arr4_256(struct st_i32x4_arr4 arg) {}

// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4_arr8(<vscale x 16 x i32> %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4_arr8(<vscale x 16 x i32> %arg.coerce)
void __attribute__((riscv_vls_cc)) test_st_i32x4_arr8(struct st_i32x4_arr8 arg) {}
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4_arr8_256(<vscale x 8 x i32> %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4_arr8_256(<vscale x 8 x i32> %arg.coerce)
void __attribute__((riscv_vls_cc(256))) test_st_i32x4_arr8_256(struct st_i32x4_arr8 arg) {}

// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4x2(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4x2(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %arg.coerce)
void __attribute__((riscv_vls_cc)) test_st_i32x4x2(struct st_i32x4x2 arg) {}
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4x2_256(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4x2_256(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %arg.coerce)
void __attribute__((riscv_vls_cc(256))) test_st_i32x4x2_256(struct st_i32x4x2 arg) {}

// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x8x2(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x8x2(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %arg.coerce)
void __attribute__((riscv_vls_cc)) test_st_i32x8x2(struct st_i32x8x2 arg) {}
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x8x2_256(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x8x2_256(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %arg.coerce)
void __attribute__((riscv_vls_cc(256))) test_st_i32x8x2_256(struct st_i32x8x2 arg) {}

// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x64x2(ptr noundef %arg)
void __attribute__((riscv_vls_cc)) test_st_i32x64x2(struct st_i32x64x2 arg) {}
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x64x2_256(ptr noundef %arg)
void __attribute__((riscv_vls_cc(256))) test_st_i32x64x2_256(struct st_i32x64x2 arg) {}

// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4x3(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4x3(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %arg.coerce)
void __attribute__((riscv_vls_cc)) test_st_i32x4x3(struct st_i32x4x3 arg) {}
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4x3_256(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4x3_256(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %arg.coerce)
void __attribute__((riscv_vls_cc(256))) test_st_i32x4x3_256(struct st_i32x4x3 arg) {}

// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4x8(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4x8(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %arg.coerce)
void __attribute__((riscv_vls_cc)) test_st_i32x4x8(struct st_i32x4x8 arg) {}
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4x8_256(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4x8_256(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %arg.coerce)
void __attribute__((riscv_vls_cc(256))) test_st_i32x4x8_256(struct st_i32x4x8 arg) {}

// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_i32x4x9(ptr noundef %arg)
void __attribute__((riscv_vls_cc)) test_st_i32x4x9(struct st_i32x4x9 arg) {}
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4x9_256(ptr noundef %arg)
void __attribute__((riscv_vls_cc(256))) test_st_i32x4x9_256(struct st_i32x4x9 arg) {}

// CHECK-LLVM-LABEL: define dso_local riscv_vls_cc(128) <vscale x 8 x i32> @test_function_prolog_epilog(<vscale x 8 x i32> %arg.coerce) #0 {
// CHECK-LLVM-NEXT: entry:
// CHECK-LLVM-NEXT: %retval = alloca %struct.st_i32x4_arr4, align 16
// CHECK-LLVM-NEXT: %arg = alloca %struct.st_i32x4_arr4, align 16
// CHECK-LLVM-NEXT: store <vscale x 8 x i32> %arg.coerce, ptr %arg, align 16
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the size of struct.st_i32x4_arr4? If it's not a scalable size we can't do a scalable store to it. The store will write more bytes than exist on hardware with larger vscale.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is calculated by using ABI_VLEN=128 bits=16 bytes and vector size=16x4 bytes in this case, so LMUL=vector size / ABI_VLEN=4 result in <vscale x 8 x i32>

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you manually add a CHECK line for the declaration of struct.st_i32x4_arr4

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My question is specifically how the struct.st_i32x4_arr4 type is declared. If it does not include a vscale then we cannot use a scalable store to an alloca with that type.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The type itself does not have vscale but the argument passed in has type <vscale x 8 x i32>, if we cannot use a scalable store, how do we store the input argument?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe you can extract the fixed vector from the scalable vector using llvm.vector.extract?

Copy link
Member Author

@4vtomat 4vtomat Jun 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if it's vector tuple type? It needs n llvm.tuple.insert, and if the scalable vector type in the tuple type is not aligned with the one obtained by using getContainerForFixedLengthVector, it would be illegal.
The reason it might happen is ABI_VLEN might be different to MinVLen or MaxELen which are used to calculate scalable vector type for a fixed vector in getContainerForFixedLengthVector

// CHECK-LLVM-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 %retval, ptr align 16 %arg, i64 64, i1 false)
// CHECK-LLVM-NEXT: %0 = load <vscale x 8 x i32>, ptr %retval, align 16
// CHECK-LLVM-NEXT: ret <vscale x 8 x i32> %0
// CHECK-LLVM-NEXT: }
struct st_i32x4_arr4 __attribute__((riscv_vls_cc)) test_function_prolog_epilog(struct st_i32x4_arr4 arg) {
return arg;
}

struct st_i32x4 __attribute__((riscv_vls_cc)) dummy(struct st_i32x4);
// CHECK-LLVM-LABEL: define dso_local riscv_vls_cc(128) <vscale x 2 x i32> @test_call(<vscale x 2 x i32> %arg.coerce) #0 {
// CHECK-LLVM-NEXT: entry:
// CHECK-LLVM-NEXT: %retval = alloca %struct.st_i32x4, align 16
// CHECK-LLVM-NEXT: %arg = alloca %struct.st_i32x4, align 16
// CHECK-LLVM-NEXT: store <vscale x 2 x i32> %arg.coerce, ptr %arg, align 16
// CHECK-LLVM-NEXT: %0 = load <vscale x 2 x i32>, ptr %arg, align 16
// CHECK-LLVM-NEXT: %call = call riscv_vls_cc(128) <vscale x 2 x i32> @dummy(<vscale x 2 x i32> %0)
// CHECK-LLVM-NEXT: store <vscale x 2 x i32> %call, ptr %retval, align 16
// CHECK-LLVM-NEXT: %1 = load <vscale x 2 x i32>, ptr %retval, align 16
// CHECK-LLVM-NEXT: ret <vscale x 2 x i32> %1
// CHECK-LLVM-NEXT: }
struct st_i32x4 __attribute__((riscv_vls_cc)) test_call(struct st_i32x4 arg) {
struct st_i32x4 abc = dummy(arg);
return abc;
}
32 changes: 16 additions & 16 deletions clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,49 +123,49 @@ typedef int __attribute__((vector_size(256))) int32x64_t;
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z18test_too_large_256Dv64_i(<vscale x 16 x i32> noundef %arg.coerce)
[[riscv::vls_cc(256)]] void test_too_large_256(int32x64_t arg) {}

// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z13test_st_i32x48st_i32x4(<vscale x 2 x i32> %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z13test_st_i32x48st_i32x4(<vscale x 2 x i32> %arg.coerce)
[[riscv::vls_cc]] void test_st_i32x4(struct st_i32x4 arg) {}
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z17test_st_i32x4_2568st_i32x4(<vscale x 1 x i32> %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z17test_st_i32x4_2568st_i32x4(<vscale x 1 x i32> %arg.coerce)
[[riscv::vls_cc(256)]] void test_st_i32x4_256(struct st_i32x4 arg) {}

// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z18test_st_i32x4_arr113st_i32x4_arr1(<vscale x 2 x i32> %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z18test_st_i32x4_arr113st_i32x4_arr1(<vscale x 2 x i32> %arg.coerce)
[[riscv::vls_cc]] void test_st_i32x4_arr1(struct st_i32x4_arr1 arg) {}
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z22test_st_i32x4_arr1_25613st_i32x4_arr1(<vscale x 1 x i32> %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z22test_st_i32x4_arr1_25613st_i32x4_arr1(<vscale x 1 x i32> %arg.coerce)
[[riscv::vls_cc(256)]] void test_st_i32x4_arr1_256(struct st_i32x4_arr1 arg) {}

// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z18test_st_i32x4_arr413st_i32x4_arr4(<vscale x 8 x i32> %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z18test_st_i32x4_arr413st_i32x4_arr4(<vscale x 8 x i32> %arg.coerce)
[[riscv::vls_cc]] void test_st_i32x4_arr4(struct st_i32x4_arr4 arg) {}
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z22test_st_i32x4_arr4_25613st_i32x4_arr4(<vscale x 4 x i32> %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z22test_st_i32x4_arr4_25613st_i32x4_arr4(<vscale x 4 x i32> %arg.coerce)
[[riscv::vls_cc(256)]] void test_st_i32x4_arr4_256(struct st_i32x4_arr4 arg) {}

// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z18test_st_i32x4_arr813st_i32x4_arr8(<vscale x 16 x i32> %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z18test_st_i32x4_arr813st_i32x4_arr8(<vscale x 16 x i32> %arg.coerce)
[[riscv::vls_cc]] void test_st_i32x4_arr8(struct st_i32x4_arr8 arg) {}
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z22test_st_i32x4_arr8_25613st_i32x4_arr8(<vscale x 8 x i32> %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z22test_st_i32x4_arr8_25613st_i32x4_arr8(<vscale x 8 x i32> %arg.coerce)
[[riscv::vls_cc(256)]] void test_st_i32x4_arr8_256(struct st_i32x4_arr8 arg) {}

// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z15test_st_i32x4x210st_i32x4x2(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z15test_st_i32x4x210st_i32x4x2(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %arg.coerce)
[[riscv::vls_cc]] void test_st_i32x4x2(struct st_i32x4x2 arg) {}
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z19test_st_i32x4x2_25610st_i32x4x2(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z19test_st_i32x4x2_25610st_i32x4x2(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %arg.coerce)
[[riscv::vls_cc(256)]] void test_st_i32x4x2_256(struct st_i32x4x2 arg) {}

// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z15test_st_i32x8x210st_i32x8x2(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z15test_st_i32x8x210st_i32x8x2(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %arg.coerce)
[[riscv::vls_cc]] void test_st_i32x8x2(struct st_i32x8x2 arg) {}
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z19test_st_i32x8x2_25610st_i32x8x2(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z19test_st_i32x8x2_25610st_i32x8x2(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %arg.coerce)
[[riscv::vls_cc(256)]] void test_st_i32x8x2_256(struct st_i32x8x2 arg) {}

// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z16test_st_i32x64x211st_i32x64x2(ptr noundef %arg)
[[riscv::vls_cc]] void test_st_i32x64x2(struct st_i32x64x2 arg) {}
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z20test_st_i32x64x2_25611st_i32x64x2(ptr noundef %arg)
[[riscv::vls_cc(256)]] void test_st_i32x64x2_256(struct st_i32x64x2 arg) {}

// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z15test_st_i32x4x310st_i32x4x3(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z15test_st_i32x4x310st_i32x4x3(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %arg.coerce)
[[riscv::vls_cc]] void test_st_i32x4x3(struct st_i32x4x3 arg) {}
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z19test_st_i32x4x3_25610st_i32x4x3(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z19test_st_i32x4x3_25610st_i32x4x3(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %arg.coerce)
[[riscv::vls_cc(256)]] void test_st_i32x4x3_256(struct st_i32x4x3 arg) {}

// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z15test_st_i32x4x810st_i32x4x8(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z15test_st_i32x4x810st_i32x4x8(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %arg.coerce)
[[riscv::vls_cc]] void test_st_i32x4x8(struct st_i32x4x8 arg) {}
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z19test_st_i32x4x8_25610st_i32x4x8(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %arg)
// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z19test_st_i32x4x8_25610st_i32x4x8(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %arg.coerce)
[[riscv::vls_cc(256)]] void test_st_i32x4x8_256(struct st_i32x4x8 arg) {}

// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z15test_st_i32x4x910st_i32x4x9(ptr noundef %arg)
Expand Down