From 5bf42d3e2e83344db14fc0e33623840c53cebfc4 Mon Sep 17 00:00:00 2001 From: Keith Smiley Date: Tue, 11 Feb 2025 08:04:51 -0800 Subject: [PATCH 01/29] [lldb] Fix ubsan violation with plugin loading (#126652) This typedef doesn't match the signature below, specifically the signature takes a `lldb:SBDebugger` vs this was defined as `lldb:SBDebugger&`. ``` lldb/source/API/SBDebugger.cpp:199:13: runtime error: call to function lldb::PluginInitialize(lldb::SBDebugger) through pointer to incorrect function type 'bool (*)(lldb::SBDebugger &)' .../CustomPlugin.cpp:134: note: lldb::PluginInitialize(lldb::SBDebugger) defined here ``` --- lldb/source/API/SBDebugger.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/API/SBDebugger.cpp b/lldb/source/API/SBDebugger.cpp index 4e6b22492a0d1..bdb8e538b99f8 100644 --- a/lldb/source/API/SBDebugger.cpp +++ b/lldb/source/API/SBDebugger.cpp @@ -185,7 +185,7 @@ lldb::SBError SBDebugger::InitializeWithErrorHandling() { llvm::sys::DynamicLibrary dynlib = llvm::sys::DynamicLibrary::getPermanentLibrary(spec.GetPath().c_str()); if (dynlib.isValid()) { - typedef bool (*LLDBCommandPluginInit)(lldb::SBDebugger & debugger); + typedef bool (*LLDBCommandPluginInit)(lldb::SBDebugger debugger); lldb::SBDebugger debugger_sb(debugger_sp); // This calls the bool lldb::PluginInitialize(lldb::SBDebugger debugger) From b7188f6313fef2d5a248e71ba6028460b2ac0558 Mon Sep 17 00:00:00 2001 From: Ivan Kosarev Date: Tue, 11 Feb 2025 16:10:49 +0000 Subject: [PATCH 02/29] [AMDGPU][NFC] Remove an unneeded return value. (#126739) And rename the function to disassociate it from the one where generating loading of the input value may actually fail. --- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 19 ++++++++++--------- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 14 +++++++------- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 6 +++--- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index bb00442342d84..478a4c161fce7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -816,7 +816,7 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder, Register InputReg = MRI.createGenericVirtualRegister(ArgTy); if (IncomingArg) { - LI->loadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy); + LI->buildLoadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy); } else if (InputID == AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR) { LI->getImplicitArgPtr(InputReg, MRI, MIRBuilder); } else if (InputID == AMDGPUFunctionArgInfo::LDS_KERNEL_ID) { @@ -883,8 +883,9 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder, NeedWorkItemIDX) { if (ST.getMaxWorkitemID(MF.getFunction(), 0) != 0) { InputReg = MRI.createGenericVirtualRegister(S32); - LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX, - std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX)); + LI->buildLoadInputValue(InputReg, MIRBuilder, IncomingArgX, + std::get<1>(WorkitemIDX), + std::get<2>(WorkitemIDX)); } else { InputReg = MIRBuilder.buildConstant(S32, 0).getReg(0); } @@ -893,8 +894,8 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder, if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY && NeedWorkItemIDY && ST.getMaxWorkitemID(MF.getFunction(), 1) != 0) { Register Y = MRI.createGenericVirtualRegister(S32); - LI->loadInputValue(Y, MIRBuilder, IncomingArgY, std::get<1>(WorkitemIDY), - std::get<2>(WorkitemIDY)); + LI->buildLoadInputValue(Y, MIRBuilder, IncomingArgY, + std::get<1>(WorkitemIDY), std::get<2>(WorkitemIDY)); Y = MIRBuilder.buildShl(S32, Y, MIRBuilder.buildConstant(S32, 10)).getReg(0); InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Y).getReg(0) : Y; @@ -903,8 +904,8 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder, if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ && NeedWorkItemIDZ && ST.getMaxWorkitemID(MF.getFunction(), 2) != 0) { Register Z = MRI.createGenericVirtualRegister(S32); - LI->loadInputValue(Z, MIRBuilder, IncomingArgZ, std::get<1>(WorkitemIDZ), - std::get<2>(WorkitemIDZ)); + LI->buildLoadInputValue(Z, MIRBuilder, IncomingArgZ, + std::get<1>(WorkitemIDZ), std::get<2>(WorkitemIDZ)); Z = MIRBuilder.buildShl(S32, Z, MIRBuilder.buildConstant(S32, 20)).getReg(0); InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Z).getReg(0) : Z; @@ -925,8 +926,8 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder, ArgDescriptor IncomingArg = ArgDescriptor::createArg( IncomingArgX ? *IncomingArgX : IncomingArgY ? *IncomingArgY : *IncomingArgZ, ~0u); - LI->loadInputValue(InputReg, MIRBuilder, &IncomingArg, - &AMDGPU::VGPR_32RegClass, S32); + LI->buildLoadInputValue(InputReg, MIRBuilder, &IncomingArg, + &AMDGPU::VGPR_32RegClass, S32); } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index e9e47eaadd557..908d323c7fec9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -4275,10 +4275,11 @@ verifyCFIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineInstr *&Br, return UseMI; } -bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B, - const ArgDescriptor *Arg, - const TargetRegisterClass *ArgRC, - LLT ArgTy) const { +void AMDGPULegalizerInfo::buildLoadInputValue(Register DstReg, + MachineIRBuilder &B, + const ArgDescriptor *Arg, + const TargetRegisterClass *ArgRC, + LLT ArgTy) const { MCRegister SrcReg = Arg->getRegister(); assert(SrcReg.isPhysical() && "Physical register expected"); assert(DstReg.isVirtual() && "Virtual register expected"); @@ -4304,8 +4305,6 @@ bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B, } else { B.buildCopy(DstReg, LiveIn); } - - return true; } bool AMDGPULegalizerInfo::loadInputValue( @@ -4369,7 +4368,8 @@ bool AMDGPULegalizerInfo::loadInputValue( if (!Arg->isRegister() || !Arg->getRegister().isValid()) return false; // TODO: Handle these - return loadInputValue(DstReg, B, Arg, ArgRC, ArgTy); + buildLoadInputValue(DstReg, B, Arg, ArgRC, ArgTy); + return true; } bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin( diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index 86c15197805d2..03b7c36fc450f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -111,9 +111,9 @@ class AMDGPULegalizerInfo final : public LegalizerInfo { bool legalizeCTLZ_ZERO_UNDEF(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; - bool loadInputValue(Register DstReg, MachineIRBuilder &B, - const ArgDescriptor *Arg, - const TargetRegisterClass *ArgRC, LLT ArgTy) const; + void buildLoadInputValue(Register DstReg, MachineIRBuilder &B, + const ArgDescriptor *Arg, + const TargetRegisterClass *ArgRC, LLT ArgTy) const; bool loadInputValue(Register DstReg, MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const; From 2ad9d5f5f01cd4f29788a0cf7b21790df13fca71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Tue, 11 Feb 2025 17:20:23 +0100 Subject: [PATCH 03/29] [llvm] [cmake] Expose `LLVM_BUILD_TELEMETRY` in `LLVMConfig.cmake` (#126710) Add `LLVM_BUILD_TELEMETRY` to the list of flags exposed in `LLVMConfig.cmake`. This fixes telemetry library being misdetected as `OFF` when building LLDB standalone. Fixes bac62ee5b473e70981a6bd9759ec316315fca07d. ------ I guess this also needs a backport to 20.x. --- llvm/cmake/modules/LLVMConfig.cmake.in | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/cmake/modules/LLVMConfig.cmake.in b/llvm/cmake/modules/LLVMConfig.cmake.in index c49f10b9343ff..28655ee3ab87d 100644 --- a/llvm/cmake/modules/LLVMConfig.cmake.in +++ b/llvm/cmake/modules/LLVMConfig.cmake.in @@ -100,6 +100,8 @@ set(LLVM_ENABLE_PIC @LLVM_ENABLE_PIC@) set(LLVM_BUILD_32_BITS @LLVM_BUILD_32_BITS@) +set(LLVM_BUILD_TELEMETRY @LLVM_BUILD_TELEMETRY@) + if (NOT "@LLVM_PTHREAD_LIB@" STREQUAL "") set(LLVM_PTHREAD_LIB "@LLVM_PTHREAD_LIB@") endif() From 76392421553f3b25552970812868f70721971451 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Tue, 11 Feb 2025 08:35:45 -0800 Subject: [PATCH 04/29] [AMDGPU] Create new directive .amdhsa_inst_pref_size (#126622) The field INST_PREF_SIZE is available since gfx11. --- llvm/docs/AMDGPUUsage.rst | 3 +++ llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 12 ++++++++++++ .../AMDGPU/Disassembler/AMDGPUDisassembler.cpp | 8 ++++---- .../AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp | 10 ++++++++++ llvm/test/MC/AMDGPU/hsa-diag-v4.s | 10 ++++++++++ llvm/test/MC/AMDGPU/hsa-gfx12-v4.s | 4 +++- llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s | 2 ++ llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s | 8 ++++++-- llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s | 11 +++++++---- llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s | 5 +++-- 10 files changed, 60 insertions(+), 13 deletions(-) diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 84980d0c31d4f..899b2cf3b4901 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -18213,6 +18213,9 @@ terminated by an ``.end_amdhsa_kernel`` directive. :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx12-table`. ``.amdhsa_shared_vgpr_count`` 0 GFX10-GFX11 Controls SHARED_VGPR_COUNT in :ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx10-gfx11-table`. + ``.amdhsa_inst_pref_size`` 0 GFX11-GFX12 Controls INST_PREF_SIZE in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx10-gfx11-table` or + :ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx12-table` ``.amdhsa_exception_fp_ieee_invalid_op`` 0 GFX6-GFX12 Controls ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION in :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx12-table`. ``.amdhsa_exception_fp_denorm_src`` 0 GFX6-GFX12 Controls ENABLE_EXCEPTION_FP_DENORMAL_SOURCE in diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 4b6d02fff4aec..4ff9cff09f31d 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -5876,6 +5876,18 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal, ValRange); + } else if (ID == ".amdhsa_inst_pref_size") { + if (IVersion.Major < 11) + return Error(IDRange.Start, "directive requires gfx11+", IDRange); + if (IVersion.Major == 11) { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, + COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal, + ValRange); + } else { + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, + COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal, + ValRange); + } } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { PARSE_BITS_ENTRY( KD.compute_pgm_rsrc2, diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 58cdbe6cf373e..02ad08740049d 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -2233,15 +2233,15 @@ Expected AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3( // Bits [4-11]. if (isGFX11()) { - PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE", - COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE); + PRINT_DIRECTIVE(".amdhsa_inst_pref_size", + COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE); PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START", COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START); PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END", COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END); } else if (isGFX12Plus()) { - PRINT_PSEUDO_DIRECTIVE_COMMENT( - "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE); + PRINT_DIRECTIVE(".amdhsa_inst_pref_size", + COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE); } else { CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1, "COMPUTE_PGM_RSRC3", diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index eccd77d6c00f0..059bab5838526 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -579,7 +579,17 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ".amdhsa_shared_vgpr_count"); } + if (IVersion.Major == 11) { + PrintField(KD.compute_pgm_rsrc3, + amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT, + amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, + ".amdhsa_inst_pref_size"); + } if (IVersion.Major >= 12) { + PrintField(KD.compute_pgm_rsrc3, + amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT, + amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, + ".amdhsa_inst_pref_size"); PrintField(KD.compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT, amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, diff --git a/llvm/test/MC/AMDGPU/hsa-diag-v4.s b/llvm/test/MC/AMDGPU/hsa-diag-v4.s index 3733b162edcfb..9ab177cf2b125 100644 --- a/llvm/test/MC/AMDGPU/hsa-diag-v4.s +++ b/llvm/test/MC/AMDGPU/hsa-diag-v4.s @@ -280,6 +280,16 @@ .amdhsa_shared_vgpr_count 15 .end_amdhsa_kernel +// GCN-LABEL: warning: test_amdhsa_inst_pref_size_invalid +// PREGFX10: error: directive requires gfx11+ +// NONAMDHSA: error: unknown directive +.warning "test_amdhsa_inst_pref_size_invalid" +.amdhsa_kernel test_amdhsa_inst_pref_size_invalid + .amdhsa_next_free_vgpr 273 + .amdhsa_next_free_sgpr 0 + .amdhsa_inst_pref_size 15 +.end_amdhsa_kernel + // GCN-LABEL: warning: test_next_free_vgpr_invalid // AMDHSA: error: .amdgcn.next_free_{v,s}gpr symbols must be absolute expressions // NONAMDHSA-NOT: error: diff --git a/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s b/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s index ea649bc76116a..e90a976008229 100644 --- a/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s +++ b/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s @@ -33,7 +33,7 @@ // complete // OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 f00f0000 // OBJDUMP-NEXT: 0070 015021e4 1f0f007f 5e040000 00000000 // special_sgpr // OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000 @@ -120,6 +120,7 @@ disabled_user_sgpr: .amdhsa_workgroup_processor_mode 1 .amdhsa_memory_ordered 1 .amdhsa_forward_progress 1 + .amdhsa_inst_pref_size 255 .amdhsa_round_robin_scheduling 1 .amdhsa_exception_fp_ieee_invalid_op 1 .amdhsa_exception_fp_denorm_src 1 @@ -158,6 +159,7 @@ disabled_user_sgpr: // ASM-NEXT: .amdhsa_workgroup_processor_mode 1 // ASM-NEXT: .amdhsa_memory_ordered 1 // ASM-NEXT: .amdhsa_forward_progress 1 +// ASM-NEXT: .amdhsa_inst_pref_size 255 // ASM-NEXT: .amdhsa_round_robin_scheduling 1 // ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 // ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s index 85a7ad05b00f4..68cf28f2ac49d 100644 --- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s @@ -133,6 +133,7 @@ expr_defined: // ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&1073741824)>>30 // ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2147483648)>>31 // ASM-NEXT: .amdhsa_shared_vgpr_count 0 +// ASM-NEXT: .amdhsa_inst_pref_size 0 // ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&16777216)>>24 // ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&33554432)>>25 // ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&67108864)>>26 @@ -180,6 +181,7 @@ expr_defined: // ASM-NEXT: .amdhsa_memory_ordered 1 // ASM-NEXT: .amdhsa_forward_progress 1 // ASM-NEXT: .amdhsa_shared_vgpr_count 0 +// ASM-NEXT: .amdhsa_inst_pref_size 0 // ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 // ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 // ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1 diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s index 51d0fb30b320c..6f7a9a2605681 100644 --- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s @@ -9,12 +9,12 @@ // expr_defined_later // OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000 // OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 f0020000 // OBJDUMP-NEXT: 0030 05f02fe4 811f007f 000c0000 00000000 // expr_defined // OBJDUMP-NEXT: 0040 2a000000 2b000000 00000000 00000000 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 f0020000 // OBJDUMP-NEXT: 0070 05f02fe4 811f007f 000c0000 00000000 .text @@ -53,6 +53,7 @@ expr_defined: .amdhsa_workgroup_processor_mode defined_boolean .amdhsa_memory_ordered defined_boolean .amdhsa_forward_progress defined_boolean + .amdhsa_inst_pref_size defined_value+6 .amdhsa_exception_fp_ieee_invalid_op defined_boolean .amdhsa_exception_fp_denorm_src defined_boolean .amdhsa_exception_fp_ieee_div_zero defined_boolean @@ -89,6 +90,7 @@ expr_defined: .amdhsa_workgroup_processor_mode defined_boolean .amdhsa_memory_ordered defined_boolean .amdhsa_forward_progress defined_boolean + .amdhsa_inst_pref_size defined_value+6 .amdhsa_exception_fp_ieee_invalid_op defined_boolean .amdhsa_exception_fp_denorm_src defined_boolean .amdhsa_exception_fp_ieee_div_zero defined_boolean @@ -132,6 +134,7 @@ expr_defined: // ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((((1611399168|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&536870912)>>29 // ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((((1611399168|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&1073741824)>>30 // ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((((1611399168|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2147483648)>>31 +// ASM-NEXT: .amdhsa_inst_pref_size (((defined_value+6)<<4)&4080)>>4 // ASM-NEXT: .amdhsa_round_robin_scheduling (((((((((((((((((((((1611399168|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2097152)>>21 // ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&16777216)>>24 // ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&33554432)>>25 @@ -177,6 +180,7 @@ expr_defined: // ASM-NEXT: .amdhsa_workgroup_processor_mode 1 // ASM-NEXT: .amdhsa_memory_ordered 1 // ASM-NEXT: .amdhsa_forward_progress 1 +// ASM-NEXT: .amdhsa_inst_pref_size 47 // ASM-NEXT: .amdhsa_round_robin_scheduling 1 // ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 // ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s index 750809128189f..3cd7a0503e301 100644 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s @@ -13,7 +13,7 @@ ; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 ; CHECK-NEXT: .amdhsa_kernarg_size 0 ; CHECK-NEXT: ; SHARED_VGPR_COUNT 0 -; CHECK-NEXT: ; INST_PREF_SIZE 0 +; CHECK-NEXT: .amdhsa_inst_pref_size 0 ; CHECK-NEXT: ; TRAP_ON_START 0 ; CHECK-NEXT: ; TRAP_ON_END 0 ; CHECK-NEXT: ; IMAGE_OP 0 @@ -70,7 +70,7 @@ ; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 ; CHECK-NEXT: .amdhsa_kernarg_size 0 ; CHECK-NEXT: .amdhsa_shared_vgpr_count 0 -; CHECK-NEXT: ; INST_PREF_SIZE 0 +; CHECK-NEXT: .amdhsa_inst_pref_size 0 ; CHECK-NEXT: ; TRAP_ON_START 0 ; CHECK-NEXT: ; TRAP_ON_END 0 ; CHECK-NEXT: ; IMAGE_OP 0 @@ -114,6 +114,7 @@ .amdhsa_next_free_vgpr 32 .amdhsa_next_free_sgpr 32 .amdhsa_shared_vgpr_count 0 + .amdhsa_inst_pref_size 0 .end_amdhsa_kernel ;--- 3.s @@ -127,7 +128,7 @@ ; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 ; CHECK-NEXT: .amdhsa_kernarg_size 0 ; CHECK-NEXT: .amdhsa_shared_vgpr_count 1 -; CHECK-NEXT: ; INST_PREF_SIZE 0 +; CHECK-NEXT: .amdhsa_inst_pref_size 63 ; CHECK-NEXT: ; TRAP_ON_START 0 ; CHECK-NEXT: ; TRAP_ON_END 0 ; CHECK-NEXT: ; IMAGE_OP 0 @@ -171,6 +172,7 @@ .amdhsa_next_free_vgpr 32 .amdhsa_next_free_sgpr 32 .amdhsa_shared_vgpr_count 1 + .amdhsa_inst_pref_size 63 .end_amdhsa_kernel ;--- 4.s @@ -184,7 +186,7 @@ ; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 ; CHECK-NEXT: .amdhsa_kernarg_size 0 ; CHECK-NEXT: .amdhsa_shared_vgpr_count 1 -; CHECK-NEXT: ; INST_PREF_SIZE 0 +; CHECK-NEXT: .amdhsa_inst_pref_size 63 ; CHECK-NEXT: ; TRAP_ON_START 0 ; CHECK-NEXT: ; TRAP_ON_END 0 ; CHECK-NEXT: ; IMAGE_OP 0 @@ -228,5 +230,6 @@ .amdhsa_next_free_vgpr 32 .amdhsa_next_free_sgpr 32 .amdhsa_shared_vgpr_count 1 + .amdhsa_inst_pref_size 63 .amdhsa_wavefront_size32 0 .end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s index c644e15efc8d7..ed2b87d9885c6 100644 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s @@ -12,7 +12,7 @@ ; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0 ; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 ; CHECK-NEXT: .amdhsa_kernarg_size 0 -; CHECK-NEXT: ; INST_PREF_SIZE 0 +; CHECK-NEXT: .amdhsa_inst_pref_size 0 ; CHECK-NEXT: ; GLG_EN 0 ; CHECK-NEXT: ; IMAGE_OP 0 ; CHECK-NEXT: .amdhsa_next_free_vgpr 32 @@ -66,7 +66,7 @@ ; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0 ; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0 ; CHECK-NEXT: .amdhsa_kernarg_size 0 -; CHECK-NEXT: ; INST_PREF_SIZE 0 +; CHECK-NEXT: .amdhsa_inst_pref_size 255 ; CHECK-NEXT: ; GLG_EN 0 ; CHECK-NEXT: ; IMAGE_OP 0 ; CHECK-NEXT: .amdhsa_next_free_vgpr 32 @@ -108,4 +108,5 @@ .amdhsa_next_free_vgpr 32 .amdhsa_next_free_sgpr 32 .amdhsa_wavefront_size32 0 + .amdhsa_inst_pref_size 255 .end_amdhsa_kernel From 2f54223247e8f9f0fc006b944de8351f376814af Mon Sep 17 00:00:00 2001 From: sitrin Date: Tue, 11 Feb 2025 11:58:56 -0500 Subject: [PATCH 05/29] [Docs] Fix typo in TypeSanitizer.rst "tale" -> "table" (NFC) (#126721) The word `table` is now in place of the word `tale`. Fixes #126719. Co-authored-by: sitrin --- clang/docs/TypeSanitizer.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/docs/TypeSanitizer.rst b/clang/docs/TypeSanitizer.rst index 4d1dfc23a6c51..3c683a6c24bb4 100644 --- a/clang/docs/TypeSanitizer.rst +++ b/clang/docs/TypeSanitizer.rst @@ -27,7 +27,7 @@ reduce these impacts. The TypeSanitizer Algorithm =========================== For each TBAA type-access descriptor, encoded in LLVM IR using TBAA Metadata, the instrumentation -pass generates descriptor tales. Thus there is a unique pointer to each type (and access descriptor). +pass generates descriptor tables. Thus there is a unique pointer to each type (and access descriptor). These tables are comdat (except for anonymous-namespace types), so the pointer values are unique across the program. From b3510a88b3c19645fbde09cb58af6dead68ebd36 Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Tue, 11 Feb 2025 09:05:35 -0800 Subject: [PATCH 06/29] [NFC] [clang] simplify isDesignatorAtObjectEnd (#126658) IsLastOrInvalidFieldDecl would always return true if `Invalid=true`, so we know that !IsLastOrInvalidFieldDecl(...) means !Invalid. --- clang/lib/AST/ExprConstant.cpp | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 192b679b4c995..5c6ca4c9ee4de 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12536,10 +12536,9 @@ static const Expr *ignorePointerCastsAndParens(const Expr *E) { static bool isDesignatorAtObjectEnd(const ASTContext &Ctx, const LValue &LVal) { assert(!LVal.Designator.Invalid); - auto IsLastOrInvalidFieldDecl = [&Ctx](const FieldDecl *FD, bool &Invalid) { + auto IsLastOrInvalidFieldDecl = [&Ctx](const FieldDecl *FD) { const RecordDecl *Parent = FD->getParent(); - Invalid = Parent->isInvalidDecl(); - if (Invalid || Parent->isUnion()) + if (Parent->isInvalidDecl() || Parent->isUnion()) return true; const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(Parent); return FD->getFieldIndex() + 1 == Layout.getFieldCount(); @@ -12548,14 +12547,12 @@ static bool isDesignatorAtObjectEnd(const ASTContext &Ctx, const LValue &LVal) { auto &Base = LVal.getLValueBase(); if (auto *ME = dyn_cast_or_null(Base.dyn_cast())) { if (auto *FD = dyn_cast(ME->getMemberDecl())) { - bool Invalid; - if (!IsLastOrInvalidFieldDecl(FD, Invalid)) - return Invalid; + if (!IsLastOrInvalidFieldDecl(FD)) + return false; } else if (auto *IFD = dyn_cast(ME->getMemberDecl())) { for (auto *FD : IFD->chain()) { - bool Invalid; - if (!IsLastOrInvalidFieldDecl(cast(FD), Invalid)) - return Invalid; + if (!IsLastOrInvalidFieldDecl(cast(FD))) + return false; } } } @@ -12591,9 +12588,8 @@ static bool isDesignatorAtObjectEnd(const ASTContext &Ctx, const LValue &LVal) { return false; BaseType = CT->getElementType(); } else if (auto *FD = getAsField(Entry)) { - bool Invalid; - if (!IsLastOrInvalidFieldDecl(FD, Invalid)) - return Invalid; + if (!IsLastOrInvalidFieldDecl(FD)) + return false; BaseType = FD->getType(); } else { assert(getAsBaseClass(Entry) && "Expecting cast to a base class"); From 8e4e1449318de0e73192edf0b3c6a0d5b6ec7a31 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 11 Feb 2025 09:06:40 -0800 Subject: [PATCH 07/29] [CodeGen] Avoid repeated hash lookups (NFC) (#126672) --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index cafaaa364cb76..b679d63874b3b 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1457,14 +1457,13 @@ void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { clearLocThreadIdInsertPt(CGF); OpenMPLocThreadIDMap.erase(CGF.CurFn); } - if (FunctionUDRMap.count(CGF.CurFn) > 0) { - for(const auto *D : FunctionUDRMap[CGF.CurFn]) + if (auto I = FunctionUDRMap.find(CGF.CurFn); I != FunctionUDRMap.end()) { + for (const auto *D : I->second) UDRMap.erase(D); - FunctionUDRMap.erase(CGF.CurFn); + FunctionUDRMap.erase(I); } - auto I = FunctionUDMMap.find(CGF.CurFn); - if (I != FunctionUDMMap.end()) { - for(const auto *D : I->second) + if (auto I = FunctionUDMMap.find(CGF.CurFn); I != FunctionUDMMap.end()) { + for (const auto *D : I->second) UDMMap.erase(D); FunctionUDMMap.erase(I); } From c50f924aeae2d2eded772a7a80b20d46e1c9b41e Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 11 Feb 2025 09:07:15 -0800 Subject: [PATCH 08/29] [Sema] Avoid repeated hash lookups (NFC) (#126674) --- clang/lib/Sema/SemaOpenMP.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 376995d624e28..39ce65381a98c 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -5078,7 +5078,8 @@ static bool checkIfClauses(Sema &S, OpenMPDirectiveKind Kind, // At most one if clause without a directive-name-modifier can appear on // the directive. OpenMPDirectiveKind CurNM = IC->getNameModifier(); - if (FoundNameModifiers[CurNM]) { + auto &FNM = FoundNameModifiers[CurNM]; + if (FNM) { S.Diag(C->getBeginLoc(), diag::err_omp_more_one_clause) << getOpenMPDirectiveName(Kind) << getOpenMPClauseName(OMPC_if) << (CurNM != OMPD_unknown) << getOpenMPDirectiveName(CurNM); @@ -5087,7 +5088,7 @@ static bool checkIfClauses(Sema &S, OpenMPDirectiveKind Kind, NameModifierLoc.push_back(IC->getNameModifierLoc()); ++NamedModifiersNumber; } - FoundNameModifiers[CurNM] = IC; + FNM = IC; if (CurNM == OMPD_unknown) continue; // Check if the specified name modifier is allowed for the current @@ -6759,16 +6760,15 @@ SemaOpenMP::DeclGroupPtrTy SemaOpenMP::ActOnOpenMPDeclareSimdDirective( ->getCanonicalDecl() == CanonPVD) { // OpenMP [2.8.1, simd construct, Restrictions] // A list-item cannot appear in more than one aligned clause. - if (AlignedArgs.count(CanonPVD) > 0) { + auto [It, Inserted] = AlignedArgs.try_emplace(CanonPVD, E); + if (!Inserted) { Diag(E->getExprLoc(), diag::err_omp_used_in_clause_twice) << 1 << getOpenMPClauseName(OMPC_aligned) << E->getSourceRange(); - Diag(AlignedArgs[CanonPVD]->getExprLoc(), - diag::note_omp_explicit_dsa) + Diag(It->second->getExprLoc(), diag::note_omp_explicit_dsa) << getOpenMPClauseName(OMPC_aligned); continue; } - AlignedArgs[CanonPVD] = E; QualType QTy = PVD->getType() .getNonReferenceType() .getUnqualifiedType() From 43c82a8e07819ad0553ef0dffedff3c8b8cecf5e Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 11 Feb 2025 09:07:46 -0800 Subject: [PATCH 09/29] [clang-installapi] Avoid repeated hash lookups (NFC) (#126677) --- clang/tools/clang-installapi/Options.cpp | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/clang/tools/clang-installapi/Options.cpp b/clang/tools/clang-installapi/Options.cpp index 8a2c3463189fa..0dddcfce58ca6 100644 --- a/clang/tools/clang-installapi/Options.cpp +++ b/clang/tools/clang-installapi/Options.cpp @@ -263,11 +263,12 @@ bool Options::processInstallAPIXOptions(InputArgList &Args) { } const StringRef ASpelling = NextA->getSpelling(); const auto &AValues = NextA->getValues(); + auto &UniqueArgs = FEOpts.UniqueArgs[Label]; if (AValues.empty()) - FEOpts.UniqueArgs[Label].emplace_back(ASpelling.str()); + UniqueArgs.emplace_back(ASpelling.str()); else for (const StringRef Val : AValues) - FEOpts.UniqueArgs[Label].emplace_back((ASpelling + Val).str()); + UniqueArgs.emplace_back((ASpelling + Val).str()); A->claim(); NextA->claim(); @@ -608,32 +609,37 @@ Options::processAndFilterOutInstallAPIOptions(ArrayRef Args) { ParsedArgs.hasArg(OPT_not_for_dyld_shared_cache); for (const Arg *A : ParsedArgs.filtered(OPT_allowable_client)) { + auto It = ArgToArchMap.find(A); LinkerOpts.AllowableClients[A->getValue()] = - ArgToArchMap.count(A) ? ArgToArchMap[A] : ArchitectureSet(); + It != ArgToArchMap.end() ? It->second : ArchitectureSet(); A->claim(); } for (const Arg *A : ParsedArgs.filtered(OPT_reexport_l)) { + auto It = ArgToArchMap.find(A); LinkerOpts.ReexportedLibraries[A->getValue()] = - ArgToArchMap.count(A) ? ArgToArchMap[A] : ArchitectureSet(); + It != ArgToArchMap.end() ? It->second : ArchitectureSet(); A->claim(); } for (const Arg *A : ParsedArgs.filtered(OPT_reexport_library)) { + auto It = ArgToArchMap.find(A); LinkerOpts.ReexportedLibraryPaths[A->getValue()] = - ArgToArchMap.count(A) ? ArgToArchMap[A] : ArchitectureSet(); + It != ArgToArchMap.end() ? It->second : ArchitectureSet(); A->claim(); } for (const Arg *A : ParsedArgs.filtered(OPT_reexport_framework)) { + auto It = ArgToArchMap.find(A); LinkerOpts.ReexportedFrameworks[A->getValue()] = - ArgToArchMap.count(A) ? ArgToArchMap[A] : ArchitectureSet(); + It != ArgToArchMap.end() ? It->second : ArchitectureSet(); A->claim(); } for (const Arg *A : ParsedArgs.filtered(OPT_rpath)) { + auto It = ArgToArchMap.find(A); LinkerOpts.RPaths[A->getValue()] = - ArgToArchMap.count(A) ? ArgToArchMap[A] : ArchitectureSet(); + It != ArgToArchMap.end() ? It->second : ArchitectureSet(); A->claim(); } From 497506f6f454272e7c4921b3143768c6927738e4 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 11 Feb 2025 09:08:15 -0800 Subject: [PATCH 10/29] [Analysis] Avoid repeated hash lookups (NFC) (#126678) --- llvm/lib/Analysis/DDG.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Analysis/DDG.cpp b/llvm/lib/Analysis/DDG.cpp index a0774096c5129..0907a7fb021fc 100644 --- a/llvm/lib/Analysis/DDG.cpp +++ b/llvm/lib/Analysis/DDG.cpp @@ -241,9 +241,10 @@ bool DataDependenceGraph::addNode(DDGNode &N) { } const PiBlockDDGNode *DataDependenceGraph::getPiBlock(const NodeType &N) const { - if (!PiBlockMap.contains(&N)) + auto It = PiBlockMap.find(&N); + if (It == PiBlockMap.end()) return nullptr; - auto *Pi = PiBlockMap.find(&N)->second; + auto *Pi = It->second; assert(!PiBlockMap.contains(Pi) && "Nested pi-blocks detected."); return Pi; } From c9686d6904f24de8c63294dd708c9e0d0b4f7a47 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 11 Feb 2025 09:09:04 -0800 Subject: [PATCH 11/29] [SystemZ] Avoid repeated hash lookups (NFC) (#126679) --- llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp index d307c73a87fc9..589dd8b634125 100644 --- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -688,16 +688,17 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) { int64_t SrcDisp = MI->getOperand(5).getImm(); SystemZTargetStreamer *TS = getTargetStreamer(); - MCSymbol *DotSym = nullptr; - MCInst ET = MCInstBuilder(TargetInsOpc).addReg(DestReg) - .addImm(DestDisp).addImm(1).addReg(SrcReg).addImm(SrcDisp); + MCInst ET = MCInstBuilder(TargetInsOpc) + .addReg(DestReg) + .addImm(DestDisp) + .addImm(1) + .addReg(SrcReg) + .addImm(SrcDisp); SystemZTargetStreamer::MCInstSTIPair ET_STI(ET, &MF->getSubtarget()); - SystemZTargetStreamer::EXRLT2SymMap::iterator I = - TS->EXRLTargets2Sym.find(ET_STI); - if (I != TS->EXRLTargets2Sym.end()) - DotSym = I->second; - else - TS->EXRLTargets2Sym[ET_STI] = DotSym = OutContext.createTempSymbol(); + auto [It, Inserted] = TS->EXRLTargets2Sym.try_emplace(ET_STI); + if (Inserted) + It->second = OutContext.createTempSymbol(); + MCSymbol *DotSym = It->second; const MCSymbolRefExpr *Dot = MCSymbolRefExpr::create(DotSym, OutContext); EmitToStreamer( *OutStreamer, From 042e860a8a3a2e1be384a5de04b90607ce32e294 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 11 Feb 2025 09:09:43 -0800 Subject: [PATCH 12/29] [Vectorize] Avoid repeated hash lookups (NFC) (#126681) --- llvm/lib/Transforms/Vectorize/VPlan.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 3816e1b61576a..fbbc466f2f7f6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -3709,18 +3709,16 @@ class VPlan { /// yet) for \p V. VPValue *getOrAddLiveIn(Value *V) { assert(V && "Trying to get or add the VPValue of a null Value"); - if (!Value2VPValue.count(V)) { + auto [It, Inserted] = Value2VPValue.try_emplace(V); + if (Inserted) { VPValue *VPV = new VPValue(V); VPLiveInsToFree.push_back(VPV); assert(VPV->isLiveIn() && "VPV must be a live-in."); - assert(!Value2VPValue.count(V) && "Value already exists in VPlan"); - Value2VPValue[V] = VPV; + It->second = VPV; } - assert(Value2VPValue.count(V) && "Value does not exist in VPlan"); - assert(Value2VPValue[V]->isLiveIn() && - "Only live-ins should be in mapping"); - return Value2VPValue[V]; + assert(It->second->isLiveIn() && "Only live-ins should be in mapping"); + return It->second; } /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise. From cf87eb9d9b006ff28296e399dcc73d68bb187e91 Mon Sep 17 00:00:00 2001 From: Andreas Jonson Date: Tue, 11 Feb 2025 18:11:23 +0100 Subject: [PATCH 13/29] [ValueTracking] Handle trunc to i1 as condition in dominating condition. (#126414) proof: https://alive2.llvm.org/ce/z/gALGmv --- llvm/lib/Analysis/ValueTracking.cpp | 24 ++++++++++++- .../test/Transforms/InstCombine/known-bits.ll | 36 +++++++------------ 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index fb744d61aad63..28d7e1ce401e4 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -797,10 +797,28 @@ static void computeKnownBitsFromCond(const Value *V, Value *Cond, else Known2 = Known2.intersectWith(Known3); Known = Known.unionWith(Known2); + return; } - if (auto *Cmp = dyn_cast(Cond)) + if (auto *Cmp = dyn_cast(Cond)) { computeKnownBitsFromICmpCond(V, Cmp, Known, SQ, Invert); + return; + } + + if (match(Cond, m_Trunc(m_Specific(V)))) { + KnownBits DstKnown(1); + if (Invert) { + DstKnown.setAllZero(); + } else { + DstKnown.setAllOnes(); + } + if (cast(Cond)->hasNoUnsignedWrap()) { + Known = Known.unionWith(DstKnown.zext(Known.getBitWidth())); + return; + } + Known = Known.unionWith(DstKnown.anyext(Known.getBitWidth())); + return; + } if (Depth < MaxAnalysisRecursionDepth && match(Cond, m_Not(m_Value(A)))) computeKnownBitsFromCond(V, A, Known, Depth + 1, SQ, !Invert); @@ -10280,6 +10298,10 @@ void llvm::findValuesAffectedByCondition( m_Value()))) { // Handle patterns that computeKnownFPClass() support. AddAffected(A); + } else if (!IsAssume && match(V, m_Trunc(m_Value(X)))) { + // Assume is checked here as X is already added above for assumes in + // addValueAffectedByCondition + AddAffected(X); } else if (!IsAssume && match(V, m_Not(m_Value(X)))) { // Assume is checked here to avoid issues with ephemeral values Worklist.push_back(X); diff --git a/llvm/test/Transforms/InstCombine/known-bits.ll b/llvm/test/Transforms/InstCombine/known-bits.ll index b729cbd971acc..9a9fec694ff0e 100644 --- a/llvm/test/Transforms/InstCombine/known-bits.ll +++ b/llvm/test/Transforms/InstCombine/known-bits.ll @@ -2167,11 +2167,9 @@ define i8 @test_trunc_and_1(i8 %a) { ; CHECK-NEXT: [[CAST:%.*]] = trunc i8 [[A:%.*]] to i1 ; CHECK-NEXT: br i1 [[CAST]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[B:%.*]] = and i8 [[A]], 1 -; CHECK-NEXT: ret i8 [[B]] +; CHECK-NEXT: ret i8 1 ; CHECK: if.else: -; CHECK-NEXT: [[C:%.*]] = and i8 [[A]], 1 -; CHECK-NEXT: ret i8 [[C]] +; CHECK-NEXT: ret i8 0 ; entry: %cast = trunc i8 %a to i1 @@ -2192,11 +2190,9 @@ define i8 @test_not_trunc_and_1(i8 %a) { ; CHECK-NEXT: [[CAST:%.*]] = trunc i8 [[A:%.*]] to i1 ; CHECK-NEXT: br i1 [[CAST]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[B:%.*]] = and i8 [[A]], 1 -; CHECK-NEXT: ret i8 [[B]] +; CHECK-NEXT: ret i8 0 ; CHECK: if.else: -; CHECK-NEXT: [[C:%.*]] = and i8 [[A]], 1 -; CHECK-NEXT: ret i8 [[C]] +; CHECK-NEXT: ret i8 1 ; entry: %cast = trunc i8 %a to i1 @@ -2243,11 +2239,9 @@ define i8 @test_trunc_nuw_and_1(i8 %a) { ; CHECK-NEXT: [[CAST:%.*]] = trunc nuw i8 [[A:%.*]] to i1 ; CHECK-NEXT: br i1 [[CAST]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[B:%.*]] = and i8 [[A]], 1 -; CHECK-NEXT: ret i8 [[B]] +; CHECK-NEXT: ret i8 0 ; CHECK: if.else: -; CHECK-NEXT: [[C:%.*]] = and i8 [[A]], 1 -; CHECK-NEXT: ret i8 [[C]] +; CHECK-NEXT: ret i8 1 ; entry: %cast = trunc nuw i8 %a to i1 @@ -2268,11 +2262,9 @@ define i8 @test_trunc_nuw_or_2(i8 %a) { ; CHECK-NEXT: [[CAST:%.*]] = trunc nuw i8 [[A:%.*]] to i1 ; CHECK-NEXT: br i1 [[CAST]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[B:%.*]] = or i8 [[A]], 2 -; CHECK-NEXT: ret i8 [[B]] +; CHECK-NEXT: ret i8 2 ; CHECK: if.else: -; CHECK-NEXT: [[C:%.*]] = or i8 [[A]], 2 -; CHECK-NEXT: ret i8 [[C]] +; CHECK-NEXT: ret i8 3 ; entry: %cast = trunc nuw i8 %a to i1 @@ -2293,11 +2285,9 @@ define i8 @test_not_trunc_nuw_and_1(i8 %a) { ; CHECK-NEXT: [[CAST:%.*]] = trunc nuw i8 [[A:%.*]] to i1 ; CHECK-NEXT: br i1 [[CAST]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[B:%.*]] = and i8 [[A]], 1 -; CHECK-NEXT: ret i8 [[B]] +; CHECK-NEXT: ret i8 0 ; CHECK: if.else: -; CHECK-NEXT: [[C:%.*]] = and i8 [[A]], 1 -; CHECK-NEXT: ret i8 [[C]] +; CHECK-NEXT: ret i8 1 ; entry: %cast = trunc nuw i8 %a to i1 @@ -2319,8 +2309,7 @@ define i8 @test_trunc_cond_and(i8 %x, i1 %c) { ; CHECK-NEXT: [[COND:%.*]] = and i1 [[C:%.*]], [[CMP]] ; CHECK-NEXT: br i1 [[COND]], label [[IF:%.*]], label [[EXIT:%.*]] ; CHECK: if: -; CHECK-NEXT: [[OR1:%.*]] = or i8 [[X]], -2 -; CHECK-NEXT: ret i8 [[OR1]] +; CHECK-NEXT: ret i8 -1 ; CHECK: exit: ; CHECK-NEXT: [[OR2:%.*]] = or i8 [[X]], -2 ; CHECK-NEXT: ret i8 [[OR2]] @@ -2345,8 +2334,7 @@ define i8 @test_not_trunc_cond_and(i8 %x, i1 %c) { ; CHECK-NEXT: [[COND:%.*]] = and i1 [[C:%.*]], [[NOT]] ; CHECK-NEXT: br i1 [[COND]], label [[IF:%.*]], label [[EXIT:%.*]] ; CHECK: if: -; CHECK-NEXT: [[OR1:%.*]] = or i8 [[X]], -2 -; CHECK-NEXT: ret i8 [[OR1]] +; CHECK-NEXT: ret i8 -2 ; CHECK: exit: ; CHECK-NEXT: [[OR2:%.*]] = or i8 [[X]], -2 ; CHECK-NEXT: ret i8 [[OR2]] From f574d8235371c4f28cad8e800d99bcb7ad579b7d Mon Sep 17 00:00:00 2001 From: Sharjeel Khan Date: Tue, 11 Feb 2025 12:11:40 -0500 Subject: [PATCH 14/29] [Clang][Driver][HIP] Fix test for HIP as it was failing (#126585) This PR #125646 added this test and it was failing in Android's compiler and on my machine locally. I removed the "Build config" check and it passes now. --- .../test/Driver/dep-file-flag-with-multiple-offload-archs.hip | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/test/Driver/dep-file-flag-with-multiple-offload-archs.hip b/clang/test/Driver/dep-file-flag-with-multiple-offload-archs.hip index 79a52f0bc8981..f17e56acfb7f7 100644 --- a/clang/test/Driver/dep-file-flag-with-multiple-offload-archs.hip +++ b/clang/test/Driver/dep-file-flag-with-multiple-offload-archs.hip @@ -1,6 +1,5 @@ -// RUN: %clang -### -nogpuinc -nogpulib --offload-arch=gfx1030 --offload-arch=gfx1100 --offload-arch=gfx1101 -MD -MF tmp.d %s 2>&1 | FileCheck %s +// RUN: %clang -### -nogpuinc -nogpulib --offload-arch=gfx1030 --offload-arch=gfx1100 --offload-arch=gfx1101 --target=x86_64-linux-gnu -MD -MF tmp.d %s 2>&1 | FileCheck %s -// CHECK: Build config: // CHECK-NOT: {{.*}}clang{{.*}}"-target-cpu" "gfx1030"{{.*}}"-dependency-file" "tmp.d" // CHECK: {{.*}}lld{{.*}}"-plugin-opt=mcpu=gfx1030" // CHECK-NOT: {{.*}}clang{{.*}}"-target-cpu" "gfx1100"{{.*}}"-dependency-file" "tmp.d" From c3d8c625af8ebbf8a9af035c18ec4e1cb1d8d2f3 Mon Sep 17 00:00:00 2001 From: Razvan Lupusoru Date: Tue, 11 Feb 2025 09:16:59 -0800 Subject: [PATCH 15/29] [flang][acc] Fill-in name for privatized loop iv (#126601) When the loop induction variable implicit private clause was being generated, the name was left empty. The intent is that the data clause operation holds the source language variable name. Thus, add the missing name now. --- flang/lib/Lower/OpenACC.cpp | 1 + flang/test/Lower/OpenACC/acc-loop.f90 | 6 +++--- .../test/Lower/OpenACC/acc-private-unwrap-defaultbounds.f90 | 2 +- flang/test/Lower/OpenACC/acc-private.f90 | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index a11a2c824bf9e..3dd35ed9ae481 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -1803,6 +1803,7 @@ static void privatizeIv(Fortran::lower::AbstractConverter &converter, builder, recipeName, loc, ivValue.getType()); std::stringstream asFortran; + asFortran << Fortran::lower::mangle::demangleName(toStringRef(sym.name())); auto op = createDataEntryOp( builder, loc, ivValue, asFortran, {}, true, /*implicit=*/true, mlir::acc::DataClause::acc_private, ivValue.getType(), diff --git a/flang/test/Lower/OpenACC/acc-loop.f90 b/flang/test/Lower/OpenACC/acc-loop.f90 index d65ca538bf60c..f77aefcc2c314 100644 --- a/flang/test/Lower/OpenACC/acc-loop.f90 +++ b/flang/test/Lower/OpenACC/acc-loop.f90 @@ -321,8 +321,8 @@ subroutine sub1(i, j, k) ! CHECK: %[[DC_K:.*]] = fir.alloca i32 {bindc_name = "k"} ! CHECK: %[[DC_J:.*]] = fir.alloca i32 {bindc_name = "j"} ! CHECK: %[[DC_I:.*]] = fir.alloca i32 {bindc_name = "i"} -! CHECK: %[[P_I:.*]] = acc.private varPtr(%[[DC_I]] : !fir.ref) -> !fir.ref {implicit = true, name = ""} -! CHECK: %[[P_J:.*]] = acc.private varPtr(%[[DC_J]] : !fir.ref) -> !fir.ref {implicit = true, name = ""} -! CHECK: %[[P_K:.*]] = acc.private varPtr(%[[DC_K]] : !fir.ref) -> !fir.ref {implicit = true, name = ""} +! CHECK: %[[P_I:.*]] = acc.private varPtr(%[[DC_I]] : !fir.ref) -> !fir.ref {implicit = true, name = "i"} +! CHECK: %[[P_J:.*]] = acc.private varPtr(%[[DC_J]] : !fir.ref) -> !fir.ref {implicit = true, name = "j"} +! CHECK: %[[P_K:.*]] = acc.private varPtr(%[[DC_K]] : !fir.ref) -> !fir.ref {implicit = true, name = "k"} ! CHECK: acc.loop combined(parallel) private(@privatization_ref_i32 -> %[[P_I]] : !fir.ref, @privatization_ref_i32 -> %[[P_J]] : !fir.ref, @privatization_ref_i32 -> %[[P_K]] : !fir.ref) control(%{{.*}} : i32, %{{.*}} : i32, %{{.*}} : i32) = (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32) to (%c10{{.*}}, %c100{{.*}}, %c200{{.*}} : i32, i32, i32) step (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32) ! CHECK: } attributes {inclusiveUpperbound = array} diff --git a/flang/test/Lower/OpenACC/acc-private-unwrap-defaultbounds.f90 b/flang/test/Lower/OpenACC/acc-private-unwrap-defaultbounds.f90 index febb933e98975..d86f82dae8d03 100644 --- a/flang/test/Lower/OpenACC/acc-private-unwrap-defaultbounds.f90 +++ b/flang/test/Lower/OpenACC/acc-private-unwrap-defaultbounds.f90 @@ -396,7 +396,7 @@ subroutine acc_private_use() ! CHECK: %[[I:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFacc_private_useEi"} ! CHECK: %[[DECL_I:.*]]:2 = hlfir.declare %0 {uniq_name = "_QFacc_private_useEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: acc.parallel -! CHECK: %[[PRIV_I:.*]] = acc.private varPtr(%[[DECL_I]]#1 : !fir.ref) -> !fir.ref {implicit = true, name = ""} +! CHECK: %[[PRIV_I:.*]] = acc.private varPtr(%[[DECL_I]]#1 : !fir.ref) -> !fir.ref {implicit = true, name = "i"} ! CHECK: %[[DECL_PRIV_I:.*]]:2 = hlfir.declare %[[PRIV_I]] {uniq_name = "_QFacc_private_useEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: acc.loop {{.*}} private(@privatization_ref_i32 -> %[[PRIV_I]] : !fir.ref) control(%[[IV0:.*]] : i32) = (%c1{{.*}} : i32) to (%c10{{.*}} : i32) step (%c1{{.*}} : i32) ! CHECK: fir.store %[[IV0]] to %[[DECL_PRIV_I]]#0 : !fir.ref diff --git a/flang/test/Lower/OpenACC/acc-private.f90 b/flang/test/Lower/OpenACC/acc-private.f90 index 99e3b223c8575..c86da8c001b55 100644 --- a/flang/test/Lower/OpenACC/acc-private.f90 +++ b/flang/test/Lower/OpenACC/acc-private.f90 @@ -384,7 +384,7 @@ subroutine acc_private_use() ! CHECK: %[[I:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFacc_private_useEi"} ! CHECK: %[[DECL_I:.*]]:2 = hlfir.declare %0 {uniq_name = "_QFacc_private_useEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: acc.parallel -! CHECK: %[[PRIV_I:.*]] = acc.private varPtr(%[[DECL_I]]#1 : !fir.ref) -> !fir.ref {implicit = true, name = ""} +! CHECK: %[[PRIV_I:.*]] = acc.private varPtr(%[[DECL_I]]#1 : !fir.ref) -> !fir.ref {implicit = true, name = "i"} ! CHECK: %[[DECL_PRIV_I:.*]]:2 = hlfir.declare %[[PRIV_I]] {uniq_name = "_QFacc_private_useEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: acc.loop {{.*}} private(@privatization_ref_i32 -> %[[PRIV_I]] : !fir.ref) control(%[[IV0:.*]] : i32) = (%c1{{.*}} : i32) to (%c10{{.*}} : i32) step (%c1{{.*}} : i32) ! CHECK: fir.store %[[IV0]] to %[[DECL_PRIV_I]]#0 : !fir.ref From 75cb5633844deb4e0c6a5c7bdf84013b563818d3 Mon Sep 17 00:00:00 2001 From: Ilia Kuklin Date: Tue, 11 Feb 2025 22:17:52 +0500 Subject: [PATCH 16/29] [clang][Sema] Move computing enum bits into a separate function (#126096) Move the code that computes `NumNegativeBits` and `NumPositiveBits` for an enum to a separate function in `ASTContext.h`. This function needs to be called from LLDB as well (#115005) --- clang/include/clang/AST/ASTContext.h | 41 +++++++++++++++++++++ clang/lib/AST/ASTContext.cpp | 13 +++++++ clang/lib/Sema/SemaDecl.cpp | 54 +++------------------------- 3 files changed, 59 insertions(+), 49 deletions(-) diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 65be782c1ba43..a96b9c0a17045 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -1733,6 +1733,47 @@ class ASTContext : public RefCountedBase { unsigned NumPositiveBits, QualType &BestType, QualType &BestPromotionType); + /// Determine whether the given integral value is representable within + /// the given type T. + bool isRepresentableIntegerValue(llvm::APSInt &Value, QualType T); + + /// Compute NumNegativeBits and NumPositiveBits for an enum based on + /// the constant values of its enumerators. + template + bool computeEnumBits(RangeT EnumConstants, unsigned &NumNegativeBits, + unsigned &NumPositiveBits) { + NumNegativeBits = 0; + NumPositiveBits = 0; + bool MembersRepresentableByInt = true; + for (auto *Elem : EnumConstants) { + EnumConstantDecl *ECD = cast_or_null(Elem); + if (!ECD) + continue; // Already issued a diagnostic. + + llvm::APSInt InitVal = ECD->getInitVal(); + if (InitVal.isUnsigned() || InitVal.isNonNegative()) { + // If the enumerator is zero that should still be counted as a positive + // bit since we need a bit to store the value zero. + unsigned ActiveBits = InitVal.getActiveBits(); + NumPositiveBits = std::max({NumPositiveBits, ActiveBits, 1u}); + } else { + NumNegativeBits = + std::max(NumNegativeBits, (unsigned)InitVal.getSignificantBits()); + } + + MembersRepresentableByInt &= isRepresentableIntegerValue(InitVal, IntTy); + } + + // If we have an empty set of enumerators we still need one bit. + // From [dcl.enum]p8 + // If the enumerator-list is empty, the values of the enumeration are as if + // the enumeration had a single enumerator with value 0 + if (!NumPositiveBits && !NumNegativeBits) + NumPositiveBits = 1; + + return MembersRepresentableByInt; + } + QualType getUnresolvedUsingType(const UnresolvedUsingTypenameDecl *Decl) const; diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index e3b44bdbe3dc5..b1b9d56ccca9f 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -5320,6 +5320,19 @@ bool ASTContext::computeBestEnumTypes(bool IsPacked, unsigned NumNegativeBits, return EnumTooLarge; } +bool ASTContext::isRepresentableIntegerValue(llvm::APSInt &Value, QualType T) { + assert((T->isIntegralType(*this) || T->isEnumeralType()) && + "Integral type required!"); + unsigned BitWidth = getIntWidth(T); + + if (Value.isUnsigned() || Value.isNonNegative()) { + if (T->isSignedIntegerOrEnumerationType()) + --BitWidth; + return Value.getActiveBits() <= BitWidth; + } + return Value.getSignificantBits() <= BitWidth; +} + QualType ASTContext::getUnresolvedUsingType( const UnresolvedUsingTypenameDecl *Decl) const { if (Decl->TypeForDecl) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 74e0fcec2d911..6eedc77ed20a0 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -19633,23 +19633,6 @@ void Sema::ActOnFields(Scope *S, SourceLocation RecLoc, Decl *EnclosingDecl, ProcessAPINotes(Record); } -/// Determine whether the given integral value is representable within -/// the given type T. -static bool isRepresentableIntegerValue(ASTContext &Context, - llvm::APSInt &Value, - QualType T) { - assert((T->isIntegralType(Context) || T->isEnumeralType()) && - "Integral type required!"); - unsigned BitWidth = Context.getIntWidth(T); - - if (Value.isUnsigned() || Value.isNonNegative()) { - if (T->isSignedIntegerOrEnumerationType()) - --BitWidth; - return Value.getActiveBits() <= BitWidth; - } - return Value.getSignificantBits() <= BitWidth; -} - // Given an integral type, return the next larger integral type // (or a NULL type of no such type exists). static QualType getNextLargerIntegralType(ASTContext &Context, QualType T) { @@ -19723,7 +19706,7 @@ EnumConstantDecl *Sema::CheckEnumConstant(EnumDecl *Enum, // representable in the underlying type of the enumeration. In C++11, // we perform a non-narrowing conversion as part of converted constant // expression checking. - if (!isRepresentableIntegerValue(Context, EnumVal, EltTy)) { + if (!Context.isRepresentableIntegerValue(EnumVal, EltTy)) { if (Context.getTargetInfo() .getTriple() .isWindowsMSVCEnvironment()) { @@ -19752,7 +19735,7 @@ EnumConstantDecl *Sema::CheckEnumConstant(EnumDecl *Enum, // representable as an int. // Complain if the value is not representable in an int. - if (!isRepresentableIntegerValue(Context, EnumVal, Context.IntTy)) { + if (!Context.isRepresentableIntegerValue(EnumVal, Context.IntTy)) { Diag(IdLoc, getLangOpts().C23 ? diag::warn_c17_compat_enum_value_not_int : diag::ext_c23_enum_value_not_int) @@ -19844,7 +19827,7 @@ EnumConstantDecl *Sema::CheckEnumConstant(EnumDecl *Enum, : diag::ext_c23_enum_value_not_int) << 1 << toString(EnumVal, 10) << 1; } else if (!getLangOpts().CPlusPlus && !EltTy->isDependentType() && - !isRepresentableIntegerValue(Context, EnumVal, EltTy)) { + !Context.isRepresentableIntegerValue(EnumVal, EltTy)) { // Enforce C99 6.7.2.2p2 even when we compute the next value. Diag(IdLoc, getLangOpts().C23 ? diag::warn_c17_compat_enum_value_not_int : diag::ext_c23_enum_value_not_int) @@ -20171,35 +20154,8 @@ void Sema::ActOnEnumBody(SourceLocation EnumLoc, SourceRange BraceRange, // reverse the list. unsigned NumNegativeBits = 0; unsigned NumPositiveBits = 0; - bool MembersRepresentableByInt = true; - - for (unsigned i = 0, e = Elements.size(); i != e; ++i) { - EnumConstantDecl *ECD = - cast_or_null(Elements[i]); - if (!ECD) continue; // Already issued a diagnostic. - - llvm::APSInt InitVal = ECD->getInitVal(); - - // Keep track of the size of positive and negative values. - if (InitVal.isUnsigned() || InitVal.isNonNegative()) { - // If the enumerator is zero that should still be counted as a positive - // bit since we need a bit to store the value zero. - unsigned ActiveBits = InitVal.getActiveBits(); - NumPositiveBits = std::max({NumPositiveBits, ActiveBits, 1u}); - } else { - NumNegativeBits = - std::max(NumNegativeBits, (unsigned)InitVal.getSignificantBits()); - } - MembersRepresentableByInt &= - isRepresentableIntegerValue(Context, InitVal, Context.IntTy); - } - - // If we have an empty set of enumerators we still need one bit. - // From [dcl.enum]p8 - // If the enumerator-list is empty, the values of the enumeration are as if - // the enumeration had a single enumerator with value 0 - if (!NumPositiveBits && !NumNegativeBits) - NumPositiveBits = 1; + bool MembersRepresentableByInt = + Context.computeEnumBits(Elements, NumNegativeBits, NumPositiveBits); // Figure out the type that should be used for this enum. QualType BestType; From 42538ca3a0db6513a1a01fca88dccd34bd0d9641 Mon Sep 17 00:00:00 2001 From: Aaron Siddhartha Mondal Date: Tue, 11 Feb 2025 18:24:05 +0100 Subject: [PATCH 17/29] [GitHub] Add aaronmondal to Bazel codeowners (#126760) --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 484b947bda402..dd4116fa16bc5 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -131,7 +131,7 @@ /bolt/ @aaupov @maksfb @rafaelauler @ayermolo @dcci @yota9 # Bazel build system. -/utils/bazel/ @rupprecht @keith +/utils/bazel/ @rupprecht @keith @aaronmondal # InstallAPI and TextAPI /llvm/**/TextAPI/ @cyndyishida From b92bab3c010a8b8d7c2273ebdacfa34aaaaa757d Mon Sep 17 00:00:00 2001 From: "S. Bharadwaj Yadavalli" Date: Tue, 11 Feb 2025 12:29:05 -0500 Subject: [PATCH 18/29] [HLSL] Appropriately set function attribute optnone (#125937) When optimization is disabled, set `optnone` attribute all module entry functions. Updated test in accordance with the change. Closes #124796 --- clang/lib/CodeGen/CGHLSLRuntime.cpp | 7 +++++++ clang/test/CodeGenHLSL/inline-functions.hlsl | 16 +++++++++------- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index 2ce54cc3c52ef..6cccd353cff96 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -345,6 +345,13 @@ void clang::CodeGen::CGHLSLRuntime::setHLSLEntryAttributes( WaveSizeAttr->getPreferred()); Fn->addFnAttr(WaveSizeKindStr, WaveSizeStr); } + // HLSL entry functions are materialized for module functions with + // HLSLShaderAttr attribute. SetLLVMFunctionAttributesForDefinition called + // later in the compiler-flow for such module functions is not aware of and + // hence not able to set attributes of the newly materialized entry functions. + // So, set attributes of entry function here, as appropriate. + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + Fn->addFnAttr(llvm::Attribute::OptimizeNone); Fn->addFnAttr(llvm::Attribute::NoInline); } diff --git a/clang/test/CodeGenHLSL/inline-functions.hlsl b/clang/test/CodeGenHLSL/inline-functions.hlsl index e78d04ec9594f..4748eeee7475f 100644 --- a/clang/test/CodeGenHLSL/inline-functions.hlsl +++ b/clang/test/CodeGenHLSL/inline-functions.hlsl @@ -1,9 +1,9 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE,OPT_ATTR +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE,OPT_ATTR +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE,NOOPT_ATTR // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE,OPT_ATTR +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE,NOOPT_ATTR // Tests that user functions will always be inlined. // This includes exported functions and mangled entry point implementation functions. @@ -71,7 +71,8 @@ RWBuffer Indices; // NOINLINE: ret void // The unmangled version is not inlined, EntryAttr reflects that -// CHECK: Function Attrs: {{.*}}noinline +// OPT_ATTR: Function Attrs: {{.*}}optnone +// NOOPT_ATTR-NOT: Function Attrs: {{.*}}optnone // CHECK: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]] // Make sure function calls are inlined when AlwaysInline is run // This only leaves calls to llvm. intrinsics @@ -98,7 +99,8 @@ void main(unsigned int GI : SV_GroupIndex) { // NOINLINE: ret void // The unmangled version is not inlined, EntryAttr reflects that -// CHECK: Function Attrs: {{.*}}noinline +// OPT_ATTR: Function Attrs: {{.*}}optnone +// NOOPT_ATTR-NOT: Function Attrs: {{.*}}optnone // CHECK: define void @main10() {{[a-z_ ]*}}[[EntryAttr]] // Make sure function calls are inlined when AlwaysInline is run // This only leaves calls to llvm. intrinsics From b8ba266820ca51216c98216369a4947a942ec21c Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 11 Feb 2025 09:31:01 -0800 Subject: [PATCH 19/29] [libc][test][stdbit] fix -Wimplicit-int-conversion (#126616) When cross compiling the libc-stdbit-tests, the existing tests trigger numerous instances of -Wimplicit-int-conversion. The truncation of these implicit promotions is intentional. --- libc/test/src/stdbit/stdc_bit_ceil_uc_test.cpp | 11 +++++++---- libc/test/src/stdbit/stdc_bit_ceil_us_test.cpp | 11 +++++++---- .../src/stdbit/stdc_first_leading_one_uc_test.cpp | 3 ++- .../src/stdbit/stdc_first_leading_one_us_test.cpp | 3 ++- .../src/stdbit/stdc_first_leading_zero_uc_test.cpp | 3 ++- .../src/stdbit/stdc_first_leading_zero_us_test.cpp | 3 ++- .../src/stdbit/stdc_first_trailing_one_uc_test.cpp | 4 +++- .../src/stdbit/stdc_first_trailing_one_us_test.cpp | 4 +++- .../src/stdbit/stdc_first_trailing_zero_uc_test.cpp | 4 +++- .../src/stdbit/stdc_first_trailing_zero_us_test.cpp | 4 +++- libc/test/src/stdbit/stdc_has_single_bit_uc_test.cpp | 4 +++- libc/test/src/stdbit/stdc_has_single_bit_us_test.cpp | 4 +++- libc/test/src/stdbit/stdc_leading_ones_uc_test.cpp | 3 ++- libc/test/src/stdbit/stdc_leading_ones_us_test.cpp | 3 ++- libc/test/src/stdbit/stdc_leading_zeros_uc_test.cpp | 3 ++- libc/test/src/stdbit/stdc_leading_zeros_us_test.cpp | 3 ++- libc/test/src/stdbit/stdc_trailing_ones_uc_test.cpp | 4 +++- libc/test/src/stdbit/stdc_trailing_ones_us_test.cpp | 4 +++- libc/test/src/stdbit/stdc_trailing_zeros_uc_test.cpp | 4 +++- libc/test/src/stdbit/stdc_trailing_zeros_us_test.cpp | 4 +++- 20 files changed, 60 insertions(+), 26 deletions(-) diff --git a/libc/test/src/stdbit/stdc_bit_ceil_uc_test.cpp b/libc/test/src/stdbit/stdc_bit_ceil_uc_test.cpp index 1ef87b0d44de6..6915859b7c669 100644 --- a/libc/test/src/stdbit/stdc_bit_ceil_uc_test.cpp +++ b/libc/test/src/stdbit/stdc_bit_ceil_uc_test.cpp @@ -17,18 +17,21 @@ TEST(LlvmLibcStdcBitceilUcTest, Zero) { TEST(LlvmLibcStdcBitceilUcTest, Ones) { for (unsigned i = 0U; i != UCHAR_WIDTH; ++i) - EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_ceil_uc(1U << i), - static_cast(1U << i)); + EXPECT_EQ( + LIBC_NAMESPACE::stdc_bit_ceil_uc(static_cast(1U << i)), + static_cast(1U << i)); } TEST(LlvmLibcStdcBitceilUcTest, OneLessThanPowsTwo) { for (unsigned i = 2U; i != UCHAR_WIDTH; ++i) - EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_ceil_uc((1U << i) - 1), + EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_ceil_uc( + static_cast((1U << i) - 1)), static_cast(1U << i)); } TEST(LlvmLibcStdcBitceilUcTest, OneMoreThanPowsTwo) { for (unsigned i = 1U; i != UCHAR_WIDTH - 1; ++i) - EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_ceil_uc((1U << i) + 1), + EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_ceil_uc( + static_cast((1U << i) + 1)), static_cast(1U << (i + 1))); } diff --git a/libc/test/src/stdbit/stdc_bit_ceil_us_test.cpp b/libc/test/src/stdbit/stdc_bit_ceil_us_test.cpp index 56873c51828f1..9a8b46f250f48 100644 --- a/libc/test/src/stdbit/stdc_bit_ceil_us_test.cpp +++ b/libc/test/src/stdbit/stdc_bit_ceil_us_test.cpp @@ -17,18 +17,21 @@ TEST(LlvmLibcStdcBitceilUsTest, Zero) { TEST(LlvmLibcStdcBitceilUsTest, Ones) { for (unsigned i = 0U; i != USHRT_WIDTH; ++i) - EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_ceil_us(1U << i), - static_cast(1U << i)); + EXPECT_EQ( + LIBC_NAMESPACE::stdc_bit_ceil_us(static_cast(1U << i)), + static_cast(1U << i)); } TEST(LlvmLibcStdcBitceilUsTest, OneLessThanPowsTwo) { for (unsigned i = 2U; i != USHRT_WIDTH; ++i) - EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_ceil_us((1U << i) - 1), + EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_ceil_us( + static_cast((1U << i) - 1)), static_cast(1U << i)); } TEST(LlvmLibcStdcBitceilUsTest, OneMoreThanPowsTwo) { for (unsigned i = 1U; i != USHRT_WIDTH - 1; ++i) - EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_ceil_us((1U << i) + 1), + EXPECT_EQ(LIBC_NAMESPACE::stdc_bit_ceil_us( + static_cast((1U << i) + 1)), static_cast(1U << (i + 1))); } diff --git a/libc/test/src/stdbit/stdc_first_leading_one_uc_test.cpp b/libc/test/src/stdbit/stdc_first_leading_one_uc_test.cpp index b8c8db587098e..2ab8397015288 100644 --- a/libc/test/src/stdbit/stdc_first_leading_one_uc_test.cpp +++ b/libc/test/src/stdbit/stdc_first_leading_one_uc_test.cpp @@ -16,6 +16,7 @@ TEST(LlvmLibcStdcFirstLeadingOneUcTest, Zero) { TEST(LlvmLibcStdcFirstLeadingOneUcTest, OneHot) { for (unsigned i = 0U; i != UCHAR_WIDTH; ++i) - EXPECT_EQ(LIBC_NAMESPACE::stdc_first_leading_one_uc(1U << i), + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_leading_one_uc( + static_cast(1U << i)), UCHAR_WIDTH - i); } diff --git a/libc/test/src/stdbit/stdc_first_leading_one_us_test.cpp b/libc/test/src/stdbit/stdc_first_leading_one_us_test.cpp index e9488335d9b00..de81275205424 100644 --- a/libc/test/src/stdbit/stdc_first_leading_one_us_test.cpp +++ b/libc/test/src/stdbit/stdc_first_leading_one_us_test.cpp @@ -16,6 +16,7 @@ TEST(LlvmLibcStdcFirstLeadingOneUsTest, Zero) { TEST(LlvmLibcStdcFirstLeadingOneUsTest, OneHot) { for (unsigned i = 0U; i != USHRT_WIDTH; ++i) - EXPECT_EQ(LIBC_NAMESPACE::stdc_first_leading_one_us(1U << i), + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_leading_one_us( + static_cast(1U << i)), USHRT_WIDTH - i); } diff --git a/libc/test/src/stdbit/stdc_first_leading_zero_uc_test.cpp b/libc/test/src/stdbit/stdc_first_leading_zero_uc_test.cpp index ac7e8c7d9e64b..a19d0ab83a9bd 100644 --- a/libc/test/src/stdbit/stdc_first_leading_zero_uc_test.cpp +++ b/libc/test/src/stdbit/stdc_first_leading_zero_uc_test.cpp @@ -16,6 +16,7 @@ TEST(LlvmLibcStdcFirstLeadingZeroUcTest, ALL) { TEST(LlvmLibcStdcFirstLeadingZeroUcTest, ZeroHot) { for (unsigned i = 0U; i != UCHAR_WIDTH; ++i) - EXPECT_EQ(LIBC_NAMESPACE::stdc_first_leading_zero_uc(~(1U << i)), + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_leading_zero_uc( + static_cast(~(1U << i))), UCHAR_WIDTH - i); } diff --git a/libc/test/src/stdbit/stdc_first_leading_zero_us_test.cpp b/libc/test/src/stdbit/stdc_first_leading_zero_us_test.cpp index 37f8612675a7a..2971267f82a6f 100644 --- a/libc/test/src/stdbit/stdc_first_leading_zero_us_test.cpp +++ b/libc/test/src/stdbit/stdc_first_leading_zero_us_test.cpp @@ -16,6 +16,7 @@ TEST(LlvmLibcStdcFirstLeadingZeroUsTest, ALL) { TEST(LlvmLibcStdcFirstLeadingZeroUsTest, ZeroHot) { for (unsigned i = 0U; i != USHRT_WIDTH; ++i) - EXPECT_EQ(LIBC_NAMESPACE::stdc_first_leading_zero_us(~(1U << i)), + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_leading_zero_us( + static_cast(~(1U << i))), USHRT_WIDTH - i); } diff --git a/libc/test/src/stdbit/stdc_first_trailing_one_uc_test.cpp b/libc/test/src/stdbit/stdc_first_trailing_one_uc_test.cpp index ed2b4921cdada..5ca4cfca1b479 100644 --- a/libc/test/src/stdbit/stdc_first_trailing_one_uc_test.cpp +++ b/libc/test/src/stdbit/stdc_first_trailing_one_uc_test.cpp @@ -16,5 +16,7 @@ TEST(LlvmLibcStdcFirstTrailingOneUcTest, ALL) { TEST(LlvmLibcStdcFirstTrailingOneUcTest, OneHot) { for (unsigned i = 0U; i != UCHAR_WIDTH; ++i) - EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_one_uc(1U << i), i + 1); + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_one_uc( + static_cast(1U << i)), + i + 1); } diff --git a/libc/test/src/stdbit/stdc_first_trailing_one_us_test.cpp b/libc/test/src/stdbit/stdc_first_trailing_one_us_test.cpp index 60021552310be..46c69acfb7f07 100644 --- a/libc/test/src/stdbit/stdc_first_trailing_one_us_test.cpp +++ b/libc/test/src/stdbit/stdc_first_trailing_one_us_test.cpp @@ -16,5 +16,7 @@ TEST(LlvmLibcStdcFirstTrailingOneUsTest, ALL) { TEST(LlvmLibcStdcFirstTrailingOneUsTest, OneHot) { for (unsigned i = 0U; i != USHRT_WIDTH; ++i) - EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_one_us(1U << i), i + 1); + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_one_us( + static_cast(1U << i)), + i + 1); } diff --git a/libc/test/src/stdbit/stdc_first_trailing_zero_uc_test.cpp b/libc/test/src/stdbit/stdc_first_trailing_zero_uc_test.cpp index 2b17aa6536e66..9535ad9ffa3a2 100644 --- a/libc/test/src/stdbit/stdc_first_trailing_zero_uc_test.cpp +++ b/libc/test/src/stdbit/stdc_first_trailing_zero_uc_test.cpp @@ -16,5 +16,7 @@ TEST(LlvmLibcStdcFirstTrailingZeroUcTest, ALL) { TEST(LlvmLibcStdcFirstTrailingZeroUcTest, ZeroHot) { for (unsigned i = 0U; i != UCHAR_WIDTH; ++i) - EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_zero_uc(~(1U << i)), i + 1); + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_zero_uc( + static_cast(~(1U << i))), + i + 1); } diff --git a/libc/test/src/stdbit/stdc_first_trailing_zero_us_test.cpp b/libc/test/src/stdbit/stdc_first_trailing_zero_us_test.cpp index e370379300e4a..e0dc34fd89996 100644 --- a/libc/test/src/stdbit/stdc_first_trailing_zero_us_test.cpp +++ b/libc/test/src/stdbit/stdc_first_trailing_zero_us_test.cpp @@ -16,5 +16,7 @@ TEST(LlvmLibcStdcFirstTrailingZeroUsTest, ALL) { TEST(LlvmLibcStdcFirstTrailingZeroUsTest, ZeroHot) { for (unsigned i = 0U; i != USHRT_WIDTH; ++i) - EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_zero_us(~(1U << i)), i + 1); + EXPECT_EQ(LIBC_NAMESPACE::stdc_first_trailing_zero_us( + static_cast(~(1U << i))), + i + 1); } diff --git a/libc/test/src/stdbit/stdc_has_single_bit_uc_test.cpp b/libc/test/src/stdbit/stdc_has_single_bit_uc_test.cpp index 1bc189cf0b665..9dd2bdc12479d 100644 --- a/libc/test/src/stdbit/stdc_has_single_bit_uc_test.cpp +++ b/libc/test/src/stdbit/stdc_has_single_bit_uc_test.cpp @@ -16,5 +16,7 @@ TEST(LlvmLibcStdcHasSingleBitUcTest, Zero) { TEST(LlvmLibcStdcHasSingleBitUcTest, OneHot) { for (unsigned i = 0U; i != UCHAR_WIDTH; ++i) - EXPECT_EQ(LIBC_NAMESPACE::stdc_has_single_bit_uc(1U << i), true); + EXPECT_EQ(LIBC_NAMESPACE::stdc_has_single_bit_uc( + static_cast(1U << i)), + true); } diff --git a/libc/test/src/stdbit/stdc_has_single_bit_us_test.cpp b/libc/test/src/stdbit/stdc_has_single_bit_us_test.cpp index a038f6fac0123..3ff0b83751ebf 100644 --- a/libc/test/src/stdbit/stdc_has_single_bit_us_test.cpp +++ b/libc/test/src/stdbit/stdc_has_single_bit_us_test.cpp @@ -16,5 +16,7 @@ TEST(LlvmLibcStdcHasSingleBitUsTest, Zero) { TEST(LlvmLibcStdcHasSingleBitUsTest, OneHot) { for (unsigned i = 0U; i != USHRT_WIDTH; ++i) - EXPECT_EQ(LIBC_NAMESPACE::stdc_has_single_bit_us(1U << i), true); + EXPECT_EQ(LIBC_NAMESPACE::stdc_has_single_bit_us( + static_cast(1U << i)), + true); } diff --git a/libc/test/src/stdbit/stdc_leading_ones_uc_test.cpp b/libc/test/src/stdbit/stdc_leading_ones_uc_test.cpp index 5d32d92e327a3..4ba240fdafad4 100644 --- a/libc/test/src/stdbit/stdc_leading_ones_uc_test.cpp +++ b/libc/test/src/stdbit/stdc_leading_ones_uc_test.cpp @@ -17,6 +17,7 @@ TEST(LlvmLibcStdcLeadingOnesUcTest, All) { TEST(LlvmLibcStdcLeadingOnesUcTest, ZeroHot) { for (unsigned i = 0U; i != UCHAR_WIDTH; ++i) - EXPECT_EQ(LIBC_NAMESPACE::stdc_leading_ones_uc(~(1U << i)), + EXPECT_EQ(LIBC_NAMESPACE::stdc_leading_ones_uc( + static_cast(~(1U << i))), UCHAR_WIDTH - i - 1U); } diff --git a/libc/test/src/stdbit/stdc_leading_ones_us_test.cpp b/libc/test/src/stdbit/stdc_leading_ones_us_test.cpp index 91a125370ec15..0f93eed9e10b8 100644 --- a/libc/test/src/stdbit/stdc_leading_ones_us_test.cpp +++ b/libc/test/src/stdbit/stdc_leading_ones_us_test.cpp @@ -17,6 +17,7 @@ TEST(LlvmLibcStdcLeadingOnesUsTest, All) { TEST(LlvmLibcStdcLeadingOnesUsTest, ZeroHot) { for (unsigned i = 0U; i != USHRT_WIDTH; ++i) - EXPECT_EQ(LIBC_NAMESPACE::stdc_leading_ones_us(~(1U << i)), + EXPECT_EQ(LIBC_NAMESPACE::stdc_leading_ones_us( + static_cast(~(1U << i))), USHRT_WIDTH - i - 1U); } diff --git a/libc/test/src/stdbit/stdc_leading_zeros_uc_test.cpp b/libc/test/src/stdbit/stdc_leading_zeros_uc_test.cpp index 3d555072927ac..42f78c296fe09 100644 --- a/libc/test/src/stdbit/stdc_leading_zeros_uc_test.cpp +++ b/libc/test/src/stdbit/stdc_leading_zeros_uc_test.cpp @@ -17,6 +17,7 @@ TEST(LlvmLibcStdcLeadingZerosUcTest, Zero) { TEST(LlvmLibcStdcLeadingZerosUcTest, OneHot) { for (unsigned i = 0U; i != UCHAR_WIDTH; ++i) - EXPECT_EQ(LIBC_NAMESPACE::stdc_leading_zeros_uc(1U << i), + EXPECT_EQ(LIBC_NAMESPACE::stdc_leading_zeros_uc( + static_cast(1U << i)), UCHAR_WIDTH - i - 1U); } diff --git a/libc/test/src/stdbit/stdc_leading_zeros_us_test.cpp b/libc/test/src/stdbit/stdc_leading_zeros_us_test.cpp index afb418a24ad54..967ceb13ff1d7 100644 --- a/libc/test/src/stdbit/stdc_leading_zeros_us_test.cpp +++ b/libc/test/src/stdbit/stdc_leading_zeros_us_test.cpp @@ -17,6 +17,7 @@ TEST(LlvmLibcStdcLeadingZerosUsTest, Zero) { TEST(LlvmLibcStdcLeadingZerosUsTest, OneHot) { for (unsigned i = 0U; i != USHRT_WIDTH; ++i) - EXPECT_EQ(LIBC_NAMESPACE::stdc_leading_zeros_us(1U << i), + EXPECT_EQ(LIBC_NAMESPACE::stdc_leading_zeros_us( + static_cast(1U << i)), USHRT_WIDTH - i - 1U); } diff --git a/libc/test/src/stdbit/stdc_trailing_ones_uc_test.cpp b/libc/test/src/stdbit/stdc_trailing_ones_uc_test.cpp index 79d4e5b8b8032..0036408513388 100644 --- a/libc/test/src/stdbit/stdc_trailing_ones_uc_test.cpp +++ b/libc/test/src/stdbit/stdc_trailing_ones_uc_test.cpp @@ -17,5 +17,7 @@ TEST(LlvmLibcStdcTrailingOnesUcTest, ALL) { TEST(LlvmLibcStdcTrailingOnesUcTest, ZeroHot) { for (unsigned i = 0U; i != UCHAR_WIDTH; ++i) - EXPECT_EQ(LIBC_NAMESPACE::stdc_trailing_ones_uc(~(1U << i)), i); + EXPECT_EQ(LIBC_NAMESPACE::stdc_trailing_ones_uc( + static_cast(~(1U << i))), + i); } diff --git a/libc/test/src/stdbit/stdc_trailing_ones_us_test.cpp b/libc/test/src/stdbit/stdc_trailing_ones_us_test.cpp index 7ab15743ed1e0..5ebacc829c543 100644 --- a/libc/test/src/stdbit/stdc_trailing_ones_us_test.cpp +++ b/libc/test/src/stdbit/stdc_trailing_ones_us_test.cpp @@ -17,5 +17,7 @@ TEST(LlvmLibcStdcTrailingOnesUsTest, ALL) { TEST(LlvmLibcStdcTrailingOnesUsTest, ZeroHot) { for (unsigned i = 0U; i != USHRT_WIDTH; ++i) - EXPECT_EQ(LIBC_NAMESPACE::stdc_trailing_ones_us(~(1U << i)), i); + EXPECT_EQ(LIBC_NAMESPACE::stdc_trailing_ones_us( + static_cast(~(1U << i))), + i); } diff --git a/libc/test/src/stdbit/stdc_trailing_zeros_uc_test.cpp b/libc/test/src/stdbit/stdc_trailing_zeros_uc_test.cpp index c02b518865d9f..129ab38c45ea8 100644 --- a/libc/test/src/stdbit/stdc_trailing_zeros_uc_test.cpp +++ b/libc/test/src/stdbit/stdc_trailing_zeros_uc_test.cpp @@ -17,5 +17,7 @@ TEST(LlvmLibcStdcTrailingZerosUcTest, Zero) { TEST(LlvmLibcStdcTrailingZerosUcTest, OneHot) { for (unsigned i = 0U; i != UCHAR_WIDTH; ++i) - EXPECT_EQ(LIBC_NAMESPACE::stdc_trailing_zeros_uc(1U << i), i); + EXPECT_EQ(LIBC_NAMESPACE::stdc_trailing_zeros_uc( + static_cast(1U << i)), + i); } diff --git a/libc/test/src/stdbit/stdc_trailing_zeros_us_test.cpp b/libc/test/src/stdbit/stdc_trailing_zeros_us_test.cpp index a9f8327dfd914..e1171f24ccfda 100644 --- a/libc/test/src/stdbit/stdc_trailing_zeros_us_test.cpp +++ b/libc/test/src/stdbit/stdc_trailing_zeros_us_test.cpp @@ -17,5 +17,7 @@ TEST(LlvmLibcStdcTrailingZerosUsTest, Zero) { TEST(LlvmLibcStdcTrailingZerosUsTest, OneHot) { for (unsigned i = 0U; i != USHRT_WIDTH; ++i) - EXPECT_EQ(LIBC_NAMESPACE::stdc_trailing_zeros_us(1U << i), i); + EXPECT_EQ(LIBC_NAMESPACE::stdc_trailing_zeros_us( + static_cast(1U << i)), + i); } From 6d58dd4dd9fa24d66bdab64f0f905926081a9944 Mon Sep 17 00:00:00 2001 From: Wael Yehia Date: Tue, 11 Feb 2025 17:48:36 +0000 Subject: [PATCH 20/29] [Release Notes] Mention -fprofile-continuous in release notes --- clang/docs/ReleaseNotes.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 369d9e9de7d16..6344c4b36e357 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -104,6 +104,10 @@ Non-comprehensive list of changes in this release New Compiler Flags ------------------ +- New option ``-fprofile-continuous`` added to enable continuous profile syncing to file (#GH124353, `docs `_). + The feature has `existed `_) + for a while and this is just a user facing option. + Deprecated Compiler Flags ------------------------- From 1337b0fe3c153ee8b7c8e19d694b9840a64ccf3a Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Tue, 11 Feb 2025 18:41:49 +0100 Subject: [PATCH 21/29] [analyzer][docs] Document how to use perf and uftrace to debug performance issues (#126724) --- .../PerformanceInvestigation.rst | 96 +++++++++++++++++- clang/docs/analyzer/images/flamegraph.png | Bin 0 -> 74351 bytes .../docs/analyzer/images/uftrace_detailed.png | Bin 0 -> 60862 bytes 3 files changed, 93 insertions(+), 3 deletions(-) create mode 100644 clang/docs/analyzer/images/flamegraph.png create mode 100644 clang/docs/analyzer/images/uftrace_detailed.png diff --git a/clang/docs/analyzer/developer-docs/PerformanceInvestigation.rst b/clang/docs/analyzer/developer-docs/PerformanceInvestigation.rst index 3ee6e117a8465..ca3a56828209b 100644 --- a/clang/docs/analyzer/developer-docs/PerformanceInvestigation.rst +++ b/clang/docs/analyzer/developer-docs/PerformanceInvestigation.rst @@ -5,6 +5,9 @@ Performance Investigation Multiple factors contribute to the time it takes to analyze a file with Clang Static Analyzer. A translation unit contains multiple entry points, each of which take multiple steps to analyze. +Performance analysis using ``-ftime-trace`` +=========================================== + You can add the ``-ftime-trace=file.json`` option to break down the analysis time into individual entry points and steps within each entry point. You can explore the generated JSON file in a Chromium browser using the ``chrome://tracing`` URL, or using `speedscope `_. @@ -19,9 +22,8 @@ Here is an example of a time trace produced with .. code-block:: bash :caption: Clang Static Analyzer invocation to generate a time trace of string.c analysis. - clang -cc1 -nostdsysteminc -analyze -analyzer-constraints=range \ - -setup-static-analyzer -analyzer-checker=core,unix,alpha.unix.cstring,debug.ExprInspection \ - -verify ./clang/test/Analysis/string.c \ + clang -cc1 -analyze -verify clang/test/Analysis/string.c \ + -analyzer-checker=core,unix,alpha.unix.cstring,debug.ExprInspection \ -ftime-trace=trace.json -ftime-trace-granularity=1 .. image:: ../images/speedscope.png @@ -45,3 +47,91 @@ Note: Both Chrome-tracing and speedscope tools might struggle with time traces a Luckily, in most cases the default max-steps boundary of 225 000 produces the traces of approximately that size for a single entry point. You can use ``-analyze-function=get_global_options`` together with ``-ftime-trace`` to narrow down analysis to a specific entry point. + + +Performance analysis using ``perf`` +=================================== + +`Perf `_ is a tool for conducting sampling-based profiling. +It's easy to start profiling, you only have 2 prerequisites. +Build with ``-fno-omit-frame-pointer`` and debug info (``-g``). +You can use release builds, but probably the easiest is to set the ``CMAKE_BUILD_TYPE=RelWithDebInfo`` +along with ``CMAKE_CXX_FLAGS="-fno-omit-frame-pointer"`` when configuring ``llvm``. +Here is how to `get started `_ if you are in trouble. + +.. code-block:: bash + :caption: Running the Clang Static Analyzer through ``perf`` to gather samples of the execution. + + # -F: Sampling frequency, use `-F max` for maximal frequency + # -g: Enable call-graph recording for both kernel and user space + perf record -F 99 -g -- clang -cc1 -analyze -verify clang/test/Analysis/string.c \ + -analyzer-checker=core,unix,alpha.unix.cstring,debug.ExprInspection + +Once you have the profile data, you can use it to produce a Flame graph. +A Flame graph is a visual representation of the stack frames of the samples. +Common stack frame prefixes are squashed together, making up a wider bar. +The wider the bar, the more time was spent under that particular stack frame, +giving a sense of how the overall execution time was spent. + +Clone the `FlameGraph `_ git repository, +as we will use some scripts from there to convert the ``perf`` samples into a Flame graph. +It's also useful to check out Brendan Gregg's (the author of FlameGraph) +`homepage `_. + + +.. code-block:: bash + :caption: Converting the ``perf`` profile into a Flamegraph, then opening it in Firefox. + + perf script | /path/to/FlameGraph/stackcollapse-perf.pl > perf.folded + /path/to/FlameGraph/flamegraph.pl perf.folded > perf.svg + firefox perf.svg + +.. image:: ../images/flamegraph.png + + +Performance analysis using ``uftrace`` +====================================== + +`uftrace `_ is a great tool to generate rich profile data +that you can use to focus and drill down into the timeline of your application. +We will use it to generate Chromium trace JSON. +In contrast to ``perf``, this approach statically instruments every function, so it should be more precise and thorough than the sampling-based approaches like ``perf``. +In contrast to using ``-ftime-trace``, functions don't need to opt-in to be profiled using ``llvm::TimeTraceScope``. +All functions are profiled due to automatic static instrumentation. + +There is only one prerequisite to use this tool. +You need to build the binary you are about to instrument using ``-pg`` or ``-finstrument-functions``. +This will make it run substantially slower but allows rich instrumentation. +It will also consume many gigabites of storage for a single trace unless filter flags are used during recording. + +.. code-block:: bash + :caption: Recording with ``uftrace``, then dumping the result as a Chrome trace JSON. + + uftrace record clang -cc1 -analyze -verify clang/test/Analysis/string.c \ + -analyzer-checker=core,unix,alpha.unix.cstring,debug.ExprInspection + uftrace dump --filter=".*::AnalysisConsumer::HandleTranslationUnit" --time-filter=300 --chrome > trace.json + +.. image:: ../images/uftrace_detailed.png + +In this picture, you can see the functions below the Static Analyzer's entry point, which takes at least 300 nanoseconds to run, visualized by Chrome's ``about:tracing`` page +You can also see how deep function calls we may have due to AST visitors. + +Using different filters can reduce the number of functions to record. +For the common options, refer to the ``uftrace`` `documentation `_. + +Similar filters can be applied for dumping too. That way you can reuse the same (detailed) +recording to selectively focus on some special part using a refinement of the filter flags. +Remember, the trace JSON needs to fit into Chrome's ``about:tracing`` or `speedscope `_, +thus it needs to be of a limited size. +If you do not apply filters on recording, you will collect a large trace and every dump operation +would need to sieve through the much larger recording which may be annoying if done repeatedly. + +If the trace JSON is still too large to load, have a look at the dump as plain text and look for frequent entries that refer to non-interesting parts. +Once you have some of those, add them as ``--hide`` flags to the ``uftrace dump`` call. +To see what functions appear frequently in the trace, use this command: + +.. code-block:: bash + + cat trace.json | grep -Po '"name":"(.+)"' | sort | uniq -c | sort -nr | head -n 50 + +``uftrace`` can also dump the report as a Flame graph using ``uftrace dump --framegraph``. diff --git a/clang/docs/analyzer/images/flamegraph.png b/clang/docs/analyzer/images/flamegraph.png new file mode 100644 index 0000000000000000000000000000000000000000..b16ec90b9e600dba946d5c9a7a08f5b2c2976f68 GIT binary patch literal 74351 zcmbrmc|4T;_djl5#+GHW(`Fl4vSnXG#x}Ty>?sOavWLiC5wZ-~2Vsn@$f%JmT5J)9 zkR&Fish?~v;_skqLnyTbC+TZ@MWsmcG(pSAN(?i_3t9sb&)?5+M*>~{5P zc*Xu=+aYc4FYw>WUln#5bTEpO`J|#LZ!F>P+qrAt9p^FFi`c`1rp-Ht8gu~akgEJ( zZ<;71v#P}mQ&PgoKg5!qzY_e=WW!UCYu6A@pFWMC!$c__0tabm@}kxfY@hlJl^D|j zlYfD$KsKveK!Tku)4+aQI(hyg za89(EBmUc#R$S4D4tb-;ifnlKv8q{hs4UF>K zW!iC(*yh8pKq4!nJ2a8=zTFF1cKd@xEiO$<>RjkgRG-KW?L9B&EUV@hulii})Dh8o z+tY{T=vODcX)$6hq+bQ6vZ#fEm)Z5l5`}rT%vDcs+j%{Vf-GVWGWrJu$xXC`o~L&E z2NmD;;%*a%XB$$ab3@O`cssAguY*2(RR zm+;$F%yQ!chXvX&6=7jV+-V(@!A%}KQbNOt4tu*q!br`Gg{z~YaxBSf(y9yHdU`p>mhJB7u$Ncw|K|{c(qXhhc*-h zE!1IVG{kqQ<8fDw?s4RS(JfRcpde}b%U8s-qhhOJKP$7xPh_inh>aPB8<^!P&v~c` z4Z~3IZaLpB)Q)`omg#_UdP(c>9T%{30y*C{bj-=0(cii{r?{BLy6We6q;ZI+RKS6G z7LF{o)nEn@%|Ut@Mng=06`ehCFmT7!TW*x&@Gd0r`+$~Fi@%6R&h~j7`Cl3D#VZc= zdLNQvQLaN;hr5)d_g+QEf9y{aBb`s$lLkD5kaVh=c(1nSAy?;5@bmLe+Tb7>F|Xge zd9zcd@|q~^<>lq9zFGB-esuif|HmKAA?Y8-o`7~4RQ0`d9vtAvaFvwkpy2SaiAUQ+ z=V<&lpoXtrJu={@=P=;Vpeq00@7=3Op6-4R3O2u1wPdm1SttmK%gwfbz`i;G4&qHq z7`s_{z#h}a1nv_VRxID|m;HVh`pVIJ{(8-Cap<7`_39rf+9WZp7i)x%?pW)MO-xD$ z=Z>4Ondy8@dxskoa+2a{z}LkW_P=-Jux{Hx(p%+ia59sWSjWP?MYrcFIiQ?>P4GXH zV}|{ugTG`tn*RTk>9>adxw;pw(^w_`;*EYc_)h0`*mSVTuM2C?tWX#5Rjl_4IJL}E ziA8sQG+(}YRZV33s>YFnkfFrNq?>T(n_Rzob&rVt+8p_}R{z_Se=GfuLI`gsX#cBD zcz`w@z#+5m%^TcQ>pdaD`O(rVSNEjW0=m$bQjb)m(fZ!(`l45{@QMYfJdV!(??nO)=das^nP8DANA~+iM9@=5hSmski!?lay5=XRsaL#g zA5Z#Y$s2UYeSyMxk#JBj0dmCUr3!~a>Ua6nd#84i*HY^bPhvD96^XBRJRX@G<`=wn zB_HBUZ;T8)ftI|v{w`bgfbT#ZvA@MwRk=n-IPx)|%Wy0JL+#|V#2v%6W0cq z_i5rIxf^e54R5C|d8+BQ`F>R|7XJNE5hK=)5wp$q0W*G5L(2;lfb7m(m7B2f+9XJ^_?7*e|r)cudIsI<#=EU@Y;;mQu z-JlxTp7zvbho@#J$$(K@#IzsDm?402ift75IFE!hWXU6H?=%By_c> zE-sSl%K4Cd(vi)ynJ!%?zKx5N6Ov4bM6sobeJ(RZO6%XYRfA|iI~+l=c21(nQLgy% z>SRP}pM}VkuAp8Va!<5XKSCb~6UYZv`U(xuzx*r$cy##iAe@}1A&8bJDch=Pj z+{vKuDi!N0W{t^P$dV=hMNuvihL~Mc-*hAIdBpZ>m&vVNqD4fF#&yL0a`CiEO!lGo zV^8rY>`L~0jqx_7I$`Hz@pP~Tc9TbjW&iT~SlzZZ>r~?A4cZnEirE1Du>8}4zuI!` z3@97aS%55)!}2@KN4mLzDnMhAtVX%}E#2EpM@ANd-JwKt5Jmp1^O2MMj{zU)>(BAu z%T6Cn{Q3)x{retb1p&Km(!vvT2E?nkD^h-JH_PN^OAvrz}B z#sxtCaTNbi2Y)PaR=W&biq?oT80o`n2*=2>ewV*={M)Mjv)TUHYyVk=zjx!_6*R}4 zu>gP(fs5+-_$c=1p*^Y{_Ap?2d&o{JAV~;?(5ezwA(Z0PUy6^r{I*5zrOPXFGK=3_ zi7S~#+nAoc`pi&?+8-sG<8m@6{~T`6wcq6hQ6<9tkj#KUK^6%Ca0yz31LDUER%}wL z^6qXlyw?w<6Z8P|jUOYM1~={~HlC>+a@_E596g-3Z}QA5cAlp$sf;Lg&Mmc@y8LA@ zRZj~ib6I%XpT6QuPVIA|K5o=b2uPtG9?MvkfX7Any=m~E$3$H~&DBnOqHd3E4C+;Z zV{38%!khT&y5C`i1`Ib1+m?(WZt7_Vy`aPTI`aeBzk%c5p6S0R>o1c%a%aCc=^xMi zTfe`Logu(iJ#Et3jaaw<1O~49_Mkr_%jDY_;ux0*H~Mj6W67LvTltX|RU5^BrYEHz zE%KF>O%ZNk@vVBkX6Y@uQx1E?YSAy%;pFE!5Lu-T$ch_Q1x!s%o`%dQa`s;e%jrY9RX+3$dzk0oNX) zb}IZuY+6*$&(-G0T%FGwG zs4Q?XeZ~D6>89U3Zr4`BSLI`wX;P%{)6mrJww{0C3WLMrmG~B?qAV_vVR{Or>Rqdh zJH4!*-^e`?|0%QPz}+a*sKJb#elCNQZbc=ZO?i&VcQMVqWRHv&cd5ZQ?ns%lO_|vI zGBq|7HP)Wb+!BmUa;@a%=Fc$sh4RY6@kznbGu-IwTz{h4aN#pHedsG{of3Bb!A?FY zzhn^}ugEyN6$lXIhzM}_e%^|#I!(0GO;8`kIrYo53Py6_q7!%QsbP`L4+r7eW!>h? z*cP4dfl^R{=DU`CW(;--c27QvcqceS z-Z02xHzte@YbR|uzfyk>VfwJ43~o2X zr$u9jw)^M|-rbFc|7t;7%Cru>m&Y6^BM7ZD80p|B?12YbjuD_cTJ<+*^5$PI_wRl2 zXQTY%$A5=7|9GaqLZ1J4vRGp3s-c>8P^b?gLXwgo5>51%#JH}pg1-fUJ?oI0Yj0Q) zjuTIuyeFxPvgu=qb!(T8HYq_Y+H)miIh*@;R3f&0S~(}#ToSH?;}RJ&9;7lM;%)#0 zxPEs{Qg12@-ulTR^S13U1z_OaAU+P9!LZuu%x};l*J8k+^s_H2iDg+rrL{g%x`+~h zKE=Ttj9w4g+~}@ptmRp{*=>>guwj%G$j&|SndC9iVOVV0Iu;o9B|BxdF{wldg);qo z2Y)anC*yqm1`pu84!2g><|6bB{T!$iZ51Qd`aZr~ z5cGygv8l%!6_Ntpv0W^|Ti<2leiEG%EEI+H|Dg(*elwFtGK|7*qIR-%NUvAzYl4p1 zN_!T5kkvZ9T`Dd!nAF7KOQ2`lW2{Lerw#cm07^W^$g>aeh04 z<_>eacAtGm)GyTuEe)1fTM)T}bHge>u&bQh-rKR6qI!uTcnXl8debAfWD(<7_Dl0& zKs@_~8T}-x*Y^$j<8>D3`|z@&Gs7#dxZXg3brPx3KXj(*NB46Nru#B7w5W)uyF4Q) z)TrY5jAagSC^V&XNnH0=owWxAX3kDB#qOz$IlTPP4Q+Wt2@H44sZowJ$XN|EUR?la ztU(X?qdD@>*nApnn*?pgkzXn3Ej{wJ|5p>@0q%%le$;|FpALzzCXR4Chy(|f$4H<5 z}4G9Cd!(bFa?Q}@i&(|siOQD3aIlh}M@b+TQs*}{UTHQ7Axl$=i~3|ep36>= zuT>RVe^;$#{vhoc+_0m?`;gBkK!GpbU|$z`gJ(0oR-~!7_R$=bXV1=!TYMUjq+DzG z6*dvJI{aW;heKoIAe+`Bx>s`UJmmAQs~sOARG(YfRTcB9>9xfy-1gsgolRuk58Rhw z+<%bVS(l&m0czob*k3@a%I5g|Xq8dYL2g~xuU5f^U3Q9;_bGE%ozFh;wL#DZ_;sLN z_<|c3vUm{@TCjI6_lA>#;NIl?`S5MDvuJl<68XjFk3*AfZVzf03v#Vj`yW3XOjN-h z61vLlBt(b(lq91PUa?T^+jm;ZoQ3~lvhYxPHWjizlGI-+#xJG<)Bh={O@JAdpMOtx zL}loZ{Lol0TBJNBa?jj%ZPcR)>)j3Ua>!!|A<9EB*by(|cndY6+H1CY&+hOd)(V}6745ioc~RJfLJbq=HQ zIovg78Ioe}UcO_ylhkKCPwnO;tpLVSQ+2L)CGd`;ifP!h&~WYhLWBdW8*fz=dF3oJ z!t^&EQz_Tnv%^>BUdtijujOW!)75e+n{$8szzEHv@pX)S!@c>K{-Hyr_XmB^8k_9R zJKwI?=+zaeKe^702|$jNVtbRRbUpZ@?5oC1jl>>VR$jEN?%c-Q%#a~=lTPG_Or>cF z0^E6b-NUt1t(}P1wmA zG@^C>NWtT&;$q6O@hVq+pQUMdF#bAEZgIkNhtOIQ6SDE7fu*XYooy4cn7wTz_lXQ# z<4}RGs`6FoqvHqF@xsA1$La||EP7g*y3(sGPQNrd6)sc4Xt28H6jE`zWBIw20_^1b zy-Xj!>n`0@+q8*`%(Lt&Y1LTnDxLxB{e(b>PxV5yvJqnpNMY7GvRQ#A+AQj50 zVQhWKty$XAnqN8wvSWcvVzKvhF$44x*l);$44z{wt1P_6Q#;0?ot1D>+^|+0bO9L8Vc++Q~85mOLYo4tr2bdYvRK9GnSdLR@#lTR5rV?Y^7-UfHu-dTQTeY zA^w~;qH<3gd5!_tsfg(b6~=h=)z?J#T>Bk++)>AP{uQ8*P+)2W;_Aavto%``_%^yn zl^F;x`c=uZL4}M$%e|+?lKs&2sZ^-mwYEWl z0(Z<#-OH7?sbaUeFh9Q2&=IT=IoGLBxD^$wE~BXi8#XO>gv_F3?5@cIOcs`It~!b+ zOt>^g0=f;XFzI3mYJx~Caf6Bwt>2w+7XFzA)jA&g*(S5vF>xo^*;W#cv!B=RP6aZn zYZKEaN2c5?{0kD(S(-7zs5SCIiEB`cxNTckiEG0%aa-fWbd6#jsBLDoyP0YD9gF(! zi@TktlhSFqMGoD9muxWi`2zVPS{A+Dy+q%$j3`P$I8LsgV#xHQaJrnqw2FtG$i^VQ zv|YS#DX`d{gs=6;wtj8h;l-l3276*>W#s0>aT3F1Q3*&(x~&)^uSVV9Jkc)Pa?$(I z3t6fld+E!=5Kq}>g{o(DhXdnEpUWyvY#MWF0&^df5vlOgE>$r?NUS@lBBTrMcJ+MI z#%@~u^}C6@=P;TJu*U=~767XMVFg;|lS>OBjD@%Bpog{v(OCN++FuG&4$*LSoNf1Q zt*TSZ?bfCYjuw85X5jz1Q}nc6+t=|BTjkhejn8)vYS$NT)Qh; zrQ1k(r?Hm_DUex;9S@r7(bak&E9m^F2c}K_acp%J7Ew|zSpPY~iZnG0-_AU@+WHa0 zI&ss!HE%kY9lQ37{dJmRF*PgyHPf1!H$AR+wv+Bs%n`3!IdIsJrFmAV50;H|0-XKh z36Fj`wyFAg)55x38l?J6)Eum|gaFi5EHO&o>6zz_LDSRzwccvyk*?75(K;lOrG2)ofRFaA4B7@8wMRs(aIA2W&CYA42xHyn0YI9s!*8OaO zrRL-k$}+*nX5$ke-t(Sq2xZBK7ww`=)Iv%JalGfuo#~LUe=$fwjlV)nPH=-IM^(F= zDuzMHu}|GwVv^D(!l@=DJ&Nd=%jFEA6N#!(%(0v`DOIAbBHom~jY%|>*46IQY zXW*D{(2ESq5{UKn4j}3}3+@~}%w;BRf)Q&%w@wwb13||mQ^{>~U2O69tNonmtminQ z#R%NeskDLR+0^Yu5p_X!11jIHI66IEaALdnIHC$AQ=I~9Ub}C+oNdP!F(N_hy0Q`m zCZwxLa(P0ZlUFZ@ zI2C&Vj^{_RTqbj$Xjqsl#?p4>Cw}EkPPpkH<9_#*tn1SCs#g|-gfcI-ed5xJ+bcX( zYjQrEDI&j?V04lEw&Xo%z|!sw9q6O% zuls2HrD*IgkqhM~)vv0qE7@_*5G)^$oO^A*l_(Q}(W~1Q@EV%nFL`sTd4KZ%zIL}HcUB0qcfm(Z{MXu)w)z`8%dk7?5%t%XmU5N>*UPd`sI%s z^rY=Thv=RRG^BQ;Z8pydHbd|pbB^v2(wM3g4BZhge6YcMsd@rCqCO^@JSWBc1jPWm zxq4wZL~X4(cc$AFzm`-!OOLNVol{7Ubc|D!-}R?|F3vL*e)@^bD4W%}mVo1kZplGd zPUK8J3lJ_I-QC4RSod1M1W%EMQz)RlFMl1Ndo&8sk;gF)OcCRfh@TulKQln3t+>R2 z0ceonU^!9bi01flJl7FzDS}a@fFZ>YQ+2V}ATG=UPVKTtMFdh)82HMCaQsujIC2I5 zo9*}~2>)H9`X_q$Pa^v75MPFxARZxC@Yge~wT8&E#Xo~@GA83KbOZblj4UxdpQ&J^ zTL{wuR_q&RJ6$U+dq$Qq^1dCSpj3QZ<*~T!`8W{T1JE!@cx_!`dPrb$Y+|~7o_@EJ zb*4ZjR3!XT<|jV};O%jd8)f?4oXw}x>?||A)}`U12tnIS2k!pd7tP`?oN)KFo{5Kw zVWefiI%3jt7*@+4kMx)3g$tt!O>!&qa8hS2nvbDO9=^twKJ2!0#vcnRz`uO{v2h4= zDNj{b;%XJRv`aI+=9J2UM$-Ii9 zEmx19MVjwe`sh!MKwWI4N#uA8?~~D9udo`VJm=jHLm4>C>?wpkU77EknWtRlWtBIO++%@Q_4_|Nv9TAM_+ZkTa_lQ#A@ZjzL*x8=DZ63 zH18!RGohKoZ5nyC$Drcl=ce!Xkk6)W^zXvKd;uj&!hB}Jz7#@w3}Be1MiejW>z{rt zx~h#zVj7Z>s|nJ86)$5hhuE$z1j*O2Wp3chC0;KEO4;TN(kM&O-O{|~*kp3qwX8h7 zLu7>G)P81zojx_r@k|)7WMTX5?#_d43`IQ(ADArt43CVCta8kS#(6o8xkPJowy?&| zS&e4YRCUTJ>eS!L-h4+?fyo<1AI6Y2o6@;uHt6+S$gA{R>> zONb=jy&D-MGe}*^o1mx$5Uw#fGVs6G4s>j{w`JP*I?bR&!XVV;g~=W&O~<&SSo0jB zHo|?_wk+uKXDFXr>y#>no#H~~@}p2S-iM~na0pvdeS&Ugq3a5ARMqUc(^=wEJC3 zhqVnMk?knYOHV$cDQ}WmSb5@d>E^qb32!bBVsy^pPj6RYWF?7wd4-;_=8xF+x8D!S zyPk#kOjjgCZg7ays(t-7$hO0*rzP^OE1~Iy;*+Wk*2T^JWW6@$8@LKbSkGOtVVT<$ zs9j|t!ppRcIMyh56K(#ppg!q3v0>!to98o@t@S>>Kw+zDCCg$J2Qr2mcbLliI~=}= zIGjL=F>G+~kF$t%rV(e<<7p=8I=EUbEI{}SwmBk42QsKiXrqk1B(bA zH#%yE%VR7=>&2br>*DphQVQhG;yw4_;PGhvadC>&8mv?Q3O&AfwpY+ZkMS~vb{U;4 zbodvW&)ZS~)J&D>H0k9;*|Ik^Q~)xAiEcly|ETtAo(h8Cnay&^?nD6agu%4t&Mmop z^^pB%Pl9jBIYP6!r3SodT7NupRh=g7G3gLqjzn92y9(C!FTAym z$Ff41G0cxt9jKAF7%;hv7zHZitYu-$?<9$+fG}kM?C7w)yRq=;6^jBI?E4P1e@F~W z9ReKCVcJXmc>($;xZCd-sfY%4_sD1cP4EBLkmx^L1Yw#It2#M;mX^?*6^(}9We41{ zVtUN98xI7Kt=?4FFhOmUJxjD_qouOiunVo^_8H8#EyEayO$ZMLIEMAmQA5S6Rn{5oX%k!&13!^n%8zs=qKtWK;NAo z_)%-YfDKVEo>-!BY|m#FSP(4?r44#^LQ@nrQXKQpT$l=0y)ngv?VyjwMxMbCo3)XL zXAm9Bhc~p#P_ftpFrozOg~f)@@?v4s;3_>TfDhB>1A+P4Gxq<~ju%mQW1WhP?qyDv zLVIRveH)*qfokR)ta__e)pO+5QNh-@kCM{;O0*oBdIjX514!|sxb0g|ZMVDA86~dQ zk(mw^C3B}Rt`>4;|mB4g~P0SC}*_-*aSptscIcP`gxEyi{NikRJu(O1Azu zK||QhqDP!Vq++Lczq=-APJ2?t7^U`v-u9{rgTNYGN~;NFUn^yM6sWGN7yc-uMEi@G zo5J8(3~OwAIu|Ed(61~xtLv+8{v zX`RVjQMUZk!$PjW@lvMx;}TcAw1vecSSlDi?&LaF!$kV3(R*V-yKIdT1xBF!(IGS# z;+}S6F380nk*zR6>B#b*t?}qSVnj5cbkzY<_y?c3&$#nRb_UHuJVnM&d z>1puhr3q|P2?ubkn-#5OQZMJo6ER2s&UABGouJ5#74_$R1V37tDe?2zX41u}>5=@ZH zlGA=7j)w2sjg0mOO!m?l9nvbERa)Jt;xaq!?zNz1QQLcxm%_}h{xrUXSBPiXqF7T< zHr?P>Vf2|N1Sz^a`bog5L!{kX`=a*l*fHRV!Z#|B;}&GrU<#Zt^xGF1sSgQ+yb)!! z$GPPSd@M;{;%;X=IIX7va~zg-*SgI3h1f2`V_#avm>gJb5~wlHOzO2nc&J7fr`H@u z{*3y#`Rz+11?dG0fALZq3s4cc+pL3BVGm-&)U)N{d|M%~eB=fH<#&QbH33dvc0mt< zifJo5hlxCi;S(%SXrG3XRrd7S+K>5k3M%8qNU8-Cuk>dasarOXOBJ&!35Bs!3=Z30 zh8^fd988+aC12#pz8GW+K`(@Q^6>|$I_=R`d$+5sU~xjX*1fOPE-JS9HXwqSKL|`E zdzO?_kN&#WP@;mHty;^=Q~IJuRtB?ToM*>ERdLm$1a8feN(0A0$EKBKaeb58OZ2b0 zBgMW=dOM$I9+vZJnNdVmRO-%s{65HHb&iko*!)Nwq~nGZ;I}8#lJbG(l>?tt=l6mv zNQb-8Qiv@Z6_c)3&8Q{IrJ9=ixW%h@7&n=bAzlL+Ic2b4U0h`S;Qh29S;Y8aA3Ocs zNX47C8gpy-u=wL--U{##rPo$R4V&Lbd)B5e?_@Pc7HI$I_!5LAYcx%>wGwnEIitOu z=hByDFJJaO=2<#>#Y~}PMNDVD!>G`_+{Pa*x1u;r(#R>5+v{9s;RUx{N==B9?*``z z`CxW;?Xl)sxH zipJIq>1Nw)eRB;ng$C;*BTDDF#%ISXKuBFo&se_0fnmL+ev*))4+PxLMQ>0 zBI3Rj+HyAvyZg1O2!hQJHWvpmNeBSzN;)L4BB-9%Z}2zG7q6q3(thplqaYo{Tx3TD z-x1{po+EJ6A-Vig{!NnO>L6J1aYW7yH2xDSHcSFTTb>XfVU#RSs^RSSkiRgKoQo!Zqt^r(N3dIgA1Kh%nFou! z-jc=L+N4}+)S)sMpeA$VR^4Va6#|;<;qFTQKo2F_4mJ3I&A5Cl`qN_Nvz%YjJLI@ zPlJk>gHx%d_jtu9*>+m=WefP%mn?$J+ZJR62m<6pHY{u`x2XY(^9_9n*Er>EA@84Otc{ey9GGoWiKk8P^+we0 zsQ3e5sX7q{XL|(evS>6~s;arqm#|az&0E;aGjsEJv8S-1E!L|5SK`acX` z@Ycgjc+&b@4pQ~Ju^ksY#&wZSkHvfJ{IBvczWrakVF}#n@4qWo=e+syHdSX(R-Z{} z3Ni;%V>$oQ->cN+B&3A`rxaxxKgjk?UnV`N5UC)#=SQVY#I>&|u`&_uQ|kj;n8Sb2L*McV^M9<~y+%bho_VR{46kYV-r z!&4)SZ)biq2Y5WQ_ioQLM%P;*UPMQuEhEkc4mC%9G-9!6lJxAoG%t+iq->14?G_z2 z&$@R3#vk&L+U{WsUG2$V(Z!#gcI5-qV#n4N&gWgdYBXloi$X8XFHDb zu5a8NlQA`2H!kPaE*tggw-P-4ttEzS-a95yIt1E0`f|=yH5SVZgm3`UyU|!A1Z&+J zyouf%)h0|+5ZD;8U&Imd6xe1XN+2T$p+W<5j_J8kf%e}%g&|8KG^jx42TK=)ZGi$c zr!X)^%wGXJ!BKaQ^TX;tl+b(C)d^~QDCn#aZ(4V1qGV?w!@mAPYIFPF%xcR zY@0AJY)*~b=Z=L3^fiEuH-6}eMz9Z%@l1QB7$v#>0o9?j@2RItKSUYVjq}U%=(*NI)hoNo%R;^8xXT;39XBNz!4ft zynq-lqQiD)Q=tZaXKtU_u=#zxW(bXWZ2~eRbMW~Jdgy7Kful_EI2CL!5W(WgX!@bl z{}mPH!2*V`#@h^cuPzW#=nepMSOK%zxHmFcgFiOJi+}2)dmvkV1I;TTaK?Ti#P@5-RbDvSCw$01|Vh zapq^q9Bz#Fcw)L+^>FZ2s`bA|@weS27zjBoS1637W=Gi#Y9g9+#ULYHnx1?i`AFMr}Gu`h?g3HPH*XO=_$@9e0wCCmHAvsm(?n-&Nt>{^EaaAA5M4y~% zIt%)SiUKafJv2QpzeIiwMps)_3L#HzS$=?KhJL6n=}WRslaq-#ju_+ePTGdQR{!2C zv*KHo_BB%|{7Ij2*`OeTfnmONRLP^Fuh)lH8l=w@5J=10Ufw64ZE}9Od~xRR<;_Bz zYWzJVNkq<rzw~plDEh{6Al~Oh>}>_$4Yj5O&N12= zBJrJ;#n|^@nqK-K-BGi{nH5)%o_ybH%&SjCnr5~X2(^2rde9q{(aYa zu$rJU!OikG1GhLSLCZg`qV5V!ovm@qsB!5^_uRwcPKPXR(K_E-+nu=+tOZ877x2ye z8@T&gfm1$pV=H8s{$*3eP;;7=sgo28VLHm2wRJ6#!dK36Jts0-#iChBm=n?fl~qH7 z0CF%Q)MJnCTZPNylj)Oi7W(9r6{`FR*2JpGjJ+1ROtj^#k4WtLg1TdoM z*&y>KEQ;Hv^ssyyotko7x?2CS@|oz`*v`c@z9{$@>esp3cfNR#ZjOY#5Z>V@DS$Of z?HaTOtG9VKyAloJLzZ(`OLQNcHqqa9Y=2cze(nC6x>J0}{eCQ=StD5fj9Ijz*pJY! z({p|L)9+gF0ecbW%dE6!M+Uuv?`L}#2ZFtVS#4sJyXaYR0{sP#S!4V!Guc%#i({OX zIjI0Qv8140H?K(URB&F%Wq^&{6t4SZhR*S{@+veg1=;E(;R$aFWEE-^UpHpm9ay^e zwMp2X0jtzX|Ftt?IVXR=d7a$RN&vgJd*#Pb*H{6{K+XX_>H!-j(^x4q7Skviw+%Kj zOGfvA4W>EG+Vi22lt2m%62!Php=L(RH=$`dOqdE{Sp_klzDxtNpg|f|N5g0k8k(PL zE;h0OHaF0Gjbl3VRG42|(LEn2VJq$kkvb}Vuz<1m0&KVA21r-6=LHyGc{*T61af-A zf-p@FyPeW=a1n7|5G|(!o}tL@g>`KP4<%*<@Lkf*7TGS?2F|#A5hr$ z*Vx^LXJ$FWtl9s$WOGY>#z-^s8x6Jjl6`loY~b!T_(juiw$hmjpg&5FKv+yf6C(vM z4-^qKaCbX9$?aZkP+Pea*mY4%zMs37MDf#bA8XwPMl6!CpE3(I7$}&#_7tHzD`>4d zj6!o1oWf91pjvEohe3BQB~k2=UlC`pe*sb8plfwU3-c=ijSXPz&u)19E8NDSUXr*W z2^ZCX1(05Vt@KO}+6y0uN!6{y7uQspq7u{f1c|UADe^%oG5x z6fa-1@Yk@(>;u21=##gXtOlG+2*BpmDf<4SRxc&sf<}{7mL`7vT|7E*u@hsYH0G_Bk%g zW6h23nf9agBJL2zo#*8^IjfnjL8C6VgNL$0P6I*dBgVGzo5AG>`)*y&$IyG)O;3$@ zF(wWS5T)fOL3}WF(Jjx9EMKCK3jJ`LAY1YvZpFd_wHw`2ozDvxwY_37$MuikqI)hZ z&3@;+(Ib@?2HXd1<}V_0n6V9Z#Nza^0!;D7J8gv%T@#``rO=nxCM7Zy6FBuoa@~rH z5GO~C!tal6^9hD;D|pcEUzYtQtLvd;c4Yu8ibXf%GDxVNQE?K%_K*P9#*yF5>_Tz;&+@+>jZYTPM#79~V=8`ax zDwr-<>WlL^PQJ`2{B;2;SflMkhoE7m*l;{2jZf~cQQfv-j}8f*U~<8j&Ce3lhvyoW zGB;-0%UGAmGtY(@`PkSF2n0f@Oq^u+_0pe@zcTUqOgb{c)Cr!E=V?(IKgY{m$v566 z7&&c8&=Lkbl;3kXm4FQ_M4uS!^tsorzcl78fPx4aG34YL!+@{W{95feGrk~UB6!l0 zU>-r{4U4`r%-W`A_A!RhtoQqBAoZJ>3s!<`X6kp;3msk*e)d-~lQb37Qqd&@_RKTY zp>C=uc+sF{$2^16^*VRmX}o>I<@Oa2_ti0tU-LH#7=C`Vc{y6eEAscd(cA{ zPy%EfbXa1Ey8$|Ph9YZEUx+GCiTyhRjP#y_sRQwMtT(J|m2H}-ohJH`k) zGdRqM)*V59-wkotdGJ??^O+ImIy@k@Y+=M3|5*Hm>LfecIt>!~64O=6mul?r`wL9) zR#hRuD8k=bNJs z504%;Pw`aRV{9F%hL?nz-{(4)Z0P;f)(9RUhPi3m;D@{Lt?JvN+TY_quqH`Fu`moQPCgtDV-I z{ml_#rx^ezRzSyyF*!q~_3^Td@>Jb4x;dh31mpjYFDn~1`! zl!(G`aQF?z>L;IZM^1TQSW#b9_*RXJX%}gDrS?pYSJR5 z-|b?T`Rj>wrh^37oD}J5;lHN0jR^)>JP6ny4_@X+tAaVyd;M-7 zRz?eDCaIoPeMul-sHca1kjvo?NbNI@0yCPVZ@US7#>aRJU|T~yy)U1cokKpFmy>A( z@#7h_tCN?qxJnMU2r4vfZMn-ZLgWjZ(z#w&r|p`y<`(D>uQOHc5`n^lTsONb3&D@kh=FH0BFWHQa2H3ui zv%&tg_u~SNpSn*o^JNZqkvCPzAq^cvu)iNz;IfQ)nw{heltYbFXJqomN3zHl;mf4! zgpZRoCW%{~GAi3@@|fJb?m%{*wb(bY82W??R21B*<+jb|i{*J((TrtJ&79iuL5Qls z+X3zQkQrTbikRV$vl$oHoOa7A6MY7tK?L4+TixbH$MA(R;T4mSM(JH(lQudADPB;_ z*1-|gIivEw$2Eqj8a_S<1<{83Z*sBzb=Zz=O`w7U1dplsnE+-e6E2r+>Hgm$(rL=veky;!&6*USFfg<)F~9* z^uw>Xh3v<*WSI=q78rcQDYmWc#o9MgV6|i5)jQ{(TZ2sEj!Spogt1xq;T41;E*2gk zjEPalglU6Z+$)n|V492JDHtCUDBY9#mt5F?l|cnV`gHz<(4 z=OUJ;PGgP_FH@lvPJWCz7p8OC=Y{E%Mo1zG}~D?>rG^YpwfYK+%l7C^?bq1{s*DARky?v3uM^y!HS8-@XQV#FN7d8!aojxY?$o~ zPqKg$xrLTn1#t0wFF17OgDT&H$CYffAx|R^neSOGGJn5R)*qB}j!p)6T4%G&4t!8` z6Lyvh;2^GT!;uS}3qWN4F7OXdC#rWeJQ0R@8=erDgHBal*64B+{*OHrH4Z%J{@n!;p7%>LaE-X*CYjuF-=K^h4M#<68mL-j^>8 ze^)}=fDBT$PM7FLMq`FkD}ZR5ifiJ39@xfjG8>RR_e*%Yu9$+}f>43KAmO(LH;CGz z#Fed1NrwK#uWb|^VF!1xa1`&`%AaQ_KQ4wsA9)Wp79k5ZGb86d?K%?$#)p#WQetIc zKl0kroxV4z8K_U0la{;sh^qMIT<#hc`!NX_PZc_Lm()}~4GM=U7&o64c(JiPj0uU$ zVqx-O(d!_r2r_V>HCjq8fH8+z<+m3FxK(PEeehbM#0MIz8TCTf>#5hciAg9jv2ik;N=eC(k*3t1ZW#IR=HJeXOz#m{! zueUE>tcSL(hU9Ic$%16%-AV{qWpYcIao5Z|L*MRxlJ|H>OT#8ee-~&0p4)dx4!UU@ z!jEd9^Ht@VXEIRlNAUBo$aGrIc*yPNH%Hy~w^}|t;}Eor%PZW8wZIZ6e(_GO%!f>gdxs6gv_LmkB=jhmlW-bOL;J|SW)aeT~m zL$!Ou-sz1~X-0g+#Xtz$Y>U83hUiYRmP1X2y2HE7n(YR2PBu7k8zJB}^mvRI7V~4= zJ%*-9>`bBz^g&BozMA3Y1tO1g=BJumE?M?y@iHagP>9+2<`5!N2TZI&L%~f#xl1I^ zgj9BOs~m0`m_i~& z_mRhY>4gj9le~QEF;C^Cg@tc%7juht%Ohi$^L{P*ho+B@cXj4{!)HLhYH8_vX>Q!( zCSj0G(2$qhnzv%anUaDYTB+H6lk8FDwbjRs_q%5n z#e>g@;9*W@1`J0wV|uZli#$v`SW|xGl88*&9O8}*_Z_b2e-uj>=~~zGz$;Z{-UptH z=URnRm0kC7J}C8kAujI%@9-I>g*GRs@OzS(J|P~^`&5}%YrbjX zG(ij;L-Qz5_!b8@Bc*()rLH8&>gQ{3B9sZ>H?82ZB6@ha-CF|qg(0%u01E>I7MRg- zC$IU!$J!x||HhK+#K8ODVmkPywI7!1T6-6dtpCU2)sle@O`~1E;uO72!^QZtF5D(= z4j|?Gaf(I{i9!EbX?Y1$TS6%OeBB2=atHLh4|QA$Dc}GtC?rxWMn*upv@&la>8qSJ zs6YpQ#w~f^4VX43xFv0ROXm=(4fsH&3=w*fTXGkP2GSAaV|Ex|tgy_zEyk(I5{GaD z!}`^y@gIoLof-pkslbs6$fsK&YSAFC`7R}M>Q0ubnehHCk}$P>k7=&UHsL5@ouK}5 zF3h{_&^|1fA)o~N;UazjE|^0lv&^tg5R~j@vr*q9vttHu&n$iqa*(uqt{PobJhK}N zh_X?Kw~x>Fep%Hy8D%bz<699VP{1|2_~dZ)0A!aWHO~qw>laOO`W?MlrY^D`Tk-MZ z%=b@ZzBib6JCA++Om~jb1*k(TT6*pxSAwnE%pF7boZyK#`<-k)OerW;V5T%(Tm%p3 zJXt5>UIUdul0D&w$$rT;RE;+`NBo?|QgE2pEmPn%v6vJjU6hZD%h4KU{w{lif>*--Ei`U2`Wc(*e|?Gyh~QVy_p&H^UlXJZZdLlgdMA2lSuFfE35MA zE3@g+dO*Nc8|--OSNH9RVb-V^#9a0)h!L1lJ{wd)v6gNpT&OC|5Tt+JI?@x+er|C- zS$c*l{P5fnV00c$pXf#%*Q7#O%O7V14{RIqC+UleVDa#S%T{cZGJ>Oi=J`NN*kVAc z0`ByBvx-{jq&=*fDQ}2}I#|fici}|fvZsuijp271)JZkZn4v1?jR_mCn-Q`G@85-h z+OmZh^h@9K#)OvNOO5VpO*Rh{mGP?<$MAiIKcWYXM@YO=XpW%VXu(Bsw=Fho5wMeC8ZmvFX&T5(0_H9SQVKPc z*Zm+_9TcU259jG*v{y?bZbfS=frc92b3Otw){QXjnczIN$?!1_l4}rTl_wr>Uj35? z<^1$!$1pco3ADJND5H+uNjy3d0^cOPGp9WXM?GD`+n+4;iHSD_X^?bwG2Ao_o}paz zw0UL#$P))ZB42YM5WB zwlh*lFKy_=WQpg)AQ%iU!)-{Q`efB~{l=WIzm#$qx-mfTIkvAW$oyfo2!A7fToww-o&eyT#ax#l8i*T>*X<&Te zrhzK4qSTnNq2$DoTN3=ZRZIXmJ|;W?+;|O^buW2DaK>Gwj)#ptIRi}J)tw#NlpR%$4Oh(MQsH>gGTMaL$&H9=WHTh~FiNAqD@ zfAMa+$a;H4GsWAU&;hfVvDiUp%+`VFT+m5_OW?lr_$Ts72l=^7ahv^(II^_PP?fP+ zt-lA(KS;tkmr4)Qj^H&Gq(=5%bYC7gwJ>P&hv!lIk@HKP6TX3dZB*F*`c(jNb@it* zr9I;2&n8Nc!+%%OQzEy<)x>!msqtosKLlPLB2#}q@Xilf{;XR1h&W>#3vD}OrP6=O z^UzcF?CxUh!^58!({z42SV3hn=?-m^+U!>g9lOpSnFr3CDfQpEs>Bcf?%BMCAasDX z92*WHLPg)D@w)vxd$T&};VRA1I{^x)m4!tjbi0cT$_e92GeQ9Yl_tWK^{4p*3Krd3Ku&!Hlm~VZ(A0 zxTl7|+|TY#mdkK4qcs}HzV5u1omW>S;8}V?cq5_6F4;5@c_TJzm#jbV0AaI;#m5%qrw(dWi5z;Ox%(L1P&EXcV4m|F=w1xDCJp%aME zPer~Ijf@|L{IDTg+3Z9niBjsGN|Kdo*Xfs2HBOI8H-7LZU&CPCK{a@ui{*FL z|7q+uk6?>mC&Ji7R(SO)od?di(VDLny?!q_$X}}?h*w3m$do5DNdBTFGr4NG@*SuL zM0HEY3Xd)ggqy0?M$ZPq2iW$1se;e0b@CqgH6XOBEATd}$Iuh_V+a_+x*g&680 z23?kW-VgpcK$Qn%;cdpGZ#UID$SKTm~00&KC_`ya!6!Q?9OYVyTI8 zUASof$t@XyjjjvamG(upoqSbMm#4;-y^Z;Z$eYXzz>sCgHZJ01>gdaxJ}bYdC)A?LeB5}8iRiMUAp!x{it zk=QmINEKv$l$&#=2s=wR=HLI(mKiL@2%I6Htf;@eRSoF}wHy^NHuCvC4z*PXYu5d#Q=C+dN#A?^_MNx=c_d0omB>?51~_j* z#8{J5NTkBTBq2YO-`&W#!FwU(l9vMhCv1~`6!Wv8$(W9c6r)eO#YH@7Di|5JLB<{` zlj7x1#~yP>RsxMnULydD3>G@Nk<+UlFD7q)coXryy775oR zmmu6Hylzu&G`9$B<`2fnyR8tTrHpaowKb^KPYHw_&xYRN`0<|3_QNUTBbqP6V9Nmq zJQO#W`tdOyetWx6j`#?Qv8So~p>qu`hAJ9Lh86gVxHr!ND8MevsjSPL7vW|4a-cB^ zU;cy-RbLu9di-m&@raxHpSgz@qu(BM#{hr?U`;ZS`#85r`h6BSpLEt?@ z&V|pAgB^9$0r_D!Nrf>Zq}cVz*i<+$!dmN;(k-9gu?r79Y0TE;1*qJR(s!|%YTJ+$ zbK67sRt3gOE>0f_<-@0j|8%NBj;j?qFJ?(7_K-3qY{9Fl;|W5#mlD0IOs~sbXiD;?iD2 zr}*>>%7=rY%Uf7DfAu#p!7QIe2%jNc#E}!VCk4yJ0{M!d3irc{m%n_m zP2J&x6^r0`vquwKmIvd0L$4)1ub2oUTVI!ZnR_8B`Le1anwTA&M)mO>cf<=p1g(Gr z)eC;2qW47KzhDKzhKau@4oq%XxtBKXS41cM02i}Ixl;Cf4MA|rBr1Y#jxx7=O{nESW{fyfL<+f}rM(x* zJVHMkmE0fBUeHGq|E{6e?rYuLB{|_|8DO(b8xA8z|e3` zTl)3K;<LCtO6M@Z zuXbe=pz@$l4#Pmr`E!$Y2T`$L9G zN`_lnA%dCO>Byu+(3^*8lkL$G{F~S6M;t#vNyia9NKj)6+Q4kS0x{2P#8(Jd*gyM% zCpy|5b0MQez06X2YeUMW<^R8o^tbDiA&uT9Lia{fjAxQ#rC%}xUTv}|BA!de&NrWW zB@fI;wjM-`|9#DP3@S@^I`c@7%uN;X>>=*hs3zl*<$jj!Eu2}JRCwT&(5L|$i6BK~ zdkNa)T!|8}e6dKpXQxh5$GPkZ1Ofs@mOd_~TO>bzJ@S#rqw=*M8LHj;25PU3&v)7M z{w6xe&+bQ!{Vzeen+!gt0Pwol$P?)zT@kTFyyMx}=j07j;5Y-rJ11nD0wKJ%2oJQ$ z0Awg&f$|S;?)Z^ZV~63XAfe)5Y2}5|IkFOdrNdbuclXZ{Ri2TNMt8cgfrXAs0iXx% z1K1TYNP@pu4j=w(%n%^Os47C?tBi%E#50hxGB$(+0cd~r8@Q~3Tk=MeNN?5`K4@D9 zDt?Wuzl?P4L&{UaZiFbb{486(@y`@eR1~sw z?}y6Opj@vSwr?YR&(8d0yr;FP$3TM6_!u`n0$T3=>$$F0)%N4ggAqMb$qr@9=Z#b_ z%ta)Wi`V}HALGrdITFO*pP$E)(#v*?fmjH3fbSKaDy3?EO~c*a=U`Z$N6Xz}qA{TP z1Kw{q&C+JQmx48<6v)rHpU((S=b{(RtpVim09Ib-GS6 z|8O;~1PAL4u=rVr+ON-znE+2P*BCuKA3wep;vSiTDG&4_ZaF4T(>=Zz0!lMwtN zN4}8c?5Orj;|>jAx`OG!M8r_R^67CjN2H{}yp9FH?7kx;8R$?f9ZnuEBnNzd^`)tN zeKx4gtV6l6r&^O21&KF+`_Fi=qlX)aQy6pNIknGyF+3Dym48zG15x&dbfBZhnKk6i zf;3QJQRmVRslG09PWLg(2`(n9xF61adh+tl3wQxz108(z>%VTV+J=*Xes1zpMI_Xv z2p?pKjgFHpGRhPRfU^5l)B}G9WW!HBJu2aBfrSt+GJZ`qPojuJWb z%ji|ueD~eBB;GtOY)~`ko>f)jOu(Lh^X-honWUhGfl0496Nif+^wiStg| zmA0;8g56@9840q<(vvV|{3u=4vT~*u!XFO07do!Yh0bh0q-GeOD(Bl>ZT|EfRI#i* z!=tkK>R~nSKQo=KR;$xTc}c@ex8Fu>8Mt6!RDCipQdnNuRmxvT`EhhB_UhDQ@?^2* zxZwVsgG;k%(hpb3J;^hH4OF291&P)3n=9FKrI{G7h4c4qOWyov66gBsbyTf9aS?J# zpsCzM=g-e93B=DDSjWysLdGXZbsA{B=vWCCnQH7@srQRV>IYZKQhq zNwNF=d*`3MSi$%kt~E{>_H3&vfu^O;GFY;->@C`?NbVeL0d5ieD_AH!wwQ4lR{eX~2T}69|T5?p21_Y#sDqc9U>v z!AKoi-C29hGE9PXt&k%@fJwA>l zfeQsU4FiD+M<@z7xdp_rkOYKd(}so2`-lfiK~Lyke9K8-12VVhfceOTFzoy&cq+I< zHdg3sV>mhLz{UUvL+L+pGer&B5MXw9CyLyx0aze zR`*o5Xk-!9tCVlMAZa;bc;{zqD>NI~+0m2Qgy)B=)W7#9$RpjvQta~{>3J0vvV8gC zQ@HOQ+S1#xmI5d3 zzMY0%SO1x-zaJN>{8SEx2gc-UonDvFn{J-&gn6fB$vq~0tKZ(8_AB|lHvH|E)9a3e zKT|9pX?hi>V&?v+!UnYZDd6x)H7%pAq%Ni`g4q9!TwHlk6l#=Igz=Hu5-(4Bf|*vo zfqb5wg-Zi|`V89c#P?=?f~XRJ@S+q=zHT&4IixpiOsp~nkKB)RqBJ;sa+Hdtd>^SR zX{A2xAg@))NOiXR+QWwBYeLhVKeBHx6Bi?BIL^7B+i0ElzsV9SDVi%K)q0AM`~EAf zWoi|WId=_(UQao2!<(IuL(|(%c9PsZs<_AK+m2O;h4^(Ucq~a!%a^y*4bG60jxt>F z6oRc1eZiVFDWn=yu(hzIZc3SI_H~`VEki<8yK^&rP|C#zI4}PGXKM+Ja*ZYKVnlUi zY#b89!Jwn+M@m;*Fg*#FofTOyTr3Jy*x${9vOmh98|R864?}SlKR~IafpYY6+-;=i zMl1L4=&`aCt@%+0?~;Jb-G4(M{9Ru-BC^Uczk#Jp^=mUSvQiLBRYt@{^FE|~-Jd0%R^YZkp6{2Z88vJtmo=udj9!*D&QTG@u_RMKi(s)f>>zeJtOW&09pPB_p;~jQtpzLzmQRp0}CJ>{I3#?heW!6qh zDmSJRcq*Q*0z_OUsFE08bt7{gtsdgN%;!YFUU`P7WO?qIbGfhMHRDrFc!u3=M?1CI zyq~*hZr(rl(wnk54Z>!5aQ#d8IvswURJ-v?`9D_i?FkFw`eEUg@Y{=yn^Xd28B)vA z`OmKz%M8NB?>073(l0-0RT&fj!Oi=PHfE|1#bJ)Men0~15(H_kl)D5dvs^I$xVw++ z@RQui)9i7#xgi#b08m1y~!-Z+E+cqT=CtP{ls)a3-}isnaIiU<{pEt#tN(x@!qp;fB+W+Q-@%Vdp# zLQg&*`l|W04Fna3s-1)ij0v7p*hh0EFYBnV!39s;bJ;us#=v(YgU5Y|c=C z>NLizF%Wf_f39hsDcERYp84M1(C5_S-C&7>1Q1IO6i5i${Z2z_fH15<>-9HdzN7hO zfmCugmEDU9TJA%{f?P&8z}-c>0q_GL;MT7*Duj_45Q`wcJ}>Z`astTUio)fJI4WcK zJ`@xlJDlvk3)opd(=5$o2Y>J@rb&x)b>Vy+{6QM{!68Gzhm?t8e%DRr@@&4hxLG-L z{Kby}>MyRVy3_e9^w($0oZu+3OrXjMFk0T`%cY-v;R9d+!#X8dWyRm&=*bcbF43&P zj>N3tWB|tSDyiZ7@leXnP$#`8U0w``(xn9Oit?vG^3O4_K8H+!FphOR_6J%^% z4NC)}__N3*eh8j2D?Tn?Y6W$$Zu)?PEmNZy>?4#ueiB@%ghd;{iGkdU)Z$`ft0$tj z<@i;%Sz+p5AbbfQBf0~8lbk4JoitZtz02JV)F+N0V699^na5xUS8f{#Y*a0fcJ+T zguSPyr>V2l@_-iD?pZq7=)o78am`f2pm4Qbc;d0g{eb7Yw=Evjf!-Txwm)FJDn+Uo z-+pzcyUnrLu~K(jG3&Lh#}|6@v>}nBOs;;)wF(%c6Kk?~JF1&tZ75gFqMwo36AF z|ClznUTMYYuY14~s~i^2Dmgzxu^d-eahC8Fe|*Yc6~l-dzaPB%&AT(cti4OLr`<=D z7gJhKGVi6pWil7ktRygW86QxRtG_w9xoucEB61(S8FRo#15`0y<~!NI_8npZN9#z3 zYX|uVllx}vN6ri%m0Aw*`>YJg4Z>~So9hpzZ)dZ{SAUPRK6qe>m9?I>(e`Ml!}H30 z7}3biZNlb^aiOnVT5>9FnGh2U6?AarNn@-4Di8Z{tKT?g0lM?y7hCQ>r}f&oDls0* zf!$LD`@0d{Q`q1X))ocrVrqoJ1KI+7`EAPlwh-SbsmuqWFjUZ8K+@CRCSK;VK2h!R z3ZxMUrYYa!_Sn13kx%{9nQ#df>IxILYQ&7_?ku%OFomR!@LGIOtKA%O2zhcf9?z|o zc7nZ1cm!sZ@oKLvbPsm#p5M2zY0t?26SJ!auZ0lbq^`%Wn}m)ZyDf*as01Jg|NgGA z+l_M3pL;64NbL3m9ExU+gt}+xigYYMTHJSiRl5zaG1_UoiR%zY*;LmzKLhXJ6N2|> zgn60e^8#+^Fs*&UQ>wIDT~cfm*lx<(LemHOTBEd);>2bni9uKPzD z-$~+szEpon)KZ;CfF@Fva%_UR(^CnXHz`TQ3DY9;8y6YqM5|Xd(NA`uJXH z{?5!-23E%W)7;h}c!YV{u&7$xZ+WYUI}b^eO`Iit&Uh>k+tef}wtW+97^|#>2Pde5 zFGs6nud$!^@~ZCid{fvqr2Kp&cOXVYffOGUbeGFuPj8+GnRoEUBYf z$l#GZaW_@%xH&BAP`n4(_OPhbhVz-KU(Bb(vzGc{T zcMolGO-AOi0*LE~x+0#X#%W+=lC-2bE9@$9X?hF=N_b`20Q#?R{B<1rwS~0bP{OYa zUHY}@UeQbkhg)BC>u4kS6iq2>{8X{8HZl@z!$BRIr#RP&9h2)W&L${s2_mDg*>2 z0XXV=;Aa^2H@h6#vIw%enqt2^7AO|efzW1Wk0-8!v$hQ(2~t8>SrIUz6dPR38Ip== zy^)IPSesk&xuv1F{G(?=fVPthc#i>b>M#uOBh~Q*yC=_ep*ILWMjX9YPlF!c%f3_321WqvAGkgd8QlSM*%@KU*OD3zkLCFSF2dT!nqO%dH^%5-;pmSRg@qBB8q?sqs23gf!;zO(dY$b5iYt`@U(=T zlf%v(;C7gWsDzT?zdnn{m6a_qq0Qe9bf8pCNy>D8eFhD7FaV6_9?o$=5-ACz|iB&45! zQ|a~r53~!H|1_H3RVQ9sYoKF=Hr2seuoC0|MBC{lkcNG?WdR-wlxu#HTdsiR2@fdD zuK407BJWuYcJSp6-%m2?)yQ<1(Z$O5=%un91Joy^^kBpU+s{TMzP4xTUo)*n`E6h@URO;&C{IWUJ8+`GF)|Y$HUK~{S==lA&9~GvdOhU?Qiq4xef9QhC zbd*4`ylad%Vs*HW^f-g1(d&lYoW+VXJ!-;?kJwo1>u{-p-W9_ijlT=x!l51CS64!p2HU1hJ8$!c38MB4?%(Ax2Zvg@OE-*<2@>5+t8O{t zk&{O~_s3i?*(=#&+9ReYTQ^MUm=eu}&wPJ>$1O|r#%T86j~9PD9@D$~U_6FH_Mll} zz_D?R8Co{5U1dhI92Oko64KP^Nid96?7n4?5w=v#*_>lMtc5Z+0r$ z7Zv0oOVTm!Kfm?cDA6f61wl9I&phTe8l$l8AG6!T`TMc;q>$n=c}zlwOE^Ivder}` zxL8kmagnEr0Q6~sNCe3taj{LZs>;WB=^7lcB=+$Q6XFHpD`~s((!1~G%IZTND^g4~ z^V=UgYc|!eeh{gO9NDbELPt^Cx4kRh&sW~~A9`A`*L-`My0%?dT%!7cs%Y^>HxO6^QYln{ zRXUnRK)?K8O~&<4hC-Fk2U7FK*S)U&oqi@g@@f`vvP&48iPJ-0jW! zSL8&nSBkU04@#Ft$<|-@%WY_grolP@<#ew4zG~Qp$j;74rCCqqCc0Rt+ZDF+_FLLV-jOnVn zS4-m~;)w+SQ9fQ~djCqMIM?yZ_XT<-oo@7}FiI{MAu=BQ-FT1l@>iGKbn z#xw*-QvxDZ6(eiY4#Qpz__4BlKcvRaBH!y9C1seIZhtXsx}B*jfNFo_G5?a*fPaJD3F5!KjMRA z?qOaQl0CrCgrRSRp^`>UC;r{C5!urlBk2jBj=gtXR8ZwuSuP=XyvoYE6b8#%tru?; z*-yf4A9TH_GQRrDldrpK>d$7}u+8!uCqJO&ZS9;r>F@~Rr6(*EeSGEuTMg*sK)HBV z?JJSN{lG-drQ8(hYbrM ze~i6uw*@L({&Xr6J@y`?W!+oB2z&a6?yG47;i<+Jckeq!!G|*!?$!GRb4DMQ&YXz7 z^1qsDg~IC!1y-otMcj1TI=@CkO|AzxRrrrN&#ks1|2X`Qnc|HE(Mt6Y%YjNVxJr7n zu2_PN5g`&*YZj~&HzVWS9BDf{`=FmB6!`ewT;D5u3{+$IeeUsQe3c4SOCZwF@I;@! zE03G&ikiVm&WFOP0j)_$K1vL;H&WxFe~;EpBpCh?dts@H8NA|+1i(}Wje!?z2q}5a zv;V3Wpbk%_D$~Wre0-%`aWi*}9$4f+rf+^cLp^wv$s181XYdrCWfeO%Pb~u%ZcMUN zj<5t?aSo7!^jV{m+kH8`*-(|1kf;LqM+X-CeLihnSmNLNbmjn?m>8_$-D{=&_MZ(Y z8ie_-3(o40vD3us3KS5lbT}!jw;{e;u`DV{h<3%z)xzVApc$dmGCs3(;aVZd|3*SH zEUU*&f+YRftTV@hOsXyC+juZFOyK;!~dR>-=&c657c#l$LPp*^KIYZ$EW7Zpv!8YdI$dG zq+t^hMqA2@0}?Nf)So;uCh|3HnC|364-{=^h$^rfByLWSFo-`-rc^qfXIYb0C+_@% zh1uR>m6_N}nZx$RQohR`>A<*k5fZ zLDc_7lNl%9Ao#rHdfx9(z|}g@z`d+*ocBaR{$!B3a28xTG)!R7Q~C7zNJkJu<>Dk zzz+*?1VM^-W1%`~w++MHVXS8kZ8he=9YS$6YHaQUvi>``tbh;{ayqRQNz7V*ZCX`z z?FAdT80yH39t14&jHve`yi+P-XSb8wlFp=eUd#nPu*!Tk%-N2sIWhWx3AG1orNKx4 zS{nDvxf(ncIaZ{Bo4z5+`|}GK62`<%fG%pcQ4V>p8P=+oZ8XT}eN zV>M3dr~y89x0}(#E=w!Y6tF)Kc~XT_;4@XSF_EyXa4plFb-m-g-#*IcCk_i%P9OAN zJYw~Mt9(s$cV&wj+yXw17b3~&?jI7f*p9KVBC)j9A#fs~n*0u|!_lJS-Yv|CBu-Ah zkMs$D$Pc}^q~oTqO|}PoW==ra`3~6XR$2tW={<#WSzaBla&IN{=E_2@EBH5oD^nc;SkN_@uSZUdV z)95y#HkdA)Eq#{;+ng3CY5}PZ31Pi`7_i#!VEz7dprfEfOGDfCmMc9>64hu7!AqoU zrAyZ^3MPh>Jw&CeAxgC3#O;q9ce(q5QPa&QKi&3(258=SN>nD^vz&oDn4r~K>Yd2q=;}dM$C!Mfdt2c?zje9a&(N+~9y#>8 z45F|5Z4_XCyCg4%hiQ$K(Z$63htMe-^s43c_lEFIq(u3SHHxk*aVmM)d~5S1Ea@@U zjxM=;Hui)ygX-31dw-;fz;A1E>CcyKorUw;mNucQQ*U<`ESnJIYk#Y@HKiv#Z)PB} z9w`Sss!g{ilkuJ36>8R*NtpvkID2~nm4?Kz{iolZS=ct#{Ux@Mkqn(wPNq5j0MP2n zabCpvJCE*QFf(?*Yw0sH)@)Iw7Bz4-9`9b4(H_yZpQtL{dE`T|7#{3R@J8?Cdr@h6 z$)&wm55-w*iLKuBClnK!)X-!C10`LaNc~665YSN$p^k5?BZ( zkLf*H?a6SrnP0UPl|5Dz9!}b2e9!SjwALf;n~(AW?%1m@q?@PL^=|(8d(mZrrPo?f zJ9FoLvJk|o^K311%c4hRP{?CfVs!cPu60wpNN4kthZAh8ih-+s-+^6#okDuw$;#7= z7vaz2fvuTkp&8$Od&M5r2598c3x$O+&5lVsX&~n7nQGkA4%}964shOMF8xX`54-;Y zNSyHReXGEmnM(te=kb2Yy`DC!5^c{g_e}S{T0fT!iRO-3w6Y7S-9$s3DQ+NNedqp? zg^6IDnT4##sc3T<95Rg1x#g_##V@tBpPcR06kRp8M_{1`oFjJ{*bVBA$p&~(eZW3q zCR7q2*0CaYHXuJ0mEx@BP%K0sJg`)_Uu|<_((m-(f*XO{QlGbERyO3Dm}@G7pOzU5 zUUO&2=KeP*QA;N_Bafqyrgy*7Kaf&5%6&CU@YxXkaz0F-o2*HigR!&wJkaB1wbgQQfSzV*ULZ36Rgt~C+TLtQOo0XXSSP9_`PQ0 zo-i}y;ze5Yi}HP}CizpZM=?Wq9;8y`-;Z=gnN01jDOa$cPkj4R*M{3O%3kBGb35?s zA!>t-)JslHj)ZI-JN`!7*s|9L!uA-~BIctx_NDDS`-Vx+#yz9=New;fUejKe_&&~9 z@DuX2fNKp%(>2$pS{#gE>c4G@XkY-`=7Z9N+tL=}zSeHZA5gubXugZ^bk%$nd9Q@e{Uf71Qb%hu<`2Ti|)jJDu1MC(o z2p$=>tlA_8lk!UL$IREGYS={tSFvjSt|T?;{$ebVS`vFWqH8W68Mxf($2n`r8k)Ce z;RNJA=sJ;hEF-T)ea}4OKWehzep{99NfKjHYat>OPox{r{ireH3l;Aje~{#Df{Q^i zsjO@eI<;%^68C(-J?P`CiWGV*oBdESZaNf z8jKrZ$M^Db;nZgU&M!(rG>5RiVV2Hq5>mA@el`aP{6BS7+Ak6R z+kI?AdOf4M_+g{v7s_4&zL&k`0Ki%1V+&6#F8S4?^=#mXcec9@cnvcH9 zf5H+#dbN}BMf8Yxt$n9n1akf=l33<_QKYN3zA5d3l&pqJApQpN`G7C;n&=03^iGsC zn~AqxD2YW9i5e~$ntX;U6YB`Q77^JHcgaAZ&5v>sj0E*{<6t5FCP#x_f4Tw5fv^yt zkT+Dw$p~kd-IDB=h5$xKWZ$s=?^o}V?kS7iFbrMW^LKJVJ$kQGCRR`Gjs4!U9GWAm z+gI~9bWva-6K#+L!&aFjU_$(#GYM#$Mx$K$)w<<^UGE~8ZdS*9#4_{2!i=B1iBJy; zSZTmgktsF~$UG5xnk7eM-cWU-mx}!x2AySo9E1vxW`T!Ehn%-W5qBr543*Z85`a$o z!EqG|@m^w7w)cKKc0i_IdlHw4vSq${kj;U@KbHF7l9)z2G17q7W|+LIF?Z3{C{%dr zbs)k|swu}a@V@ul_6-iLrso}jZUHy4@1?TSZ|ZCwi_~iYk~RGBmq+*vH>7+jJP-}B z4PyBHLDPv}oG7|20^xxfH5jQ2!$mw;D%Nh{h6a9z3!lJ&W!YOVNF{mOHN^mAPO!sP z0+f`Gn>zf(_+fQYo`PwE3_d0YC+j#gGU?zt#x%OsYtVkIDu~$Lye+AFQ=IVqq_3t< z?$#FqoE!2Jew^XfC-dBIM=a*ZeBAkCy0)G~Ch|XJq3(5Fc-bqCUi+=HM*cGLaUBzB zPpj&g;61?sAFtexY=dn>N|FBKRK@!R(O(Aggk&ayM!|bavWLs!z*TtO>=Pp^qM-2O>sxP7-tAe1zQ3#+5`qR@qvj7|5N3@70qz?cG?JsnP z&dMW{6zrVC3k|9O_=&~x|B6W_+zLW;V0o(|u+N2Bw(0r?v9(AEbafR0R&H#moecHe zM=SSV%=D7dqg>ubQzN*?5*R587QTr-x{h|zH%I{=>*POP zeX8{4btAn_W6=#_xv=p>mgzx04a>6^Ysnx~Y}qH^WeK`K!=j!CoF~)r9|o|z&&JJq z;e8P?=mwX8PZJ(Ec%7vgsV)DDSDhnOfko#BI!{_oVEqn;23tG#0ya=ajdRt0kBqSi zV7sv4qC}L#_BFskAYd0eABEWoIfO^V(M1IDi6%@@!-d%)r125k_BHZ=8dLZ+7Pi4K7>O&ioWyko+FR~TXqq3!|J^>)$ z;$27v#$&{L%~`d=9Q+%#w!~=XzOar>fVaUczC&ONDY9 zme9-_;Tp9H>ZB~JBaG%w0O1_B0GISSvX&$;_EB;MNNzgP1&s4r*M;(;uR!ERE^pla zrga0BHE)HfP}g^O3beh^mv9J?OX5Mmdtu!;Jkwu6KVvQo4-@~^M?o7)Jo_;9({hYr zDBY4$g7neYsaw5o-Z55;$*4xEKJdOOP~RZp_DbKl!#rj&pZ3*T7b^e0CUprMH=xgW-X={krSKAEdY<&v0wprXGzUFHoyRLya2>(k5Tp4C)t z%OguVww#5`&18PbFoAW3t3O(NPd`W@la1qDJ%s`B&+uXk31DBTfP%EW4W++pJy4Ub z9cq=DEarCjxwQt~Tk38>Ns`dC`24+193@=_W18GiYxrAJk#oI)8E@rG=Fi~!D>dYm znkvuMkEvoEYyW4zr094y+aovcSLX|S1vlSO+FB?`YwAyQAbbv~>@w=Hy7-r=2^#V&=hDtjZ&L$o1y> zx{pcP$8)u1>7dPA=cw)YO2lgEd?_s0PA>n8-!fI-5xSm%YvAFwjQ?W_Mpjyc`oBDw zc8 z2;QWQv?U5FW$R2xnn@S6<3W*)#=TjwhcY`|`c;f`_v7gt{L+0$BRmVuCp>!WHTE>D2o zoA{O0-b-+7>cjAWH^f>7tO--^_LRJfk4OM^s|Hx0Q?&@507};E-3@r}J5VWV#~d27 z=t=qM`zkg5gni$=XYW_Hf{}YSpR7g9o4YJM=^Wh!CD93Z5lSW8rz|vP-<{{aF`HWb zrWV^`&%2>~C3~I?yOKc|4t51%@adU_f}0F0>&oi{3M7-i&qPT$=X+WAjxuf#PG)k~ zkcd)QHQy?NeYFf0&nlx5KoV5tu%N=J9Ko=T+EWVN(G0;<_q4ToS>*XS!isLlN zM9k0K@zLVwFkAJQZ}m6dir(4i84=uB>yMW4BHpgh8j(cdVb4&o8upz(gLXNm?jpKZ zY(Z}78WGG!Ts72B-*x8zus&Q!_xkC5Dmqf2D5r-BUB0=u)*Z}95wgB8ReDR&4Fkj? z8noxVk_$h<_Msnh4Xp>B@x_nF!j_h~dOnLl*jZbn5;!y!&_K-q88Zx zi@d!%W>zyQXq$8gE)Ym=4^y!CcA=a-fS43SPtn4Wl^Io@Ad!4a{c@tb?YeoeNtB1_ zJ(?B}TC<{+)S|}a^BDP>H$d%k>kVtykyl=W{L=2~YroP<1N`!c=KEN>B4+j; zI)DJ3yUk1;lGU2r*s=}qI=DZxX8Yvr%79M84Ph+9hm2$vH1Htj6p*oEURi%9_GCi- zbhXW^fZ|*0g^C-!$CuIwR(T5E+Bwp*-&3d*>IsB%3Flg=hZMw8ZMtL2-^vsqyp$U@ zUD!}c;9(MtqQd(ZH%Gicr9S2!&g4BJsp7fC0EUD&%LFkvR3T~rO2i*b=6I37%}^9Wx#x`s%ym6hIxe?)>MDSO?NH-UPOtbcJ)}gPrbdvlW@m^?HFkYU zs8oq-jT2mp$N%E(AN< zTrGKg`;%s#VXv0&xl0IDM?|#W2$&MBl@1JkzXvvzb^~5Dn8|2&(?55M$`Uf_n~)b( zAs;kESwcpZ)6b8YSFg&D<6CRRxQM4K^g|!Qt)1tLA*ArQ%a%myaM9zt5l@$*uyu_X zie04bgEjD6MTCKr@nH`M`iSK*m-=zC4~M|?)7;60mSIohNPM$aWo5OxqnW$P8_jEO zG1ogOQ+-Ln<2n?g;7`;L%*)=r zVPH*dn}6|OyWs}ZlKrG}pI*rO!r1*E+dCLYEcxU|>ly6$AV|`B!p;3(-^=BmJEjf4& zZ&JXW&;{Nw&q3p*EFLuO628(G9S-!O84W6NK*OsTS=LMgk^f%o^r6DifkrZq`jZl1 z&+xAhXnKUr{*i|Fda7T+^=5OZ*b{x?f%PvD)X*#3^?Rw^AU=>0eFjJSYGuUZ1#E+P zfuS;sg1rap>83&s^J88`>1}US6;+r&LC(l2dt&-#;+S}>@`#qS0r1K{wG`}PJxmsv z(tbRv!NwHqRAA&QdG>Qyg9(s?_zg_2F5syw<&j4i9maiPI&sr{i0(NI_)!f7yJ-U# z4c`gYpBC;bIB?IaOX=4F*w!^!b+SM3=;HFE<0J6wEr^VL!ct%D4l3m4tSqV9Tcq1_ zMp4+cn|-l{*#Y=bF7*~nu8UHj+dIxA@`*A{S%`l+N|b~10?o3F#oodRmFwacZu+=Q zZceR30#`<$pxueWOh)NAe)FZYQ#l>>NT(6og+d`uTHuK9Wdl($&^e|!YzFE!HXH8> zh4}9RfxH3`VBljaRN?~cc^^kQ_wR6FLRaWE<)y0j)onpq%}1%1l|EMlu>RVm6>Jc9 zf$Feet0?*80J>IxAQq8}!bD0W2F2(j^@7RkjYDdm=@Pw|l3Mxl{h}>#5Z;L~o3iHF?GRX$bu0lc8?9Vnnq&1g|2O={=L<)s^!*p&EhlWI9f=r(=&0t>%;l$Yc;rm}(@yv0}n_0YoUr zOA`gcc?G$-QvxRB3EB-fj}D_KpMi4D=~gcS=MHa%Pf6l+=RTOew%-j+I`|!04B1`x z+_gd1reDTGysc@n_C^nzXkqsQmf!K0hhM=*Vi}j2Kge`sg=Mg{ZT1O{e>!zlA7USoYtbq6Swax46!ffUem!0HE|Kp0R}h)%t<$a%pC5UA*am?@ zp<6Y@Hq`(l#xb+w_dE;aYfE%ce_?!l611im-C(-n)3>sC38l8t&p%6qP~Cy8awDq$0HH;U7Ba8c)PAm%O~(dsE9;yA-`SeKm|*?0`Hx_;eSzN4**z z+?4}Z-fjLxI=4m|RZaubW-l^b?n5C4PNzPKL%0^S9sISshv7cQKj zXJtjID?^MeX`w$UA;#EpMOLlnshs5UJJ0?zs5Rq5=UCv!*O7>{NI-1YEa|u<^X}cQ zZpho`z~e^>*4vP zQWgQC5`M>+bBF`+{pa+bqOO4mI}11`Hq${3QK803oxn%?S9yB|6p#!mQaGhj3F5 zoEuQS%JhoaAqx&eyxuHSfK10M-_-oQWT%PFSL%6E79OD(+tGUcGQ)$KJdYvK+vYg0Drp(+t(bSM66-DZxIzzTyCvo4SHiRZHCm4)@YFpo@H<j zykC0#+ilCbmCFk=hHP4>nu24mezCJ7ap5Yr6k(!puf($AMo_l~V_ea`n3XG~L3JIc zu)9h}bK(Dw$&zr=S~MNIUmAzf%q~uLYZ`gKs#JKFQ z3!1ueRmFmTty$tDK$2{xUdPxwV2mk6B}`#*yez5Y*{y+^dt{UtE~o%#eSItLK=nMdT*6VBC#MR|h;)(F zGE91*;U|_-X-NqU=s)=m5aSYovwJJoQt^La(^6reY{deJR5VIvgX;-L=8G(+4gFWv z6_r7dLycxz(J10#s>!I;A{GnB$x5~<@ifw;Q6mr^&MR6zl{L7~ak|?EMhl`NJL#cw z^l%SvSl%q&2W_Q%3V3f%`fm;SSX)x^av00JD~~WjYYc* zl`#E>W*hwYAy&V@TLM_DhUI0H)#L3wDHlx45s+7WB0>E~Q`z{qbJ{|}J~=lH#BJCv zS*h>}EAr9c@IK)@b^jOm=kJW0$oK5_Lp{v+o2xS*n34u4^v5*0^J8C&#|Lhd=vV5~F5Lj{^;Do|TNmwlY1`B`-z zonypj#nXoT?BaKni=pyfS%Mrp>~OOcJ9zyCA`{@!Mw_eDVm(c*l`cB6&%H_H_Dr#S z?k4UQnDsRKL9;^RAEw(;~w0h zqo~ZJm+^}7QvW)j-|6!DI_}?$?AaN{SETn?MldQ02aHVCk^i4#$KZcxmS+(6U^lY4 z%?lmEgkP4CNe7%K+j~Gn)`1BZ;XNP~Cg;Dhe~*OWrD7QT*1^d7QKomr0R%>beH#U~!IE}zS{5(bYU4v?Wgi6%Go1W;={Q0uZ34fIF`fOg#%X%gcA zVw28p4wyG8^wfVRX9 zxXqh6$%ZNOWFqJ8(ZTldU!rKgl_-xWGy=LWdns$G7tK|EzzEww$vaynbRGol*B&tg zcC4DuM-f~Sr{~HeF7Jr4Y`M^Ys4EX`Y)TBTn+~xgnud5iKnJE|53b2PzA^x*`z`ak zS++zCy&cMTJS*J#TSV1HT)5rAqu&taS~}mosXQ_oq$`w!I)(6h zk}NwD|3SEZwLx{LTh-#V8n-9$8f7Ys69>-ow2CzohhN}rnjx6Yuw3-}$~p(&@syNq zSJZxtd+BmlB;`w&U5*jwF6ItWrwj($*A~E_sp^fXdhzvE?4pNDy^AhJOgM<-P>G}j z z|C<1G>!P{zRzd5nzKee~b^na9p{WT$5}2>xJPOgLDE@@Xx^8;}ApR7>J&Zs3rPaKq zX)l~`*bfG{E?APFyUJ{LzE$J+g*cf0HmxtxfIo?aT=P|FqJj9;896)Tpv5KRlQ*LU z9w1%~LWOb>$NnGz7DSp6FKDVcgP^w)Yz)&7+GMkEQU&MdJ+zWOBzEaG0g=p`Slz{u z^XlgHq+l7+XL{)pFx$fx&Z`BhzQdgQ)Ya_Trb=v&)Al}b)7ME>IU&;Jfev*OoasJ=X_QkseaEG%D7V9m3r%K|!YjBA*zu=!B!1Y{65})4(Dy zY_21!JKuaxWbgQZn;gtkT3>NO`?2iYBjg>1XRwnwfAT|-LnEMJSv$41S2X%OckFk^ z;7H&#o%K|=)&8`Po3H)gH_u8K2<&Jfoj&0yqeTGH*WoP>UJ8Ly&mj?nCScyPbcwn^ z$ApRU*4j{j{O8PwPxS}Q`P7czIXV?mA2mHO>dLgofT*m@Dn;%73X7g^{3+u{MSQ|{cq&D(YT96heI%3jMKZAMow}Nt zOJ^=XbU6=<<*Kt4Z9jvKQzJQ{o7@-(;Ima`3K}cdc@+0#yDZRksR`g}+9dr#Mcg`2 z-{-8aT((yU7zPVx`%xJn*gl0mn4VeJXm5eSoH z#~JHSOLLU31QT1o@L6o>N@@s4OZ?h*#~gTU0&nzlNR2L`xm5m-Ctz+R zn)9b$jm(IRd$Ug)cRQ}JUJv8R9Q^`k679Ljs3zkVJlyXLr;h5l`ReL)?XVj_=f9e72wxHn`4Md zkesy5Ct)2fBg)D_ z;=9olFQ+ze-h$lMnv_QY>+GxdL$qs$gl{>v8W29G(??aKBCtx#o-{9SR@?WY64Yb% z6WgUibwku)HeZa{a_$Y;S!_}e)T&Q40IqnCIC?O(5QtpZ z;2@0C*)rr8Q7ACNdZ`{TGzwpnL71brVP=L6Q6iAdQ_}{s(^?^iiP|R&2tcV}cfOp$ zG!RjL3thk_g_NWkbeWdC@BaU|OAVQMUl_o}4exDCuyc?)OYhy){l!iaBJiqgj!`Ci z+i(Tp-ZUeszBIb*p$epBIs8Fy%GSIYk;|s2&=;*>e0tpZhL?gp;ARsBP5c-RNk zE6H0y>JSN%4X)>lUvJyoxG};hJ%?ib3Au{B_t4_ znhetTTWoQf&A64BTdCZ#(wO@sQbXPUkUvK~zp2#D2z_h?7G-KjFU(x9X9_!Ct;J+a z;cjqt?q#nGlr1~^{5t}D{oUH-?k9H4YtH{A*Dz;=EptiD^T1p#_bM#ohIO6mtByu! z;tN%6m<7HG0&KDdgzdc4${<|jD_r)ge_;N~*q`aU0uOsep zp4Fk#!wcsCh@rvqJKaoB&;5@~x9|2&96TG>*Gldfz{=XlKAlL9z>SNuv53)Emv9pt zM6HFw1(6Iw8m{->mmh$}C_0|$IE6ItxIz{A0p`*{lr>H88m8wj!O$3;=ny6cdAu%Y zKPN^(+A+aZP!RKT@;fzI)3^6vk)d&9jidC1=o0Xjw_tW*PIC6tf+=qGpDFz6Ex>3G z$?OE))6IQfx5`*o2xzLdmn4kXY3MdI|GyZuI4a+4k@*L;(gP?bX-y55+9f8WBflt* zxb>LoHXLO`S%95k>rgN~Xm~CfxM$P1fd{?=+dv>U%KsVZTujdHZ#Lx@!q2OYrQ$Q6 zkiK563dkIY`a2BgbCgFWO?18Cy9w#;7e9b3EKpclV!ue=*Ih*1$^#Iah2N6K#e5ew zfcq{R5Y#0C8U+MkOJct$-3CNQO1tItd>5srdSjyyAabf+s#-4m9kM%kx)7%E^0VT9 zttT|01qQfS$vFOHF)+88!!q#-&sp{i;0P|jv)6*>qxe+<%gl4iCay*p02zw9;R6*Z zQ9uy;q2&9cqBAIV5_5$ds3k43MzsR5K_XCEkw$=C&2QaJk{`MOy9KbQXRZ?oAbv08 zbS&Rxw7l{|1gol2U@>D>vO+)``E8l-PcS>F42U&NUVoEz_x)!@V}VBOGV@GAq275{ zm?QFon}P^4I{Qy@Gm)lV#&WtUDC4bVGHl@5l(wL=gH9MyG;-lj%PrBTRx|wX-jXajZ7N-)8TVSQgR#P~6}KMl(~ z;mh{7nFC?aja&p~wcFYC^i_Hp z+S-qt&nnAzUq7HFq`?+_t{v~}(eb-u6$Lp?K6R$Z>4qph(BSNvzli=FW-}nW%;dl@ zE2#6wee9mWBxU_=w7g5j{dBDx^dDt6KATtQA0NccLd%uP&&|B8jyGICbm4Jj?@z29 zzIXX&JfJTOoNn=Y{Uoy{Q|-<6L6d?O%MkPZ6S`C!@~9}%vg;FmkRPJ%=WSq3Kekr ze0g+MZ9rE*Y z0n~co$XAN7D|X2J9_X12-{iaJkk2na-Bh)rWocGaPn>ZTL~x>Z{qzk6&cZrlhYwr2 z$iS8FUGZ_ww;sdjkSdU5#tVqMw1kyt@jfBN2}V{ScGkrrOfMCk;`SBdP8;&qRDLHj z+9k+Ww~xHQBqTLd$wA2c8*G zp{t7W_5hRyE4uH4ya+u3@Hr?MUz~&7|#Vd-j4|#M|EW&l1{<(gc(PP43Vo zzmH8v{MFG<&$Wm2UeIfhmE`bSbs5ir8+e<^FUFwWB<{LkRGi}`1oJKIj4Dr?VZBTG-isdFnd6M+ptxo~8 zj!+_a_Jzc)913XP=0c+A{MP@~R=uq4BDN(KSOf({H^Vwfi*lc;AO8@xpKm5R z9YP%K^!FLlezG9^Nfu(Q;-m%(GUB_NeS5(99el`xO|8q$h!kM7$^eTn^5lF+ri8d%6Jt~{AZSxl(c8(Kax2x>;JW6dNJr2$O$A)+{& zn0R^yqEOu)@==v|AxxJ6r9C_JP&1MFk{Q^{0sAlfKuOzuZ22m64nixtm_nDVjUs*$ zmhbcuG^&H--fBHgJ6!~#UI#lYcU!W499 z_zZg7g$lt}BQTRmd|1Df>zFh=ThS%GzcRJv31zb}lM%uQm0DJZGu!V5LaxTZ&6?om zOjs2AXL%8`NC3Kb#{#4MUR$KH@$aKY&9*!RKO-sRKdA*mQL%?Qgqz^;my@_zAlcFvt!7 zEz9Z=^PaBmK2>?ix{ZXPtL#YXaG~5i|inb}5S1`X|H7dVx7KQ7bT6&d2xh z-SqmCSwhE6^-UP`6q`GFl=W;mwRz^t~Yj zsq_SOgkQlUtoRy#|H$8wqfclPi(Xg0H9Yfi%Cz>mRF&d*J-JoJlVIQeAu_r zXnqy!p<)Xe8{X#;O#8+dch?(KLE5*W3Kz&NT=DfNML8^kzv#vJEuU1eZgEHDwj!s# z_&ouHpW*`y%OF9^qN|ZhouG|56Tl*EqF@_eY^GpafQA5^8hAm_djP@%Y}0}@oEQgK z;N-Xnj~ijg$X?#9+rKYhiXs$5I%!@VJv0>qG~j#kmR&fXW8gNPn7^bRc--X2&Z@5| zs%zz1ccfPq) zCx3uIV2$Yf7vQeS1T2SuN$I*oh@We}f&0QnBbO=-xT0Qb)^xYT z_7;-B)zqVxn){*@c+oAvI?TFFZ+B3hM5aX!ij8Gp`U9wx%eLWTfeLh|JU@&l^4eRx z+QJRWuKdmA(J)I9l~|MUy|W33`U%Lka$*uHe=ZzX9qrurYg{A+e7r*(a6Pu~8Cp}t z-2p0Tj1+42h1mK#cs93%@65)zPe1XQv%1-ET&wbWRhR3%#$wX_fNlnW8G#@#vgEYz zSyjNAICXr{*6BA3r@An20kPf*(wqjNHt+KC&k7z=4K5N*+v zn9{FbHo`;X;4YT9XJwv#>SPHKJ{R&~D^TUEJlZ6%ER*Y#?HycJ)JdeaF4Rb>w<;?a-CD9s`=|LY7QbO!*dX)~ zs&3UR7x?V!cKh=&{F=VAA4T>g7-6bAU%8YC-ZT>re=ScZe?#q{_YD}Q`UUc}d&mx- zg;kO^4#1WNKIv29?~W5gy{~H{4Ez74?6y7j4$w#MH7%vSMl9VGEx3u^J-s-3U0Y1p zp^E>D13O3q7Ti^I5+q;z^L9^vKbV0A?Dvr0Vr@@Dvidc+9sg)|>ivZ{QGUw&T{o!` z(?7|#k_ofU_bTv1UIVJN$>}LN8JSg)`78M4X$=a<3FW)rtevET?Vkc$nwI`QHS2S^ z=?9sppA8+^M=lAV*G?;XoL?M(LWh^Zr4ly8oPACaoqA3ag~PjK`^m2Txc#H%&11=l z_=dK=(Bs+6D`4Epwxw;M(7qo?s|$YdcOQJx{TuY8mHX8_H(0**DJ|$1(u(?)vCY6^ zp-9$3t8kcr9JyG-C*6Nj&+t7x)LrdIcxbzsoR+oK11OkalP%omY=`K0tjFE5*<9-3 z@5LT`QA;dW^1k70@J^r_ zhzSV=8owbk?&J(uz?kG{kk_pOGV2LZ+BgE*$D*eL$Rw6-AIv=twUM}{u#4-59>D-uffM|6XGw$oroDX zHY$$cfj^hUG}j#wcs;F7fPB&OS^UOH0J7L>+>NJl#FJJ9nsGw67dIo2a5t>kJr*RY zVS)Ne!wT~F6P7nwDxOf2f=%Iv@jM0tvjK(QVR{0Ec!M@M;k2YkS&QXwn8$lKiI<~c zA;ZsWsVD5#-@$R<0<7Qhy0h%)IWG9~b6!dVYE zNZ{MThb{&#LfTd_|LT1y3VHIPj=X3VDrgxlJ?ycNR9FVW9l70FIw%x2>}T+9PNV0O zC=A~RBbg4EnY-BHRDRIXPQUs^|E#j}$gPYwP(?Ru32p>)2j6O8i3w!E*328Vd>$^=9Eeqj7_AVh3?q^4<}1~(@OMjy6f zAx%YO`BVXh$T*1_6KL;QGNR1DhMPqdhn+>$`Gesghe2Ky?M?N z`{fno3^o?>1Pe6E1AH*!LANlV>j$cSm@!Zy=t5>iIAKJ%8>5@4w~WL_nwpyWVPS%KMr-;47*i=&4+u%<3a^$+~nxUugjCplTFe zM3fqbf*R!=bL%N{6wirp8C{U^ERG}*dtDv7O3JtFPljtSKl z5NiVJne-9D3^x1wJ4Ol_V?b4QXXu!zR7p=E(qBL(=xR8Tu7BysAp>%v8#(y?DUBN~*&Ge31yZ(EFTlwI;A zcUG&qrQe(ox#f$yiW}C1tKd`XE-@G>KP~d8M<}!Ic*c#Kc+IDFPr1~{kyyj!04R7y zc62f1vW>?S5jki;p2Bjoe=Na5DXSzTmReiWDv7e7_#HZ_H8Qx9K^tKFw zBT8LMeun`CVTPNm2kci+VR%GWaFxbIDo}5}GD-BIzPzu2p~4oUse-;r{@m|>`^*1U zNb}s4M-V^C&@KQsw97ky+PB&%WvT03tt3J`U4D269Jjt)mD+WD;*uZ+)o#o*UsJG4 zeF1oS0|NOIVUDGAPXfTikWsoVr0v`FnQM_s>+n6mhUq@MSRBdDjTRY%W_|`FStnsn zE5?}*Lg z7|lpw!OhJ$HL{VQBe@$u#@Gq*k5-K1CkedT#q|AnI>j%>&k2kmz)wzol(7U>Q^3Db z%7DD;A^jSmwTxLclJA>GlZ0b0f}rg`%1D*dI(cEU_(4$N%vYE8Fj4?e+s>HvFJ8=7!m$Zxd%Ps>W~i1DdU0a@u3Rt%|8l* zpzxlYm@pNM{a#l2#`+Rf5f<)WGy7d7>v1;DGb+#S?4v$C^OIl%$h+oMd_5OBLwFjGwNZc@}nGjy)`A zK#X;>;<_$hM}4U=_{o6Oj8TGDjGAq?y}J@4ZZE^njg^krdV52siHYgwZK}xp0KWD8 z?$8a?@^-GewAo!MgXQO0LDaM-c{|_uJYf`n1F;vFh$)o6DYCKM>jy(gvD2@`o)T4d z?-3unH|d(K`B*%qmo@7+83*Sz>mBI_Xq0C!moiI6+2;n?G%8Y06dY&q@L}rpu441) z2Xy|A(rIf~tc1Sm6~DRCh|I7`DNgPrK)biy@z%@!kJZm$b7yCvjLDmjc4#D`op(4U z(colfr@T6Vf8?I}DfFOO?aeyx>kJRMN2UCjrwx(j`x*dD`6%jB?=0}j+r0LT$W#58 z(;8F!Vo?jAk{P+fC$qTWag?w;Hoka)r%vCwni%MN7JW3_OI5QzC>DR4;#vFl@No1d z>em+?Lap=us|ABx~g8M5OvBm=F%s5EuTW4ADwO zGXOC8a+DolOisaWneAz^FvsOK+CjQG&o_@_fgVHl(86I|Gws~hPF?TjzoIoh%WDvs z$T{oGTW;LZCIucMIin9T+q3i0kv6h&Q7;mABy->yN@uAz5r-t;onuVob^##s%Q(P= zA8%}jmi`NtM@rNGHzNN@MbBc)jK-5*i+6{4`$ru5)*BI6l;{&!ZBT^w{IaC&iO{~3 zjL`8S?HQW+oobrLmjB&Bcvpm`zWL|~ag+++|J#*9KL1qwZMk6}o|5fNi9-93N|k78 z)^0X%MEsS&shR6DgB;;BzbQesZVQmw|NWv!P=HfAUM-QS>7yN|^Fyjn2Q@SwCX|9s zarN?#XzO_$zeh%Nsx0Ys<+AA?j@AZ}D}e_T%OAX2;pOke!-d1UlDACr{$@Lvf2Q=M zhSxmxbiF*%&J0|-y}+^z{GRqU`njOw#eAC*Cu1_Wo$max2dV$sxAE)XpApU1r47P~ zcyiSVH9%u>Z!Zeu`;Ei*n_^*ulhs7Wtf`k%j~KN*30C zro>eD6n2%PSbU+)q(eH4lYE9M_|g*84Ug%GDd>Ko*2CM#5ScK|&R}8Gy5%{c`wtJk ztKgRMp7X&AlAL*0IJPqe&dOK@W8*~Q$>J2&dMgo;XiXJ_4t5Op56XE`c=b|VR_{0d#r`J5E4m_c$x$ndu;O>vD@3%g^SgW8V9h$Dv*dV&h# zNXCGB=hnZSSYINtL@36`6inE~;a(n%%X3S9UkEgm&LS!+Wcy5#mCr6`_oEjaFwo`=mAFh@uPX7!}$9dHm77ray_<&)%P%eh2v z>;6v<<}gFd#qjM9ruDtg$86ZBL{7O~ceOmGPKq|l*X=)K(Fy2=u(6SBA9>f*gnjhN z#44i*_0RPN5ea^L@vWF#Z_xi}4c*&=uZUo?XDvjJUMLX?FX`8QlYJvt5c`t zlIdVfv4rlP{tZ=YWxTQ8e=0*LE6V_8Y4%1nEY&V(?+E77Lj#pM1LlUQNmQr@5szf% zkIzI*-wegO2||8ufH|BJ#R`u(59n zDk|tNEu>ko^)Umy^A5>T{%_*J6O}@O@_Xf8nZ0ZfGyL2>4h1xqMpHN%Vl{*J#+b0i zZ}KmlR&se9@dPuq?n}v-><)%Vm!^5YxP@xB{DT&aCQP_D(N8~(rpcT}x8&&z@rH4U z>Pd2Ik6vq*e%>5f=pR4k*E%N|*9K|{g!T;3SEkGeqZ9{r_)m5?TYT8>)%IguU90-0 zlJ(%&u$YG{9&~t9wP{3xtdpe82lb6|kb1KaUVOE_euZ)c9-g zo+}xCjz$xA8DyYsu*q>IRp=hK8xAB~pl#>op-Pw{pvl&x{0<&x85SK3+~qN z&hvN@NvGNt3t17Jb@N(=WaUfF(V>*La0teT#6Xs=W(Z=Sa60(O2u< zmQ@5VggiCReeymrEqvEJL5^zfHkl+^7!VmU^zKWE8h_~G%9ks_#K>UYasu)ObmJ3W#+`fU+@ z@U97@tBHw%Gv}v)rIrldYl-Tek8R*H@g`q)HPt0(aAgF2wrxFDmEn@U#5VQ*o8i|FRI7$qbS4Eh}vR2DXdSO6m7R#6gaBT9FN4%P&23g%X zpL_eP8)mKqTkAHgE9-jiotJK-lyc{gvp2r*{lrDn43i;ezfa>+KNbe6yPJMeNl&hW zz$ASa`?faK`*29kFq8hUblXWXx9zoK;w%3a`5Vh7SD+EhR)5m|B_O1ND;NK<(!yUb zxnDOx=5&%l=Vim|W70cB&mS3|rMY{HA4ug98S~6P47}>c`=#5Hk>eWdTm4I1CKCsr z#x=0h)p#y``J2>c2X0dMKdjN%nnCGKO};3k>`(~tKgeDKOEpIz+eKLz6{XF7F)RBa z@T_Tk%$NBBZ3ZzX#J`=rncQq*u`GOwmz!fqcP?QFoJ0GpqghjlQ$@!*Z*}LSC7(&{ z1Y%}`%ia-LWe1*hCy#54?U8*!@&RvMX>UW>(T6P2vdYzgMl`qVfx%{Q4 z3p%SaYOBweeto6QTCCrJ%cj6Xw0h&NJy##cvu1!Q=EV9(g6YhJuUGi`P15pVCfme(_^3i{S!2l?atx1fosGNK za_5>2hK~Nb<}qRO|L#NI%Dm%~E#i|*SUY7zTgv)=6gN*EWrn>v2ef353)YorYDF>* zcylu5o-#8Ij(?oAOqyNg`vrTxs_-{6F%-ACAgH>oz0;}yy51?vc#_HaQn%)93hISw(xuKR0TJswSS?efu3F#lFHxi5{;bl z{UHut|0Z~ZBRW^iCz?O;i(_Rh?5R62ZCYoA_IC51!hY4w*h_tJ$DUgZnJ#{5*J5X> zY}H*2ReEzYvU2e##)dbc!JIlt7@pdU%Y%2aub`Z-w-P9O56kbc!B@|k%Ra>aI&KPo zKKRe|S#G}6mTnc=H>up z$TISq915}%72%8uI+2F2P0K`rW4jA4^cl!fqFvquAZyVmq8E!TFc5Gw11~J-nK1+t z6TT}}c7tMI7L`zle?Ts0QyyMw88~Wqvz~FqG3aUGuHNPDjc_leFQ0Gz;;zbIDbG&4 z{ASiIVPu6)U}mdR!Nh_ex$=(}oktCiX?$E`daSX9IHti`Vf#f(GA7UwP$QDf3hxC0 z>ZB-;I1$1zh4aZXZ2poLNtDK$!g?J2BK_QT*>L!qJ&ZPl1YU810$LeMWu)ugyxf(M zM?Q+e^7@n^kgGnFpxKh_Ll#KAR1~^KFwM#g86)P+bTqNfKVpUH#m|iCrSH?#`aKBb zw$zSVvxb*t*;2km7MzX_IozXU8B6@k`e?0M+bo;-^SLu&0PXKLpFf1Mp?%=FDK=Fk z=GfFd@;TFlyc6v)aA4a1;(cIjvxa5TH=JQ){&tQMIu(ro3WGyuO}r_U*qUFML?7lH zy3gv1M}N{Ti!l6+yzS-H3%vTK zZ?Y)9oy4|o9~g~#))Xy7k`v=&3R{2a*TZH3JqiXg`bicQad*a6y>$>tj2SX*8=C16 z7&klo?$_Uk$Nj9_llW8y?*EZ=)nQG&Z=dc^8U&OWF;YUhy9aC_sUVG{*8$ZJm;MIx#JU?T^OkJsC55!I%r-~IO^OAv1n_q zhPzH_(Q?Et0>?zw7krjMZHE5(;!X9BOJu;6>;YR=+|MiZIpoLd*YnD!?Wd=Wgv@AY zgDAqD^%U+4ecTT(-!o!%+7NI8kx3;u1pX`y{# z|3WrS!JxLck7BeE-|=<%m6e&!w#CF#Y?th)+Pxndeq!_=XrOq|yx+{!owA6?M9$tW zGsInZVWzC=2!4FQqw~BX!p=sgd!w4SW{j49gCqTzkVOSVs9Autd-K(o3^i@SOqs}L z0VkL5wJ#vNW0Y0$XJvYW8Uh&-*kW<1#e4f?_ke#3u0PUdPH0?tu?Vi`lLprz$Ecno zKRgLzs>W}=ZgnICUv}dptY85-tK3QJ?n1SEiTe*uhl=@ForxF^w3s|7HV7#X68F_Z zF#LWah}a-?fS3T73dnjmDuFZ5@YcCR0pvuhchYiDqs>X=&PW6s(?g3g2R_^c1ViBT z#HcQOC<-Gzxtv4lKz|RLSk9~ni<-#>MdZB@)HA89LxN0+7)P_0-(kI2;wZv^b+{PF zhcU3b!$=?$rLmW0=##%6)NsjIzA7Qh5|64*z1YBd-pCT?e@PZs40cQA=2sejAw?kU%y9UVd(?d*K#|%rx?AQ!$ z9?ay^pFlK_`Ok<(d`)}a_78Q`rAmOGVfp7qq>3!ATS2&W;jA^h{boMaB7qT6#BgSh zF{|89x#L-lPBkt){=7Dj>rY!5bdPLpULFxM>?*7FSf;q9>^cTY=R`#53VBol+HCQf z3ABPLY#GLuybBA_8_8b;LVQCUl=f(D%oj}|78v0BRS#(wXMFiXzo|jbJJRf$u&(*q z3B%_F!Oy70)V(B!KT}ZdSKmbew%rxk+S>&mxJexjSUkS8m<0#%Y24E?F6#pnj|&Y940 z$@Q&9j;v0mobc~~9}|SusJ|YXm85k2EH>@<@_lFg3~47!xpDgLw4P@>cQ4<)NK8yO z_%zWxsp&EpAvs%;nov@EQ%P1&3W)@$!eYLEsaOGHyR$WFXwC77e|vyXzzGeM1}g9; zAJ`TO7AOzD6$2&^U<*acTIN}1WP2*YbM2*Ko#Z0zT|INUoCzYruInl6_yQy%g9kgl zCMyWqP;(=P@@GzXwW$GGGdL*PWsCrt#*Lx~4%u^yf(otLOWLI`fO3)-p-!2l;&cu# zhZNpylSDqAEuoH=l^hs2S~U^Ht7#_#?&8(F1m>Ii#s~Trx2sRk$d{`JCQI|hLMnrK z--~SAYPf!i0=h4NWs3`ts{)eZx`k>mDo`{(EvU_dF><|rUMsj|qPWPzB8@RJbX7a9 zXw%<4NXAK>C2D@&>hZOV8-c}z-`36C}{D6xNOfjkV^Rg+`PqnS^mGMIXFp;OFt zJ)GC`i~)TWo6<3@G8pSxzKnjK{)0*@&moI;T=*+z86c>lVLZ_8S2J6DwoGhm*-liS z-`~UIy_cEsQ!K-OkB*u_Z@9hQG`~!=M0?oat!7h{gO-+->8Vf&GoZX!f5|Lx)Kq^< z)O5B8v31<@ZzuY*-8nK0Yn$#g#~W~NdbwS)60<9XzE@zPbRJj#JaIa6TJ!-HBDtD; z-OG)dO*N%@nl2=;wk1>VzF#lRcfR4jV`GVcQ8Ep+J4Bbi`LPkWO^y~cS;yt4COwm= zy2vzz{86E7+PSX5hE>dW{Mf~=X)n|LaV^xN+AlC@5)#uXiw!PyP}9;Vln7$&blsz` zY&^-AWRPco61bZz#KHmxx{64fg5<)Q9cNmdnrf<9qSScYi!Z6;<&}ooinbrC^2>qmFTchx#4sEh{Hv+DdPDAgSfiSo>`~v{tM$Xti$fQ>wqUx;o z{h9H1kgE=-dMSSMzL@_ktZNR-x8U)f(|t-sFfowM>fR0<)9f|vqh2+sm+G|*c>1nm z;QuqeE4@utHVk_d>SStp;e1gBq`dN0$NQ+Rzrrs5_2u{V1X_vd zni0U9t;d4>vrtHvw7vkC!DFG||B5oeL#^(=f_eY)3F77*2~-9fe1=rr)K>||L-xJ# zV)O%*HTDDAf|B-nM>8HjN_OOYD`ZxDSeutzBp&{l*1m#=cG&6lNOWEjRy}X1_m71k z7~&G$_l*N_pyZ}b+R`(TT|@KHnh+EfN%-Qt61PFg4rgHM)VfkJ)36yKzjard6}dO0 zfe_jn(9;H3?c&2`6ZiQLnmasPkgWb~K+#ZCo;nvZ-Ax*G6p3gJqJg$Hk|ByPg%gaK z>D~zX(;c9+bjP2ixHy6tLmwLgp4Fq`h{e z(D99!OQtZOm7rGp(};XKWePK+xw9rs@U&I*U?C$Vcbt2)`Hxl_?65@`y432HKr?eo z0?~M$!2s3N%KA0zRe$+}z3?%)}}w>9KFfm|jX!KsY+VRm4Hb zs&wllid|ze$LOH z4mJ^-_^arq&0PiO8a(O*63H+5nADm6LP$vLj=JGpp0qo+GZciy`WJQXH zTO$5IyX0aQg`I!2sOh91S3}zP!BNB^dmqV|eL?>IY@YC~DQ=um=fDGKC?Cg@Rc5AFOmm)zpsxzgB zGe_H?j1as+($+m#5%3lhvC%PU#v>ou`%T4qz5{nQM;?UZ4;zmN#u!T~Zec+bEA&!X zv&^&NmjqQ5zOH4)2GcO`?7SQIv!ri>E=99sSV78%*}A%Lp_SSV+2h8eU$YCa!3oDE zDgcF6Yn%0R*-wYihs;nS5f2iiIHQ&`{aYMc#>B!w!)_u^TSm$^vkKmwF6$f8v+}k^ zXV6fh961c??9+vgOQf=C$k5wE3}`v-tW?PUJP5IoD<%y#33Wnll|GFkUt*ER8I57_ z^1GCeyjJpq;_zaFMf@<3e)JK+1AWs*QRMHXkhyqBgZqL95lAfsWAzP4SEx_SEJTyA zDn4<7d~&TE<3xSeSFYQieBpy6@>@CQv*_;<@xn}Fn6i4@Dn{It`+JsM=5t0n$VZ^B ziKQ~&juZx@m$ak2=yQ7bWV?dRZAOC&-w@slp z3wCd@T8TbnidYzVZ<(OU#rL#VQ|kv0!Mm?xxf3ZdywCc<-n5`;i9?d(NKorv+b}A_ zA&nW0WZ52=SX8E?qo;PRlTLy5&D~UE?oFTcm}v%`5Gil|b}@ zi~4&4WQ@QY^B{pYRa$*hA8f<+dl+X2U2{&9BI2`ok^BcI5{#@t%6i`m{riy6!AHrA zi6^(7>T=D6x-zE+U;N7LsGa93wtNb(Q2QB2`r}()vjFepUq0ao4$SfM!u9yh)Aiz)ppYaed2C|3&c{8mBKNt)eK``LD;(}*x4$~(X zg%3qvtqOM(X%0KdV>Vo5*?XFTs@aLwg90C5JirK>V_oIg^{!MoiH_h%-B1n6P~nB^<7{W2KMcRw$tZ{j2W{REYHR#k08afOT zQPbA`xz?jDYti{s?{Sz1W;A(1L!;Q4mVCz*n zQslV}8Y#{i&kFww@%iNASg^?7;p5mUB?=Tn7Kt%yAf4g~awXM7CrLiQv_%jxk%-mh zJWu+wm+YUFIMMHeZgED)rmt3Qz8MiF==53V{o2DL3QJn2y*A#Zs#lK14mV{_$$#$! z%5pcIiCi~kp6DGdPTN>3O(zcM_eQWWa<6;AdGBV*lAIczXSmAFTh@0tiXW11WIXZx z2=(Gu4L!KbZayhxAg)yFO(j7t_l7BKO{LDcW1$!Zy*l^R5+Vj6^Qn8ZAu+g9y}=(x zZr!I&ZCXuqlC{;y0{2_d9|V<~Jp%anWK_OSvt}{RrtHOW=4qD|Dkkoq>T^ynJv3}I zFJ0Wnw(9R*;^j7wyvKxw=XCph!EIIbpn~G-zL#pgzRQ`$>%FrV+S;taorRz5jNkn# z)T_O=HnsRIC2%gp4JR;vJzk&pdCv-aCFhr4z`FZ*c@X&jWf9HO@ zdlJkfoFFmAQ=>CP+eA5iblndv^#(fa_ZPp7mJ3u#z>=Kdh>yhY_1pG+pCe1vq41tMAl2!(rR;`d3xvk3w~q+KsWoth-?&vxLD$$zV|JkUoA*# zC$5V5Lu2oQFu{7^s4EYI({czJ7ok|7yHZY>&cI_K%4Q&p3V8pb0UeBo^l3?x2nVMc z*xmAOB@`*NPf?QE2w`M;byxedqnzsmZveta#)m{GuaYF?Gg^`K- z6M*mG^w7Ui3M~5VXa1`81rub`eYxQb(A$n_Md<=NC*xR7qaka>a83No_}Q2)Qm87g zn_-1YAM^%#eR=dL?6d$6m1l5}wTub%R~0w_S;c;87|!E0-n6rAv*--KM80GJ&-VCJ zR&&1is%yH9>zOrRN6Vl``P)n;lwHsCg@TsGr26a&nQ5bNuxRf)*CZALh)e!6aP}sh(yl6(4v&E$XE=36gwC{NjPIM5jj6SwHn}(h}=b(u2IWfbY zw>VCNHT@%^c$2Z$#KBtvLM=}3!Nw(86IrZ#RXSgIC`1J`NbyFmSmMr>TCR+xEEpFTP|3rB9?QKW?7B3aI?Ix_NvPt! zO#!rz2%|I&(-cGoTMFe#gRxQC1C@9vdM9e11Ex{FhgW!DCX-oPOQ)aC%AvVkaEg+? z8RgiYIvlD0WY*gUTN(opD1Rt-L-v;&&i31Xo-3q>(jpUCzQm~U6C??;59aN-r|$^y z7%>7wtwFHM+;bSNH4&sl6jBnY1`E|plRz;bN1PsXAsz)dq^R^UeT9Qapx~?^jj&9z zdeBFt=)M?EDjv!UELG?&d{xTc8|KVDO+7lk_b@f#)?-Bs;m`7D4ieP$ZN}a6z}lGad^=nDl#>oEQ!(1ikpm}V1YXTd^Ce-C@}~M zC&Y6;~}mMac5ggdUX# z!lTjaYpg(5b zwiuH681u5E@3B?q1du|~_ zYefUe91Y%P6MtJe%DRKjZ&TFGQS(_>{9!FPDFJIWQ-7`a6j075= zP9-||Isght9&5U&IJKC;%!^{MD=%!tH)iX_`q-~f|I*2z4vpuB<=H_dOWR>MWp_DV zD+e&9qs7U1(|^B$q0JAbEbc)QiTmpP$oh!gaGS};h6MudGw(5xNyRDSXf$Ae_*P$0SPsyE(JpsJEkc8-D@y#OI&KGC$yfs5;#F>O22v`+-WI z#&uVZ@o9V`rI)#rkN|U(V~T+Dh_ zd2-C?(-Fn@4)2EVM5C)wFc7?f$3t?nd%!I3!2HCY@1fOs>-F~oQmM?y^3Ho z*t9#uggtt$*`H`^g*no&3*r`k zX9>$}ox-Mb-T&KaZy_ipJ|J-UQZuuBS|W@9y`zRB0I^Tq*=6=pTRPy z*;_yr;1~)fY9YhKyBK&B?X3F0csg*vQOvU(5 z!u<;`veE<+#0im@KrGG|_pGeECqiamX`HN`AU(NNaKRdusVIZ3gtVV-B32MgC9SLffXZndC_v0Ti{7{O$cUVRVkpsVn*aIxO&TlWpqP(kM%3838=+o?fXTJ!gXs2m=&2{HbGKzQim zOHbuu`bfX5AMs3Qw)x+6utFzTLUU}EH^rg`eNzk)gy6Sx2LsPfjhepLjfOdqLwB!( zYDb2N6=usN1LuzReErNeVRzTR6{tIik-9HcWv8l0e^r|N1GpX2Xr{<>*{9Gu+E6E) zNLoj!OtZR6SH%7(Egv>&%uxol>H)dn$DO?u4TJVyAj5(nE#45^yUqYC=n4Z89?47{ z3g(xm7FB4d2nEaH%sz}IgNKENxt-?{OqN1WzCaycfCn>4i?eIx%K8#fwE2w+r1lNk z!R0SYi1h%Nb)CuP9hR>4g#_Lvy*hdMJTj6F`26WH{bM?|+_OrptuCXe zXrFd#ms)4O3|nqeQyJ-BIWlE=P7QIaq+;{O)v&?#*T+kpz)-3F<#r?hVyc+>Yn4S? z$NAFD0&_qN4xOts2`FqJ#uR@;`jQI%b}|%Dy2XM$Df8#f?Kgn(1<}mQ<38u=I&4@> z4tR`|ZlY};f(QS51idGOT;Hq_?99r0YbT;G!Em;36dZnsm)>5<{!9tsRVH&tEs_==#83|(`26OoN5i0`KH<1>6QcO`-)AwK5lRY<03`p;(euCFO$G*!b z<(8qz%rRMCWi3}9cX@3!x=3CdAQ0E~SbmmVmvj25p06I4ymx0n5AbZ7Wl?l?SQ_qP zn#PN^>UC5F^njX%QQmqV0N@O;mb*JvwHU^Jue-Zw?QRNi@g8{57Az{yE1X{TIJ}=s zQz$9W`A5Zr;&aJ(KI)Gh>zTU6|7kLE`*W7-=pyaXQ@r#tg^TQ9=u>>KCvqz7X|tIZ zYPM{m3B|a=c;Ij$iqQO=MuOraW#5sH44Ox5U^3TOK39*>U@DEvGL{Br!?C5KD)joB zk^C{R!U<%qnOp3GoTA{+)C~>IEb~O{qb;xh;@eo5ChcDrw95)Tbh~d$(U>DviSR-I z6IPVwNfDw+#63d~DQTa^fW<7C3XJ4jhwnwudlqw9P9s=zCE(feN}Xns-??-~skjKj zECQc;C!ljxY6*=PMR-a{_eT+Yqk!hoq)qV!|HzG>sC+(uHE*d|ot=U=8FWHZP*R|L z(!cQC;}#SZ7|M+sU0zqva6F-$GAw6HO4-YHq&gA((b2_evcR(Dg=0$3mcqQHbpRF;>TcX&nG%&ND583y!_*6 z4Fp76#D?US41}fXz5&t7qtE~YC+{fOI(!KUco~K|afylbWW34JPVxA(kD%bN=+%iE z?ya1bozaJ6&Fi!fiFEr@ zq@)1h-KL^gv-NYd$nj>Rwor)8gme>e*$1od32OZ-uLf^v-J%iZ(jsOL5WSunU>N zh7dy-IYt)_RB4YOol8xfA?XKCeB~SCJDe+$i9{jC(T{Fw0ZJs~(KRg+fz$$`Ki%`n zG)F(^mO3k*mi@@QH3+H=9sDd-$3=0h*tBIMsaor;rbV%Tk!63Eb$(_SBMR(HWeSnR z0GxJ5Rqn;8{xmtd*b@qj1Krz1_0Zph9}51Q_&dyn*4iC%hJmR!sGXn%*eFVw>4LgC zgeD)b!v@$xL^U1Yz#+GQytnaEGDY@aBuHdD6kvSCtUiS{4yTAZmmS`dAjkIfWsFn? z0ptMC5)H7`|7tc}obB`bRV$ajr@sE#?~1jF&j9NP0JO6Cu1;6Ss15!;jeX= zkU;106B+PXEq}7Ve+eCR+}x4z!D-N(=eot)Q^A~{+;5hPinrJtMy?VD+Z|Y*bm4aM z3^*$Q!~D{1xdZ!$~FFYOTR8%!;y>M&7J>hU;p$U66kqGx?2_k)}#UF)&B+Y|yZRe!O{Hko4(86L(Be$(-hBQ#_u9J=uW$Og}oCd4i zt3j_<3j=y86w+ym3|~{NbtE3x3bs!PLDk$0&i9W9GecwAIDg!+Ax!N7cr`dn%cFGE zwJEbyn^U6Dj?zajxI<7L5QSUzm* z_QF;J5qCY2T# zCiflg=$pRLI4nh|Y|!d(qVz|yC~bU+D!wz+e#`FjOs=U-R9qD91xPO%kSxVbi55fN z&)@i=h2R8(9W%BX^Zo`F<8p1wWR@6|YwN zSC(G@Z?Z2Dr3bD0l+cc9VB~49mk3VafkfKNuJCdOTuRYx2{p-pt4soDpk80&$ABpk zB~%&1Fm2hwd@o|AOiD}1Q$!e2;j(^tWBIAvJu<|;E0kng;S4Ba7lu($;{sOL$ z8JJ)x;F(;6`m-32V7PL9P&MzWV`lsH;4wGS5rcc9TC0tE*LO>`+W);D5gh120i=wl zXD>UxZP9&NHJR}q47eRXhRYC#E!KYAfrSQ|h2jGPuaYN#YexM{tMlA3w(*c;+P$yG1X7`WV(;;E%& zmN~R2hl3hZ3v)`{R~za>6s^bKm)#YA%sIwHDNlVQhfIddjQh=Eqkp_s_rHIsr-2rN zW0FjU2Y&TzWmVU(?W0(U%El7zWyeZi$RIu07$|bA9jdh)sDvKpKKd#<^v`J=$N4=A zQW8icXFTHEnUCZ;%OlPs<|f0(YyI&dIs*8!n_S3&29{%dM2d(qF9n%Sk{R~`A8UU! zmS{*!bB|cCFElJ8W`RA8&4bdf9r8AoyPe+=OIl?>^NVaWjk}-6TW!fOg_4{%;q?*U zbNb|j1$v$QqtRrVD;c-^Z#~{#e(P@KD+fxG4jL##?~@oBX;7sGii2Z0g9EW4p&)~B zP1!F3n;7Ec++RXI_7zW66DNmv6yj@OJ7~KQI^ht5#IHo`yiTv!sN@{Hh&8)yJ!fcY zxWi+bqMqaP)6n$bMbZg>q)V!|TO|Gyx~gV9k6oo=U5xgFz%l=;{Vd5**fOYV;0c%y zk5T&K@%V5ME_NK-#-V|o4ASPehXq3HiNs;5Zt%>du^c3+HijZ#hs zU5$KXb0rZ<9IYf?Cc>J;Dv2>mhX~tA&BvzRki-UCj{bcFt(SeyFJEH+J-qIJ8x1C6 z5s)Z&be7)Z&-iHn?Z5e`V8%6;N-P&A1Qz|m@ZykARz7=qH|`DP>rnP}-VCyxM>j61 zHY0+?CnSWZdQ7ldzqjV66MZoNuoU^hGfrkf^KjmZMH{PZJcTmgn=3whNsn8^Raw>}M8dnZ z`NR=*2`f4G4Ei`iY(r;h4-|A&qxPfRfABtAjC>kD6Rxt>3j(glQHL*IOPCliXLn0> zC*ejJEdgyYzE{p|5ihN%O#c_yoqXMy0~9bW#Jj51wqTrH0SL8OFdC?-B+Eo|oe*ax zNxIm8CI+cXi@n5FN4}xVT(MKmUJwQbi-Cr+0@TSk%*?V}4Hp+Y?qJYBh6dzm^X<{V zF!GZL3Ak$%D*!mPASM&wr>6g8`+IJsa$?aIm#V>hoy^zHa}b`i9O&~fbzt!(ietho zbrOlV^Plav=)0e{xr&V^p+qeTY5X5n$#UL+2JrjfP_f}D>&+6~J^l593*K|uv5{8V39~kJSw8@D3(9O^G5e5@ncg46Z~ZF4$vc@5&?!&VA1wENpt!&ZdKdkSs8vNs25E6| z-ad8@>r$ctmC3y+|KkDNvYfXV5Yi;=W3>w#c)(Q1kKbrSfFdqRc!e^o7}45_1KqoY zc|vP2XQ6BSJnjOxvxsXJ*U69PBhl>*{aK>UZeOKrBIuzS&@Kz33QSaQW(b&Cv7()z z6XaQb*7I}@D7T5t{?yZ?nCJ#dPdcfaV1GQM-SF+J-C-31fF!M9 zLZ9#_t()4grezdUK+3VoAyskZz(g5~>JD2z;xiFc$w=tWxkD=~4E}|$=k0?*sEyPKRDrJY$07U`aG%8a9jgC^jvJ}7RXfCPI)!9bK7#}}uQ+T-i>>V5Q37mKNg0zL z{U{YIwAG`Kgx~P1%KEO1&mRW)zyMBOz#apRCWG>r&r@o1zSj7}6B5`K?Nw596GV?} z)clpge3U4dCOv|;rOUqf(ZI|c&6dQS5uBo=+xA~N6#k)-Kj$Wb%P>SR z!>F1k;t5$5dxG`P-mwa-i0y9kA=JDr!oYDnGT=GDp#@r3gsY%k-`xxxl`S<1jW77aBu(gGx;#!i+x-mO>e zJ}-cI^fYn>o_k}tnKhBf$`%ofStL((2bV-2wCD$(cfn)!)0orSK6S0obk|pF-!4?` z>CY;54MYS>I?u+xi3nCGQKOaziEK{PP&Y^gi^YULODLUwmeAqubEW@+aTSO>E^Bt{ zqe5~2exHWujzuxU;u&Hc8K1IGe{U2kXb$!ZLa0Sn1h2q_WbW^}h*S2|bw+X5Koy0V zzF8#HM@IYZLLH%Py(+$iS!6(;U7@LUUkoJlsy7f|!6s{ycZ*zkOdVg{C55Ea=0)qe zBrTGS8i>Sj)13E$rr0ZbLS+JFi-APy5X56((9tuKvQeP-RItF6|`a9XCJn&D>=w?G|r#Os}bdk z#u4Dw5X_$BnP0Gh-#mJq5;%V;wVaklkiOzFn@3e2n2{DsHdyjsiF>FP5@X#F&wu*+ zUkf(0xj;&lsT&M4-%|U#VakxapVJON1ENfd25LPCUZnT`Xa&H%Ld$+KhBhXOA&Sv> zERXJ2^rZl;Uu@*UPXo9hxVD(+&FsBj)705VK;vKydfy%mthXwp9URgDG!D8y2-Rz6fAah~bDC)od1X5(D*W+x8y)-!sbdvw(iKLI$uaL^q-l0W|BsUJ1 z0hhso77ik7#_z$qqxi+Qm#Nd5eK1CFPJL!Xw@9@@|ARBk=@a{s_La&nW&(6Q-woaF z-uHDQJ+%)n55m7Nk&V)2(68zX=QmWrC0+vO7!V#4P+BlGTp<~@A6#(LNT#x;x2$w< zt|<3kpeT*E#72Fm$bJWhW@b*bhaYB6XwyNv{UN;=uw}qiTNC0<2sOq5({xWF&i+d> z{{=#3wHy|KN#spHL-6Ep1^u&7X6o)|6L9G1=)=CxS1$iEW|cwLIxYz$8p&!Q-~u3! z5v47!V^UU_Gp(nIX+df*ea}a|3gjw-9*hB#Bg`-UJCD(xoTZ%RE+%s84@@&?B*#un z%2M?eYgT53OjhRQb7z%qDPTkhNFn%%F$f8)c8l=es;?5P=HL# zlZF$G`irG!5gRgTh*l?)6=~MShaRm`W{Ozy~F*Ik2MF(7*B z0I=GK(wx7Yt_t;4yvd%twqIogvulIeNH+xkuz^b0rlyXqWl-gl#WBAz6}tpQ;7}kR z1g)J7k#d`pEY9mc^7_t-W^0a?debsu-@!ejY5a`^{g@&Wc`R6|kUjzVx<&XgKaPLh?nFx=T>~T> z;uIB>Wot0SW5xBmjqO03+*cz*BGIlj-JmVEZ0rxF{pex^Aj!;}9?={EWbrg)9H(;f z=>*GYaAAnqy)u93gQ0~3RlOn`fw1NewkxO1lVS~HKyLCaQACB zwEP!Uh>(4XGVI=o`wEg8igZ|o1kg7OL9e+$OE&E~^1KXduH5F@jMpdW2-X>!#bQmn ztR$(uP4t2lgo?^QnXAyxG-H&4Q;9hLW-4hiTh#d#9ty`&9~-(U`#1`c`c}R--f#Z} zOJXl3vX9W1?|qe;iusfTDofepEMbKsI!r1H1~{|Dg<<=w4o05Yi_}_Y)JaR#E@$5_c1fu&^V-A5WE@ z;65e3=E{21{b(Pw9A(L_PkcwWi!1T>^KJl$y2W&mb`vR-HI5@gysOQkrb0Jlr>>f% z6(G!Xg9%+;9>lE3BD?#u1-$*i7|3Qe#UgQ@YQ$~?A;f_p{1q153#Ur?bW0{biJ(yU zKm=Df72mRdJ|spR1Wj0xzcE+L(d@1ZD@tP%iTv_?Q*%q3oBgMUt-Lk8=l8cSR5wlNB=C}X**b7sG_j`CF2%qR;Y*Hbh>vb)xU7i zn6Im}edCEr`uOvKc8AE!zSV|H-=ZXc?yGy`NUXgL5Ps?|K|Z}Z4i!oMKMlQ|#DIpM za=uY*-29`$v--2#H6bUx2vo(vF7Y8lz?9Q9C?bv-W_GZ<{y>CBH94JLrdD0A&K%WY z-M9{`2(q;e1>za2445X)xoI&t&>^@D6D;SK3lSF~TCrrrSB?2pB*UjtNnH@eKTM(K z$~bmpSEtzF>oYVq8j#u>N_bb4&76J=PbYg$h2z$fBTn`^=Lti5OX;M;A~`lgzfE?8 zxp@c;)y)d;eE-|}`;U$Fm-9bw1R8DWeA{H2s8uck4Xg~C9^YI~UpqG^ot{I_r@onzYAM4znGo}fV_ZUkP;~B zCq2aWsrSz4jsmq4q!;@P>4dnUzPgg9rbKQSB8IjGm{oucgA56Oij5)>qhux{hLASV z>(JW;9*n+?{pZd>s9JZ`g}DRx273~i;9=t4Yznnc2r`STFlzy?kv0UR$fb`>ohX%3 zYTJz6n)F>8kw#vXQh;mO*Bgc~AOWZK-9aH$YL71emly(N z(EZ7SRAjI(aIaPlL-%!wtrbi6__-Clul}#8E02fr`~GF$Nh6tLAI8{yjHQqz*%^#P zi!EgAQVfw@M#!MhVC-X3vM0rd%2u`*yJXA$A^Vd2?)3TozOV26HTQX*d+vSaK6CCl z?{nVgsnUwMX3j#@CXIAxcii!=QL&U*PafV4znH|2h=aqpVGz2XX}WSs)~Xwf;fRun z8&duy{OOW#Ob5YdRLd&*^$3W~YjUEybsp_5<($wq%M`P8E$&YDlBgH7-3*u2rcMUyA30Q;~xyMlJjTbY~_pQyf7*eTYqbEHP-Jz=*t9r`3%z> zMHn0&tF)nR^Exew%eB8|^g4h>U#(pOZdRv(8Gw^lovtNV{4Ahg9+pbetwIu;RIQs1A=~IC-d&Qbl-q-9zU>f)-_c6(E(Yubx=Z z6M?1O>S?i38jm^X67Axc4&wV3Z3ShD(!2RPAQeQR-r5!|F1_i>%|2n=^O9y$MapCF zH}?9+Q`y-mL!5DKg>BFCCarV3YxV;OzKT#og6fLsnn_7e)1|ot7vaz8GZs--rXRfw z7~m@tYr!(d)bdvEBTH{r8r76}qnheJ6q<)z%t(4YQ@y(-e*dT$&d>I!wZDy`Vfu*C zIM{smXi?}_Ro0O7J;J_Gq1062WN63tvQ8-gb*n#^7gHqti`80Faifwfe1n`t*cNx^BQvlKrLAJgrj z&-$9CA(Mr(l~O9O$sx|)wbI}u8fE)4hz|}u+c4UKPyuR&H|OA@I)nfGvoAnw(df#C zlDNlb9<+Z_id4xY(#M2dZ#>3EWs!RRpt|m2BdjJzpSGsFfZr^nt6YNpp*qbd?Pu&Q9xY^<^X@ zNY<2DRmxkroL_cAX~Kbr9o5BWEUEG-pTj3F1Jjj7&*SZEBG7%6iRS5pAbk|y%V%~A z675J4PDzWgJXxz^wT}V?TaxEZp1R;~o<#G@v-s7`@lCR$k%30~zO&r!C}uWAuPkEL zbL=6}h@GU02>`SPp!}pso&1!xaF(+e;H;Wd#6T1j+#1q-r)r_-_1V~U627HidjAB^ zFZu2BAc9Co!9;mrp>k4wpaM<(ni5I*0kW40M44}po&<{MOG;|Nc$y`I%@e|_ixydg zMp@!&U(_o?0Z+rMp*7o9vH& z(YJd4*fu5;lnB4X0aFQPbtS)Md;w5yjbu!Mhu2c5R!z(XL}FkrTfk<$=t46c1sy`GVS@&9T4!C!fPX|)dnQb@yv5+3js9SJs`*}aN_dDi?!CHmcqM7CS7 zbMl5&gwm0|b|&rkOshRl&E5Giix1A;2?8JmHhJm&#Ro=3*(GFM9%+U1g=qi$jM##P z)!!|GOys^8qKI5+r8C+#n_%_EQsPH>kQwG;pk37t2N6m*N%?NV+@;X8h&G4s1*}=F zsD!;EM#p~pNPPI+;?oJr5yAM56FVa7=#H9G*XU5>(vThzSNu^ju-9$7v>l0TS9x2k z5-#$i%QWvBgd_HkPOc9g{nh`0J;>6=956m1rIGRjdL52{+S&Usa5T};F~D>-ab`Kl z$wT+0?IHK`-u<}zSs{LP`!u%9XC&cG|4%HEFLjuUUTzb%K7#Ui?l*-%#Ps&9QEAf; zr-O!;Ki|-{2#N6(PCE2^P*#S_Z5njU# zHo_Q1ZsH`zqQ7T&6vfLe(UDsHeS$@LA}}|6;L#}cBIvBJZ(s9H>l@}5kfiBI-yFBW zaI@FAI)Z~pY(QxWHFb?Q98spVy*$*$^CA-0OyBoRD*J&uK zhOH!-kF0@a$m%KWEKLFDI~NB-E#mLAHcwgH%7|+VQcgT8U#%mPW5u3yQZnF;%78FS zVjKSAcv2f~-d)xykU#e6<@dEpjk!pVV-Zq|N(2kX-{56q3>6pU^58 zwhrx$?{P!SMs){4{*SRX zy}{81X2gY;u8F;z2eN@|0p(gs4X-v3IR zlP9O0$FUO0d;d5lPcY0`r2I>mf>*p><{?e930_}Wd&YM>+L8gy&#_@wXi=rV@y)+;w27U3 zW5qvZqVaVIjdwUKHz@>NWny&XqZOUU))T0%mN!4^egE0_ebz$@KdoZH@tdb;lk?mM_2EbX|Kod= z@>7Yx&sjAO&!9z3Q#RGe`l9uJ6LAL%3x+$(XGsin#nO3AjxXokA`Jm5@rDLKGlVydw~HFS+r)1$)}3P=UXLFkJCT$9-iOG7^oSnpgjeS0Hqg}pnS^UBnU zhZkfye|Ej~kq{BCCgRPb*D;iLG!2xp_F(cWpX9N7R>_@rX)6tu3#+e}#M}@m550Fj zwCJX;H8~Aix@ewxlF1x+WwwWEKWdnz+RE@)_f(4&-T+!>4IPWE#5s?%=_=II*B=K3 z@SK0vF5dT&};90E|Wn+u=(bKx<;e8 zAa+K9HWhDT$n>75<&rR{6UIjJPnYT-cD{b;DN}FMN;Oh)8OiZnrCbR%;Rr{s=nEu* z*jV@9*=%~D9VTjHUQg46j0lgRK7ZYr*Iqi!${Nu~6r@E+^i&q13+7}QYo1n5ovOd~ z)pZQ44B(FF9k@*umY?7`kxtRp`<5{JEk!p5Zr+E6J<{eOiA5nIb|j4O!t2s2c}9mg`~;qf87+E)_nf3yzPyvfe4_jZbTFWBDiAxlIoRu%_bq|Z zuq|tY(I-L;k)Nqq%7;?|lv!(HXS*C(qCXBFOF;>#eR3y%pXrfsC z8;+X8jZ4zF)>>g?&a)g7Jl_)}U7CUb=maI55y^gHgFGe4{(gllsH1wuN%3{c6RVcL z)TIUqciZiuMtS82c|$ytyto>OBFBlmwk-={%Y@hl%mUFPOjE>_`QtNkwYxiP8$2k| zs1#$^cQhbo@p-Va^`)mC$GNsVIlMp3ocdFJKtmH7)_Qs=^-`hALN~`l|Gswe-JcbGD4xokvwy6Jg4y|z2Rl+F2)E6gAf=iwKDc!^q>jjeie8`|q1^I% zJ(FAD>j7i3|f*Dl#^rYyYq=0t-WPK+!=FPXMI3yJp0SyoU`)>optw7_?{YsOlk&ORb`ftC(25pM8QDZI`2+XXi z&DFyHqCZG4cU^sMJAjEKJO7zZ_fGUCwd3mH{|lnFy8dVQ9m>vek0yHu{5SIN6X0X0 MXLk9Ojzi@C0lC0k`~Uy| literal 0 HcmV?d00001 diff --git a/clang/docs/analyzer/images/uftrace_detailed.png b/clang/docs/analyzer/images/uftrace_detailed.png new file mode 100644 index 0000000000000000000000000000000000000000..fcf681909d07068a05f5cace5eb75ab1d338226f GIT binary patch literal 60862 zcma(2Wk8!v(*O$N4uRqp+_g9~xH}YgD^iMkid!ijpm=eI;qwm)_@mfTvvVq&%mtep`fGJcmC&#ywsKUQfEtCQL&(r;=xpkiw8u6=d}T zR*v$p<*Q8z!z|oH{h4uzQSsGMKE(fRF?e`+IhgR0(Ge2j>U5v%{5Slz{cYtUXY`IEK)d=`_sXnqP#Ov&VAMij> zo!+I(C(?ZOQe`Q|Zj{t?71V6+jJX{|eKAUtZ%=?fdb8e!<+zko>+rV;VCo?P`}$sX zab#GlY`N|`cc-P3p|^Mm!2{gyu3`Dx0&e!l9t)mbYJJfy{yFh+uMm3+z7_`gzq5Nc z{q+Y$DHFR+?(MUZl|OuH4fh=mVvdg*7pi=6YjNJ0Gy+1N23EE>YDZPorQd(yy3?px zW;ZiFkgX*UZfbEJn+=h-7!&7QH{#6{CIE!E-GA@6KJ<_ER7qvt|EFlFIN~!e(PEa) zq+lQw>SZX-{f^*b!m78rf-G!@|DQO9Le}5KjTzai)-qbUj1c5OEHEj((KS6bIE+!O zbXiJ<+b}ua9tx+=m6bt8Pe%Hb=v;z_*>}eSZTx6@G!IC&-jYRu6EKXxJzryk4Up_> zqr={W3#?nOzwb$_p>aOLC&*)}N3wp?cg+vi0wjmX1}5O}cbd;=A^2%3NeKIIVP>V% zcc5H5^ulJtG_&jLEm>o`V>{w!Rk-K6S6QEpU4DGKey{t1h@R)+RtFbi6zQsXyuQQ; zzXjEr^0m}(?vkxzp^W^K(EAZ&JoCEz&3vs}T5?Pqm-cTKdOki2(ga*u8R!zwzxb zYOZ*~Fd?{o(443U(}s$E3dIkW;N0#d$3TlR>!bv^sWF|TeY=MaJyd}BV=4atP=Gld z71m_GfAeO={O(0y;+-*O_gR&iy4QNkrm;N%g!df ziE|P6bC)Tt_ak~yDZVZjMzj*)_Ata>N7ySSW~0yHgidy?-s+U`G?QNYKY7N5<1>yYb&~8g>a47LpStTWix1iP27pt#+F=F|+VHIl^e#S-< zCg>0kotqb;rmxo!Urk)JkqR}2{y6&*w3DSJazLn*^H*?ET%GQht|cY=ViMD-4#Vk^ zd*ub60?Ji!r-lEdd;GMZiff`F8pt zI_les_AajYH+W7Le6sn>s;SSxu=ESq@foqL812%xc~hjY#iEH43umUfI2SpZ#aUnT zS)YTKa+x-x;*Hjb%!?sNYy755<3mTdtjPsW=Zi z>~hOk!Y)-EdxWq-S{(z{z~2o>yclVwIgKLEMON33>z?=m4V zjplC{7jJo~f+mHwyupqLu}ZzXp}ii!$=@LDLZZm}S?fV7}MXU`))Qr7n7R9=OE+p<_{4o7BrlX>I<3 z11?B^?u8y=!T%e_y{!30S9CuwvE7g9{eVrKLg z5}cF;^KtsoFEt!&|3{(fOD^;Ozw`#SVK*wF1Uv01Z}@aRN`<|8-GZFgUiju5L*d!) z`W}Zux*fUkqtd~Bi3WWUvh)aANEiMPTHc>9x=qDhQ#$uP?fx*Lhn!8EcLQ%E;2m!3 z1%n6Kb%)|15fgkN=L42J3p3uBCY3fl&VBzh$O=qjNQZ7zwi}S|TrhuZ6BOewSCG9k z-=Z(Et8I>{JCC3(1}AvHI4!FTM{Xpz2wn7=W3-M%GYog$i83fHie2wd8d zXUtan2)2ylJR)j0-;?O3OHy&`JrC^3rj8Djk0LHIg>po&aJDfzKRnv3)QJ*eD2+qN z_C&;777Hfb@PguKnN4;|W4gsKlr?Cmcl7cDs zDY~XWHJaeU;E&;xZx$|coAmPmf>Sm<-bzqZh4o+~o&7#EuS$bLAxsbZ>nSuWusGql z%};2%Ndwrj@ySjj3=#vlSSq62^93~1`e^;Msf(;(PazCLV?ooikYQ>N}8m z_W{EJy)Fzd2TNfFf6Jv8ajf7ZkvJ{&197Cj3Qt*>&-bG8KMoFBy^|FWYUXcYYc;}` zff+Pfp})n{r8{81h_s^hXrvXz3{I4nEO?3TT+ebbnCed#xK{m52wvc*d(4p1gt;qNyCdn25Dc(ZL2}z+GjMt3% zqPHM>s(V|hiHY<@QmI@<&2~h7_;mvDHbV%{II3t=+S7+x~DWn z2FbJwoa$UY9z}qDf{4qj0NtkUfz93_nj%H1iwRRZ^wRjvw#q}LVjZo z1z`3d@*yRUhUH1A& z=$lC1apWG!kmQv(=o=_1>a<~DzPN{;m=kI+yAy}M=j+iLFLLq&H`w)oEd!k79P$B7 zYoC;SHpb9Dqf?09kT@=h`qV&R87?Ug+@reA^12(B)%nf+2*@)L@^?XJBua^qv|$R( zl3wNuHDIx`$(`haZ6JL~ilny|TEI)9l?g)5wEKt9E2C{Lo#-{iur4|~l&CHj-ei$5 zz{2|gcCi`)X0YFXz!~5zw<4t+=+o4IkkUk__#5$9Z7vM+b z>mn1Aw9E(F*h@?~mU$z=eUX$haX23q1Z3+q-kC=V5t|8NI9(^KN4kc39^n4b{TYS< z^S&hwGQ^0)7xEY!wyY1&bm)a(a(;>7vfgte@IV4VVbw9-#QYwqZIRGO*ZZDqq>1e! z3wczy8;}};tGUk2A1-f?a`z;NEC*~Wr3oy@6G4(t%EL?UhvRkUml)`Z>o6=8_B@0z z)PKa{od0F3zrqmn1)WK1D21RYygGl%(~5=w^)z?vm>OP9F)52mVDU9}2z;l-_U1^r(iqT8; zL0`XrjKJ7)lttwqT5jFuT26^jVW;CJ8Y1#AE&g;N=ppH$AAWCDDfg+-5j9f0PN;)Z zC`Z17Y?>HP6G;-~_f2@s;MKAyF+GeH1An>-`KPu(I1;#!FMt-wo&8?EaE;+QQRP;w zDB>?(A@*fagl?7O+akO;$)$eM9&s;~yhjFJk?m|1n&?>$9A;|hM&C|8kJYbP94l1n zY1!ShNUcNJy-vfVyv+eBXBko>%T2F;sBdp4833$w#W)72 zqEvZfW?rs=-{sgO8rr{9$8I^aKQ&!o!86{9czQ{-8pU@NqvnofvZ&4rWwwTLRCTVX z9`ytF9x7!)nKzM|VY~2*gPgW^^c64n44ju^D)U8?mQcSZ8GZYaQ>)DTCG>IRo$F5HWik*^*3ZcqqXLeAf1<|nd%Hz7OvSt`@HQj7?U zEv(MA<8>*^n_xk7(@?^Ub&{V;4e z$1|i}<+QKQp2>Op4G27%M0;yko8~sagTR*AoHmN#)hUSVVYu+ON{2Ud6xaUk#C(N* ziw`R#q03%R+a8qzfzADB8sk+;=6JrCf)N8>{b}8Db*@{OzP9i790Q-lLoN6wQP_LlxL=hgQ3Nu2`+pQ8kDOw`%k`P4a9%%q;5H~ z1*#tN+sES42j}_F43eovvk%z~L4^z4i_)K3$7Bk!z-RBF7L0sl?G-upQ>C3>-zZwD zHN1P@@qks8ck^Iv1HSQ?sDI1@UIXF6{foiSIL1&y2%{YU^O&(up4-{$OE7fb&{8U-GecK3 zJ;*1iaoAW?JzDF-^U(ZL(ZK2qR$gsv>}i+YnWNlMfiHW;+ktuk2;31}c|63LaSsUE zV-sNCOJwhr`Z})Y7YU(y>SbhKem1J)g?8dP>%V+&wu6*12g|`Nkel!cI(Hue(s8QYsDS0Fdqu_yCd%1D2Oln=A`$| z`v_9DMJ#}bmfXLN9p8*dWSCJ~rdn%Gy(JnO;XATynBl1oZp?ab?8SX^%Ro-<)z5Ow z0Y_0!;M{kBc1~L?eeWQ@L#F*xRzRMaF;GV6x4r)@S=3r z;=}auPJa&1O|g1zAsWBDo|RJmhoXHnL-A=Q=-AezF&rJ78p3<7(U5u~(`ya0Fh-?I z_*p#Fb!f#Nt*rorynaXUj!4$h|Cib(v@?r%Dr&ZNLY9R7lo1(>yu^!<@lUt}9T(1g z%mx3#vu7Q$%2xM3I{cl1 zCSDEZqLbnYJZ{==xJS0r=#N>fN22GM09~Mu5~jma)3Q>hgiCaD|2N)_7R+RzkLGf5 zuJv~ai!nLOHrx7>j#3y%g37DeL-di79=?wI4rWIaXP2wPDuk)l5@F%B!E#IqmowvU zRRpqwE|eLj7^Ch`=e#iIWseUe{9dI&98K{-vrl;`a`0+SB6}-aC)}2%`qF-l6n{hV?ST_Jz@$zPPjcu1==!uPY zJIJN@37dR^|6QGH$mOG#I}ve^E(Q-B_O;IQTJd(3O3GVGQ6wz`!K{$U5o-;GDT5XF z_GRBA+sWl&O}YI2l4J96uDQf{-1m4pzfn=663mZ$b8Z(#da4u<(x5Ll>-$@62il27 zOlb{G@72L6_zY@DeMEsrAd_80AT4#z^CZo8KNOtN z377m`EL|KN2^<*|)|#-@#;OQrP(-Kw}FtlN8`(Zx&d%BonCr=UZPo zgydecjSk8NhJBJ-3o8N9EFW#~m%Y7JKuB>%b`)sOm4F+oj{*$dG{~IBrrezB(ynac zza7}(^8j59OaQpQ$`nf8iNihAkolwSa{hW8Q{cN?hxzo0)276jA=wLM?xHSzqaPiN z#a75K6EyTRviy&?3GlvRbFP>0b`aIEGHiw$-IV+t_VonuDo9{f)3ovZ_2ZsP7agNE ze#LD9h+mB6roSB>y0?i!K4!kj*y>Lv|DU?3AJ`7f;ck5gJuA3aM@12(HKih^nk+yB zMpa{F0tm=9qh6rC^5g3ei;sa0nuw*9Q+rV8DT1RwUw@tbEatgHV#fjd0H!~2iFocY1IUnaP9H3&RAo<0 z+}1=b=4LNI?~(0q|DwZmm4zwf&Uwfu2w(c|y4AmaqK9o;NS_`!sY%iN753~{AL!~6 z@BPm76``gNsZORVtv=H*knykRrdN;SxkBHSa85g&SjcgqgNjodI#|ZP(9KlIGZTGc zC8I!CI~d?2<%o21Ym$mxi=>?n)*aj@ZEMEKZ3Z|uu`^Ow1P>7`TSzVrx+TEtne04(4il`ALmT$YO@$vsw~iL6*sVL!5 z_a9_qL3)7WHZKD|8Wz3>DT_Z(2cwBaNc$vF)>Sk^B4*mG*nJ~IFoNalq@_lZ*ienA!0U>22Z z{Ad8%&yQbEyW6tpI98-9`a`V*=;>r!0F>-`;!ERo4IPNCPHZDlKi=Yo`wGuxwn#hIy!CZ-!R z`l+UpLHrvhU(GQj0|9Nn(;ntI&WZJQy&E^|P!NjQBMp5hO!pC;+%4vsNoQ77hnhLc z!dcP(cT8WN(!0Ca-;d*agQiPbX`IZpNEt%s!p#m zMBN}iB>xM?r~0L%nbRTn+f!j60`8Z)F=cN91ESyWnZFTjElE||zF645i4%%yPCj{7 zLx9ivm!;9|M&2+b-pfQ}Y(?YoFY>P=0{l-4m?=hbu*BiL=f6T)fxaW^Y)NU8w8@$N z%o!;@)^EW+c(ml=S-naxZqu(+r8N}M!K*B;yHGCEXnhuPGJ?Sv+>YTW*ZbezubSLEG(KS-@;>L+A3-lBJvb7xk)G`IruV>Ep}E zE2Op*uQ--iP}4bVtJ`YW(qm1EV4T!o-d>sy6t_>{1>f{r=NOivqGg}c>(}!B5z0wM zh8o+tNFfm=97r0w?FTJJOO(FI7>fUm`@E!y7+0Dvg467yCo-7Kjr*I=m3l6>z-MVs z4Ki~j;IfV7e29RSXk*>PQ6y?!GOpMZA?a`o0A}5a&4{$wi?v>&_Xvsvl`IPOi0|XCk^ZqJ z{ESG-ecoh!;%jt`U|`rSOrFsayS57~`iAR~A%*N?^Hb4i1da?fZcqqR5{hC0V!UNe@CEwAYtmKOgPtwHfmMOp$8m^UwaHZsH3&@DVA zhO+5w!{-vGfjBny&a@@ACm0DlLTusBHlhlx0dRgSV}U;OGOy!SW+YY06Shl)4=%j# zLr#f7LivH@=JVF}PmF0S^x;q|;2AG@2A7zeNco=+>EQmdYJMBBj++FjU-CZ0HVl+B zNf<}WS#6x9lW2`O6epYH@oJIeS)-x&jQoQ{kpvOEAq;y-N5}v|4rqjG6T^LcYAEzb zp)ARfb!0zXj@*gBiazuGQkk?8Lq2EH5AWrG61E_MkbT&W4f$S$I+RN%iDH$9UVFRb z5DLl+`y@u)IsX*-_OG~71236G2q>T{5Jp?Vp)Dc{)F%qWpl668)Q4leA)FR5+Hk@X zK>KCuU9?8F!9#<&FjH%ct^{-&=6149qXA$MNwkFete|7IOFR#MHXVt-XbhjQ^G8`m z+?CY4y|bvtg+Gf{sRZ-q$?=E9yg`0Y9ttyB|IQ+I`MLXbu*Ngevf%UKLDxZPVfo?u zMHJ|ll|U#GS)vgNJPu6^XVUzwtB#{oRAw!Gy9XM{kG;@%4aLk4Xwb-elFzQZC-MWH zylC=!!i=b3YvMRrX|kW02yey4DZ(uiV)U-bhFuXk{Y^4Te5vtz{)SV+Kb-!rdb)S- zt>k(CsvY}4mfHW=O?wxsjF!*0z?;KqG*%z>ty$6b}J65J3uNOf@^eFL`6k>Il@y06%j=(+LmH z_za4pJ8w9*63)CK4@TqB>~DDG9Mv3!u9QDX>RCmMb-u>@sf&Nqdn6Q>@KN;(8&q9p z?e;T?cA`F-5OxXZvNenDe_9O954>csHQR)I$Xl2 z40g+ke%=KLHt6(W8#OrNUWLNCd{>p4QvTcw%^pLG?I} z%IS{a#sb@^OEW(G1dCozD|4RBfOSh0b%7rrO2pypbhKA=TqN|#cH%U|v~%=8Tbpke z0@VECO=;Sc%b-QJy_Xb}k@to!w(N0<&HQl-s6Fyu;>I+7D&ifQo4U(GnJP9n#mbW& z*uQ>9V)!ZBh3|yh+3aJcP9b! zuy>%WKUnyOE?srdpIY58m!@F>auoG8O!=+#bQ`b6W3c3UHg$myu{z5RZ)T zml}l1ba4=%VwuC^)x^uuBllX6qY0y?YdwcwD}%Rmwz;wR2tUB4M_OFsV}3J@8JXwo zY|WjY5=|P-s44oKgn@jn6r!y~5PKlBatBI{`BA#OYVdrSk*kErFo&%{%8nPngB>H9 z9sC|5%8TYnR`xnci41Z-%(QkM^JA8LS#O>ASwV|xjsaZ)U)QM6d_n2EY@U!I+obp7 z;Kt@mK>PzdX_vtgH2Uzj+Ae@ickSh8dZ7~O-WJ~{OR+fPLh5LJ=9^4XAB~r-Ms%7G z{fD0<4W6u9y_dCEoiAkjJzcMr%@wH{KRdkIk>Tn(|A_`lyGF}y3?_+4>~k2!5!5Fw zK5LLkj}F#c_%0(lfMf>$Ky8?SZ9dn+NGrO$1aurbX)hAy6O;fwxx?~aWMqvoGVquO3 zJv<7fGC~6bf)jTx&Qe(=yR?{;duKBW8E4eGc?~Xv#W;VQUTw#ebBJ#F`o?D}kiQq8 zd+NJGtH3aMHqMS}3Rw5kkVlSojo_;%vRC_Z=yL~6u81k%99!Dg{^p}W4PN zxs4&{Q^`#u|DB;bK|J(7<>BclQyG2-eri#B$QCvQvw z>H#Siw^M@=G7W?<+Lfxw^eS<+AL)-dq3%7 z3qR7&BpzSzZ@7J}?OF)^lu$1%c!xeu8UD0ie4JBgqyAIE+oqFJ^!hhvt_bFwzxe|d zj0MNt2%3xk)(@R5=vDEA`PutF`sBj`W*+Z?H#4SXV%)OPTgkg)~xOsRz~t z&Z?&E@h4p*{R7>xL+yTSOXq+ZyEH9(q&eV;p-9;gy6G%Y5BHYd7G)8JE_)!h3u?I zKyMIeo3dwW{f%%^^QC7D0APs zYx_aB;@taQ5*qnIP#!j+1JOIfKl6@@i0r-1g%KXf><;>t1t+M5BE`t@T8{?M)8IpM zd)_{<>-5*C#K=3Yj^LeJb0gh^9|Ovpc_5+&5$c3%yb$F~hjl&siuKr{o@K(@_44NR zb!s0}!o+X9&TNk%c_@h$^!g-Pki<=_QRHSpM;|ff1UZ?Y5PIWVH#yd3cUd4dNwr$R zyj<50eG_6uS#qE1x}8qw)?s9-(EcW-D@(97tq$WSjo?(6xpd{+Yx!DS-pKn;uUK`) zRj4&r1o6>ocqhK#2u%R!leyfL=EJW3;Egk}kQCdA;R5_- zFxLnFQu;eWWvUW>xIj+?-Xkq2`hAA&N%byXiWsgaibL%f)c58*>b)FU2xJ&f^0mXp z)*S@5-Fck4wQc>7Lw(J$BN;H&hW}UU!Rc&hDjC2*83%l!R1HfHP^h_}ql>(kryM=D z8*Jb=5MxB~17HA>1^s_U3xeoZKAG31<5@(4I`U7M0LsKX(~FKY2A7V!$YgfvH{BRh zPHl2|;(8nGiJB3(VcpHyqpvo_i5bNTMg$xqcO5~0;OC6^8iYJ9)W&t~!tnCu{9m@HA`+hk}y{@l9LxR4o6w7!~qGn`k1ds}nnO${3?}Am3N>=a~9+n?fl&0Zn^IQco?M&MdegpF<}vgJucmpd-r_6;Mv zR5K$!b#|O(>K~AzS2lBe* zV`u!>;2kok9a8`uI85&#ztGSaRmVYrhw)GnCaXuU!b+N(@+ylWzf;9+FS{?KQe=g| z0WB!<1E!TuLJL5D()WCT9`3O0tN`R($C0g9Tnh`Da%2R=TsjCLeYTu^B!k(Au8f56 zQ=M5$tW(JX{t0_pqCfk``m;*BX@Vj9HZDj>)75lqglXRud#aoN5ef!?X?S13 z9ne_lR@L&?lrtI$bS|tKaJRVZajqg`@X|j!EhVr>l{8S=qX(AbfCcYkW4aocRWnEga+S!;{vGp3CeZwXOE9wvn{0>De=Xjz5&r-+x zhs+ut4HM;eKt6f56xK3I=;!dI3qh?A2D-(hz*!<8uY*BRlDbl@*Hf+LQ+4Dlq}=Xb#{ljIA*#WqcsS|Jh>6sGtONK{_-V<8h9q?ac){2dyTQ zfR_FV!^QlQG#eP2Ii?2C!^y=Y7j-VpVZZvqYaA3HG06O#Dkrv`^BnRvsdlWV$=O}S zIa7lMghz)*oe*W3T(2*?wls<3VK{gF5j1HZaEt`O1h*T$p4C)#@)@PhTwlqNHdy%g zIhX+Uv|3Lbn(_i&0%V#JK_XZzcLSsE1>iaj^!8#-RAkb$K}cY|gBeeuNb+T)o!7YK z+YQ`;GQjutI^YF4p|93R1xfiSdd2ja*!L28&_TX;z;LJWC07V$>1I0!$Mbb{;LqEe zyVoA~=7-;*-j1Nmk$wjFtXyJnX+Z2(p{kZ)ebgl+;EI1iWP6w67f+D(81#bod$W`((K_rh}B;4(SU5c)3+H$mi?7#4coCi!_L6?(A2& zS8y!6IZ7}`G>8thk23EvH34)qfPs7A7Y2$rcVyth8*Z5KgxTa)#6kl%Dv9sYy>YusPPzF=KUKnHQ~U!{yBZj9wnf%ncaW1)hogN)GJdcS@hLQ?KHPysFg zEhh}La??Z%!$;&==V;3Sf7v3u<{$g}+!3HU(C@KJdt~r|x&ppb6sY*?)e##bplkDD zr5zLv;<_AChc@$X7Gt+E!#0eq`Vi?{6lD`4viY$y&H`|L^I*$Z(2nb8(AU$CA!`o9 z4$d%i)ZE)&43z0{sJT;LfCzI2L*zFuXmUr_(#M*3_uUf~9FZ1+@G@=882;p+n*)Wf z#cx~t`^xKoW?O#230dr8y?U80mzXkDUDu2hv6hY*^xj(zuSf>f`&VUy2 zU2=}F5hfC*h=n$a2M%ztb*aKIr!*2K7glo8b2YL93Tj^7Sy?hu&s3cUa0lP_xSyMcER;UBatdj!_E zxw;0HnPD^2NR!U@f`mhyE6S5YGkgm9IpJRwGwR1Fz=C5I#yy+W`&z;l)ZA9*nEfrT zL6g@{+g#0i0*TsYJrQ$o)Fh0@_W{yNOFUNRk!36(Pd8RxtA68AnxV{kjY04E*L?kO zQsMVU&1_g1^@MPTjmMyOcX7Wy$fZ6sf5@!AWd`{c1zh|3j~QwBc806xi~k6+@2h$3 zr1r^ScX!OFJjm)*eaRWeG{1>6Gx_B+>~LV4>vK44n@jA9^d4Go)>CXqeO`-1u>${? z28EN#kqu;G8M7lvkm_k9C?~;zLhRG<`8MrB%-> zcBo74HdiS|-7kx*(rvDUu3LC|RrS)Js4w7Ko9)OII0C2TCY;oq?~RI3a{8Fj^gVZG zG{L)uR|pPy@ZwQ7eZ0e|(_pBzZ{y~*!Y)6|$f$4VFb)FoS$P&LsBjeGCT@0bbIFL~ z;)+_xSqqr0NFxj;Hv6dt=D=#V3^iX5F=dVj-7O&v)!4r<#~!!uGht1Zp{N(>4& z6|%QDT-|{U5`Kwt+zOgT;jqmeV^qHLu(uWEsN{C+TFG#K+n1lH>d(GDU?P!tVjK`e zK)!q5d@0IthH{Z@n(+8ulw;b~xnPlfYJT8=uU8dWUBrOYssH<__xS7F=ns`eMsYJH zN37_7VtXTexA~l7g_D=2XFc|4?^-Ar+qT1%RGCCM*5O{?83TW8vVKn&KHgm7nW6{S>!=nV}#PAKjp!9rlRcdL6 zs`dRV5}+D<*ocaZec9!l75!fUFFW*;C^*uKTx0BZG~rpoD0~KZyl2VRpX`cL{Db4KMvq z_nscjW{8hK#}oTiZu8|uvw+ft*5dlDz7cmttGLgZC-pBX#SCH>k=a2jwR`IRCb{q3 zp&>8#U6l(BZ_-P97ox#`E7!a)mMym5SNiZjWw|o{jqCC0Y|Fl$gbUZ;PJBtmj!o3d z*%?Xy$XwlWC`nCM$k+nkziUf$fWpK1v+gtU^vG2Rz6ev#-5MFP{gLyJp* zK@^2n(OsH3`6#r_d2*4Tj^v`aYVaHo@ z3g}>I#$#OgwXTp=RF~%LARzVN5Kw!wgLp51h=}}O$mG9kK?vLbr|LsumtGLpM-njL z!&=?cN*-wDYgnRib6R@X^1J-~(#Q-i6aEywwvi zt=&c=R|(`oi8Uuh=gg^7?`=cz%rQ!FwS&QvFG>)T$uq_(8kFgPBgz?AcL)u*{zV$r zjfIHpbY@u1Uvz7Q7P~5#P5(pWP$ryu47Ik(v$tPKD#i@oe9UnTvjb3CUk29D^T*Mj z?l{j0aj#zQ@1GqU&b8ZoEy(E3H{|?|4DPo4ZYpK8KQ2b~qN7f zy4^X$VS;}|gkD-sDzU20b^*_bi`9Rl_T74*PzQc7MG`Bp+XLwT{ zJh&^%vmFDd{46cW-H!JWXU^p*v6}&&Vc}eS=J%=VhVzFhFLTDW_kvwh`Dw;$O{Pq3 zCCh`w3fcSuxWnN)^jzMh3G)y%HCxL0&-q4=$esnZv|0cVpIsBIZ z>?`WJx+zdKM_q`bx^!qnngUK(P4)46IgTLQoVm<3#ne)$g`+)(D9ayD>&q`s9Cch~=^IVM|Ip6Y|XscKM=!Vs; z6L|eRXd*=lUX{Y&s`}QVZJ3)K-+T9KT;11B^=DabsbZ-acKyx@8u)!dkEn$WaBSyi za(q%vKX>E15R>&E(3-=m+LYaL(08D8onpS2PkVaJePNPQC0pPoDU@3O2j?RWEjlLiK-y(h(QcQQPi28iHZrH~*;;oD zBYIbnYsRnsZg^YVnT+a%9E*3Gc#MurQwoOY0^38<0c1s!>s|VjdP@1d_;5p9cQ%A+ z$M-j1lmSA}JMBW1GT(;>8?u$BqZBuh{1%7<#kB{f!!=@sSeM-uEjtz~h?lg}DiNbTbI-i|>a(n69CN6L4<5ULbj2s2Zag z{epjA(!~2B3w>upiIdKOYD`VIz*3}*1wNg3AJ&ELzv5gb0|Z7vaf5FD5dVX)8NS&Q z!{hzpSgyI3EiXzCLfNodJV6op{VhG`|t*4viBVCR#ZrCL|>n@K}HWi;Y0c`ynq>5lW{`e6D$ee13?Vp#Vq`}B(S#U`B6t`ZEx>qOf(yNOpUr1x3%%Yqxs51w6kn z!=g{wOmSj+2*4DOyA7`3V)r+)+FOA6`0z@*I6ic2z|c+ZnfK;Cy65F{rtBGIA6J{2ww~?-p z7`K+BHXFWuLyUFC8S2hc#(IN(J?KM6orH85F z5Y^3xwoq@RvVN_4uV!F~1UlzS1%GI!0<%}+Fap%7!xZS-hVJM8`a?t!?=s-)AQx&! zJ=v-%6#~>&+2N6`OR~7;fl}P!F6yxA97(VuQnxVp_(a~rV;?3+EGQKoSTxF}z3@iDLXTDxm}{(5Po!6G zvYlCUjw`9TFi5wa72b&sXB^1oO&B{tJezSa3rP{~u1Ta$td|k6Ak9B}XeQjaR6E@7 zGPwP;WWozsU z)j0Ag1Bt@Ykx}_?S%)#d9A4X-Oz2k(I4TYs;mnylo>x z*G82!O7$+Zc>3=|)^R+n!X%RQu9`}#KDIsx@#FZD-HDVL{cGYbe?0jk>%{Z z(2wk~xfxkQpWbK&ry$iAaZazvY2T?GBi4}raPQ?<~TyW%o-~b#s z=!Yt$LGq=M9Gi_Ro|KB8f-m2c*6`nAVTv50lAg#)7S5y1{!EEk_!+K=w*&dd^PQO` zYb{cR2x8H{@t$rtu@8Xx^>t}fWfJaiaiw{}$Hq*LW?{I8#1vI10o;iuN=aTLC~&Kl zK6y=gg4Q;Ohx?~l|3j|h<*f5Z$_n*K({7rNKTm@VlTSg>x5P&9EgvTNuqtxqf)dno5pgTfZsfxb zl*weHPA0DZ(5R}w20qrb-@S~wvdh{3>ZONgRDJdS3p7yo-MX=KK%+~g0;}dTJH2)G z&Xw*1wc22sqhY@S_0+hz2EMpxxBrC%9(2mZ3Dy2XQ_*$xgIquv$Y`W`z1 z?1B2pX9nM;diECqYd3Xjd2kiXZnR9&zA}+Y@-cYLlnK5A>Vi}V2Pj{7RbbT4A17hX zlM44Q13IBmx)2msjvSOuSW+Tg2YBQsl>ev2n*%Y*p6dCIHS6oqMY7W_l&O4So{sZz z?*>v~1YI(!Jd^!@7<;Rzw%RZ17mB+(1b1!G(%=rowK%1;xD*ZUZo!LdaciMi@KS=i z6bTlJTX1Vn-v4(l&dnL)yUb0--p`h2ues*@Ee$2 z&s_Ht6h^&%5uA>zod9{!RM}1n;*SDWc{3aiK|SYKQ{)vSmb!6PX_<)btcGi2UF>d@ zQn4OQjm^23wN>C3Y=01n&ZclV5wn)7DS1GL2!08sn)1CJloc7P!Uc;L$>yYu_1(T0 z22*wZqWUVv>{gRpSWR^0j2=mi!2BD?Vt>?ulrHxvq_IGPJU6s_63^B{L~ZV<@ zw!w4n$U#@iI>8iNQK&tiJ>W-yeW4>U$AgH>minTviU5AAmuiFU_2r}`q|M)^>4sUM z2~~UKF`NoWjYi!R74cCp5lHI%HjUA&hZHy5@|r=;(jL%IqlFIoZv8VFOU%QMO%bpf zRa^Iy6aK{mKGSn9?=1D7m=x$PC0p1-gE%*^U~ZI$>L-&wdv-72f-6)^j067!l(r3eDDv2P-FJ(dT!0{KgceC&zM!sQ!b?uza*B8 zxf!$x`nT;rbru>&Fe9F!7&0$ zSgt6^PoNBA?F1kj10fBe7sW0Cv;Yfh-p%y-HDdbrBzQQ&aeora6W+@7tMfviiHKE~ zjX&@QCNpB-HJ{Hl34C2AV=v~sKi}2M6u-0-h{9pL`L&fD;difY!+?;5iX=*gI)1OS zAP>Sw#cKKm9#v;SoC6gLrTefYVso)57>1)R*ixA$aS(lfP#kbA!mNvO=Og63vq#zC zt^3sRkmh-JOi=~|ZdSu!?>6I7D_?DsCtSCeBF4_W2di&uZf(T*ArrcDK2R{TKOTx) zC_MNGDBZHxVbaffu2T0ku)i{>#g!?igw=SHS3s}ptujE+ti<&WPy1>n5JOB9q82um z%Ml$g0>z7r39+!$Lp8kdjR0EG50dfCEBic{kXtZA4A`)g0l{?Bc^x(}jYe$e1`I=D zVL2tGUvuv-uv%OIyUo_RxWdAU00E1=te^u#W=4I{H_AIaT~!F*s6v6(Wp|^9mVM1G zZ`e03qq~N*@>wF&)K}Q$QpGdq>6I_u3gG5t&4T6@!6OfhrZ0!$K0jlsjd?=EaJkM zFE{(3Ao5-#9#lkwUq0EeAUBh zF~|j|{w*@|)S?&OS->v_6?j?{D*%JCp%6)>&_vV_ONnMB&|TJh)rz4LLOhi|=XfAt z5N^n?#IOt^dMCrQ!tk{1Fk9(l?6bcwh0mB8`y3$1-*37pp4Z>__x=sDm5>!a_r1Ad zkDk5xUEe_wM1Uo|6=o}ZI(f!(|A+5Hb8^Fj_hGR<@X$&yxzr!(0Yt0i% z%zFXy`)+xZ0A7-o&nH{{R|MTLtM>V`B9puRm&vV3zhgYBkn!6F6!~F6KDC^GYMJ(3 zP|bQfB00ie9j=_T()%i z(5=e`8l=-2wApAX8wtp`s#Zhio7K+tm$6*Q&L$b5TYaNQFaSAIwfj6&f2M8_NT+JO zG`jm5?crvmhFlJ|di`}|Mx?P($|59a7ESey}B|OL3x0m0a+?udh_IxSbAZ_M{WMazk7eC8%p^!V+Q2g`f2HG)v0&; z(nEN$#V-nlMgFMzq}wk`#+U==WS?KPlBxq9$6vrZs+GBeh0|&rA6ZT$qf6&s0fS%^ zes5KnZUQr+fIu8<^N(!kg z)HO1-mm%Nc7?ol1`MzP0`OKd7x2#mH_n)&r|3(L8)3FHF8x;Sl45O*_<@Qz z7KJ9*GSPs@f)H}>Zq2I<%_Q;TBv|#GtF1nZTj?uL&Y~DdgI%y7Vq8vg=kS5*!vC^M zoQi0t&Gc9DN$wv`<|%DTq&KAv(aG1@*={1B>Db$|AJO&nZECss>+f}AWT5F`GIhyY zztgLij*2gC+;r9$M*l`k=8g~U&ChFba_QmjJwk|iuhj7k^poezY<)@&me+8G0wQ~M zq0$s4sf-jAmUMB;=lS{A)53;afN_wdp;ePlDTV*KM(7Azh2VK|S3&@y4tvs@pyF_Wfpc z4#aaxLb93B|E`O_G0MXtbG924VRU^#FUCu>`7))6JJQ+TH_8@Dl1+ujAEjcDSKAU| zixhk><3CRq{)KkICb=!i_csDM7m;wMhRlXufDW^8{@OLI%{>OfEppKI`ST+10`08< zjwM8VLh8QOU)JSrk@?2w`(%Ney5>?UD(i_&?Doe5#(TMJ071$<9BYk#>PWH>*UW1y z()}Y!rH^29@o7a=l+VSR;4mvML^M=k>z)bNFsB8k>Jp&(nrzssv(PFKI9B=50?rSd z^lIil?L!w_x~>QMeTY}$V}!65&11ZSV{xcxQ{vHB#o2yw7kM`LjG=4hCxpA)`ZjA3 zxg4znv(-Zub+iUp6#b)RMzU}0v9Q$`P0pBzGDnU(Et~~KJp0nnG3EqSaRf4A9bz!j zJ{StA$psQdH|ea4=0DQGi<`?>KGO+BvVw zX(>E2&DRS zJReKctyY029i}DTe|_b5hm%{O@crD-HY?M&--<<=BCycP3%ux}RbeZ>3XK%_`*6^buKV4F7_rVqrC{3yV&MQQy#B z?Q#WRddMGMV)SlWcH^qQ`8#P}1?Eyi1`6AV`sUnGq>h~|mWS0}dFHbhqKS>+JQ?EO zjlxe&(8xt+a6Vv4yyA`!a2v=bvcNZ*_vS|+R9TVrSY?7LMk>hbLQf7g0!S4~VPh96 z$i%!djH@a5<3z6w%8BwFWMYp$orG>+%0%3RL5@;Qv5%s~BHIeW`Y~);jmeujtVJHC z(DQPe_qt8Z6!}!cgE%8eEbiBFVx%N;RGZ^g%DXOtB7w;Yiv&##@-VvB!R1seY@u;< zEupwMY~+)V)S!nq)T&XCHWaklR2AF!pY#PvkpnEqP~OC@?%q}sdQvwoefdw01GXOG ze|^4uaqbOWKtd$HVrN?>8F7f~X5nyNE$LJ%oT=!~n+J$! zm~YALf&Av;QK={Dln01p+{L<*UV&Pc$}eE)=Xa*sF;v%I#LOEC0NF}IPIx~+$a?&Y z1qIJ}K>Cex64eOmpd-BbJXT|D;E@_3;Uv>8@U@V$5pTv}Q&i4$`T~n{QNb$4Ib87G zx+c!jG2VkD8r32-JS8j!Z`J!!ewZ5Fl%SLY-aP(NhTWKPM~SNQIh#xPJlrY~*ePHU zkKn@>W#$VLni!(YwZSXtgj9C{K<+ku&t1zhrK+(Rvct<-+^CL`PBtN^(^Zcmxpk64 zp}={z#6sOPe*n6C6c1{I(F^V3NFXawDY9Yr^JN%Ymw-8AyE--Qp8%c*trTFQf-V7{ z9@{k?Ei`y2ZJvUCj*Py?)7Y0_OcmsS7&Ld-!at24FR4 zr=%Hb4k&^UAc*tHn`Zi$GfM<1q_-Y)wn*^L&0%l`{BlKDBd7-&H%F2qcW^C8aco*z|`ZJq}C>OiGJ8Eg;J)A@;eDS*x zBn5&85$cN&82XNjOo+TRE=6@z%m!o0D|1`6-=l!4tS!tDTUOg|UoE2j9(ut6-?gDC z#su#PBbpqU5!UG#q*G|82(=yTGv<_kX-wxV4}Jszf9a7LwJP2t7Zv#r?sZ*$G=gwm zY+_dU0j2jzm97TV){LnYH}%tO4=OVedOI@$dV7gtMh>cd(U1u(>!m*F*bm`89n@`X z1F=OnNYH@V)BRcq5X(WH&g%X8p+G64J2QHaYb>zUQ(PC8hs&A{j(Oyi`(({M8i4n= zn{k(92@T%S%yo8RTCa-(u8h%zDT;nMpyzkJ>;R4*YyRVd+w^&DmF<*C|Jx1J`j?Qp z7cq>f@4oZfV=N`=Y@U9+G0gZh^BhY3La7vKS?Z<} z7$Gc!BruKU3FfNN?UtM~Ssx1Ex?$+eJ^fE_(ni=_7I(haWE$nXRFyAuP~u@!1VspqYKW!ymI5uko5^?i z?}rw20ibQ1=1wjm!^1M^ zRYkKaSyeWSSW{TO-4oZ!7fyPD+_rkjJwX`}bc&#@z;(8qF?}c=W{$&#A`aYzR4N#B zF3Etfzsijn;PzU3HU|Vqk(J@p9z{?Mg;=!(+F8&1#FY?9m#nb2q|{=XHn;52kl`9d z+j~!dpah)Q@&~!cQ|IQ0jD3>BPp|xf-?U~e%uK`?fwUvdo1fE5x2NG9&ylhVs$zp& zpQ65KzsaVefGNR-t4n|zFVD9+Aoh!t%tVp0o*?PTS6>__P+5rvQHu5uo3y#)BM%ZW zN@P9~JK}v@MB;mK!IFDG6~O31>EfOTYK$5)Wv4bh-lb-Sk@$Ud^wgIz1ym-@x4AJ& za5+4O3rvx3tGjVp%9i5o`El{Tj48D%v5PWM@?>;Af!Pc*_U0Tpj>EkjinLh<4P`9= zOnoDQi3t1e@`V+jKf-rk3N6V=siie!F4Y5ls{)x$K3gnJ?%o@)nbVn;eIP~jp(7-+ zV>qTOKHh_(fXZ>{T zN#fvOTf?Cl$B(jcs+{8@UK2NW14e1&T+@`DP;?%>J1urnzi-Y%q@nH8`<{$bqEkXy z(KfPW{FueO>kb3yEQ;4mI1Iz%?T2(qH+t?+XLdr32m0d8T*uo39J~i^M``;IM-}M@nBh~lNwhpKx-YZb!Gg>q| zZ26~ecAD(lt17o^p&C2aRCr5^lh9jOGVUYcg9o*A-_H4jO`37|_47QUQ>n-fT>0=etMwEV)NXQJjJDDsg1;c$Z}mul%j z)}sf;COYjMc^Gqk?TD0=|Z;m@8K}>ZS8G#Lg%sqK_}=w3k0fPQo+PDMRw@Mz9FSbR_5 z-0do%)Wt_4`hdLi`rxjRO%qZ?+RIc@5II{AHpss-@4z=4XnFSC9QF?lG|k#7;2RA* zo^4pRduD^nWp4z1;#PZ4hpcPghf+-3UTdn~zk=7dH@o1$ZHm8p6diuXQ}fvnw8MkD zFvZ?Z466HSX}i1apMr0HJjAilF}bxB`YFIq+Rs~|`qU2Y!fi@cJBrI4giT6+xq<#_?3*hy4Wk(c} zfI`U6|F|g?og(exG<6EF-QGCJSW>F*y&&t^XB<#A5)eg$utWzjs=$oglzM&t97t<+ zPG0+4_EO2*(N6aC_!w0_rc^prVnOK~emg$G8{T2SulJyrz(J6@mQR_vSYqC4Ufxzy z2%3XE-Q7&v$@g#T0p}|fv%17^2UvtU?A-VFzEawfMmkW?zlG{I=~&;TQQ`j3O$Dzv zBboT1#ztyYJ`3ldv0&luRHmBz8wf_wIZGO1(J`k@fA>yJe&G)^;2FHK=`h1JWpZ8) z7t)u|!cwx?5CpyF+>Sew8Kiha$l~@v0!K;a^%3rd+WnFVL>$>sW?-2*LPHqIZ*qr2 zHY}Gf_lqjkI<_rqhW7i0d_WPBSm4T1ebO!}q*+!~|Xig$DjUXN9b zgO~jFpWlIV{ix^d<`0vFKtT`G1GU5`Ol?(qh}*^=6}iGJu%G`I$2^T4u5B?CR0AMAv_8G znO^v5l~$teSJQ3Jj$(`iJ?tm&0`%4NL?U+)d)<8^0VAi&eTW#>PyBgl)o*5(keq;g zv-I&<*+n9f_NvJC!sXx2P_F@#!f^y@HP(0{w*?rqOYh!0DL|G;pvIt;l zL|V8Wv82FEXByz!YGY*3+BVg<)eTx3OosrUU)QQK*9_O`@pq4Xec1oc&=_ z5RY6zj*CPwr9(H*8;I~sRk>G4Xg{_Wu3D!sY$YI1a>V;w9m!wYL~5vdlc}2SEp5b& zeTw(b3oR+3wBZj<6%y_HKTJq)4(QC1CUrs z{A^GKxii3@>8KA9jEmS;!aI}P*=6+!2X=0!dq*)gGWAc%!(Mj(bSTJ3x#*@j1+Wr5 zQCCYZ>kK^`!3~#F^fa4Z-4Pu_O1p+}oeY%RhwbMDR=htH(J5p2l}fU;IKYVE@4kdE zzi>^0jM8qoghTwK4W4n07z^k@1@UP(9+*xIp4z)2;Xo;10wF%ceA$b#Y8dU~r?{z`o{()S+qW34lIIK}MdZ+8U&MFjJPfe!i8&YBud&!m{qY-B z@0%^9SBSzk{Ti9Xm(My?A#2P)%A#!{OzEyGLKFY-c#|Jb|8|0+_y1h1g4!5!6Uq3pc8Jk#rKd9{6w8P z{QRDF%SJj1=8B{ov<|GIfGz@KZ72#3sYME8D|o-??C$Lp?|*Gd+8?!tYecm`z90y5 zwXP%#EvLK(&r1eu2yjlRH%_AjGLVx1{Kt0&4PL0wvaQV7HXQ9Sr8IE7zQ7&Mv=L6; z%MRT2bAAo7&RVJXjGqxo4HzL2FX&86nVWO}AWAHpS>45^1|bVv?&ZMxQ+!0MOUY*bcVc@Y|;TaGaqFqJSP7JTJyRVJJr?-rixoVF$y=~ z5J-jy{*{nMn$ozp>Io3$rTr-?u$Lnq7Fp2G~rK~Iq6>ltkZq|bc$BB z61Am4407Or%8BnnESK8Ph0F{urA`!3{zl+EZqf#KJ~3jzIVJ8hxtJdLNvBmL&~f%9 z(EM18M&}tegnBxVQ^m|{qoM3X#hVLfNi(Me@So;3 zFHL}bM@g3~lnZH#lD?>zfYsTPA*#E)ft0K%UXfj~T$mPnf^20S$xrBTO+999WZ9`n zTzNDsIyJ>Ux|+gumhtP_!p?9b9%9YK6>I+_^_p9elv3fWIYR$2Yfi8UJ|WH}gI z$(Vrf8yzfI`d|jXkTnP;ku9W?16Wo3R}N|yJp!X3vCC;-6|KJ#J|rusrZu=QKB^;{8NuV&fmuJ4!jux-ZSNmD+GsEzpN%_MDX;Nn zU7Z}1IX9I%w$o`~Lb#PiV1U1f)nJWi)fmtG*oX}p26Lt40483_0#4>4NHcgmLc%b^ zQWm0Ey7QbWkcDrKF@E)7{cO`b)b_6@iOo{<^Azr772g*uO*@1ZveI>7fg8RUoQnWm z+QOb2K*u9}e#ILtw>X(1MWG+m5}Wndt7mlsS`jgvHZ?kG&k$h?)KygC1tYLy@6SblSn zqyn5Vp0NHq081SE^Y3@S@op?70_@#<9+m8YJS39@%-pB7jGYnF zFDa!;UXG)0ZXZq1pv}0S#VCPd$gBfoPk!e3hYN(?@Y6R7BI6RbyH&8Y0|zgmKN@a& z#kuI~Hio>VmOAzKC;k0l!!u_!e=B)Va3`_F<|Ko|Hyrq6CU@4EtA0KrC?kFAKCj&M zKun~8OrTK>ZI%hV+Rm>B^fv-x&v#xuj%Ol;8y-*5bvy9h;Vob9AeN|*l^{?}{BViR zz{j8SHq8u9D5OI>>Ycd4i;})L$&fN4QzV;7_BDE`vjysYrIanm7*ar9d?)pp8ru-@lJ1qoc`)tTwTbLA2qfbc^5Z2fQ5Z?PI|YuMds@5?Q7U zqX*u07Z@2Iq+Op7aD#;eyNh` z8)*L9Vz*5^wJ{0=m77mfnEjKk|b2u9Hdg z0=y!j0sB(7q~Kdw=o_L`Uw|AesP@_cYIf8%DlE zO&Lx#oOw0NfixAcfkN~^PNh@$+_!NgJ@$J?q;85Oi>5bTv9O- z3J&5gQg0=fuop&P@4m+iFxLpY#l(ibfS>Ty1pdD(^0>uH6Zt~1M zl!^uDWt`{)s#rEua>J9FGuu!Ru1>L#t<=;dEO_f`(rZK$XGS8V#weau@Em71FS8~P z4at@Ony+8cfX*vrRA7eapl3s9SKMjW!N!o(%$&A7z{k=>L2WG9^eF3rlT*{Ntsi<+ zCZz3};na(>CdJ@!dkOZ9h_^}orNb!W%)(W(PL$ywg>dzWh2$|tLh^hZ8s5rKfK+UV z&T&$y{LUPInCHDI=3@~&B=Ed>g>Zc+nR@}Zqd=5a-#lJH=sP=o4yFR2N0K-GaEqc_ zMu60Q`U{r``)@@`DDwjkNG#T0hDFYuV{{N9%b7Y(y)I#@@r3mH@PJ&NjS%3B9yHjf z2%E@)cHQ&9yA@NH{=I-N@N%EjQT%&H(;m@Z+~a3}(MI^>M{*r`?6tOBV_vpU>vj(KXC z2#(}#XG1;TL%WcCzzArU9NpVbGuQsSJ}kX8o-^M!rfYks0?nO%@2;Ddh__ec%hL~h zvMWkmswr2rG{+R@_kV34R6T-5`qmu#nyu|@F)oiks>mp)dhGliw6@X(@e#U1S)toJ zD$j|^-l4jcCyD2`HD8Vw;fNg-L%Ue&)QW>}6^iicbLBTjP~3o9Ix?5pf37TLI|}|+ zf||e=$ndwFz)J(PcLGDE#I_4ZCd^e?R=uSg`=aL>7{2=vqXfn@f3fz^Pf02Z%qWhB zE}73~X0b#V))7a3JXtz!a$1?2pQ6I0C2Er!%3q;OR&SeSn(W=a0mqyu!aV|Xh3aFi zpZu4JtNg|o<`=@)S}30s`(=6EbcEP!QD3`}{9CG213 z{Kbdc=pV|ZHycqjuvzeW?Oig|6QmB4*2LtJ@kGtO$mFlq4id8GPAUsJPf}D)>sKgd zkv5Oe>C5N57UT)D4q_KJP@gu@`A+wgu+K)|bSYvvRp@RRY8+|EVk5~&Y@falJ{HL*U(1jW$+fbq zSoXtrg)O55-R}6qmxm7vnHq2ugkh5(pQBak(tvtd%0TXdp}vU_fA@m?zpuvggx#m| z+W8n|OKyG7HM$nDb3BsjnlZg->r20_OZAz3Ml2C3t(D-nd#(#R1euk#05u}pSs!r~{Ki2S#|C4;*c z23OjuZZhppFp$ey6c41=d9sAJnlrLWP(4oXn_-1AGafeqiYucm-9z7)?w6YSDN5W# z{jsW)B`A0feL=B5d9-QWM3wlmBq2gxlC{wr%@ywwZ#ZuH{(xTsdkt4HT(bCkDIdG15bCMlxB7+LrG&2)nNdnLdN?KYm)t^T1c;N%i_g($r2h@XY zs_dHv__xIq`JiI-9G8M}6pB9L4#073DjT!=AS$K=gDn&JYdmU$n?p>Iy6_7MmJ}9) z`O&7m$S~dyfAppaB?+Hvk}$=%H54ri#`~1;4fjVaCY&0@Y-s!p2bm zuDIZarBFP`>r^1J{Iy1o)@aUpL}LK&yk~G9Kjuv})`jA+X0gL(yzAX!At=gf#<(3m?L4k|PLTA*%X6SC?@ay|%q%VWR#_7EnUijUPyei45uYebwTn zDus5`i7R$ivqH@=YFzOTupo*vE;VolXplNQ5Dz-wX7gVnE{wUyrn5Tapfk7)_HUF7;G6_9g;r zf{-YVHqeQ8O%Qw4vn-|3ZioQc6MADKMVl58QUg&A~h|+v>eb?m7 zTh%QOh|NYQJ9P1jz9pV7RCYuA^S`G6*;=9G|7<@arF4DPA=r4Tf5C6cHDOr2lL!o4 zb6;?OAzfvgP&;mQE{49*IQP7BofoFnt^b5&76kpFB(AaG;31!oY&xFuYCHePWNm=| zXd@1?Ih$&JNI#7p9w!EmuAst=zFCkH@?%lMT@q@!lO=y!pyk@?XEUyuaN5&=b>D0q z@z}E8Igd6Ia-)gMzfi10(NLB45Sr&*!82Fdh0~l_zwZ*o>!ydoubZ8dwf#L*sNIE| zKSv|k&1FX~JYDwvA)o0@e;B))5zYB*8gI`SgaS_GXd*S=WK?XEw^rWQTC}Qyw{-M= zd}aHOij!62@YUl-!;_#_bkfk_*1BuDZz*}Q1dEH7$P7MmR)_CH-%E2+Db@RyM9}L9 zid!M$w51j|L|?O8z=L|4wX2xRP;puyZax3n2-gNFlhWVkW$sYu_Bz*8VL_N(D{$pk%d$pIUm^C;#dkFIN3Xsutb2ay6(^JUG!n zLaM}`KB9HI&~!o*616Bj!QNiXo`cUm6RfDuRSnzl8SywhE*yS^!H!kN+xGAa=9q=r zi5~yKsz`yMOP!(^W_LwazXfwb%Nct2k0uDOJ^k$uvi4cW8{jq!g=O0}eVcM^XL z!~2eW0(y_6HPcVXL6s4#HPPuW@pl+Lt-Z*&UOMfay7t$3MP7gt6iA_#h9fRA&HkyH zqF13Zmpj|(jGPcXWoc@f^2iKR=43*y55M@;B)%Djr1FvP@5N+l%yUlUU|6O@PxmLE za56zB<>Om!Y+6B`6A|KL8oc>;5s;Ph+Xn-)j!(fHERD^nt+%Q;JURi?&v8s=Q*$IC zZAHW&R<~FWGdh2HAJ6J=f{7kMx-#94fBO>}?ujcWM`Js?6p|9M)Y|GOK&^56=_ zZde{70(04JmpJ)DfQVX@e{Phjk@h>>;tvi0h5B3_KW^aZ0egME(U4Qc{@ES!H1S%t ze|*4+td6{je>|SCV-g=fnSFicv{O*|0VhAuL@0HbA@0{pOId-Xn(DQmSWhnhDhB@! z{5sGZwV-2D5v<|P$v;YA+jF?O8(_Z@2zXvy{oB+#J)W*T-Q8w#exmt0u!ZaYkO*#rhvh;AQd;SIK)~{ffjK{WE2(ra`feJSiSGJTv9&49uVH*P?5d%)cow|pE`0ECpyUw6w z5PxnyNq1@1#4TWuB0s ztE6|Hs)M-Y}8#?EHL~;W|w;SZN$25fFJ)D%D?X{qhD)wvu*Fy71;8 z^j52HDfTA^5ODzFxQzlP z5(QhN{X#PCwKQ(1{(^lpwc^|6qzOkkEh?z5tFNifrE11XIoTNHj`OhB9+3 z!b%w^X_C#X(U=r?lG7a<^-qunNfP87#!>U88{39%mwfZ9 zK>M5Q=DVdstRnCW-WdxL+ex9l8^r-!h)HQ)pqdsu8oXWATl;9GDg+}L~v6&$Al>PS66%% zMiFcIRXXq^;V|J#6^Egbb5QSZga^Bqn&CKR5~QVG4MaKPCBOznnwzR#+4?Ma5} z>uvrz3bWRl-uY(DC(hHt63iU7-ANG}JKRFkb-qL9HN%Oq@GMi(o1??!gmGC7WzFrz z&;>NPeSTJk=C!AX#^s190tksZ$$N-%eq~jp19h)|abi#!O}DYUi#TkG=2qL5lNdKI zuGPUtH^(kPJumB_t*6!<7{-^52lDY51vsZ@3{Ah9FXkYxqf{e`X0a@(wr@>vB8krR}@6tPa}-Jd)nC?cuF zbdugrf{n4G^O~%zxPF4Eu%jiPsq5ZFLSDFXu{25NC7_@<*Q+VY86e_;msCt{Z=v5+ z_x&aN1EmoSjou8zNDnSO2-(cdU+jVrgG6$RYq^MierSGVtY!XC5cMq*i2Gw9^85wD z#AN3KvO#?I{vMZ-vNk9bs8L^_E2<_7Sm;SL2#_**RD#!6|L=bF1n&*}6KVSWa2hs| z&eCc2V+53;668sr-e2_gFT zJb$u^2Qn2Tqva>)0zyxddWo;^tR&%E(@aULGw!F$rPuLWeH`lbCfHzOooO9~aOMYO z85{}`ax0MM11y&d;3!V)yk|COOs`@>f@u?%0Bt%T|CSq6guV48+pDJ6jZ!}7!>9;c z@X8LPf=&tWhe|YAN%Vg=>b5yRuhg)*FMH!!{dqN2Wk;5NV+;Noon3@i9#0@1hz%-AV!`n z_Hfv%1coo9_}>bagph0-bbZ-Ast6?9Y|NPNvmK-ZxtMJ+Xn9ew)+F&tcu`jiW5#|! zvee2sb$@vP|3nVt0Fpa51d__1Dcf+!YTS2pi8FmMq~F^TSfP5tpH_u*qFZ~d^gF%r zDtyEfc}w7&7U~c$YJOjuI($Eqt3%9@ETY3{i=^Q+uN*}}SoqW977CGzH|GBSG0UkD z>!u`8z4AYL-g%(ul*USnJ>&nB8HgIHRpN6O;lEJ-{@IB$lje6^rp^WJ@`R-PjC(__ zN}XyYvbEn2G)iQLw@E+S%?8%;7btC_WkycjX6}CB3EXCEJaE)5AWCG8J<6E&jj7oqFJCg_e=}FD zk@I^Bcdk^z8#SNLMuMFVaDhGOF3U-2_u!YLY~4QUm1;;{eLrS0Zq;MTw}4>2#Wb)a9CCYTTq8q0C*P{MUBM< zUBGYYn$-Aq8*|8MA~^ykd@9H5c@BDXj#v?oN1-i;k-+sepPTNhXd{o0NZ>3%!lxIG zz^Rxty;=5SJ|cmpZDJJyFc(8lEr-L7HZQKv&X&4rV7marn=9<9<*@Rt7<7x31~%k5 z7e1B}UFjZ;+RJNWzVKm$<)yVV&m03!5+n>i5+H=drn@_VN~{Lh;9hTO7Cw>nN+&CM zNDQp_H+7-=SPB2gCxe4-;pHJqGzRpKX5()O-8v`T>=aT_^l}l;pAtWE&3laAt++xM3+NZ>(;YUS*PD(^_er_Q%XLN3K+>R~-*0;j^BdebE-(_9C7#r^TdGfM zf}7v|cp*YTx6k_ixooQ5Y+xo0xmQgAr_bY|I!umJWHN2ng16fl_Y429P^$ZaS#gc@ z$idHD+^SRhv%*}?B?a)$fP{U#(L^0G$?&Cc(Mp(Kc41)PITyHdEBvwuH>V(3?^tMf zk6L!PbA1(eiWR=ujAdV3I)EtOVRP^(|CPRerlMbxzv<)FMaHaJICb&mjY@OALLdTCZIDx&L%NPzJN90!i^xQ)L$rUC-S+5q=i>` zS)RD_kV(E8zkT_^pDL^<20q5*LyJs_TTUMjI?vq?zXLnF(FD85rWGA?8w+d!?{h1{ zXM4Hox`XcK>+?r?OByza_%IC2{dzk4RDr?2fL>Rc7x~ncTW->fgNKoemkRO@ti;ay zYEGXdiz~w+_YEe+NfBbZ1f#+ z$;IRr>v+_x4TfWEy~a%Gd!$9_&hq}g3{|~W#EBI802{CqSWslUDudLyu&v*9nj1PY zUhb@<^)zI;tN-2U;IeIR=QFYUaow~qrYvv}9I&l8YI@QS;RqI!?e$}x5rF+le~wvb z7x>`EL0*aGlUDD^@tw?(sVBwFj4sJFbTSu5k#A$iJFd8=<`_9}aXEgL3MXe19{F`z zhY$7&VMH_Q8BCTVbKn;ky)z2BGu_MVw<82`+%yTV&FYJ%+o{qgq95`0FmZMtSEfwU zPQLi{UWZdsSGuTKPKYt|bE0fj=3jg-!s&AH$KnC zDbx3%D^n%a^4+h-4EbeWzfimTWBQO2Hb&2H;M&NNKH*`kRANK|Yf!^dq&Yc7qc(1% z_tbE-%8{t-te|Yr_OR+L>m;yUDtMVaIzJp0v|}Ku0u^3D^HPM-!vqWg+ohm<#Zn1W zOjmx$LtTN&{%ZHyC+xLKmJz57U$n$^{DuSWf)FPMr2ZW5&SsF}K6qYwS1b*_$th@{ z0K46g;}|G02BBLVad}y3Zc+!FT2Ns^m|q|cQMvM=2Q91xU!F<^$H8SRZhe_rkY@j= z(PZ-BT>_@Sm?gsdIu(qJfBQ{~@}2ao=EFhE#HHeeb^>aR6J|jI3sX)+N%I*11K=CD ze-FCF)szk9&xms-#Ahd=g+0T;XQ>}6?m=SYe>381#whSkt;Ety1n2&#C>N%$jV{ct zfbt$#=Cr4Rz2{0LoFzQoDzy^*w5l^@YNR8p?*G`-j@~ntzIw^HFdFPyeR3H-eZ_g? zRgPNP>j=)LH4IqWnd*?-JOCU1QPk9NGem$+Y>7M^R>8>(0Ok!kOU-?>v z95Uon(Phx-zre~Ul;cV^KR$oAPvYV+&$)Q>0^3g+S)!|VTi!!WDft`>lCLn&$OAm# zalSbpJlamqS*K(cmXyiJ%RqXXDNY{qmv7Ix)q6=!)F|T>c)55kKLDW!MR=?4fL)3g zDNfUYgJS<)X5{pF(lizqwz*AN5vE-Z<9VJiq^Ye+4)Kxk=H7%)`OxGLo{CjuG~%uZCW*97Mk-*rg8+|5dnQ2t)!&h;XH)Xve3MFMl-( z;IyKa9jX)g5D=weJzqB%kI&C=;aMZMK7nnjDhDeM(U+yEK_4pb0DtQa3!VRNZ?^Z+ z$>)eLB#{1}@6xs=1O%v4{ljbJC6WmW5*(e5C0P7Rh`)x^62LTc?mqXiksuNP4c5SKX4?SaP4>ut2ajmH?^ND-v6WmpJOoSE7~1Be8#<{7QOzV@rr+_oP9ujVfX&@# zI=S67O2f67TtdVXToe;#8#j0>ny)5-JoFa+p&LoV>^JV8Rd8~P#gbUnAYNJGsbUI| zYl*M;=Lx^7R8@Y zNUMvI?MM5>sf0n5xABZ3tjl=WKlgY-5@FASlECaL-vkG#vOW3p3lk(rxjmF@Df77N zZtrvriL5o1Bnzqq6AUgDOMW@I&ab{#Am>*y*Ijx}R=t<*qZCIsnE2v%TKL^<>hz=J ztl-0+MnCZ+n+@o$tnam>Nr&cu3r*jcCb`_Pl5}mEa6Ohj9t#) zSrT|hlD;-zuk#E7*XnniZB7QeUD7n6#rjK^&f@P){zP|0*`fFPZ5Vm-4EXYsRKt<* z<-J#w=YKg}X|fMSi1N{qNC^n)%a zs_gIna}ll_;L2JC*c*c!K)$yGfy=y|B@ZVbnS7hj^{m!Wjhik<(Waf)b$`qVE0OyC z>xZxN&d~eUT3l^_xjgO!UNqVPBrFT!Ul0ISP!W5;Nv8%EdLlV2mxh1XTn_};u}_*v zg<@?p$=T=a1mYuqK8Z(ZXR?$lLKVMx74lRZhH5DD*EjNad?dBk#ozcFacF^XPg^l8 z%n<;d`dUmR;xxP2S7jedfZgb6Fqn#n`L~vBs^_0OwS)#RqfD8?l{8?ncNUOJx!f@u zV$pWZ^%6*Krg0+|^C1b39Sp5IshcDT+fz!#^OE6KXl!>mAuzxPpITZazxQN*6rN`M zNJFFk1%=lfB~RvOHjm@4P2h~L3}S3ZajVKFlD{!zFbS9Uy`n~s-c}oGQ`NG`0V>&w z0A=`4xRe_~pbUtzvPd-%33Fnod^plQ$SU1=YtLUMD-LEUJArUXqneL9tu{ZiIXsk` zWEq97k{>vmWI9%)GgT-y9?O8{+61|y&7lmUn zt?^BAvb0n(nPD#jdHPiRSfa*C-7hmMyj+lpT#Hg(;pOH?qTaV3sFNFF8D_%x!X_oT z4}@X63Az%eDV5eQREZIMlFkD8B#}i5G$wl?sMWXG^ra`f`Wu?ZvMI!8L?V>9wMd9D~p|fnu;Vd;}<%5~VJxTM<`%voatsp$Go**-Pva!)k0f#%a@5--b7(~JMw z+yl6+ce*3(?-Z_I3l%#l+xBM<&|N{FrDHQa5igv}bFRYFxU~Zol*V*$PrkgxQ>Qmt zUIeuN>ygIkM{oZ)Gi)n`Nn8eJ^%O*-dfcDn27&*jtfwO);X zo~#zg8Fu!BH-@b~RmYmSj(emZi8lOGTM zp@|)FBE?t9sA2zD4r2yRwn=aoPzdzcbRXD>c<>P6<}HVL!tUZd7SJt8AY=+!K^&>Y zt@dYiRvo*_3Ddt0Lx=+{93jl$EEyGg4PSNTj(gp99p+Do=18dloTOxwf&FBQ0>N|u zE&dp>62=1TAi40S?aU!2U4H6^tcx>7a_NHfUDdaH>|XGdJ>MjNvE{&7E}?B++}fYW zvvk$(xyIj}9DH(zSOzcQA`1AWk&CP&d-y0{`<2-0CzMj z8f81R#|qorsSp@tVxQWJy=Yh(E1v$f;|9--IvLCM8;spPE-~m8oH{~i%g5O}`JKqz zM|v)FLOyP=3O;xkODk`uFqfZOW6QdntKV7Ksl}G+_2UR0Mn)GXFJ7RqSQFvP9@yH4 zoyl;YoiY#b9Nlt%i!v}s_%QB5CaG4Bm6nI_uoM0H<~Ql6xUhxpjd>-EDRg_%b*G^7 zwSR8z%st*e>Y|bau!fNL_g}u?`IeLgR}nHeoZ))lu&wb2=K5o6qVR)ym)~3@hV9En znYchj0!Uk5Z-rCQWg(qIRR})*kRc#S$xsgGRk; zeaEA@)55l>KVNUH1aRoqA88Vi&`(Pk>gCotin|76wdyX=xii&+{FgRgi`~RP8m70> zlRozQbqN-N%R@Xk+rp51Cb^E{Lyo95w>M{2IWvxF^DNqr{czd=%s(A-Ld}a*?o;= z{M$3#Rx<-iYWT#LO>xj5HE}Ij+|zExLrn-hmO;H*`Ox@dHvg0WgQ+CABqN0%X)VMh zRYXwc(mkBoE$w?(5GVLr2Sgm%dv}J&##4b|e7HlBTD(e4}MEZ>R(4r7uQo> zd0(8YZaKf~8!_T|s3$DPg(-?B4UGPf?zhi^rsF4z_$r=uOXgeE+mUiBjoeu05Q}r~e1tK^8CPY>T~s?q%pCQtGrD?JX~5 z=YvtXmlCW0rkm!+g-chpZqwen0K$KXRrSX5XinGLPfN1(m|4hbgC7A`xIki2O3};x zeVMyA4Zp5?fa4x6NG7=9mK>Juq?r+z|SU-XB#EM?f(DOJYHxXFjyPOjm8 zg7hIaug4HN-VuzyQ2cfCjlaaV zui!Vtg5~Np7003c#4gZPdI3Zj@#vcRNT+l6=&~U7X%;_jgB2A2WL~xUe|XANv|cyh=M(sqATs3KpX=+YNrSZkP!9 zTS0&V3V;dgaTwori1H~gO9T2e#J~|buXqac-}w?Kz(e*=^Jz|1`>??Iug-I<&#AfR z$P2iFYL-R;lYenkWx?FLIbQ7T3`$42Vc*Gn^L9If?0Z-yob6swx94chda(ppFBMi&hT4*cO-<2qtATvO3_!UfI2eKN5`)OGg{3+S>{xYW#{ zqH6wt)W_IW%_`~ogTKrRqEQb5raAJ;?IT|$QYD2UGkTBskcF0S9{*;>mlKsmU z{%HxUaJJ$1U^yUzFrjA6wX8OlMcav>VykOfD)#mB??y{C=~azr0t2{u&asDKHY0gp z=3(nj9tlTY%X>@9~R1_ipk@n@5=qd z3A<@tJIy~pkxzo_>YO)e5OsD&M2B9WLhhKLl2F1*OEYEJ`a#}uaDQp0MbHRm-_ z($5YXnudcf$N}5`Q=bbBkNBRS*vwkdkE+~x$N>}Vv3n{w%N~D26MN9cKUmYOklRM-WCG$E-86RaFB1;7kSE)_^6Q)`z3shJH_%nLws# z!6vS|t2y7pA$TUgQJ>uy=7T^1pAZ{bb-=p@r7`l4m!?8qAF)FBOAZM4pi&uI)3cY0 zR^0be>FCmz+sY+ovA6fih3G9wn^rT9t*|$f@xL2+A3hPy@XYHIw)p;tv2D0LZE&%gH4281u_q@9*-~3r_J&cO zYMMp(tYkxO6`yKeqdgEdKWQ&%LfK5i;XTGEd#;M2j$oczY@-kj8G27SNU?8&!^W*jDxQnWJa zIdigI$mdV9H}aYN;(&7#&~$tF+4P0OmmaziwV{+a*$hHuoA0YZ{Lbf`7BF0_-2mSr1AB@P2~QKTsF>^7ZCd5 z0P4ZX?TCKuzAEB!RQVeOXMheo;Z^5Nq5ei~Z0;52z~bGx!qMU#&74N|gAWu{VMS8| z=f{m-LSxJ#4(_P7XIlBoe3lD~)q%sxmNJ~`^Tx6npx)4+)5AB0nAiW>Fc<7}RL5x8 z4q&9|8FO#uzEj#9Bkp-+(J+>`%9GKneArQa{gJOU__8(UoANwcjwaQ(j1+yMbK_%@ zf4<+&=pf-6NuCoJ-n5T=Ract0TfMkbYJ0pVNZbgh3qSA@@HmOzo;f)^*_f@r`*7?L zpZEoFk6+Fq8*KLVr8}xu0H!ZU0sPVB4OC!R>;&S2GFx$l&cv1uhjR zb$Zq&0e<6+tpxsbi2bvZU;(_;JryIJ&-AO@$MSLh^~52kY^nuUTF%PFX>qRjR2-K1+Xed zJUtl}rCxE}6U@26Pw(Z%giozA=Y5?5@_eu3{vu-EAqdCTzq79Aex2X*I=^^o)_XxD z;)5#mP(&l&pBk->k-r}sA_{ndp*9^mjgO`c5uwQ<^%!UiPmG>uh^rhd4Qk9W=5ZE{ zx)e3#NR(fot$=|9qa~K`wI-hk^RDFZr@H7q2t2F4;4T||zHJbe>C!8iyldcNd5N@3 zR-f*VUk!cG%}==qK3M02@B(Kr*oonb?8W8C$ia!T>#^mBy*+QlXqnnuz~fcLsN;H$ z-skVn$ASV@8g)kXdR5x3)I~k-%huypSDj(;3bi)u(!v3IxK$JlJyp~doS$DtKg|LCCO0c4pGU@C zyyxDCy_P#G`=%(s)J5ODDn_SYqcQuhPC@Gw<oR-=YA*i)QV18$p~_{WIQozP)_XV zHDh8<$9A5VuU2w6hCy%a!kKY2R`#2_i~({-CG`Z^Q*S4bq*YCX=7#Wu*JG0eM6A>O zmLyLmBGp`|U@uY8oX8j(;&0{Zui(p4{G&W69lV9&G}gS^ouT)^3#+|{8-S+o0Cfwe zYN<>MYwSh00J8YtR#H5-8m=>KfIR&u9K8oDD|mRfk2qmRg!pODzP&fDHBu%Ag#3oK zi;CylE@uM7`9LJv8WBFfICXUaEewp`GJR;_J2*w~{!ZW>+zbg?Sd`Bo3A8Q62+IQY z%lm9-;ShkC!}0}ap4Iub=Y1JrLwZA(FQE5uzG^rr;73O$95u@U@EN6ew>=!u3e)%S z1oXP-7B)!w5j!vPCwfFBiU77~4MLhsbHij1wkS$7Bis}R-?pDH&YBibvm#Znn}D@Z zd=Q8IC~$e051xRKqBROt?VeVGmbf%POOU{dPM8t)wWHKmjyTbFy#sQ7WRoI3zl|6c zxZA^zth|!N9bCns&x8R4?DZ$+?zKx~dZ}e3${lzKjJT$8KvCPt{5K|emWrw(?WEzQ zSF#gl5VbHZqDBi_aBlnzF(6DmpGY_3EjJ$>7mE%$9^g7oK{AWq2ouAvF}Hf1_M*Ec z2wUA_z}K2BlkZF(f#_-Ht#RoG1o)lWe~RHD3+#=@Dya&TQ3*7_8D7kYCl7w8ApOtT0S8a(MoS@nJk@a{1J~&KH9T)e(Y&5Jo%%}L74TU`-b{=J!xNLD z+~@l4u6QRY!)$7cc?!mr*e)S9cO!`2<&mJ@tSU6ijVPU^k^k@N z*XW2|I3Z8O`}li&BIUO5qW<#k{G1b(0Ft@7ftUSUFXEv@*I;|Z^=+pleWJo7RXIe= zU?Ry<>}j<&^9+CVWFg?F0d9QzObn@Fbcdh*K=U60ydwT?Xu`fh3nAXeI!5|R7st1h zNQnPbkOsauK3ZM%G}OC1k~4zto^Eb}(CSikp||k?=BEEVcDLj9G0Xn_opP5&eyS4$k|R%gLo%tTzIm1U7zFHsX#w9qL{GHMUii-KU;21X!re_H`Sr{WGD3 zShhX#v9@aaRc|$uD8e8<7jS!LC~~HmV^7hC4B>X?27e#TWjS-sBqZKPl}Zo@)vTWT zBa<9fG!}Tz%dy8m6?>=c^z3+2v1U@Rg>>( z;#-vX!oCFy>y?Ek<9GtCcohFA+cF+)<-;jI?ED@1&tTPfhNmJ;9jBTxN7h*yzT|Wh z{}w{;aoSh{zV-UrgAaz!v?fGvP}ycqTqGD_x9zkM#^*=QMp&s#vQtG0`=}9t?}UfM-46iAv9TR!z?-ta3H8_5-m3h$h08BkcZ$AOY_oA?74u z(@N_9u;)s9{SUDIuOt4?c3t^@U-|#m{xnM94&K1YNyFIjr`w>x6Bb}TkAjzm2;m_u znov(lSZdTzkj1W!%&qiIc>tERJu`Yej1-TOM6Zj7U|3-S)aY6!__AVSZHimNOqDeI)LU~7Da5-J zTTlxhzEFbN&2EEG5t3bSKG-!7pmrLP65!M`@l+N^Q--F5{}`SG*` zW#?UkeP~z~b>IA*RhvS0o)#mSZDLYlBo+jEF)JXzH8*x5_hN885<^r#BTvH;FnI@f z3j5FBOGWD1Cpds%XTm@$d!}649d*}#K+)bdoZR)#o05i^>g`77$E>yNz23VLqdx zk>4n8$<%fENcDcldR%#swFhn1tD+RKpjO?7`$cU}54{ZhDSKm^D@w$7IUMzt4_d3d z{h!fi{J6`xJRnA2PEQ?%!ctU2f8{=ua+M{MJAVGrYT%9Qre6joy`N~iuhqy?vRF*B z-a=@~k8IR5=m*g_u->5d(JYf$04%w>MKEbYo9O&{)bb;RAiwc(ZK0`}SLmjfK9W?o zyznN_ijD0eTaMO3>ot0*&!u^vbS*r^P`bWL4xW>J5hlqFMf>&7Pq4N{bWiT9udJlx zp6#)#aZD3t&GbH-r3kVV{3!jb%|cW`e_%*H4>G<~T#!3*AI?rgd8VPaA{DXS0g~u; z8ddMf_LcvD-H{AAbx+pO8YlcvzYJ#iPHSSsN6!vNn^{2Dw?%+_n>W-LK94Vt)$_i<> z3M(h79PEJa3$=rm4plp$XifEvp2W0Ja?+EiRkYs*2th_LG;kWS>BI(mynRg~=2OOt zp--WLstSZl)c0M)aMK>*R7Oc2y&u0c4@HEJ`fuH*4es^6`I=QJHT9^-?E0Dvk{jn5YNq9gk$5KMt0 ztv0O14HqpMZ(jq%Mof5F;ajgt9+_YDm6VQqxf8-1zT-Rk%Dd__c3u9YY&O~O|KSXD z*hz+{-Lt6mzLMOa@!>{9z~|``qu);eCS=pmx#k*-LinU zr`~&pV^n3quqk?>d`egBI;rXVp*UsK>_uO`m|Nu`4O?{xoU@x%)&+!%Q{xDnrS{~!1Txbb7@HMlkWzVm<9?_v3VWdb~jP)|Jg z^js2GFBCP~RY}Eb@DJbnuVV+*xgma4y4&%0{;X$)-o&S_E~;r0 z-0xk2NFpQbGCz`(baef{3)5M;H*7&|_&dY)QEdTZ zKR>N&EOAbNr8VKzMO}m**QIw_8en7Da%%mO(APKNziJ<#+87+0dDiKhbVKwh;SRI! zvDvY6pBaGVD4AA0o$3UHkesqyCZUyS68}E^rzS8;rsyv4e%>aXvu13VYv=LL_`196 zOsXW@)1sBH(}B|YaG9etXZ&}&^S8bezbf1kG&k9(7_tbY;kYM()vQQ)n(pHSQtuHs z*!=szD?$8PlEpur;Xgbun#k{Y59sh) zc4u~?(9{Gp%vTp;zzO)by_j$_0Q-dz7jhdvFq?#T0Rb|_H}Dj*GxMCB62Fp$0NYw> z%`ul0-|rt)p||78?O5wSxjWWC#2i^$AKtd+%IQD6KV6$gbEDO;VP19`y`GmVsrv47 zW*d3=Q9wd;1#R2~d!VmgpH1!vfVk+)HEmNopSzNx1wcC)cHjH4G5gb8FVGW5Kt@i8&)0sJenDH^O@$Q$n;wwsl8TSI%Dj4Hj19)_doZ%{3Cm#=$1I!!R<_7RcGMe%Fzl#JrrOUSKH zRkR12&X7p$IYD=m$cq;~D8!;~?_IYa1J!3XE*NZB!m zFT`UV_9ELc`TX`_KAApwf$j_3b!+FhUj^QBaxH1L$xKlx1w37}IoxtKx*!*gx(p+? z$Fr|1WV_vG7)lG$dZ&GxY1G{E?#-dPN6wiFW)11@-3Q)fwY2#WVRq&M=u`N-yDv9C z?M!j?6!2o*V`ZhHx*{+MPTu*m1inr+L})#}2OT=d>r80Uai>g-jCOjYu^=BcAc(fjpDFw zVkGBdmqVSKvkF6TH{lL(5ws4UXX>vj&w8k>3;lLb3wkiWEG8J zOyRX}ae=b|ZXz^+h2|K+QQefWja7;qYAQo^QCjx0fG0_=(+8B+WaRIEM)Bze-J2;| zyFAHw-_&hYdFoBrz!XVuB@(ZS(NVxR*|btHkWN$-E->8xS#$|;B#)v z`XbrqikTAj^Czj^K}Yp*zpaReq*0h@Fsjcl*EiL>htOTFUDQl#hfx?r^XH-}d$~U`2ERxDMULpFThnm*NC*zBk|#d$Du9_c6TN zBq76GeB1hb$VM<`|0Pb253irOuyWuR33j){IZ(pJ9B_Hx;74}_px5xZzUYNg=L?T9 zOTJr%bac5fXm8+5x@y=iwBI;@T@b+xDA8xNjClRDG=o_!DB!o0Kf;GBg`C_-_phUa z4cx~ERzF#Oj2?N!J4g**=9H_1Pgq8P*CuZ9ZvzyL+{fs3N_-m&%wO`_KXztVO>RYr zDa3iz`s;!O6yR9)-?w=vJMOU+#ja3bEA#}0R`?#LLyX_qGx>L6K-B}G-OZT zup$**X_zbY?}uXpagReq@P%7Fuc`Qvq3m$W|9GS?I*NCdoX)L5=yZ467yu7ueE(*~ z{ZnFtcwh(3mM2B%u0`PFp5ZfhOe~RKdP@JJKDEZ7nyZMV z_5*<+>cl{0Qdm#3{%d55isH@2|ClS+Jv5QTJ3_vkw@1g=;XK|MMC0dQpmH%P;vMz> zaTtIl8N1w4e6b-glbFBt-_xxC`iuvJtTL|nkNNf}jBWDz-zd?4T+H7)*}oV-|J5S> zp9nZdi|CQiOUvOaGEdFHX+PrG#8AiJ8~8wKbPFv&g8_y>;q-~8fx@R!AOqbi(|v`y z*46;w(-bu*5Q5%>0bu-P^hhx}2)b*!Pm&P4f>;^280ozM2>l<#frI)jhs{9Sg9X5B z44~}FIVfgDE<8g?LdBUaE`+{R!Y-yy# zXRoGi$W8$Dc%(1`zFPR|2sJ*m=b`A;u(CQSD1Rlx(72^au{r&?HW(!lxL?&<^mq;V(OB#JGdBR!+%G)gYfdhg2>)yCM<{tFJoi*UrKUR=0wE? zxaVke)^v!2j$JYC97oGu1NAO9!|E&+VJ;WkP$TwNjCl>Qi=4Sj9Zyobe(^>aPC9bf z-lp^uF341PEi35(7<WiC7bJ8bgq0k9{?`ZWH-hI;V&>h&xq;gjqZb0~ts# zLA98pwUgdy(N$$}m9^Mqmj8(dqY@d!aQq2rEucJfy;p?0j~yv)%T>`teEYCrEY8oz zz%459C}o;02r=MtwYA9(oALwJBNH2AN-VG`3!eJlE3lR0`*c<{G|$8dVGIsqN{FJ1 z?+l(fNe7qr>Tme4rqRN*Sgp?8b{;(yYw4NEiG8a3;{Y-%O zx>3mlMH$=SHlN&bd)(%u*tJo7A?eh8|L(lcW^`daB}xHLw;fK0n%kAbB1{iewTDdA zd&4i~I}J_RiGM1ziywJ<4iRD-UJ=AF?dVsAiN zexf5rXnXs4y+ZEgR4WmAO6n!V4RU|TsUu6j77H^C&l}uY{F8FQB>VLhvC0x4n0Dd!jdf{_& z_R`Hw6TV+O;}tDVf|?eu7v?(~7HkA5$+IT>Ejbw7Wn*+#TKA)hYruCC=GUyv@&)xd)sj3|3BT2XK2L&grPB$zg++Q^B z2$1L0{!Xf&^>s>!^5DT2Utl~np__*p>ExO!;`L$srO}r%K^_H?wGPYg^O8{;SKP1^ z0D#Bi3*lxSpVXm#T=)uIwD1QN?8$w+)7Sn4(4aOXKPwq59tqZt2V=jDJff3By+-$G z!YAT+k*814!U#2}J|B>1p85=Z)`ccsCi9Yuqt-@8HGEXd&$3Ja3oydO#gWMwJccupV@u?4+nu?%31Mi!)KZ@*a7CE}==2sXe8YhuNFbQR>YsB*9L zoP^Mcm+H_j%Hg3PwD%VYUi6tXjy6s!O!9wl&;O6{>HoPv{NFtoNIV6&ne_bInFBnx z`916I;2{9I%C6_JidX?``20E4u|6Q2bu7BYK!aPSq?H=y05m4+Z_<4Awz z45aC*U<-EY*ZJ^bq_C$4ru(;WQ@v={V19hX(@5}|pW+lDeuzp&4LZGU>gT4{S3FV% z!*;Eg!~&n+qR9Fk5K?dJq48geAh*wUUT&Mi+9UTLZ|@SOPsHWzo2oYKKx=JK$!8(bZjR z%!g+L5^4FfJgU!i#vBclO-STg(W@ygb2|*ry%B@JHYxD>8+hO=E6bY9OCL>n&COK( z==X{P@hE8l2p(_jae%StCQOixCBL%EbmXZP@!DBjA#n+Le^8|0b9S0|al&~TM$E3u z189-b{kzINGsEa9VIi{E-DJb~f!_(nOZyV!*W}giImd$+=1HTEe{)P39HL=roecPC z*AnnTQ4hbmsGPiuBL~*5-HI-~GzMKeIM^2g*CTE=>#sF^`zE~yqqzOfmYWA_k-?LG-W+EfOzcciByKt;=EWFGBiH0M|ApaWqB~>kQA5(&v>+Rf8_u2YVfjK1cy$u!Txk zVq>bnhO8`$APTXMXPMSds;!>(4QyVUNjN$s(El{feSqR{D64N%gDw=A3L$KzI?jUw zo_g?+c3hMfB1gppXj^DoZNk8Wl3Iw7?61MLWzL-6wZZ_u}0ZRDOt!^qg>#gY5p$BQ)TAX6*C z2>~N7(9$C--UN-j?1qX?tg`It;CuTIzEt9GtW(Mq+ zm|giqgL5rcN9?%=vG+qYDDUX}&UvtEf89g3PXvDoY;uc6^iAp>wMgDK909gN_9wSo z$3o(YrM9%KW_OqGz83F2^wj3~GRu7ZwVt5emqOyOmB5+bLNPpS?L~j;vt`z>Z5bY@ zM~_&8ymP*rhv=)cWHz%vG4;j+&s{IJHL+U6l}GpB5lV@>Hj@DDx`{@QJKu~Hb|r%6 zmR5%f)8ajd05&)^EQL1ROeYS!^9UGm`ok=jCph@h9++KT!_E9mKLWs^Y9g#GA#6bu z?w>&h6Q^G;h(!GaU}|Q5WF0Bq12`cW-sAt&WdGm2hyH)Lss>f4E-*P+CZKldHLxS( z>1Nk}Q3Ie1y#}Fcx0NViQb1HSl7&~imJsiP#P-1b>CmpV;?GLP7+_eh%!$+~pW_mM zk*^mA)`Jm<^gc-eJ96M?jd}mF7_tN2ICO74W;j)mf6!7bpNZF?Nx_{Fh8Vw(PkxMc zHC~=S=qyBe4tF~2#twGmuv!G#!WqVDMAIi z9)uog(1e1L@wv3={!IJ^7{MP37AjB{;e0||3Ra!l{w5kDgQI1GEo$JTl&XtT)+Tt7 zaa^m-!W+qo>K%SR1Q;^@zq2EQW8&YM&}r+fhdS+zhJ*~x9@BErR`m1w;F0roqAAH zk5PJWan+C(7j-e;+nU6^UF1Uc2QE0Gtbm zmcVA&r)5oe-4Ygj9(k~7+}uh_Qc^V;DRjr0ot+0 zyrUxDLu8m1iN88el`!seWd13~9e|Xxbt;>Ugrtomr!I+aA#>G_9Ur~(?rPyGmj3yg zZ+)!z2ZNFI<5IWwk8aV0TFNd?{0-iY6Q@vj+KtmMwNKqgJ(8IlhRy3zwjWHnmFyzW zXLqS;?vw?3WBfUDa?{2Kp7q?%iAO0KtirWoU-|N*U3oreL4%T@ee0ViBgO|II8p0L z2AH@v?t@g*=d+Bp$EFbxh~V7rP%_eG;E z5#)`OyVQL2x?fr;Nou#`f%pRrT&of<9%{z7-5k|LOa4;tDV5pvheoU?{+a>@{^Xh+izEM0DSQ-CDab2cf`b~XJErXR3l1s6ydQ!QUJ>OO# z55UmB6mLFnVS7*3@DV~=F;?T|6?x8sbIde#iNV<@$jZNW5iWU?5wU zveEHD{6$e`wck04q6R;X~Pn)hcZAZ+RP=EjRZLYQ`G>Bd`Fa$lV zL3pHn^8hS@8_#ha55gAPiMS9aHoUQJ)1JQ<@nn=}Oa(VH zo3b_78Ou&vuXxX)G8j^F!^x7jptklgpdCBAi7R@aBG5B_$$|z1K8hr01(!hjltF~~ z=C0Rhm9%d5va@hHxs=7Hux^-izT4e5MSFGS{L}susTVT4b(dhBE&^Jj`p27ExT&T+ zQXCA*?oe6&%;h>+tcUQOns4LnRcg?BWfH{0vFGAg|MCNN>O>DG0fTpA+7%x@zxiLy zU3)lGYv1lnDLPUKgGA+!N-F1^DoLdw$>@YArW_i}F$tB*FlE|i?!O_p_n91gvPI$r~ zdn#HDb|*M%oi&RwT(7AlSPpLm|)EWaWRc6p`@xfL(*-3c&N!OhzFdt|`i0J3_ zxV)KY*4ayN8aR0^5gd*c#A1{%a`!y~ix!#2|GD+NZE-Q^0k%l6MZzPMvs`en!K&$6*a(vfp^Wo!%b~6jv(Q z>qPXiCm)v|G;3UvXTq_~I)c=?xs^3B?p{WxC?>vr?%1-f^^SI1JhuFcfjC2#h?W0dd99oO*~DCv#Y2Oq zhI>ia2i;)5VBdzIpx}pvwGB))j8a+XCW8h`^WZTPLUqg8$>L^#nzKV>q>qAa zMofRkFL&x62b#N{1%Cx=TRKV=Tut#OM$h}*D0{2?WXJsCZ1WL3UNCPQ**>1N;Qrj3 zJkp$=qRinLnN}8=A!W+Xu1!86m~IWs@n34)8A{WxylhZ;+3Wam6{pSV+vC=$B@BcP zO%{(T_+T7i~&L4WIQ2YCEW- zo_=Ru>l-Rt!7qBA4j48E3Xo)0Iy^tWvh1P{v(UrxW}-#80Q#A}+3Ut$mlsQ?K|7*? z_jxsj!8*L|j6<5dhpFzRb0CjJDf%n?l`h+(Ua5|h5OCQNhS;2DwYp{^Pt&EpVm^cvi~1h=F#y3XA`)(zdAngr8! z;V=c;@s`iFd!L8z?P|~ke`w32B8@4Q@#|Tgd0Pd)b z>V9rB(DbT0r8JdCC*LVKZwPvqQ?S-=`18?c7cWDCorpcy%GeCeh-E~Xs}DXF>Rn4o zS5Z-^F}J$Lt-KZ3RvG-WJf8Vs@24B~Syg9Z)Ok+0H~Q&TQDl|MF!@%WW{a_-ivl-1 z3AZaqqsUF8$f<3 zx<=c+LvBf_&Y8lbgkwVyp&m5Ywh~|NZtu-2TO-Kx!hoDmec^?>o>=eL=tNX8d}Dvz zG9B)1s$=ZDds*Sn*l9j#*vb^|3pEXsSFL!vU44w4IKvkFn|_V9 z-t1C_*(Cfdz%rWvHEhi0o+e*6-1a|(v9Vh38)$i#kH(BMa$>h;^aq4Azuj9jdr_=> zNd4kr=L48m$DMG-4~xqe>e`1}W#{&d;MOKkPhc&o)Vum)15F~B8_maNcX?0P=X0?h z7~WwU!>DRJ{u10r zV45j~1iRqE&r@nAR`R;tZ&js=PRTXeg>$7bj;P2S;x{Hz@uz#j<-ri61D&K8x$MJA@{ynEO?tmtm@kx=Y zm6>q=SI^e4x^6vY{bMg9w|uOpul=Xq2LleQ@VW%8o)0hACExvt zo0TK147j18D@xeAl`SpBg@LNqCRa^w(-7>$8E-LjN*-5u-r_{$PxosSE`{1dh_vcX zFVw+mVR5q#ucpE~wRGX@usHZZMv?|?%N!b^??E{0;rY$&WI3j)5rILuu!Fr#p^4iY zHFg6|ZfIz*#)YG1W+y?xA#?Se`E7ZThUn$cC)Eyk-Fe{m4ddVnZC=Fqt~1fL96B3G z_*Gj8V%u{byC9jRo1g_K5!9qG0^$2CM1YaJR0IM#G4BVssHi-Z-=)E%0ErN;;%%k) zmd0W~>M1R1pFH)WBVO>JaDaJdZuy4DLM(6c!eS)nT%Xu9MbZqEe|Pda75eu%E*ByF+P9 z?2CDmQKR}UsyRamOsA~ARKNEAp^WozJO;l>3DK$Av3a8K&BSk`WEc^S^Z9Hjr)%6N zbi3ckoT3AdAwq=J;M+95{_H$9h5kdqFsqwA-u^C(*~Yo#qHZ1+5JRX)*SEUhAH-`) zhMq^JIeg&7eqN9@u&T}5|JGoVcV|#oU6~%l_UrMXxXnZrd)6<zx6#R0~@D{W*-UhKDbYPK;pdQs6aP(OEli z+1^Wp@7`&ZkH0u^=4WdTzrfBbi#j9+IeR&m!2T2Pt5OK9Zd8NzTB@fE_xJbb&B^w> zKu@CGD}&*=?CFl3XPyYdQ}0q2Js!1Ya#5{q9L-cyrR*hFi?a`=*A;8-#WQP~yqowc z=T4OxIw2NOI&^S5sUqZWd>Y;4Y{I_l%(=@)e5m8klUw3-A5D~&jQ4yTIJJI+|#4y$t6T)Z>UXE=V3q|g3oW!qI z`W%Twx9Aw1P+^#jt3nNG=t9II5w2|KU;gfl-`B7JOFHO`8}VhV)%cLtOHxH^IqJ}C zKqNTZnj?rvf(vXpe5xH(W{XpuM}hY?ERK6>je}fpqtyhcFJAYzhosCBxJBQf;eACq zJd_TTNTkhkCNn%fxt!&{RY}+0vL97Wwoino)2}%64P?QF860yacP(Teo;E%fl%t>7 zWsH`c7f~HLiQ4kq@w)zY4Ms=8+BCOi9@#4qn_6hu>LX3`J77oS1 z0xcpTUAY9w1U~{oCp>uZ=sBosUA)yA^3V`Es)LV*%ZOw=5bTttLlO;gk^5WDD_ct) zD|?!BVRZDs6iI1voU0(Rh9GVi<76~7mUX5*3nk_bF>JFGrRT^Fh*npAQ)LS+{d3Y! zX3dnI@Z!-FPyy3GiTOA|7ipRnM^V!|4p}7N`#>b|euVO@x)9 zQcZlk`2wkj?*rrfW|`Q$RyM2GY7nDHd^pT8OWs3-R~B8{<$`#&F72th+r+Q}d=_b{ zi2(lE0zF=u{|$6m+$FLv>c3`=$a+(PzV;!GgDU~KLRV&oUsXq$DiL@&UTqm6jral z2t4l1W^!9MidWDua5CTEgFM5@me*oth$DXA47Fp3sRcmOIxZ##nC61Qzkn}PN<7_d zVmy_gtrGSWfPMZYECszu$@OQb{u+1@Xl$!4_W@$FHhOJhCugw;FZDFjWxO=g)v2@& zFV0GX)aH1)uGc4gvq@)XW=DEe4W$G&3^9ZiA1tG7)BThv%#OU4VsLMZ3msUf#$}U=2hgwv(46Qtouw_@($9 zqRNm1LDiIG-hT*>m}mbTpg69~;k;hyKy5L2dT9+si&uxRe6cIozhbB=2&=mzjRtE| z?Ew1C*%kdwUZX5FktPQ@o+z=Bw_w%iS{)~WRfJpUj(pJ z0@)*TB=JZx4+Co$S+XXXfBocNQ#RTUj8By5ns^LgMR0C|&p|DGieuA_!Kz|z>u6_{ zG8BC;GJUY#%``TR>?5M>$R&6O-L*gTJRGgw;@_4yS)6->*jz#HAW7IT%LNzdgiDs` zDk`RgY#{${l#of4Y#^72A}mv)kK>jA$pg_*OofYo7kYSL!1Uej(abdT1=4pCGxW9W-k3F->_63I*C|Z zg(?1??Ln5<&|<*b)p95WHqE*_bR%g{MmwYKmk;Q9%;#=W(fI{#a!^U}}HR)CKiZ zWyDrp4vmbei5M(`ao!(>?CdTX=d&sRx5bcwRu(;ZzDTu`63l07&HB=kF=&RS%mHEg<`VOwc{8FM!e{whch1Bzi9LS|JA*A^$SeDd>tO8xn1s#M*@%C@qK#KEy(E z&X_p)0Y@jERX#f`r8D1~k`pw-ZKV2NmN=T8M<66x7eGZ}AJKF<4}7z%r75w5x;D}d z7$l6?&VdNJ5)>{1odqhDJ0#;d(1X*`ycUxSbSF!oE&4K|gQ1f!M=y#@p*gZ+zRYN? zl$RN;g%`xb`UdjX?bCn_`s^$5q~f`p3=gv^$j@Ij&Oz6kve=YzbQczrtweL^bIf z(1B?Mng|9jT^fK+4pIM-*MQA`u!uCZFWE5#3R^0B!L|CARKah|i$3Nm>6L?F)Kry= zXe62-X9^>~LwH^mCt&YB*OVBeuZ5J=)$5}YYJ#zJoM<`GY=pj?(06H{+=in+Q4|wr zB*i?|NY>3B#xP%&o0@;aLJV-Rf6qsd(<1JHvh_{erS;)Y8K1?yAubA_yKe~f?`Q;U ziInp9$2XA3=|6n=eCaA`-ml=EL##OIsNTp`kYZq_apJTP*x@E>N>+$8K~zCzS>r4w zy1ok0Ie4#ygV=5Zy|p2lE>{E9M}qA-hU(wO^7I}gQ0`Cts)n?#NfT&}2>k?x0-nvT10&^f7HvflB`JnP!Z2n!6m2R6Vko~S0`|1%OS8%%T{CIQlA7cYJSFmwX?wE$W>EpwUbyL8<9rnBJ J3bq|S|G%9y09^n8 literal 0 HcmV?d00001 From 55ae118db7c36fb88994e072e424cb66d1f34e74 Mon Sep 17 00:00:00 2001 From: Aaron Siddhartha Mondal Date: Tue, 11 Feb 2025 18:43:27 +0100 Subject: [PATCH 22/29] [GitHub] Skip undefcheck if no relevant files changed (#126749) If the list of filtered files was empty the check would process every file in the diff. --- llvm/utils/git/code-format-helper.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py index 48a338aca9c8e..cb1e56859d083 100755 --- a/llvm/utils/git/code-format-helper.py +++ b/llvm/utils/git/code-format-helper.py @@ -353,6 +353,8 @@ def pr_comment_text_for_diff(self, diff: str) -> str: def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]: files = self.filter_changed_files(changed_files) + if not files: + return None # Use git to find files that have had a change in the number of undefs regex = "([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)" @@ -379,10 +381,6 @@ def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str # Each file is prefixed like: # diff --git a/file b/file for file in re.split("^diff --git ", stdout, 0, re.MULTILINE): - # We skip checking in MIR files as undef is a valid token and not - # going away. - if file.endswith(".mir"): - continue # search for additions of undef if re.search(r"^[+](?!\s*#\s*).*(\bundef\b|UndefValue::get)", file, re.MULTILINE): files.append(re.match("a/([^ ]+)", file.splitlines()[0])[1]) From 918848d03bb9d0e06dea2ef588bda17ff961923c Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 11 Feb 2025 09:51:18 -0800 Subject: [PATCH 23/29] [lldb] Devirtualize GetValueProperties (NFC) (#126583) Nobody is overriding GetValueProperties, so in practice we're always using `m_collection_sp`, which means we don't need to check the pointer. The temlated helpers were already operating on `m_collection_sp` directly so this makes the rest of the class consistent. --- .../lldb/Core/UserSettingsController.h | 4 +-- lldb/source/Core/UserSettingsController.cpp | 33 ++++--------------- 2 files changed, 8 insertions(+), 29 deletions(-) diff --git a/lldb/include/lldb/Core/UserSettingsController.h b/lldb/include/lldb/Core/UserSettingsController.h index 32da7e05f7040..29e892fdba45b 100644 --- a/lldb/include/lldb/Core/UserSettingsController.h +++ b/lldb/include/lldb/Core/UserSettingsController.h @@ -38,9 +38,7 @@ class Properties { virtual ~Properties(); - virtual lldb::OptionValuePropertiesSP GetValueProperties() const { - // This function is virtual in case subclasses want to lazily implement - // creating the properties. + lldb::OptionValuePropertiesSP GetValueProperties() const { return m_collection_sp; } diff --git a/lldb/source/Core/UserSettingsController.cpp b/lldb/source/Core/UserSettingsController.cpp index b57c1b0eef9b4..5408d64b40647 100644 --- a/lldb/source/Core/UserSettingsController.cpp +++ b/lldb/source/Core/UserSettingsController.cpp @@ -40,64 +40,45 @@ Properties::~Properties() = default; lldb::OptionValueSP Properties::GetPropertyValue(const ExecutionContext *exe_ctx, llvm::StringRef path, Status &error) const { - OptionValuePropertiesSP properties_sp(GetValueProperties()); - if (properties_sp) - return properties_sp->GetSubValue(exe_ctx, path, error); - return lldb::OptionValueSP(); + return m_collection_sp->GetSubValue(exe_ctx, path, error); } Status Properties::SetPropertyValue(const ExecutionContext *exe_ctx, VarSetOperationType op, llvm::StringRef path, llvm::StringRef value) { - OptionValuePropertiesSP properties_sp(GetValueProperties()); - if (properties_sp) - return properties_sp->SetSubValue(exe_ctx, op, path, value); - return Status::FromErrorString("no properties"); + return m_collection_sp->SetSubValue(exe_ctx, op, path, value); } void Properties::DumpAllPropertyValues(const ExecutionContext *exe_ctx, Stream &strm, uint32_t dump_mask, bool is_json) { - OptionValuePropertiesSP properties_sp(GetValueProperties()); - if (!properties_sp) - return; - if (is_json) { - llvm::json::Value json = properties_sp->ToJSON(exe_ctx); + llvm::json::Value json = m_collection_sp->ToJSON(exe_ctx); strm.Printf("%s", llvm::formatv("{0:2}", json).str().c_str()); } else - properties_sp->DumpValue(exe_ctx, strm, dump_mask); + m_collection_sp->DumpValue(exe_ctx, strm, dump_mask); } void Properties::DumpAllDescriptions(CommandInterpreter &interpreter, Stream &strm) const { strm.PutCString("Top level variables:\n\n"); - OptionValuePropertiesSP properties_sp(GetValueProperties()); - if (properties_sp) - return properties_sp->DumpAllDescriptions(interpreter, strm); + return m_collection_sp->DumpAllDescriptions(interpreter, strm); } Status Properties::DumpPropertyValue(const ExecutionContext *exe_ctx, Stream &strm, llvm::StringRef property_path, uint32_t dump_mask, bool is_json) { - OptionValuePropertiesSP properties_sp(GetValueProperties()); - if (properties_sp) { - return properties_sp->DumpPropertyValue(exe_ctx, strm, property_path, + return m_collection_sp->DumpPropertyValue(exe_ctx, strm, property_path, dump_mask, is_json); - } - return Status::FromErrorString("empty property list"); } size_t Properties::Apropos(llvm::StringRef keyword, std::vector &matching_properties) const { - OptionValuePropertiesSP properties_sp(GetValueProperties()); - if (properties_sp) { - properties_sp->Apropos(keyword, matching_properties); - } + m_collection_sp->Apropos(keyword, matching_properties); return matching_properties.size(); } From 71478ecdb48075051e6e746c4c51b9caeb4c21b6 Mon Sep 17 00:00:00 2001 From: Ryosuke Niwa Date: Tue, 11 Feb 2025 10:00:09 -0800 Subject: [PATCH 24/29] [WebKit Checkers] Treat const Objective-C ivar as a safe origin (#126353) Like const C++ member variables, treat const Ref, RefPtr, CheckedRef, CheckedPtr Objective-C ivars as a safe pointer origin in WebKit checkers. --- .../lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp | 9 +++++---- .../Analysis/Checkers/WebKit/uncounted-obj-arg.mm | 13 +++++++++++-- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp index abf5d3ec193a4..5d28982c41fc4 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp @@ -163,10 +163,11 @@ bool isConstOwnerPtrMemberExpr(const clang::Expr *E) { if (OCE->getOperator() == OO_Star && OCE->getNumArgs() == 1) E = OCE->getArg(0); } - auto *ME = dyn_cast(E); - if (!ME) - return false; - auto *D = ME->getMemberDecl(); + const ValueDecl *D = nullptr; + if (auto *ME = dyn_cast(E)) + D = ME->getMemberDecl(); + else if (auto *IVR = dyn_cast(E)) + D = IVR->getDecl(); if (!D) return false; auto T = D->getType(); diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.mm b/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.mm index 9ad1880e9d118..08319016023e3 100644 --- a/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.mm +++ b/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.mm @@ -1,11 +1,14 @@ // RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UncountedCallArgsChecker -verify %s -// expected-no-diagnostics #import "mock-types.h" #import "mock-system-header.h" #import "../../Inputs/system-header-simulator-for-objc-dealloc.h" -@interface Foo : NSObject +@interface Foo : NSObject { + const Ref _obj1; + const RefPtr _obj2; + Ref _obj3; +} @property (nonatomic, readonly) RefPtr countable; @@ -17,6 +20,11 @@ @implementation Foo - (void)execute { self._protectedRefCountable->method(); + _obj1->method(); + _obj1.get().method(); + (*_obj2).method(); + _obj3->method(); + // expected-warning@-1{{Call argument for 'this' parameter is uncounted and unsafe}} } - (RefPtr)_protectedRefCountable { @@ -30,6 +38,7 @@ - (void)execute { void ref() const; void deref() const; Ref copy() const; + void method(); }; @interface WrapperObj : NSObject From a2263eba4d3be0daa96bd154de3b8f2a67aa67fb Mon Sep 17 00:00:00 2001 From: Vigneshwar Jayakumar Date: Tue, 11 Feb 2025 12:30:53 -0600 Subject: [PATCH 25/29] AMDGPU: Handle gfx950 XDL-write-VGPR-VALU-Mem-Exp wait state change (#126727) --- .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 17 +- .../AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll | 12 +- .../CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll | 72 +++--- ....amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll | 190 +++++++------- ...m.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll | 234 +++++++++--------- .../AMDGPU/llvm.amdgcn.smfmac.gfx950.ll | 202 +++++++-------- .../CodeGen/AMDGPU/mai-hazards-gfx940.mir | 36 ++- 7 files changed, 389 insertions(+), 374 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 73b44680aad5d..36b9003a0ee65 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -2606,12 +2606,14 @@ static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses) { return NumPasses + 3; } -static int GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses) { - // 2 pass -> 5 - // 4 pass -> 7 - // 8 pass -> 11 - // 16 pass -> 19 - return NumPasses + 3; +static int GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses, + bool IsGFX950) { + // xdl def cycles | gfx940 | gfx950 + // 2 pass | 5 5 + // 4 pass | 7 8 + // 8 pass | 11 12 + // 16 pass | 19 20 + return NumPasses + 3 + (NumPasses != 2 && IsGFX950); } static int GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses) { @@ -2762,7 +2764,8 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { } else if (ST.hasGFX940Insts()) { NeedWaitStates = isXDL(ST, *MFMA) - ? GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(NumPasses) + ? GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates( + NumPasses, ST.hasGFX950Insts()) : GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates( NumPasses); } else { diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll index 8d380516df8b5..452033f332659 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll @@ -49,7 +49,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16(<8 x bfloat> %arg0, <8 x ; GCN-NEXT: v_mov_b32_e32 v9, s17 ; GCN-NEXT: v_mov_b32_e32 v10, s18 ; GCN-NEXT: v_mov_b32_e32 v11, s19 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 4 ; GCN-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1 @@ -122,7 +122,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__flags(<8 x bfloat> %arg0 ; GCN-NEXT: v_mov_b32_e32 v9, s17 ; GCN-NEXT: v_mov_b32_e32 v10, s18 ; GCN-NEXT: v_mov_b32_e32 v11, s19 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 4 ; GCN-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1 @@ -179,7 +179,7 @@ define <16 x float> @test_mfma_f32_32x32x16_bf16__mac(<8 x bfloat> %arg0, <8 x b ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15] ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -224,7 +224,7 @@ define <16 x float> @test_mfma_f32_32x32x16_bf16__mac__flags(<8 x bfloat> %arg0, ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:1 abid:1 blgp:1 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -417,7 +417,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd_mac(<8 x bfloat> ; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15] ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 1 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 ; GCN-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 ; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 @@ -459,7 +459,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd_mac_flags(<8 x bf ; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 1 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 ; GCN-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 ; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll index 44cb4e803ffad..4628a9c15391b 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll @@ -19,7 +19,7 @@ define <4 x float> @test_mfma_f32_16x16x32_f16(<8 x half> %arg0, <8 x half> %arg ; GCN-NEXT: v_accvgpr_write_b32 a3, v11 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3] -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -39,7 +39,7 @@ define <4 x float> @test_mfma_f32_16x16x32_f16__flags(<8 x half> %arg0, <8 x hal ; GCN-NEXT: v_accvgpr_write_b32 a3, v11 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:1 abid:1 blgp:1 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -67,7 +67,7 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd(ptr addrsp ; SDAG-NEXT: v_accvgpr_write_b32 a3, s3 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3] -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7] ; SDAG-NEXT: s_endpgm ; @@ -88,7 +88,7 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd(ptr addrsp ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3] ; GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GISEL-NEXT: s_nop 5 +; GISEL-NEXT: s_nop 6 ; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7] ; GISEL-NEXT: s_endpgm %result = call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.f16(<8 x half> %arg0, <8 x half> %arg1, <4 x float> %arg2, i32 0, i32 0, i32 0) @@ -114,7 +114,7 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags(ptr ; SDAG-NEXT: v_accvgpr_write_b32 a3, s3 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7] ; SDAG-NEXT: s_endpgm ; @@ -135,7 +135,7 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags(ptr ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GISEL-NEXT: s_nop 5 +; GISEL-NEXT: s_nop 6 ; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7] ; GISEL-NEXT: s_endpgm %result = call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.f16(<8 x half> %arg0, <8 x half> %arg1, <4 x float> %arg2, i32 3, i32 2, i32 1) @@ -186,7 +186,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16(<8 x half> %arg0, <8 x hal ; SDAG-NEXT: v_mov_b32_e32 v9, s17 ; SDAG-NEXT: v_mov_b32_e32 v10, s18 ; SDAG-NEXT: v_mov_b32_e32 v11, s19 -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 4 ; SDAG-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1 @@ -253,7 +253,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16(<8 x half> %arg0, <8 x hal ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15] ; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 4 ; GISEL-NEXT: global_store_dwordx4 v[20:21], a[16:19], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: global_store_dwordx4 v[22:23], a[20:23], off sc0 sc1 @@ -316,7 +316,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__flags(<8 x half> %arg0, < ; SDAG-NEXT: v_mov_b32_e32 v9, s17 ; SDAG-NEXT: v_mov_b32_e32 v10, s18 ; SDAG-NEXT: v_mov_b32_e32 v11, s19 -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 4 ; SDAG-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1 @@ -383,7 +383,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__flags(<8 x half> %arg0, < ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15] ; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 4 ; GISEL-NEXT: global_store_dwordx4 v[20:21], a[16:19], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: global_store_dwordx4 v[22:23], a[20:23], off sc0 sc1 @@ -430,7 +430,7 @@ define <16 x float> @test_mfma_f32_32x32x16_f16__mac(<8 x half> %arg0, <8 x half ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15] ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -475,7 +475,7 @@ define <16 x float> @test_mfma_f32_32x32x16_f16__mac__flags(<8 x half> %arg0, <8 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:1 abid:1 blgp:1 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -776,7 +776,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd_mac(<8 x half> %ar ; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15] ; SDAG-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 ; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 ; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 @@ -813,7 +813,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd_mac(<8 x half> %ar ; GISEL-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15] ; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] ; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 ; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 @@ -855,7 +855,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd_mac_flags(<8 x hal ; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1 ; SDAG-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 ; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 ; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 @@ -892,7 +892,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd_mac_flags(<8 x hal ; GISEL-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] ; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 ; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 @@ -919,7 +919,7 @@ define <4 x i32> @test_mfma_i32_16x16x64_i8(<4 x i32> %arg0, <4 x i32> %arg1, <4 ; GCN-NEXT: v_accvgpr_write_b32 a3, v11 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3] -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -939,7 +939,7 @@ define <4 x i32> @test_mfma_i32_16x16x64_i8__flags(<4 x i32> %arg0, <4 x i32> %a ; GCN-NEXT: v_accvgpr_write_b32 a3, v11 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:1 abid:1 blgp:1 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -971,7 +971,7 @@ define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd(ptr addrspa ; SDAG-NEXT: v_accvgpr_write_b32 a3, s3 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3] -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7] ; SDAG-NEXT: s_endpgm ; @@ -992,7 +992,7 @@ define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd(ptr addrspa ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3] ; GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GISEL-NEXT: s_nop 5 +; GISEL-NEXT: s_nop 6 ; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7] ; GISEL-NEXT: s_endpgm %result = call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x64.i8(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 0, i32 0, i32 0) @@ -1022,7 +1022,7 @@ define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd__flags(ptr ; SDAG-NEXT: v_accvgpr_write_b32 a3, s3 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7] ; SDAG-NEXT: s_endpgm ; @@ -1043,7 +1043,7 @@ define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd__flags(ptr ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GISEL-NEXT: s_nop 5 +; GISEL-NEXT: s_nop 6 ; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7] ; GISEL-NEXT: s_endpgm %result = call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x64.i8(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 3, i32 2, i32 1) @@ -1097,7 +1097,7 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8(<4 x i32> %arg0, <4 x i32> ; SDAG-NEXT: v_mov_b32_e32 v1, s17 ; SDAG-NEXT: v_mov_b32_e32 v2, s18 ; SDAG-NEXT: v_mov_b32_e32 v3, s19 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: global_store_dwordx4 v[8:9], a[28:31], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: global_store_dwordx4 v[10:11], a[24:27], off sc0 sc1 @@ -1169,7 +1169,7 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8(<4 x i32> %arg0, <4 x i32> ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15] ; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 4 ; GISEL-NEXT: global_store_dwordx4 v[20:21], a[16:19], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: global_store_dwordx4 v[22:23], a[20:23], off sc0 sc1 @@ -1233,7 +1233,7 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__flags(<4 x i32> %arg0, <4 ; SDAG-NEXT: v_mov_b32_e32 v1, s17 ; SDAG-NEXT: v_mov_b32_e32 v2, s18 ; SDAG-NEXT: v_mov_b32_e32 v3, s19 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: global_store_dwordx4 v[8:9], a[28:31], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: global_store_dwordx4 v[10:11], a[24:27], off sc0 sc1 @@ -1305,7 +1305,7 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__flags(<4 x i32> %arg0, <4 ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15] ; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 4 ; GISEL-NEXT: global_store_dwordx4 v[20:21], a[16:19], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: global_store_dwordx4 v[22:23], a[20:23], off sc0 sc1 @@ -1352,7 +1352,7 @@ define <16 x i32> @test_mfma_i32_32x32x32_i8__mac(<4 x i32> %arg0, <4 x i32> %ar ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1397,7 +1397,7 @@ define <16 x i32> @test_mfma_i32_32x32x32_i8__mac__flags(<4 x i32> %arg0, <4 x i ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:1 abid:1 blgp:1 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1717,7 +1717,7 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd_mac(<4 x i32> %arg0 ; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] ; SDAG-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 ; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 ; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 @@ -1754,7 +1754,7 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd_mac(<4 x i32> %arg0 ; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] ; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] ; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 ; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 @@ -1801,7 +1801,7 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd_mac_flags(<4 x i32> ; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1 ; SDAG-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 ; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 ; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 @@ -1838,7 +1838,7 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd_mac_flags(<4 x i32> ; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] ; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 ; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 @@ -1865,7 +1865,7 @@ define <4 x float> @test_mfma_f32_16x16x32_bf16(<8 x bfloat> %arg0, <8 x bfloat> ; GCN-NEXT: v_accvgpr_write_b32 a3, v11 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[4:7], a[0:3] -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1885,7 +1885,7 @@ define <4 x float> @test_mfma_f32_16x16x32_bf16__flags(<8 x bfloat> %arg0, <8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v11 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:1 abid:1 blgp:1 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1913,7 +1913,7 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd(ptr addrs ; GCN-NEXT: v_accvgpr_write_b32 a3, s3 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[4:7], a[0:3] -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7] ; GCN-NEXT: s_endpgm %result = call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf16(<8 x bfloat> %arg0, <8 x bfloat> %arg1, <4 x float> %arg2, i32 0, i32 0, i32 0) @@ -1939,7 +1939,7 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags(pt ; GCN-NEXT: v_accvgpr_write_b32 a3, s3 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7] ; GCN-NEXT: s_endpgm %result = call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf16(<8 x bfloat> %arg0, <8 x bfloat> %arg1, <4 x float> %arg2, i32 3, i32 2, i32 1) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll index 9a8282231ac15..25b857f8f47dd 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll @@ -24,7 +24,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp0(<8 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -48,7 +48,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_1_1__cbsz1__blgp1(<8 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -72,7 +72,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_2_2__cbsz1__blgp1(<8 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -96,7 +96,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_3__cbsz1__blgp1(<8 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -120,7 +120,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_3__cbsz1__blgp1(<8 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -144,7 +144,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_0__cbsz1__blgp1(<8 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -168,7 +168,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_2_3__cbsz1__blgp1(<8 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -192,7 +192,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_2__cbsz1__blgp1(<8 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -217,7 +217,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp0__cons ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -242,7 +242,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp1(<8 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] blgp:1 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -267,7 +267,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp1__cons ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] blgp:1 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -292,7 +292,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp2(<8 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] blgp:2 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -317,7 +317,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp2__cons ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3] blgp:2 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -342,7 +342,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp3(<8 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] blgp:3 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -367,7 +367,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp3__cons ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3] blgp:3 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -392,7 +392,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp4(<8 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:11], a[0:3], v16, v17 op_sel_hi:[0,0,0] blgp:4 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -417,7 +417,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp4__cons ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:11], a[0:3] blgp:4 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -442,7 +442,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp0(<8 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] cbsz:1 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -467,7 +467,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp0__cons ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] cbsz:1 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -492,7 +492,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp1(<8 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] cbsz:1 blgp:1 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -518,7 +518,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp1__cons ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] cbsz:1 blgp:1 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -543,7 +543,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp2(<8 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] cbsz:1 blgp:2 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -567,7 +567,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp2__cons ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3] cbsz:1 blgp:2 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -592,7 +592,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp3(<8 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] cbsz:1 blgp:3 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -617,7 +617,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp3__cons ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3] cbsz:1 blgp:3 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -642,7 +642,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp4(<8 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:11], a[0:3], v16, v17 op_sel_hi:[0,0,0] cbsz:1 blgp:4 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -667,7 +667,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp4__cons ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:11], a[0:3] cbsz:1 blgp:4 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -692,7 +692,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp0(<6 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] cbsz:2 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -717,7 +717,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp0__cons ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3] cbsz:2 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -742,7 +742,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp1(<6 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] cbsz:2 blgp:1 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -767,7 +767,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp1__cons ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3] cbsz:2 blgp:1 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -791,7 +791,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp2(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3], v16, v17 op_sel_hi:[0,0,0] cbsz:2 blgp:2 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -815,7 +815,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp2__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3] cbsz:2 blgp:2 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -839,7 +839,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp3(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3], v16, v17 op_sel_hi:[0,0,0] cbsz:2 blgp:3 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -863,7 +863,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp3__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3] cbsz:2 blgp:3 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -889,7 +889,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp0(<6 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] cbsz:3 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -914,7 +914,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp0__cons ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3] cbsz:3 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -939,7 +939,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp1(<6 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] cbsz:3 blgp:1 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -964,7 +964,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp1__cons ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3] cbsz:3 blgp:1 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -988,7 +988,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp2(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3], v16, v17 op_sel_hi:[0,0,0] cbsz:3 blgp:2 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1012,7 +1012,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp2__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3] cbsz:3 blgp:2 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1036,7 +1036,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp4(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v13 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:9], a[0:3], v14, v15 op_sel_hi:[0,0,0] cbsz:3 blgp:4 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1060,7 +1060,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp4__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v13 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:9], a[0:3] cbsz:3 blgp:4 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1084,7 +1084,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp3(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3], v16, v17 op_sel_hi:[0,0,0] cbsz:3 blgp:3 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1108,7 +1108,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp3__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3] cbsz:3 blgp:3 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1132,7 +1132,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp4(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v13 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:9], a[0:3], v14, v15 op_sel_hi:[0,0,0] cbsz:2 blgp:4 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1156,7 +1156,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp4__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v13 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:9], a[0:3] cbsz:2 blgp:4 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1181,7 +1181,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp0(<4 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:11], a[0:3], v16, v17 op_sel_hi:[0,0,0] cbsz:4 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1206,7 +1206,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp0__cons ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:11], a[0:3] cbsz:4 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1231,7 +1231,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp1(<4 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:11], a[0:3], v16, v17 op_sel_hi:[0,0,0] cbsz:4 blgp:1 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1256,7 +1256,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp1__cons ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:11], a[0:3] cbsz:4 blgp:1 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1280,7 +1280,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp2(<4 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v13 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:9], a[0:3], v14, v15 op_sel_hi:[0,0,0] cbsz:4 blgp:2 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1304,7 +1304,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp2__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v13 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:9], a[0:3] cbsz:4 blgp:2 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1328,7 +1328,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp3(<4 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v13 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:9], a[0:3], v14, v15 op_sel_hi:[0,0,0] cbsz:4 blgp:3 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1352,7 +1352,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp3__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v13 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:9], a[0:3] cbsz:4 blgp:3 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1376,7 +1376,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp4(<4 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v11 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:7], a[0:3], v12, v13 op_sel_hi:[0,0,0] cbsz:4 blgp:4 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1400,7 +1400,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp4__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v11 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:4 blgp:4 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1429,7 +1429,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__sgpr_scaleA__sgpr_ ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v16 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1450,7 +1450,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__sgpr_scaleA__vgpr_ ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v20 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1471,7 +1471,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__vgpr_scaleA__sgpr_ ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, s0 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1512,7 +1512,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgprs(<8 x i32> inr ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[12:19], v[4:11], a[0:3], v2, v3 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1545,7 +1545,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgprs(<8 x i32> inr ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[4:11], v[12:19], a[0:3], v2, v3 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1574,7 +1574,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_vgpr__sgp ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[14:21], v[0:7], a[0:3], s20, v12 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1599,7 +1599,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_vgpr__sgp ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[14:21], v[0:7], a[0:3], s20, v12 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1628,7 +1628,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_vgpr__vgp ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[14:21], v[0:7], a[0:3], v12, s20 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1653,7 +1653,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_vgpr__vgp ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[14:21], v[0:7], a[0:3], v12, s20 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1682,7 +1682,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_vgpr_sgpr_vgpr__vgp ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[14:21], a[0:3], v12, s20 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1707,7 +1707,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_vgpr_sgpr_vgpr__vgp ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[14:21], a[0:3], v12, s20 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1728,7 +1728,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_vgpr_vgpr_sgpr__vgp ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, s16 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1757,7 +1757,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_sgpr__vgp ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[10:17], v[0:7], a[0:3], v8, s24 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1782,7 +1782,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_sgpr__vgp ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[10:17], v[0:7], a[0:3], v8, s24 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1803,7 +1803,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_inlineimm__ ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 33, -2 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1825,7 +1825,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, -2 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1843,7 +1843,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1866,7 +1866,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v16 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1885,7 +1885,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v17 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1927,7 +1927,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s12, v17 op_sel_hi:[0,0,0] blgp:2 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: global_store_dwordx4 v16, a[0:3], s[14:15] ; SDAG-NEXT: s_endpgm ; @@ -1953,7 +1953,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s28, v16 op_sel_hi:[0,0,0] blgp:2 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[30:31] ; GISEL-NEXT: s_endpgm %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 0, i32 2, i32 3, i32 %scale0, i32 1, i32 %scale1) @@ -1993,7 +1993,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s6, -2 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: global_store_dwordx4 v16, a[0:3], s[4:5] ; SDAG-NEXT: s_endpgm ; @@ -2020,7 +2020,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[0,0,0] ; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[4:5] ; GISEL-NEXT: s_endpgm %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 0, i32 0, i32 3, i32 65, i32 1, i32 -2) @@ -2040,7 +2040,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___constant_scale_0_0_a( ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2062,7 +2062,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___constant_scale_0_0_b( ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2083,7 +2083,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___constant_scale_0_1(<8 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 0, 1 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2104,7 +2104,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___constant_scale_1_0_a( ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 1, 0 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2129,7 +2129,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp8__v8i32_fp6( ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] blgp:2 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2153,7 +2153,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp6__v8i32_fp8( ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] cbsz:2 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2176,7 +2176,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp6__v8i32_fp6( ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] cbsz:2 blgp:2 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2199,7 +2199,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp6__v8i32_fp6_ ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] cbsz:2 blgp:2 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2223,7 +2223,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp8__v8i32_fp4( ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] blgp:4 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2247,7 +2247,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp4__v8i32_fp8( ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] cbsz:4 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2271,7 +2271,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp8__v6i32_fp4( ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] blgp:4 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2295,7 +2295,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v6i32_fp4__v8i32_fp8( ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] cbsz:4 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2318,7 +2318,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp4__v8i32_fp4( ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] cbsz:4 blgp:4 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2341,7 +2341,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp4__v8i32_fp4_ ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] cbsz:4 blgp:4 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2365,4 +2365,4 @@ declare <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v4i32(<8 declare <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v6i32(<8 x i32>, <6 x i32>, <4 x float>, i32 immarg, i32 immarg, i32 immarg, i32, i32 immarg, i32) #1 attributes #0 = { "amdgpu-flat-work-group-size"="512,512" } -attributes #1 = { convergent nocallback nofree nosync nounwind willreturn memory(none) } +attributes #1 = { convergent nocallback nofree nosync nounwind willreturn memory(none) } \ No newline at end of file diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll index 05f8739e7cb89..3d959393a8fa7 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll @@ -40,7 +40,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp0(<8 x ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -85,7 +85,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp0(<8 x ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -137,7 +137,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_1_1__cbsz1__blgp1(<8 x ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -182,7 +182,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_1_1__cbsz1__blgp1(<8 x ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -234,7 +234,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_2_2__cbsz1__blgp1(<8 x ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -279,7 +279,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_2_2__cbsz1__blgp1(<8 x ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -331,7 +331,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_3_3__cbsz1__blgp1(<8 x ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -376,7 +376,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_3_3__cbsz1__blgp1(<8 x ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -428,7 +428,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_3__cbsz1__blgp1(<8 x ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -473,7 +473,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_3__cbsz1__blgp1(<8 x ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -525,7 +525,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_3_0__cbsz1__blgp1(<8 x ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -570,7 +570,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_3_0__cbsz1__blgp1(<8 x ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -622,7 +622,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_2_3__cbsz1__blgp1(<8 x ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -667,7 +667,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_2_3__cbsz1__blgp1(<8 x ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -719,7 +719,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_3_2__cbsz1__blgp1(<8 x ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -764,7 +764,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_3_2__cbsz1__blgp1(<8 x ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -815,7 +815,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp0__cons ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15] ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -868,7 +868,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp1(<8 x ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] blgp:1 ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -913,7 +913,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp1(<8 x ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] blgp:1 ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -963,7 +963,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp1__cons ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15] blgp:1 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1015,7 +1015,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp2(<8 x ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] blgp:2 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1064,7 +1064,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp2__cons ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15] blgp:2 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1116,7 +1116,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp3(<8 x ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] blgp:3 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1165,7 +1165,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp3__cons ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15] blgp:3 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1215,7 +1215,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp4(<8 x ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:11], a[0:15], v28, v29 op_sel_hi:[0,0,0] blgp:4 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1264,7 +1264,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp4__cons ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:11], a[0:15] blgp:4 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1317,7 +1317,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp0(<8 x ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] cbsz:1 ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1362,7 +1362,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp0(<8 x ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] cbsz:1 ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1412,7 +1412,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp0__cons ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15] cbsz:1 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1465,7 +1465,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp1(<8 x ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] cbsz:1 blgp:1 ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1510,7 +1510,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp1(<8 x ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] cbsz:1 blgp:1 ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1561,7 +1561,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp1__cons ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15] cbsz:1 blgp:1 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1613,7 +1613,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp2(<8 x ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] cbsz:1 blgp:2 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1662,7 +1662,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp2__cons ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15] cbsz:1 blgp:2 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1714,7 +1714,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp3(<8 x ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] cbsz:1 blgp:3 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1763,7 +1763,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp3__cons ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15] cbsz:1 blgp:3 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1813,7 +1813,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp4(<8 x ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:11], a[0:15], v28, v29 op_sel_hi:[0,0,0] cbsz:1 blgp:4 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1862,7 +1862,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp4__cons ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:11], a[0:15] cbsz:1 blgp:4 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1914,7 +1914,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp0(<6 x ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] cbsz:2 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1963,7 +1963,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp0__cons ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15] cbsz:2 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2015,7 +2015,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp1(<6 x ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] cbsz:2 blgp:1 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2064,7 +2064,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp1__cons ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15] cbsz:2 blgp:1 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2113,7 +2113,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp2(<6 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:11], a[0:15], v28, v29 op_sel_hi:[0,0,0] cbsz:2 blgp:2 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2161,7 +2161,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp2__cons ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:11], a[0:15] cbsz:2 blgp:2 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2210,7 +2210,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp3(<6 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:11], a[0:15], v28, v29 op_sel_hi:[0,0,0] cbsz:2 blgp:3 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2258,7 +2258,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp3__cons ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:11], a[0:15] cbsz:2 blgp:3 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2311,7 +2311,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp0(<6 x ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] cbsz:3 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2360,7 +2360,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp0__cons ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15] cbsz:3 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2412,7 +2412,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp1(<6 x ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] cbsz:3 blgp:1 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2461,7 +2461,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp1__cons ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15] cbsz:3 blgp:1 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2510,7 +2510,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp2(<6 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:11], a[0:15], v28, v29 op_sel_hi:[0,0,0] cbsz:3 blgp:2 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2558,7 +2558,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp2__cons ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:11], a[0:15] cbsz:3 blgp:2 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2607,7 +2607,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp4(<6 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:9], a[0:15], v26, v27 op_sel_hi:[0,0,0] cbsz:3 blgp:4 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2655,7 +2655,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp4__cons ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:9], a[0:15] cbsz:3 blgp:4 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2704,7 +2704,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp3(<6 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:11], a[0:15], v28, v29 op_sel_hi:[0,0,0] cbsz:3 blgp:3 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2752,7 +2752,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp3__cons ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:11], a[0:15] cbsz:3 blgp:3 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2801,7 +2801,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp4(<6 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:9], a[0:15], v26, v27 op_sel_hi:[0,0,0] cbsz:2 blgp:4 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2849,7 +2849,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp4__cons ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:9], a[0:15] cbsz:2 blgp:4 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2899,7 +2899,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp0(<4 x ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:11], a[0:15], v28, v29 op_sel_hi:[0,0,0] cbsz:4 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2948,7 +2948,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp0__cons ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:11], a[0:15] cbsz:4 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2998,7 +2998,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp1(<4 x ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:11], a[0:15], v28, v29 op_sel_hi:[0,0,0] cbsz:4 blgp:1 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3047,7 +3047,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp1__cons ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:11], a[0:15] cbsz:4 blgp:1 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3096,7 +3096,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp2(<4 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:9], a[0:15], v26, v27 op_sel_hi:[0,0,0] cbsz:4 blgp:2 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3144,7 +3144,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp2__cons ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:9], a[0:15] cbsz:4 blgp:2 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3193,7 +3193,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp3(<4 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:9], a[0:15], v26, v27 op_sel_hi:[0,0,0] cbsz:4 blgp:3 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3241,7 +3241,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp3__cons ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:9], a[0:15] cbsz:4 blgp:3 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3290,7 +3290,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp4(<4 x ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:7], a[0:15], v24, v25 op_sel_hi:[0,0,0] cbsz:4 blgp:4 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3338,7 +3338,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp4__cons ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:4 blgp:4 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3393,7 +3393,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__sgpr_scaleA__sgpr_ ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v16 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3441,7 +3441,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__sgpr_scaleA__vgpr_ ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v31 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3489,7 +3489,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__vgpr_scaleA__sgpr_ ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, s0 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3567,7 +3567,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgprs(<8 x i32> inr ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[32:39], v[0:7], a[0:15], v14, v15 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3637,7 +3637,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgprs(<8 x i32> inr ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[32:39], a[0:15], v14, v15 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3691,7 +3691,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__sgp ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[26:33], v[0:7], a[0:15], s20, v24 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3741,7 +3741,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__sgp ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[26:33], v[0:7], a[0:15], s20, v24 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3795,7 +3795,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__vgp ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[26:33], v[0:7], a[0:15], v24, s20 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3845,7 +3845,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__vgp ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[26:33], v[0:7], a[0:15], v24, s20 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3899,7 +3899,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_vgpr_sgpr_vgpr__vgp ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[26:33], a[0:15], v24, s20 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3949,7 +3949,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_vgpr_sgpr_vgpr__vgp ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[26:33], a[0:15], v24, s20 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3995,7 +3995,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_vgpr_vgpr_sgpr__vgp ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v16, s28 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -4041,7 +4041,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_vgpr_vgpr_sgpr__vgp ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v16, s28 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -4111,7 +4111,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_sgpr__vgp ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[32:39], v[0:7], a[0:15], v14, v15 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -4177,7 +4177,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_sgpr__vgp ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[32:39], v[0:7], a[0:15], v14, v15 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -4224,7 +4224,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_inlineimm__ ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], 33, -2 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -4272,7 +4272,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, -2 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -4316,7 +4316,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, -2 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -4365,7 +4365,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v31 op_sel_hi:[0,0,0] ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -4410,7 +4410,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -4477,7 +4477,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd(<8 x i32> ; SDAG-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[2:3] offset:48 ; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[2:3] offset:32 ; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[2:3] offset:16 @@ -4520,7 +4520,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd(<8 x i32> ; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[2:3] ; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[2:3] offset:16 ; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[2:3] offset:32 @@ -4576,7 +4576,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd___scaleA_ ; SDAG-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 ; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 ; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 @@ -4619,7 +4619,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd___scaleA_ ; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] ; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 ; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 @@ -4765,7 +4765,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__nonmac(<8 x ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: global_store_dwordx4 v[22:23], v[12:15], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: global_store_dwordx4 v[16:17], a[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: global_store_dwordx4 v[18:19], a[4:7], off sc0 sc1 @@ -4912,7 +4912,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_25_42__nonmac(<8 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: global_store_dwordx4 v[22:23], v[12:15], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: global_store_dwordx4 v[16:17], a[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: global_store_dwordx4 v[18:19], a[4:7], off sc0 sc1 @@ -5059,7 +5059,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__vgprcd_nonma ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: global_store_dwordx4 v[22:23], v[12:15], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: global_store_dwordx4 v[16:17], a[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: global_store_dwordx4 v[18:19], a[4:7], off sc0 sc1 @@ -5206,7 +5206,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_25_42__vgprcd_non ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: global_store_dwordx4 v[22:23], v[12:15], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: global_store_dwordx4 v[16:17], a[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: global_store_dwordx4 v[18:19], a[4:7], off sc0 sc1 @@ -5247,7 +5247,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___constant_scale_0_0_a( ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15] ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5294,7 +5294,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___constant_scale_0_0_b( ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15] ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5341,7 +5341,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___constant_scale_0_1(<8 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], 0, 1 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5388,7 +5388,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___constant_scale_1_0_a( ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], 1, 0 op_sel_hi:[0,0,0] ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5441,7 +5441,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp8__v8i32_fp6( ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] blgp:2 ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5486,7 +5486,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp8__v8i32_fp6( ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] blgp:2 ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5538,7 +5538,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp6__v8i32_fp8( ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] cbsz:2 ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5583,7 +5583,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp6__v8i32_fp8( ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] cbsz:2 ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5634,7 +5634,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp6__v8i32_fp6( ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] cbsz:2 blgp:2 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5678,7 +5678,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp6__v8i32_fp6( ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] cbsz:2 blgp:2 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5727,7 +5727,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp6__v8i32_fp6_ ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15] cbsz:2 blgp:2 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5779,7 +5779,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp8__v8i32_fp4( ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] blgp:4 ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5824,7 +5824,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp8__v8i32_fp4( ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] blgp:4 ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5876,7 +5876,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp4__v8i32_fp8( ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] cbsz:4 ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5921,7 +5921,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp4__v8i32_fp8( ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] cbsz:4 ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5972,7 +5972,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp8__v6i32_fp4( ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] blgp:4 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -6023,7 +6023,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v6i32_fp4__v8i32_fp8( ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] cbsz:4 ; GCN-NEXT: s_nop 7 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -6074,7 +6074,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp4__v8i32_fp4( ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] cbsz:4 blgp:4 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -6118,7 +6118,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp4__v8i32_fp4( ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] cbsz:4 blgp:4 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 3 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -6167,7 +6167,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp4__v8i32_fp4_ ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15] cbsz:4 blgp:4 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -6204,4 +6204,4 @@ declare <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v6i32(<8 attributes #0 = { "amdgpu-flat-work-group-size"="512,512" } attributes #1 = { "amdgpu-flat-work-group-size"="128,128" } -attributes #2 = { convergent nocallback nofree nosync nounwind willreturn memory(none) } +attributes #2 = { convergent nocallback nofree nosync nounwind willreturn memory(none) } \ No newline at end of file diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll index 66c02a9bd0c6a..6b922fcd9b550 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll @@ -33,7 +33,7 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x64_f16__vgpr(ptr addrspace(1) % ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_smfmac_f32_16x16x64_f16 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7] ; SDAG-NEXT: s_endpgm ; @@ -59,7 +59,7 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x64_f16__vgpr(ptr addrspace(1) % ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_smfmac_f32_16x16x64_f16 v[8:11], v[12:15], v[0:7], v16 cbsz:1 abid:2 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GISEL-NEXT: s_nop 5 +; GISEL-NEXT: s_nop 6 ; GISEL-NEXT: global_store_dwordx4 v0, v[8:11], s[6:7] ; GISEL-NEXT: s_endpgm bb: @@ -81,7 +81,7 @@ define <4 x float> @test_smfmac_f32_16x16x64_f16(<8 x half> %arg0, <16 x half> % ; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_16x16x64_f16 a[0:3], v[0:3], v[4:11], v16 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -92,7 +92,7 @@ define <4 x float> @test_smfmac_f32_16x16x64_f16(<8 x half> %arg0, <16 x half> % ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_smfmac_f32_16x16x64_f16 v[12:15], v[0:3], v[4:11], v16 -; GISEL-NEXT: s_nop 6 +; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: v_mov_b32_e32 v0, v12 ; GISEL-NEXT: v_mov_b32_e32 v1, v13 ; GISEL-NEXT: v_mov_b32_e32 v2, v14 @@ -112,7 +112,7 @@ define <4 x float> @test_smfmac_f32_16x16x64_f16__flags0(<8 x half> %arg0, <16 x ; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_16x16x64_f16 a[0:3], v[0:3], v[4:11], v16 cbsz:1 abid:3 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -123,7 +123,7 @@ define <4 x float> @test_smfmac_f32_16x16x64_f16__flags0(<8 x half> %arg0, <16 x ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_smfmac_f32_16x16x64_f16 v[12:15], v[0:3], v[4:11], v16 cbsz:1 abid:3 -; GISEL-NEXT: s_nop 6 +; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: v_mov_b32_e32 v0, v12 ; GISEL-NEXT: v_mov_b32_e32 v1, v13 ; GISEL-NEXT: v_mov_b32_e32 v2, v14 @@ -143,7 +143,7 @@ define <4 x float> @test_smfmac_f32_16x16x64_f16__flags1(<8 x half> %arg0, <16 x ; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_16x16x64_f16 a[0:3], v[0:3], v[4:11], v16 cbsz:3 abid:1 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -154,7 +154,7 @@ define <4 x float> @test_smfmac_f32_16x16x64_f16__flags1(<8 x half> %arg0, <16 x ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_smfmac_f32_16x16x64_f16 v[12:15], v[0:3], v[4:11], v16 cbsz:3 abid:1 -; GISEL-NEXT: s_nop 6 +; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: v_mov_b32_e32 v0, v12 ; GISEL-NEXT: v_mov_b32_e32 v1, v13 ; GISEL-NEXT: v_mov_b32_e32 v2, v14 @@ -187,7 +187,7 @@ define <4 x float> @test_smfmac_f32_16x16x64_f16__sgpr(<8 x half> inreg %arg0, < ; SDAG-NEXT: v_mov_b32_e32 v12, s28 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_16x16x64_f16 a[0:3], v[8:11], v[0:7], v12 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -246,7 +246,7 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x32_f16__vgpr(ptr addrspace(1) % ; SDAG-NEXT: v_smfmac_f32_32x32x32_f16 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 ; SDAG-NEXT: v_mov_b32_e32 v16, 0 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7] offset:32 ; SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[6:7] offset:48 ; SDAG-NEXT: global_store_dwordx4 v16, v[0:3], s[6:7] @@ -279,7 +279,7 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x32_f16__vgpr(ptr addrspace(1) % ; GISEL-NEXT: v_smfmac_f32_32x32x32_f16 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 ; GISEL-NEXT: v_mov_b32_e32 v16, 0 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[6:7] ; GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[6:7] offset:16 ; GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7] offset:32 @@ -317,7 +317,7 @@ define <16 x float> @test_smfmac_f32_32x32x32_f16(<8 x half> %arg0, <16 x half> ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_32x32x32_f16 a[0:15], v[0:3], v[4:11], v28 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -389,7 +389,7 @@ define <16 x float> @test_smfmac_f32_32x32x32_f16__flags0(<8 x half> %arg0, <16 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_32x32x32_f16 a[0:15], v[0:3], v[4:11], v28 cbsz:1 abid:3 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -461,7 +461,7 @@ define <16 x float> @test_smfmac_f32_32x32x32_f16__flags1(<8 x half> %arg0, <16 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_32x32x32_f16 a[0:15], v[0:3], v[4:11], v28 cbsz:3 abid:1 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -561,7 +561,7 @@ define <16 x float> @test_smfmac_f32_32x32x32_f16__sgpr(<8 x half> inreg %arg0, ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_32x32x32_f16 a[0:15], v[28:31], v[0:7], v10 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -650,7 +650,7 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x64_bf16__vgpr(ptr addrspace(1) ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_smfmac_f32_16x16x64_bf16 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7] ; GCN-NEXT: s_endpgm bb: @@ -672,7 +672,7 @@ define <4 x float> @test_smfmac_f32_16x16x64_bf16(<8 x bfloat> %arg0, <16 x bflo ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_smfmac_f32_16x16x64_bf16 a[0:3], v[0:3], v[4:11], v16 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -692,7 +692,7 @@ define <4 x float> @test_smfmac_f32_16x16x64_bf16__flags0(<8 x bfloat> %arg0, <1 ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_smfmac_f32_16x16x64_bf16 a[0:3], v[0:3], v[4:11], v16 cbsz:1 abid:3 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -712,7 +712,7 @@ define <4 x float> @test_smfmac_f32_16x16x64_bf16__flags1(<8 x bfloat> %arg0, <1 ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_smfmac_f32_16x16x64_bf16 a[0:3], v[0:3], v[4:11], v16 cbsz:3 abid:1 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -745,7 +745,7 @@ define <4 x float> @test_smfmac_f32_16x16x64_bf16__sgpr(<8 x bfloat> inreg %arg0 ; GCN-NEXT: v_mov_b32_e32 v12, s28 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_smfmac_f32_16x16x64_bf16 a[0:3], v[8:11], v[0:7], v12 -; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -788,7 +788,7 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x32_bf16__vgpr(ptr addrspace(1) ; GCN-NEXT: v_smfmac_f32_32x32x32_bf16 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 ; GCN-NEXT: v_mov_b32_e32 v16, 0 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 1 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7] offset:32 ; GCN-NEXT: global_store_dwordx4 v16, v[12:15], s[6:7] offset:48 ; GCN-NEXT: global_store_dwordx4 v16, v[0:3], s[6:7] @@ -826,7 +826,7 @@ define <16 x float> @test_smfmac_f32_32x32x32_bf16(<8 x bfloat> %arg0, <16 x bfl ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_smfmac_f32_32x32x32_bf16 a[0:15], v[0:3], v[4:11], v28 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -871,7 +871,7 @@ define <16 x float> @test_smfmac_f32_32x32x32_bf16__flags0(<8 x bfloat> %arg0, < ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_smfmac_f32_32x32x32_bf16 a[0:15], v[0:3], v[4:11], v28 cbsz:1 abid:3 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -916,7 +916,7 @@ define <16 x float> @test_smfmac_f32_32x32x32_bf16__flags1(<8 x bfloat> %arg0, < ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_smfmac_f32_32x32x32_bf16 a[0:15], v[0:3], v[4:11], v28 cbsz:3 abid:1 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -989,7 +989,7 @@ define <16 x float> @test_smfmac_f32_32x32x32_bf16__sgpr(<8 x bfloat> inreg %arg ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_smfmac_f32_32x32x32_bf16 a[0:15], v[28:31], v[0:7], v10 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1046,7 +1046,7 @@ define amdgpu_kernel void @test_smfmac_i32_16x16x128_i8__vgpr(ptr addrspace(1) % ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_smfmac_i32_16x16x128_i8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7] ; SDAG-NEXT: s_endpgm ; @@ -1072,7 +1072,7 @@ define amdgpu_kernel void @test_smfmac_i32_16x16x128_i8__vgpr(ptr addrspace(1) % ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_smfmac_i32_16x16x128_i8 v[8:11], v[12:15], v[0:7], v16 cbsz:1 abid:2 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GISEL-NEXT: s_nop 5 +; GISEL-NEXT: s_nop 6 ; GISEL-NEXT: global_store_dwordx4 v0, v[8:11], s[0:1] ; GISEL-NEXT: s_endpgm bb: @@ -1094,7 +1094,7 @@ define <4 x i32> @test_smfmac_i32_16x16x128_i8(<4 x i32> %arg0, <8 x i32> %arg1, ; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_i32_16x16x128_i8 a[0:3], v[0:3], v[4:11], v16 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1105,7 +1105,7 @@ define <4 x i32> @test_smfmac_i32_16x16x128_i8(<4 x i32> %arg0, <8 x i32> %arg1, ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_smfmac_i32_16x16x128_i8 v[12:15], v[0:3], v[4:11], v16 -; GISEL-NEXT: s_nop 6 +; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: v_mov_b32_e32 v0, v12 ; GISEL-NEXT: v_mov_b32_e32 v1, v13 ; GISEL-NEXT: v_mov_b32_e32 v2, v14 @@ -1125,7 +1125,7 @@ define <4 x i32> @test_smfmac_i32_16x16x128_i8__flags0(<4 x i32> %arg0, <8 x i32 ; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_i32_16x16x128_i8 a[0:3], v[0:3], v[4:11], v16 cbsz:1 abid:3 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1136,7 +1136,7 @@ define <4 x i32> @test_smfmac_i32_16x16x128_i8__flags0(<4 x i32> %arg0, <8 x i32 ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_smfmac_i32_16x16x128_i8 v[12:15], v[0:3], v[4:11], v16 cbsz:1 abid:3 -; GISEL-NEXT: s_nop 6 +; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: v_mov_b32_e32 v0, v12 ; GISEL-NEXT: v_mov_b32_e32 v1, v13 ; GISEL-NEXT: v_mov_b32_e32 v2, v14 @@ -1156,7 +1156,7 @@ define <4 x i32> @test_smfmac_i32_16x16x128_i8__flags1(<4 x i32> %arg0, <8 x i32 ; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_i32_16x16x128_i8 a[0:3], v[0:3], v[4:11], v16 cbsz:3 abid:1 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1167,7 +1167,7 @@ define <4 x i32> @test_smfmac_i32_16x16x128_i8__flags1(<4 x i32> %arg0, <8 x i32 ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_smfmac_i32_16x16x128_i8 v[12:15], v[0:3], v[4:11], v16 cbsz:3 abid:1 -; GISEL-NEXT: s_nop 6 +; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: v_mov_b32_e32 v0, v12 ; GISEL-NEXT: v_mov_b32_e32 v1, v13 ; GISEL-NEXT: v_mov_b32_e32 v2, v14 @@ -1200,7 +1200,7 @@ define <4 x i32> @test_smfmac_i32_16x16x128_i8__sgpr(<4 x i32> inreg %arg0, <8 x ; SDAG-NEXT: v_mov_b32_e32 v12, s28 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_i32_16x16x128_i8 a[0:3], v[8:11], v[0:7], v12 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1265,7 +1265,7 @@ define amdgpu_kernel void @test_smfmac_i32_32x32x64_i8__vgpr(ptr addrspace(1) %a ; SDAG-NEXT: v_smfmac_i32_32x32x64_i8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 ; SDAG-NEXT: v_mov_b32_e32 v16, 0 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 ; SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48 ; SDAG-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] @@ -1298,7 +1298,7 @@ define amdgpu_kernel void @test_smfmac_i32_32x32x64_i8__vgpr(ptr addrspace(1) %a ; GISEL-NEXT: v_smfmac_i32_32x32x64_i8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 ; GISEL-NEXT: v_mov_b32_e32 v16, 0 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] ; GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 ; GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 @@ -1336,7 +1336,7 @@ define <16 x i32> @test_smfmac_i32_32x32x64_i8(<4 x i32> %arg0, <8 x i32> %arg1, ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_i32_32x32x64_i8 a[0:15], v[0:3], v[4:11], v28 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1408,7 +1408,7 @@ define <16 x i32> @test_smfmac_i32_32x32x64_i8__flags0(<4 x i32> %arg0, <8 x i32 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_i32_32x32x64_i8 a[0:15], v[0:3], v[4:11], v28 cbsz:1 abid:3 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1480,7 +1480,7 @@ define <16 x i32> @test_smfmac_i32_32x32x64_i8__flags1(<4 x i32> %arg0, <8 x i32 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_i32_32x32x64_i8 a[0:15], v[0:3], v[4:11], v28 cbsz:3 abid:1 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1580,7 +1580,7 @@ define <16 x i32> @test_smfmac_i32_32x32x64_i8__sgpr(<4 x i32> inreg %arg0, <8 x ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_i32_32x32x64_i8 a[0:15], v[28:31], v[0:7], v10 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1675,7 +1675,7 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_bf8_bf8__vgpr(ptr addrspace ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_bf8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7] ; SDAG-NEXT: s_endpgm ; @@ -1701,7 +1701,7 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_bf8_bf8__vgpr(ptr addrspace ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_smfmac_f32_16x16x128_bf8_bf8 v[8:11], v[12:15], v[0:7], v16 cbsz:1 abid:2 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GISEL-NEXT: s_nop 5 +; GISEL-NEXT: s_nop 6 ; GISEL-NEXT: global_store_dwordx4 v0, v[8:11], s[0:1] ; GISEL-NEXT: s_endpgm bb: @@ -1723,7 +1723,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_bf8_bf8(<4 x i32> %arg0, <8 x i32> ; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_bf8 a[0:3], v[0:3], v[4:11], v16 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1734,7 +1734,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_bf8_bf8(<4 x i32> %arg0, <8 x i32> ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_smfmac_f32_16x16x128_bf8_bf8 v[12:15], v[0:3], v[4:11], v16 -; GISEL-NEXT: s_nop 6 +; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: v_mov_b32_e32 v0, v12 ; GISEL-NEXT: v_mov_b32_e32 v1, v13 ; GISEL-NEXT: v_mov_b32_e32 v2, v14 @@ -1754,7 +1754,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_bf8_bf8__flags0(<4 x i32> %arg0, < ; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_bf8 a[0:3], v[0:3], v[4:11], v16 cbsz:1 abid:3 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1765,7 +1765,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_bf8_bf8__flags0(<4 x i32> %arg0, < ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_smfmac_f32_16x16x128_bf8_bf8 v[12:15], v[0:3], v[4:11], v16 cbsz:1 abid:3 -; GISEL-NEXT: s_nop 6 +; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: v_mov_b32_e32 v0, v12 ; GISEL-NEXT: v_mov_b32_e32 v1, v13 ; GISEL-NEXT: v_mov_b32_e32 v2, v14 @@ -1785,7 +1785,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_bf8_bf8__flags1(<4 x i32> %arg0, < ; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_bf8 a[0:3], v[0:3], v[4:11], v16 cbsz:3 abid:1 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1796,7 +1796,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_bf8_bf8__flags1(<4 x i32> %arg0, < ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_smfmac_f32_16x16x128_bf8_bf8 v[12:15], v[0:3], v[4:11], v16 cbsz:3 abid:1 -; GISEL-NEXT: s_nop 6 +; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: v_mov_b32_e32 v0, v12 ; GISEL-NEXT: v_mov_b32_e32 v1, v13 ; GISEL-NEXT: v_mov_b32_e32 v2, v14 @@ -1829,7 +1829,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_bf8_bf8__sgpr(<4 x i32> inreg %arg ; SDAG-NEXT: v_mov_b32_e32 v12, s28 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_bf8 a[0:3], v[8:11], v[0:7], v12 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1890,7 +1890,7 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_bf8_fp8__vgpr(ptr addrspace ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_fp8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7] ; SDAG-NEXT: s_endpgm ; @@ -1916,7 +1916,7 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_bf8_fp8__vgpr(ptr addrspace ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_smfmac_f32_16x16x128_bf8_fp8 v[8:11], v[12:15], v[0:7], v16 cbsz:1 abid:2 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GISEL-NEXT: s_nop 5 +; GISEL-NEXT: s_nop 6 ; GISEL-NEXT: global_store_dwordx4 v0, v[8:11], s[0:1] ; GISEL-NEXT: s_endpgm bb: @@ -1938,7 +1938,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_bf8_fp8(<4 x i32> %arg0, <8 x i32> ; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_fp8 a[0:3], v[0:3], v[4:11], v16 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1949,7 +1949,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_bf8_fp8(<4 x i32> %arg0, <8 x i32> ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_smfmac_f32_16x16x128_bf8_fp8 v[12:15], v[0:3], v[4:11], v16 -; GISEL-NEXT: s_nop 6 +; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: v_mov_b32_e32 v0, v12 ; GISEL-NEXT: v_mov_b32_e32 v1, v13 ; GISEL-NEXT: v_mov_b32_e32 v2, v14 @@ -1969,7 +1969,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_bf8_fp8__flags0(<4 x i32> %arg0, < ; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_fp8 a[0:3], v[0:3], v[4:11], v16 cbsz:1 abid:3 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1980,7 +1980,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_bf8_fp8__flags0(<4 x i32> %arg0, < ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_smfmac_f32_16x16x128_bf8_fp8 v[12:15], v[0:3], v[4:11], v16 cbsz:1 abid:3 -; GISEL-NEXT: s_nop 6 +; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: v_mov_b32_e32 v0, v12 ; GISEL-NEXT: v_mov_b32_e32 v1, v13 ; GISEL-NEXT: v_mov_b32_e32 v2, v14 @@ -2000,7 +2000,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_bf8_fp8__flags1(<4 x i32> %arg0, < ; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_fp8 a[0:3], v[0:3], v[4:11], v16 cbsz:3 abid:1 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2011,7 +2011,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_bf8_fp8__flags1(<4 x i32> %arg0, < ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_smfmac_f32_16x16x128_bf8_fp8 v[12:15], v[0:3], v[4:11], v16 cbsz:3 abid:1 -; GISEL-NEXT: s_nop 6 +; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: v_mov_b32_e32 v0, v12 ; GISEL-NEXT: v_mov_b32_e32 v1, v13 ; GISEL-NEXT: v_mov_b32_e32 v2, v14 @@ -2044,7 +2044,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_bf8_fp8__sgpr(<4 x i32> inreg %arg ; SDAG-NEXT: v_mov_b32_e32 v12, s28 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_fp8 a[0:3], v[8:11], v[0:7], v12 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2105,7 +2105,7 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_fp8_bf8__vgpr(ptr addrspace ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_bf8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7] ; SDAG-NEXT: s_endpgm ; @@ -2131,7 +2131,7 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_fp8_bf8__vgpr(ptr addrspace ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_smfmac_f32_16x16x128_fp8_bf8 v[8:11], v[12:15], v[0:7], v16 cbsz:1 abid:2 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GISEL-NEXT: s_nop 5 +; GISEL-NEXT: s_nop 6 ; GISEL-NEXT: global_store_dwordx4 v0, v[8:11], s[0:1] ; GISEL-NEXT: s_endpgm bb: @@ -2153,7 +2153,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_fp8_bf8(<4 x i32> %arg0, <8 x i32> ; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_bf8 a[0:3], v[0:3], v[4:11], v16 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2164,7 +2164,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_fp8_bf8(<4 x i32> %arg0, <8 x i32> ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_smfmac_f32_16x16x128_fp8_bf8 v[12:15], v[0:3], v[4:11], v16 -; GISEL-NEXT: s_nop 6 +; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: v_mov_b32_e32 v0, v12 ; GISEL-NEXT: v_mov_b32_e32 v1, v13 ; GISEL-NEXT: v_mov_b32_e32 v2, v14 @@ -2184,7 +2184,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_fp8_bf8__flags0(<4 x i32> %arg0, < ; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_bf8 a[0:3], v[0:3], v[4:11], v16 cbsz:1 abid:3 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2195,7 +2195,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_fp8_bf8__flags0(<4 x i32> %arg0, < ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_smfmac_f32_16x16x128_fp8_bf8 v[12:15], v[0:3], v[4:11], v16 cbsz:1 abid:3 -; GISEL-NEXT: s_nop 6 +; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: v_mov_b32_e32 v0, v12 ; GISEL-NEXT: v_mov_b32_e32 v1, v13 ; GISEL-NEXT: v_mov_b32_e32 v2, v14 @@ -2215,7 +2215,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_fp8_bf8__flags1(<4 x i32> %arg0, < ; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_bf8 a[0:3], v[0:3], v[4:11], v16 cbsz:3 abid:1 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2226,7 +2226,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_fp8_bf8__flags1(<4 x i32> %arg0, < ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_smfmac_f32_16x16x128_fp8_bf8 v[12:15], v[0:3], v[4:11], v16 cbsz:3 abid:1 -; GISEL-NEXT: s_nop 6 +; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: v_mov_b32_e32 v0, v12 ; GISEL-NEXT: v_mov_b32_e32 v1, v13 ; GISEL-NEXT: v_mov_b32_e32 v2, v14 @@ -2259,7 +2259,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_fp8_bf8__sgpr(<4 x i32> inreg %arg ; SDAG-NEXT: v_mov_b32_e32 v12, s28 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_bf8 a[0:3], v[8:11], v[0:7], v12 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2320,7 +2320,7 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_fp8_fp8__vgpr(ptr addrspace ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_fp8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7] ; SDAG-NEXT: s_endpgm ; @@ -2346,7 +2346,7 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_fp8_fp8__vgpr(ptr addrspace ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_smfmac_f32_16x16x128_fp8_fp8 v[8:11], v[12:15], v[0:7], v16 cbsz:1 abid:2 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GISEL-NEXT: s_nop 5 +; GISEL-NEXT: s_nop 6 ; GISEL-NEXT: global_store_dwordx4 v0, v[8:11], s[0:1] ; GISEL-NEXT: s_endpgm bb: @@ -2368,7 +2368,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_fp8_fp8(<4 x i32> %arg0, <8 x i32> ; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_fp8 a[0:3], v[0:3], v[4:11], v16 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2379,7 +2379,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_fp8_fp8(<4 x i32> %arg0, <8 x i32> ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_smfmac_f32_16x16x128_fp8_fp8 v[12:15], v[0:3], v[4:11], v16 -; GISEL-NEXT: s_nop 6 +; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: v_mov_b32_e32 v0, v12 ; GISEL-NEXT: v_mov_b32_e32 v1, v13 ; GISEL-NEXT: v_mov_b32_e32 v2, v14 @@ -2399,7 +2399,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_fp8_fp8__flags0(<4 x i32> %arg0, < ; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_fp8 a[0:3], v[0:3], v[4:11], v16 cbsz:1 abid:3 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2410,7 +2410,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_fp8_fp8__flags0(<4 x i32> %arg0, < ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_smfmac_f32_16x16x128_fp8_fp8 v[12:15], v[0:3], v[4:11], v16 cbsz:1 abid:3 -; GISEL-NEXT: s_nop 6 +; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: v_mov_b32_e32 v0, v12 ; GISEL-NEXT: v_mov_b32_e32 v1, v13 ; GISEL-NEXT: v_mov_b32_e32 v2, v14 @@ -2430,7 +2430,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_fp8_fp8__flags1(<4 x i32> %arg0, < ; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_fp8 a[0:3], v[0:3], v[4:11], v16 cbsz:3 abid:1 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2441,7 +2441,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_fp8_fp8__flags1(<4 x i32> %arg0, < ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_smfmac_f32_16x16x128_fp8_fp8 v[12:15], v[0:3], v[4:11], v16 cbsz:3 abid:1 -; GISEL-NEXT: s_nop 6 +; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: v_mov_b32_e32 v0, v12 ; GISEL-NEXT: v_mov_b32_e32 v1, v13 ; GISEL-NEXT: v_mov_b32_e32 v2, v14 @@ -2474,7 +2474,7 @@ define <4 x float> @test_smfmac_f32_16x16x128_fp8_fp8__sgpr(<4 x i32> inreg %arg ; SDAG-NEXT: v_mov_b32_e32 v12, s28 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_fp8 a[0:3], v[8:11], v[0:7], v12 -; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2539,7 +2539,7 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x64_bf8_bf8__vgpr(ptr addrspace( ; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_bf8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 ; SDAG-NEXT: v_mov_b32_e32 v16, 0 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 ; SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48 ; SDAG-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] @@ -2572,7 +2572,7 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x64_bf8_bf8__vgpr(ptr addrspace( ; GISEL-NEXT: v_smfmac_f32_32x32x64_bf8_bf8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 ; GISEL-NEXT: v_mov_b32_e32 v16, 0 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] ; GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 ; GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 @@ -2610,7 +2610,7 @@ define <16 x float> @test_smfmac_f32_32x32x64_bf8_bf8(<4 x i32> %arg0, <8 x i32> ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_bf8 a[0:15], v[0:3], v[4:11], v28 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2682,7 +2682,7 @@ define <16 x float> @test_smfmac_f32_32x32x64_bf8_bf8__flags0(<4 x i32> %arg0, < ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_bf8 a[0:15], v[0:3], v[4:11], v28 cbsz:1 abid:3 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2754,7 +2754,7 @@ define <16 x float> @test_smfmac_f32_32x32x64_bf8_bf8__flags1(<4 x i32> %arg0, < ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_bf8 a[0:15], v[0:3], v[4:11], v28 cbsz:3 abid:1 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2854,7 +2854,7 @@ define <16 x float> @test_smfmac_f32_32x32x64_bf8_bf8__sgpr(<4 x i32> inreg %arg ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_bf8 a[0:15], v[28:31], v[0:7], v10 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2953,7 +2953,7 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x64_bf8_fp8__vgpr(ptr addrspace( ; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_fp8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 ; SDAG-NEXT: v_mov_b32_e32 v16, 0 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 ; SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48 ; SDAG-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] @@ -2986,7 +2986,7 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x64_bf8_fp8__vgpr(ptr addrspace( ; GISEL-NEXT: v_smfmac_f32_32x32x64_bf8_fp8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 ; GISEL-NEXT: v_mov_b32_e32 v16, 0 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] ; GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 ; GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 @@ -3024,7 +3024,7 @@ define <16 x float> @test_smfmac_f32_32x32x64_bf8_fp8(<4 x i32> %arg0, <8 x i32> ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_fp8 a[0:15], v[0:3], v[4:11], v28 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3096,7 +3096,7 @@ define <16 x float> @test_smfmac_f32_32x32x64_bf8_fp8__flags0(<4 x i32> %arg0, < ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_fp8 a[0:15], v[0:3], v[4:11], v28 cbsz:1 abid:3 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3168,7 +3168,7 @@ define <16 x float> @test_smfmac_f32_32x32x64_bf8_fp8__flags1(<4 x i32> %arg0, < ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_fp8 a[0:15], v[0:3], v[4:11], v28 cbsz:3 abid:1 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3268,7 +3268,7 @@ define <16 x float> @test_smfmac_f32_32x32x64_bf8_fp8__sgpr(<4 x i32> inreg %arg ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_fp8 a[0:15], v[28:31], v[0:7], v10 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3367,7 +3367,7 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x64_fp8_bf8__vgpr(ptr addrspace( ; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_bf8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 ; SDAG-NEXT: v_mov_b32_e32 v16, 0 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 ; SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48 ; SDAG-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] @@ -3400,7 +3400,7 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x64_fp8_bf8__vgpr(ptr addrspace( ; GISEL-NEXT: v_smfmac_f32_32x32x64_fp8_bf8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 ; GISEL-NEXT: v_mov_b32_e32 v16, 0 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] ; GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 ; GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 @@ -3438,7 +3438,7 @@ define <16 x float> @test_smfmac_f32_32x32x64_fp8_bf8(<4 x i32> %arg0, <8 x i32> ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_bf8 a[0:15], v[0:3], v[4:11], v28 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3510,7 +3510,7 @@ define <16 x float> @test_smfmac_f32_32x32x64_fp8_bf8__flags0(<4 x i32> %arg0, < ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_bf8 a[0:15], v[0:3], v[4:11], v28 cbsz:1 abid:3 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3582,7 +3582,7 @@ define <16 x float> @test_smfmac_f32_32x32x64_fp8_bf8__flags1(<4 x i32> %arg0, < ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_bf8 a[0:15], v[0:3], v[4:11], v28 cbsz:3 abid:1 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3682,7 +3682,7 @@ define <16 x float> @test_smfmac_f32_32x32x64_fp8_bf8__sgpr(<4 x i32> inreg %arg ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_bf8 a[0:15], v[28:31], v[0:7], v10 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3781,7 +3781,7 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x64_fp8_fp8__vgpr(ptr addrspace( ; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_fp8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 ; SDAG-NEXT: v_mov_b32_e32 v16, 0 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 ; SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48 ; SDAG-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] @@ -3814,7 +3814,7 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x64_fp8_fp8__vgpr(ptr addrspace( ; GISEL-NEXT: v_smfmac_f32_32x32x64_fp8_fp8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 ; GISEL-NEXT: v_mov_b32_e32 v16, 0 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] ; GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 ; GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 @@ -3852,7 +3852,7 @@ define <16 x float> @test_smfmac_f32_32x32x64_fp8_fp8(<4 x i32> %arg0, <8 x i32> ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_fp8 a[0:15], v[0:3], v[4:11], v28 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3924,7 +3924,7 @@ define <16 x float> @test_smfmac_f32_32x32x64_fp8_fp8__flags0(<4 x i32> %arg0, < ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_fp8 a[0:15], v[0:3], v[4:11], v28 cbsz:1 abid:3 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3996,7 +3996,7 @@ define <16 x float> @test_smfmac_f32_32x32x64_fp8_fp8__flags1(<4 x i32> %arg0, < ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_fp8 a[0:15], v[0:3], v[4:11], v28 cbsz:3 abid:1 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -4096,7 +4096,7 @@ define <16 x float> @test_smfmac_f32_32x32x64_fp8_fp8__sgpr(<4 x i32> inreg %arg ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_fp8 a[0:15], v[28:31], v[0:7], v10 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: s_nop 3 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -4156,4 +4156,4 @@ define <16 x float> @test_smfmac_f32_32x32x64_fp8_fp8__sgpr(<4 x i32> inreg %arg ret <16 x float> %result } -attributes #0 = { "amdgpu-flat-work-group-size"="1,256" } +attributes #0 = { "amdgpu-flat-work-group-size"="1,256" } \ No newline at end of file diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir index 1eb7ec4c142f2..ef30c9a44b2b5 100644 --- a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir +++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir @@ -734,7 +734,8 @@ body: | ... # GCN-LABEL: name: smfmac16x16_write_vgpr_flat_read # GCN: V_SMFMAC -# GCN-NEXT: S_NOP 6 +# GFX940-NEXT: S_NOP 6 +# GFX950-NEXT: S_NOP 7 # GCN-NEXT: FLAT_STORE_DWORD name: smfmac16x16_write_vgpr_flat_read body: | @@ -745,7 +746,8 @@ body: | # GCN-LABEL: name: xdl_smfma16x16_write_vgpr_flat_read # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: FLAT_STORE_DWORD name: xdl_smfma16x16_write_vgpr_flat_read body: | @@ -756,7 +758,8 @@ body: | # GCN-LABEL: name: smfmac32x32_write_vgpr_flat_read # GCN: V_SMFMAC # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: FLAT_STORE_DWORD name: smfmac32x32_write_vgpr_flat_read body: | @@ -768,7 +771,8 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: FLAT_STORE_DWORD name: xdl_smfma32x32_write_vgpr_flat_read body: | @@ -823,7 +827,8 @@ body: | # GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_read # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MOV_B32 name: xdl_smfma16x16_write_vgpr_valu_read body: | @@ -835,7 +840,8 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MOV_B32 name: xdl_smfma32x32_write_vgpr_valu_read body: | @@ -881,7 +887,8 @@ body: | # GCN-LABEL: name: xdl_smfma16x16_write_vgpr_accv_read # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_ACCVGPR_WRITE_B32_e64 name: xdl_smfma16x16_write_vgpr_accv_read body: | @@ -893,7 +900,8 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_ACCVGPR_WRITE_B32_e64 name: xdl_smfma32x32_write_vgpr_accv_read body: | @@ -1028,7 +1036,8 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MOV_B32_sdwa name: xdl_smfma32x32_write_vgpr_valu_sdwa_write body: | @@ -1762,7 +1771,8 @@ body: | ... # GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_vm_read # GCN: V_MFMA -# GCN-NEXT: S_NOP 6 +# GFX940-NEXT: S_NOP 6 +# GFX950-NEXT: S_NOP 7 # GCN-NEXT: BUFFER_STORE_DWORD name: xdl_sgemm16X16X16_mfma_write_vgpr_vm_read body: | @@ -1772,7 +1782,8 @@ body: | ... # GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_read # GCN: V_MFMA -# GCN-NEXT: S_NOP 6 +# GFX940-NEXT: S_NOP 6 +# GFX950-NEXT: S_NOP 7 # GCN-NEXT: V_MOV_B32 name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_read body: | @@ -1782,7 +1793,8 @@ body: | ... # GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_dot_read # GCN: V_MFMA -# GCN-NEXT: S_NOP 6 +# GFX940-NEXT: S_NOP 6 +# GFX950-NEXT: S_NOP 7 # GCN-NEXT: V_DOT name: xdl_sgemm16X16X16_mfma_write_vgpr_dot_read body: | From 1188b1ff7b956cb65d8ddda5f1e56c432f1a57c7 Mon Sep 17 00:00:00 2001 From: Vigneshwar Jayakumar Date: Tue, 11 Feb 2025 12:32:23 -0600 Subject: [PATCH 26/29] AMDGPU: Handle gfx950 XDL Write-VGPR-VALU-WAW wait state change (#126132) There are additional wait states for XDL write VALU WAW hazard in gfx950 compared to gfx940. --- .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 17 +++++++------ .../CodeGen/AMDGPU/mai-hazards-gfx940.mir | 24 ++++++++++++------- 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 36b9003a0ee65..b0f087737afa7 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -2598,12 +2598,14 @@ static int GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) { return NumPasses + 2; } -static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses) { - // 2 pass -> 5 - // 4 pass -> 7 - // 8 pass -> 11 - // 16 pass -> 19 - return NumPasses + 3; +static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses, + bool IsGFX950) { + // xdl def cycles | gfx940 | gfx950 + // 2 pass | 5 5 + // 4 pass | 7 8 + // 8 pass | 11 12 + // 16 pass | 19 20 + return NumPasses + 3 + (NumPasses != 2 && IsGFX950); } static int GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses, @@ -2851,7 +2853,8 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { } else if (ST.hasGFX940Insts()) { NeedWaitStates = isXDL(ST, *MFMA) - ? GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(NumPasses) + ? GFX940_XDL_N_PassWriteVgprVALUWawWaitStates( + NumPasses, ST.hasGFX950Insts()) : GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(NumPasses); } else { switch (NumPasses) { diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir index ef30c9a44b2b5..0af37ad8c896e 100644 --- a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir +++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir @@ -958,7 +958,8 @@ body: | # GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_write # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MOV_B32 name: xdl_smfma16x16_write_vgpr_valu_write body: | @@ -970,7 +971,8 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MOV_B32 name: xdl_smfma32x32_write_vgpr_valu_write body: | @@ -991,7 +993,8 @@ body: | # GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_f16_write # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_FMA_F16_e64 name: xdl_smfma16x16_write_vgpr_valu_f16_write body: | @@ -1003,7 +1006,8 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_FMA_F16_e64 name: xdl_smfma32x32_write_vgpr_valu_f16_write body: | @@ -1024,7 +1028,8 @@ body: | # GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_sdwa_write # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MOV_B32_sdwa name: xdl_smfma16x16_write_vgpr_valu_sdwa_write body: | @@ -1761,7 +1766,8 @@ body: | ... # GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_write # GCN: V_MFMA -# GCN-NEXT: S_NOP 6 +# GFX940-NEXT: S_NOP 6 +# GFX950-NEXT: S_NOP 7 # GCN-NEXT: V_MOV_B32 name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_write body: | @@ -2072,7 +2078,8 @@ body: | ... # GCN-LABEL: name: smfmac16x16_read_vgpr_srcc_valu_write # GCN: V_SMFMAC -# GCN-NEXT: S_NOP 6 +# GFX940-NEXT: S_NOP 6 +# GFX950-NEXT: S_NOP 7 # GCN-NEXT: V_MOV_B32 name: smfmac16x16_read_vgpr_srcc_valu_write body: | @@ -2102,7 +2109,8 @@ body: | # GCN-LABEL: name: smfmac32x32_read_vgpr_srcc_valu_write # GCN: V_SMFMAC # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MOV_B32 name: smfmac32x32_read_vgpr_srcc_valu_write body: | From 71e623d878ecbf66324e15b3a3b2e983e2d7942a Mon Sep 17 00:00:00 2001 From: Elvin Wang Date: Tue, 11 Feb 2025 10:33:07 -0800 Subject: [PATCH 27/29] [llvm] Avoid out-of-order evaluation in DebugInfo (#125116) This is an upstream proposal from https://github.com/intel/intel-graphics-compiler/commit/e60884cb98c4332a0eecff8396eb353c5b86cd35 We observed malfunctioning StripNonLineTableDebugInfo during debugging and it's caused by out-of-order evaluation, this is a C++ level semantic ambiguity issue, refer https://en.cppreference.com/w/cpp/language/eval_order Solution is simply separating one line into two. --- llvm/lib/IR/DebugInfo.cpp | 6 +- .../strip-nonlinetable-debuginfo-pr125116.ll | 88 +++++++++++++++++++ 2 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/Util/strip-nonlinetable-debuginfo-pr125116.ll diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index 4ce518009bd3e..ea1d79d436041 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -798,7 +798,11 @@ class DebugTypeInfoRemoval { return getReplacementMDNode(N); }; - Replacements[N] = doRemap(N); + // Seperate recursive doRemap and operator [] into 2 lines to avoid + // out-of-order evaluations since both of them can access the same memory + // location in map Replacements. + auto Value = doRemap(N); + Replacements[N] = Value; } /// Do the remapping traversal. diff --git a/llvm/test/Transforms/Util/strip-nonlinetable-debuginfo-pr125116.ll b/llvm/test/Transforms/Util/strip-nonlinetable-debuginfo-pr125116.ll new file mode 100644 index 0000000000000..dafee60a491e7 --- /dev/null +++ b/llvm/test/Transforms/Util/strip-nonlinetable-debuginfo-pr125116.ll @@ -0,0 +1,88 @@ +; Test if StripNonLineTableDebugInfo crashes or produces invalid IR, +; this test contains a slightly complex debug info structure, +; which may trigger the bug mentioned in pr#125116 +; +; RUN: opt < %s -p=strip-nonlinetable-debuginfo -S | FileCheck %s +; +; CHECK-NOT: DIBasicType +; CHECK-NOT: DIDerivedType +; CHECK-NOT: DICompositeType +; CHECK-NOT: DILocation(line: 604, column: 1, scope: null) + +define void @main() !dbg !34 { + ret void, !dbg !68 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!31, !32} +!llvm.ident = !{!33} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.7 (tags/RELEASE_370/final)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, globals: !2) +!1 = !DIFile(filename: "no filename", directory: "") +!2 = !{} +!3 = !{!4, !22} +!4 = !DIDerivedType(tag: DW_TAG_typedef, name: "float3x3", file: !1, line: 361, baseType: !5) +!5 = !DICompositeType(tag: DW_TAG_class_type, name: "matrix", file: !1, line: 246, size: 288, align: 32, elements: !6, templateParams: !17) +!6 = !{!7, !9, !10, !11, !12, !13, !14, !15, !16} +!7 = !DIDerivedType(tag: DW_TAG_member, name: "_11", scope: !5, file: !1, line: 246, baseType: !8, size: 32, align: 32, flags: DIFlagPublic) +!8 = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float) +!9 = !DIDerivedType(tag: DW_TAG_member, name: "_12", scope: !5, file: !1, line: 246, baseType: !8, size: 32, align: 32, offset: 32, flags: DIFlagPublic) +!10 = !DIDerivedType(tag: DW_TAG_member, name: "_13", scope: !5, file: !1, line: 246, baseType: !8, size: 32, align: 32, offset: 64, flags: DIFlagPublic) +!11 = !DIDerivedType(tag: DW_TAG_member, name: "_21", scope: !5, file: !1, line: 246, baseType: !8, size: 32, align: 32, offset: 96, flags: DIFlagPublic) +!12 = !DIDerivedType(tag: DW_TAG_member, name: "_22", scope: !5, file: !1, line: 246, baseType: !8, size: 32, align: 32, offset: 128, flags: DIFlagPublic) +!13 = !DIDerivedType(tag: DW_TAG_member, name: "_23", scope: !5, file: !1, line: 246, baseType: !8, size: 32, align: 32, offset: 160, flags: DIFlagPublic) +!14 = !DIDerivedType(tag: DW_TAG_member, name: "_31", scope: !5, file: !1, line: 246, baseType: !8, size: 32, align: 32, offset: 192, flags: DIFlagPublic) +!15 = !DIDerivedType(tag: DW_TAG_member, name: "_32", scope: !5, file: !1, line: 246, baseType: !8, size: 32, align: 32, offset: 224, flags: DIFlagPublic) +!16 = !DIDerivedType(tag: DW_TAG_member, name: "_33", scope: !5, file: !1, line: 246, baseType: !8, size: 32, align: 32, offset: 256, flags: DIFlagPublic) +!17 = !{!18, !19, !21} +!18 = !DITemplateTypeParameter(name: "element", type: !8) +!19 = !DITemplateValueParameter(name: "row_count", type: !20, value: i32 3) +!20 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!21 = !DITemplateValueParameter(name: "col_count", type: !20, value: i32 3) +!22 = !DIDerivedType(tag: DW_TAG_typedef, name: "float4", file: !1, baseType: !23) +!23 = !DICompositeType(tag: DW_TAG_class_type, name: "vector", file: !1, size: 128, align: 32, elements: !24, templateParams: !29) +!24 = !{!25, !26, !27, !28} +!25 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !23, file: !1, baseType: !8, size: 32, align: 32, flags: DIFlagPublic) +!26 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !23, file: !1, baseType: !8, size: 32, align: 32, offset: 32, flags: DIFlagPublic) +!27 = !DIDerivedType(tag: DW_TAG_member, name: "z", scope: !23, file: !1, baseType: !8, size: 32, align: 32, offset: 64, flags: DIFlagPublic) +!28 = !DIDerivedType(tag: DW_TAG_member, name: "w", scope: !23, file: !1, baseType: !8, size: 32, align: 32, offset: 96, flags: DIFlagPublic) +!29 = !{!18, !30} +!30 = !DITemplateValueParameter(name: "element_count", type: !20, value: i32 4) +!31 = !{i32 2, !"Dwarf Version", i32 4} +!32 = !{i32 2, !"Debug Info Version", i32 3} +!33 = !{!"clang version 3.7 (tags/RELEASE_370/final)"} +!34 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 581, type: !35, scopeLine: 582, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) +!35 = !DISubroutineType(types: !36) +!36 = !{null, !37, !58} +!37 = !DICompositeType(tag: DW_TAG_structure_type, name: "VertexInput", file: !1, line: 254, size: 416, align: 32, elements: !38) +!38 = !{!39, !40, !48, !57} +!39 = !DIDerivedType(tag: DW_TAG_member, name: "Position", scope: !37, file: !1, line: 256, baseType: !22, size: 128, align: 32) +!40 = !DIDerivedType(tag: DW_TAG_member, name: "TexCoord", scope: !37, file: !1, line: 257, baseType: !41, size: 64, align: 32, offset: 128) +!41 = !DIDerivedType(tag: DW_TAG_typedef, name: "float2", file: !1, baseType: !42) +!42 = !DICompositeType(tag: DW_TAG_class_type, name: "vector", file: !1, size: 64, align: 32, elements: !43, templateParams: !46) +!43 = !{!44, !45} +!44 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !42, file: !1, baseType: !8, size: 32, align: 32, flags: DIFlagPublic) +!45 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !42, file: !1, baseType: !8, size: 32, align: 32, offset: 32, flags: DIFlagPublic) +!46 = !{!18, !47} +!47 = !DITemplateValueParameter(name: "element_count", type: !20, value: i32 2) +!48 = !DIDerivedType(tag: DW_TAG_member, name: "Normal", scope: !37, file: !1, line: 258, baseType: !49, size: 96, align: 32, offset: 192) +!49 = !DIDerivedType(tag: DW_TAG_typedef, name: "float3", file: !1, baseType: !50) +!50 = !DICompositeType(tag: DW_TAG_class_type, name: "vector", file: !1, size: 96, align: 32, elements: !51, templateParams: !55) +!51 = !{!52, !53, !54} +!52 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !50, file: !1, baseType: !8, size: 32, align: 32, flags: DIFlagPublic) +!53 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !50, file: !1, baseType: !8, size: 32, align: 32, offset: 32, flags: DIFlagPublic) +!54 = !DIDerivedType(tag: DW_TAG_member, name: "z", scope: !50, file: !1, baseType: !8, size: 32, align: 32, offset: 64, flags: DIFlagPublic) +!55 = !{!18, !56} +!56 = !DITemplateValueParameter(name: "element_count", type: !20, value: i32 3) +!57 = !DIDerivedType(tag: DW_TAG_member, name: "Tangent", scope: !37, file: !1, line: 259, baseType: !22, size: 128, align: 32, offset: 288) +!58 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !59) +!59 = !DICompositeType(tag: DW_TAG_structure_type, name: "VertexOutput", file: !1, line: 269, size: 672, align: 32, elements: !60) +!60 = !{!61, !62, !63, !64, !65, !66, !67} +!61 = !DIDerivedType(tag: DW_TAG_member, name: "Position", scope: !59, file: !1, line: 271, baseType: !22, size: 128, align: 32) +!62 = !DIDerivedType(tag: DW_TAG_member, name: "TexCoord", scope: !59, file: !1, line: 272, baseType: !41, size: 64, align: 32, offset: 128) +!63 = !DIDerivedType(tag: DW_TAG_member, name: "TangentInView", scope: !59, file: !1, line: 273, baseType: !49, size: 96, align: 32, offset: 192) +!64 = !DIDerivedType(tag: DW_TAG_member, name: "BitangentInView", scope: !59, file: !1, line: 274, baseType: !49, size: 96, align: 32, offset: 288) +!65 = !DIDerivedType(tag: DW_TAG_member, name: "NormalInView", scope: !59, file: !1, line: 275, baseType: !49, size: 96, align: 32, offset: 384) +!66 = !DIDerivedType(tag: DW_TAG_member, name: "EyeDirectionInView", scope: !59, file: !1, line: 276, baseType: !49, size: 96, align: 32, offset: 480) +!67 = !DIDerivedType(tag: DW_TAG_member, name: "PositionInView", scope: !59, file: !1, line: 277, baseType: !49, size: 96, align: 32, offset: 576) +!68 = !DILocation(line: 604, column: 1, scope: !34) From 9d7177a2d7e63f90effea848e897cbf96690d154 Mon Sep 17 00:00:00 2001 From: Renaud Kauffmann Date: Tue, 11 Feb 2025 10:47:38 -0800 Subject: [PATCH 28/29] [flang][NFCI] Stop tracking memory source after a load in a more explicit manner. (#126156) Typically, we do not track memory sources after a load because of the dynamic nature of the load and the fact that the alias analysis is a simple static analysis. However, the code is written in a way that makes it seem like we are continuing to track memory but in reality we are only doing so when we know that the tracked memory is a leaf and therefore when there will only be one more iteration through the switch statement. In other words, we are iterating one more time, to gather data about a box, anticipating that this will be the last time. This is a hack that helped avoid cut-and-paste from other case statements but gives the wrong impression about the intention of the code and makes it confusing. To make it clear that there is no more tracking, we gather all the necessary data from the memref of the load, in the case statement for the load, and exit the loop. I am also limiting this data gathering for the case when we load a box reference while we were actually following data, as tests have shows, is the only case when we need it for. Other cases will be handled conservatively, but this can change in the future, on a case-by-case basis. --------- Co-authored-by: Joel E. Denny --- .../lib/Optimizer/Analysis/AliasAnalysis.cpp | 46 ++++++++--- .../AliasAnalysis/alias-analysis-2.fir | 8 +- .../AliasAnalysis/alias-analysis-target.fir | 82 +++++++++++++++++++ 3 files changed, 122 insertions(+), 14 deletions(-) create mode 100644 flang/test/Analysis/AliasAnalysis/alias-analysis-target.fir diff --git a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp index 01f3a0326db21..5827d1c3c529e 100644 --- a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp +++ b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp @@ -51,7 +51,7 @@ static bool hasGlobalOpTargetAttr(mlir::Value v, fir::AddrOfOp op) { v, fir::GlobalOp::getTargetAttrName(globalOpName)); } -mlir::Value getOriginalDef(mlir::Value v) { +static mlir::Value getOriginalDef(mlir::Value v) { mlir::Operation *defOp; bool breakFromLoop = false; while (!breakFromLoop && (defOp = v.getDefiningOp())) { @@ -578,16 +578,6 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v, breakFromLoop = true; }) .Case([&](auto op) { - // If the load is from a leaf source, return the leaf. Do not track - // through indirections otherwise. - // TODO: Add support to fir.alloca and fir.allocmem - auto def = getOriginalDef(op.getMemref()); - if (isDummyArgument(def) || - def.template getDefiningOp()) { - v = def; - defOp = v.getDefiningOp(); - return; - } // If load is inside target and it points to mapped item, // continue tracking. Operation *loadMemrefOp = op.getMemref().getDefiningOp(); @@ -600,6 +590,40 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v, defOp = v.getDefiningOp(); return; } + + // If we are loading a box reference, but following the data, + // we gather the attributes of the box to populate the source + // and stop tracking. + if (auto boxTy = mlir::dyn_cast(ty); + boxTy && followingData) { + + if (mlir::isa(boxTy.getEleTy())) + attributes.set(Attribute::Pointer); + + auto def = getOriginalDef(op.getMemref()); + if (auto addrOfOp = def.template getDefiningOp()) { + global = addrOfOp.getSymbol(); + + if (hasGlobalOpTargetAttr(def, addrOfOp)) + attributes.set(Attribute::Target); + + type = SourceKind::Global; + } + + // TODO: Add support to fir.alloca and fir.allocmem + // if (auto allocOp = def.template getDefiningOp()) { + // ... + // } + + if (isDummyArgument(def)) { + defOp = nullptr; + v = def; + } + + breakFromLoop = true; + return; + } + // No further tracking for addresses loaded from memory for now. type = SourceKind::Indirect; breakFromLoop = true; diff --git a/flang/test/Analysis/AliasAnalysis/alias-analysis-2.fir b/flang/test/Analysis/AliasAnalysis/alias-analysis-2.fir index ca97c5900281d..24cfaf6ed7ecc 100644 --- a/flang/test/Analysis/AliasAnalysis/alias-analysis-2.fir +++ b/flang/test/Analysis/AliasAnalysis/alias-analysis-2.fir @@ -47,13 +47,15 @@ // CHECK-DAG: arg2.load#0 <-> arg2.addr#0: MustAlias // CHECK-DAG: boxp1.addr#0 <-> arg2.addr#0: MayAlias -// TODO: Can the address in a pointer alias the address of a pointer, even when the +// TODO: Can the address in a pointer alias the address of a pointer, when the // pointer has no box. Should this be NoAlias? -// T3: CHECK-DAG: p1.addr#0 <-> p1.tgt#0: MayAlias +// T3 from . +// CHECK-DAG: p1.addr#0 <-> p1.tgt#0: MayAlias // The addresses stored in two different pointers can alias, even if one has no // box. In this program, they happen to be the same address. -// T4: CHECK-DAG: p1.tgt#0 <-> boxp1.addr#0: MayAlias +// T4: +// CHECK-DAG: p1.tgt#0 <-> boxp1.addr#0: MayAlias func.func @_QFPtest(%arg0: !fir.ref {fir.bindc_name = "v1", fir.target}, %arg1: !fir.ref {fir.bindc_name = "v2", fir.target}, %arg2: !fir.ref>> ) attributes {test.ptr = "func"} { diff --git a/flang/test/Analysis/AliasAnalysis/alias-analysis-target.fir b/flang/test/Analysis/AliasAnalysis/alias-analysis-target.fir new file mode 100644 index 0000000000000..8e88b508d56e3 --- /dev/null +++ b/flang/test/Analysis/AliasAnalysis/alias-analysis-target.fir @@ -0,0 +1,82 @@ +// RUN: fir-opt %s -pass-pipeline='builtin.module(func.func(test-fir-alias-analysis))' 2>&1 | FileCheck %s + +// The test was obtained from +// bbc test.f90 -emit-fir +// module mod +// real, pointer :: p0 +// real, allocatable :: alloc +// real, allocatable, target :: t_alloc +// real, target :: t +// real :: v +// end module +// +// subroutine test(n) +// use mod +// integer :: n +// real r1 +// p0 => t_alloc +// v = alloc +// r1 = p0 +// end subroutine test + +// Checking that aliasing can only happen with an entity with the target attribute +// +// CHECK-DAG: r1#0 <-> t_alloc#0: NoAlias +// CHECK-DAG: r1#0 <-> alloc#0: NoAlias +// CHECK-DAG: t_alloc#0 <-> alloc#0: NoAlias +// CHECK-DAG: r1#0 <-> p0.ptr#0: NoAlias +// CHECK-DAG: t_alloc#0 <-> p0.ptr#0: MayAlias +// CHECK-DAG: alloc#0 <-> p0.ptr#0: NoAlias + +fir.global @_QMmodEalloc : !fir.box> { + %0 = fir.zero_bits !fir.heap + %1 = fir.embox %0 : (!fir.heap) -> !fir.box> + fir.has_value %1 : !fir.box> +} +fir.global @_QMmodEp0 : !fir.box> { + %0 = fir.zero_bits !fir.ptr + %1 = fir.embox %0 : (!fir.ptr) -> !fir.box> + fir.has_value %1 : !fir.box> +} +fir.global @_QMmodEt target : f32 { + %0 = fir.zero_bits f32 + fir.has_value %0 : f32 +} +fir.global @_QMmodEt_alloc target : !fir.box> { + %0 = fir.zero_bits !fir.heap + %1 = fir.embox %0 : (!fir.heap) -> !fir.box> + fir.has_value %1 : !fir.box> +} +fir.global @_QMmodEv : f32 { + %0 = fir.zero_bits f32 + fir.has_value %0 : f32 +} +func.func @_QPtest(%arg0: !fir.ref {fir.bindc_name = "n"}) { + %0 = fir.dummy_scope : !fir.dscope + %1 = fir.address_of(@_QMmodEalloc) : !fir.ref>> + %2 = fir.declare %1 {fortran_attrs = #fir.var_attrs, uniq_name = "_QMmodEalloc"} : (!fir.ref>>) -> !fir.ref>> + %3 = fir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtestEn"} : (!fir.ref, !fir.dscope) -> !fir.ref + %4 = fir.address_of(@_QMmodEp0) : !fir.ref>> + %5 = fir.declare %4 {fortran_attrs = #fir.var_attrs, uniq_name = "_QMmodEp0"} : (!fir.ref>>) -> !fir.ref>> + %6 = fir.alloca f32 {bindc_name = "r1", uniq_name = "_QFtestEr1"} + %7 = fir.declare %6 {test.ptr="r1", uniq_name = "_QFtestEr1"} : (!fir.ref) -> !fir.ref + %8 = fir.address_of(@_QMmodEt) : !fir.ref + %9 = fir.declare %8 {fortran_attrs = #fir.var_attrs, uniq_name = "_QMmodEt"} : (!fir.ref) -> !fir.ref + %10 = fir.address_of(@_QMmodEt_alloc) : !fir.ref>> + %11 = fir.declare %10 {fortran_attrs = #fir.var_attrs, uniq_name = "_QMmodEt_alloc"} : (!fir.ref>>) -> !fir.ref>> + %12 = fir.address_of(@_QMmodEv) : !fir.ref + %13 = fir.declare %12 {uniq_name = "_QMmodEv"} : (!fir.ref) -> !fir.ref + %14 = fir.load %11 : !fir.ref>> + %15 = fir.box_addr %14 {test.ptr="t_alloc"}: (!fir.box>) -> !fir.heap + %16 = fir.embox %15 : (!fir.heap) -> !fir.box> + fir.store %16 to %5 : !fir.ref>> + %17 = fir.load %2 : !fir.ref>> + %18 = fir.box_addr %17 {test.ptr="alloc"} : (!fir.box>) -> !fir.heap + %19 = fir.load %18 : !fir.heap + fir.store %19 to %13 : !fir.ref + %20 = fir.load %5 : !fir.ref>> + %21 = fir.box_addr %20 {test.ptr="p0.ptr"} : (!fir.box>) -> !fir.ptr + %22 = fir.load %21 : !fir.ptr + fir.store %22 to %7 : !fir.ref + return +} From 070f84ebc89b11df616a83a56df9ac56efbab783 Mon Sep 17 00:00:00 2001 From: Zahira Ammarguellat Date: Tue, 11 Feb 2025 13:58:21 -0500 Subject: [PATCH 29/29] [Clang] [OpenMP] Add support for '#pragma omp stripe'. (#119891) Implement basic parsing and semantic support for `#pragma omp stripe` constuct introduced in https://www.openmp.org/wp-content/uploads/[OpenMP-API-Specification-6-0.pdf](https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-6-0.pdf), section 11.7. --- clang/bindings/python/clang/cindex.py | 3 + clang/docs/OpenMPSupport.rst | 2 + clang/docs/ReleaseNotes.rst | 1 + clang/include/clang-c/Index.h | 4 + clang/include/clang/AST/RecursiveASTVisitor.h | 3 + clang/include/clang/AST/StmtOpenMP.h | 82 +- clang/include/clang/Basic/StmtNodes.td | 1 + clang/include/clang/Sema/SemaOpenMP.h | 3 + .../include/clang/Serialization/ASTBitCodes.h | 1 + clang/lib/AST/StmtOpenMP.cpp | 21 + clang/lib/AST/StmtPrinter.cpp | 5 + clang/lib/AST/StmtProfile.cpp | 4 + clang/lib/Basic/OpenMPKinds.cpp | 3 +- clang/lib/CodeGen/CGStmtOpenMP.cpp | 8 + clang/lib/CodeGen/CodeGenFunction.h | 1 + clang/lib/Parse/ParseOpenMP.cpp | 5 +- clang/lib/Sema/SemaExceptionSpec.cpp | 1 + clang/lib/Sema/SemaOpenMP.cpp | 316 +++- clang/lib/Sema/TreeTransform.h | 11 + clang/lib/Serialization/ASTReaderStmt.cpp | 11 + clang/lib/Serialization/ASTWriterStmt.cpp | 5 + clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 1 + clang/test/Index/openmp-stripe.c | 11 + clang/test/OpenMP/stripe_ast_print.cpp | 202 +++ clang/test/OpenMP/stripe_codegen.cpp | 1549 +++++++++++++++++ clang/test/OpenMP/stripe_messages.cpp | 163 ++ clang/tools/libclang/CIndex.cpp | 7 + clang/tools/libclang/CXCursor.cpp | 5 +- llvm/include/llvm/Frontend/OpenMP/OMP.td | 7 + 29 files changed, 2404 insertions(+), 32 deletions(-) create mode 100644 clang/test/Index/openmp-stripe.c create mode 100644 clang/test/OpenMP/stripe_ast_print.cpp create mode 100644 clang/test/OpenMP/stripe_codegen.cpp create mode 100644 clang/test/OpenMP/stripe_messages.cpp diff --git a/clang/bindings/python/clang/cindex.py b/clang/bindings/python/clang/cindex.py index 806e1b40f3c9e..722562220eeea 100644 --- a/clang/bindings/python/clang/cindex.py +++ b/clang/bindings/python/clang/cindex.py @@ -1410,6 +1410,9 @@ def is_unexposed(self): # OpenMP scope directive. OMP_SCOPE_DIRECTIVE = 306 + # OpenMP stripe directive. + OMP_STRIPE_DIRECTIVE = 310 + # OpenACC Compute Construct. OPEN_ACC_COMPUTE_DIRECTIVE = 320 diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index 725624ee8c66c..88af120d06edb 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -374,6 +374,8 @@ implementation. +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | Loop transformation constructs | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| loop stripe transformation | :good:`done` | https://github.com/llvm/llvm-project/pull/119891 | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | work distribute construct | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | task_iteration | :none:`unclaimed` | :none:`unclaimed` | | diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 6344c4b36e357..b8a04167b9ca8 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -291,6 +291,7 @@ Python Binding Changes OpenMP Support -------------- - Added support 'no_openmp_constructs' assumption clause. +- Added support for 'omp stripe' directive. Improvements ^^^^^^^^^^^^ diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index 61e361faabdaf..ed6bd797684d9 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -2158,6 +2158,10 @@ enum CXCursorKind { */ CXCursor_OMPAssumeDirective = 309, + /** OpenMP assume directive. + */ + CXCursor_OMPStripeDirective = 310, + /** OpenACC Compute Construct. */ CXCursor_OpenACCComputeConstruct = 320, diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 06c762c080de0..560de7da9913a 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3056,6 +3056,9 @@ DEF_TRAVERSE_STMT(OMPSimdDirective, DEF_TRAVERSE_STMT(OMPTileDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) +DEF_TRAVERSE_STMT(OMPStripeDirective, + { TRY_TO(TraverseOMPExecutableDirective(S)); }) + DEF_TRAVERSE_STMT(OMPUnrollDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index 9ec49b8683dc8..b4d866d855323 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -994,7 +994,9 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { static bool classof(const Stmt *T) { Stmt::StmtClass C = T->getStmtClass(); return C == OMPTileDirectiveClass || C == OMPUnrollDirectiveClass || - C == OMPReverseDirectiveClass || C == OMPInterchangeDirectiveClass; + C == OMPReverseDirectiveClass || C == OMPInterchangeDirectiveClass || + C == OMPStripeDirectiveClass; + ; } }; @@ -5560,7 +5562,7 @@ class OMPTileDirective final : public OMPLoopTransformationDirective { : OMPLoopTransformationDirective(OMPTileDirectiveClass, llvm::omp::OMPD_tile, StartLoc, EndLoc, NumLoops) { - setNumGeneratedLoops(3 * NumLoops); + setNumGeneratedLoops(2 * NumLoops); } void setPreInits(Stmt *PreInits) { @@ -5621,6 +5623,82 @@ class OMPTileDirective final : public OMPLoopTransformationDirective { } }; +/// This represents the '#pragma omp stripe' loop transformation directive. +class OMPStripeDirective final : public OMPLoopTransformationDirective { + friend class ASTStmtReader; + friend class OMPExecutableDirective; + + /// Default list of offsets. + enum { + PreInitsOffset = 0, + TransformedStmtOffset, + }; + + explicit OMPStripeDirective(SourceLocation StartLoc, SourceLocation EndLoc, + unsigned NumLoops) + : OMPLoopTransformationDirective(OMPStripeDirectiveClass, + llvm::omp::OMPD_stripe, StartLoc, EndLoc, + NumLoops) { + setNumGeneratedLoops(2 * NumLoops); + } + + void setPreInits(Stmt *PreInits) { + Data->getChildren()[PreInitsOffset] = PreInits; + } + + void setTransformedStmt(Stmt *S) { + Data->getChildren()[TransformedStmtOffset] = S; + } + +public: + /// Create a new AST node representation for '#pragma omp stripe'. + /// + /// \param C Context of the AST. + /// \param StartLoc Location of the introducer (e.g. the 'omp' token). + /// \param EndLoc Location of the directive's end (e.g. the tok::eod). + /// \param Clauses The directive's clauses. + /// \param NumLoops Number of associated loops (number of items in the + /// 'sizes' clause). + /// \param AssociatedStmt The outermost associated loop. + /// \param TransformedStmt The loop nest after striping, or nullptr in + /// dependent contexts. + /// \param PreInits Helper preinits statements for the loop nest. + static OMPStripeDirective * + Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + ArrayRef Clauses, unsigned NumLoops, Stmt *AssociatedStmt, + Stmt *TransformedStmt, Stmt *PreInits); + + /// Build an empty '#pragma omp stripe' AST node for deserialization. + /// + /// \param C Context of the AST. + /// \param NumClauses Number of clauses to allocate. + /// \param NumLoops Number of associated loops to allocate. + static OMPStripeDirective * + CreateEmpty(const ASTContext &C, unsigned NumClauses, unsigned NumLoops); + + /// Gets/sets the associated loops after striping. + /// + /// This is in de-sugared format stored as a CompoundStmt. + /// + /// \code + /// for (...) + /// ... + /// \endcode + /// + /// Note that if the generated loops a become associated loops of another + /// directive, they may need to be hoisted before them. + Stmt *getTransformedStmt() const { + return Data->getChildren()[TransformedStmtOffset]; + } + + /// Return preinits statement. + Stmt *getPreInits() const { return Data->getChildren()[PreInitsOffset]; } + + static bool classof(const Stmt *T) { + return T->getStmtClass() == OMPStripeDirectiveClass; + } +}; + /// This represents the '#pragma omp unroll' loop transformation directive. /// /// \code diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td index d47e0a8157fc6..3533c5f50742e 100644 --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -231,6 +231,7 @@ def OMPParallelDirective : StmtNode; def OMPSimdDirective : StmtNode; def OMPLoopTransformationDirective : StmtNode; def OMPTileDirective : StmtNode; +def OMPStripeDirective : StmtNode; def OMPUnrollDirective : StmtNode; def OMPReverseDirective : StmtNode; def OMPInterchangeDirective : StmtNode; diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index fa244da36a322..64f0cfa0676af 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -440,6 +440,9 @@ class SemaOpenMP : public SemaBase { StmtResult ActOnOpenMPTileDirective(ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc); + StmtResult ActOnOpenMPStripeDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); /// Called on well-formed '#pragma omp unroll' after parsing of its clauses /// and the associated statement. StmtResult ActOnOpenMPUnrollDirective(ArrayRef Clauses, diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 3c184db5b2adf..ad93d50f6a82b 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1939,6 +1939,7 @@ enum StmtCode { STMT_OMP_PARALLEL_DIRECTIVE, STMT_OMP_SIMD_DIRECTIVE, STMT_OMP_TILE_DIRECTIVE, + STMP_OMP_STRIPE_DIRECTIVE, STMT_OMP_UNROLL_DIRECTIVE, STMT_OMP_REVERSE_DIRECTIVE, STMT_OMP_INTERCHANGE_DIRECTIVE, diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index 4f441c2f92dc7..f23647ef12404 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -417,6 +417,27 @@ OMPTileDirective::Create(const ASTContext &C, SourceLocation StartLoc, return Dir; } +OMPStripeDirective * +OMPStripeDirective::Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation EndLoc, ArrayRef Clauses, + unsigned NumLoops, Stmt *AssociatedStmt, + Stmt *TransformedStmt, Stmt *PreInits) { + OMPStripeDirective *Dir = createDirective( + C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc, + NumLoops); + Dir->setTransformedStmt(TransformedStmt); + Dir->setPreInits(PreInits); + return Dir; +} + +OMPStripeDirective *OMPStripeDirective::CreateEmpty(const ASTContext &C, + unsigned NumClauses, + unsigned NumLoops) { + return createEmptyDirective( + C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1, + SourceLocation(), SourceLocation(), NumLoops); +} + OMPTileDirective *OMPTileDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses, unsigned NumLoops) { diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index 3ce932a9dd352..4b45190fa33ef 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -764,6 +764,11 @@ void StmtPrinter::VisitOMPTileDirective(OMPTileDirective *Node) { PrintOMPExecutableDirective(Node); } +void StmtPrinter::VisitOMPStripeDirective(OMPStripeDirective *Node) { + Indent() << "#pragma omp stripe"; + PrintOMPExecutableDirective(Node); +} + void StmtPrinter::VisitOMPUnrollDirective(OMPUnrollDirective *Node) { Indent() << "#pragma omp unroll"; PrintOMPExecutableDirective(Node); diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 8b4b8ba19f75b..77ee6611f623f 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -1007,6 +1007,10 @@ void StmtProfiler::VisitOMPTileDirective(const OMPTileDirective *S) { VisitOMPLoopTransformationDirective(S); } +void StmtProfiler::VisitOMPStripeDirective(const OMPStripeDirective *S) { + VisitOMPLoopTransformationDirective(S); +} + void StmtProfiler::VisitOMPUnrollDirective(const OMPUnrollDirective *S) { VisitOMPLoopTransformationDirective(S); } diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 956d92a7e95f0..1ff342cb22a03 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -700,7 +700,7 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) { bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) { return DKind == OMPD_tile || DKind == OMPD_unroll || DKind == OMPD_reverse || - DKind == OMPD_interchange; + DKind == OMPD_interchange || DKind == OMPD_stripe; } bool clang::isOpenMPCombinedParallelADirective(OpenMPDirectiveKind DKind) { @@ -827,6 +827,7 @@ void clang::getOpenMPCaptureRegions( case OMPD_single: case OMPD_target_data: case OMPD_taskgroup: + case OMPD_stripe: // These directives (when standalone) use OMPD_unknown as the region, // but when they're constituents of a compound directive, and other // leafs from that directive have specific regions, then these directives diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 8e694b95dc7e7..a2cc54b05259c 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -187,6 +187,8 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { PreInits = LD->getPreInits(); } else if (const auto *Tile = dyn_cast(&S)) { PreInits = Tile->getPreInits(); + } else if (const auto *Stripe = dyn_cast(&S)) { + PreInits = Stripe->getPreInits(); } else if (const auto *Unroll = dyn_cast(&S)) { PreInits = Unroll->getPreInits(); } else if (const auto *Reverse = dyn_cast(&S)) { @@ -2820,6 +2822,12 @@ void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) { EmitStmt(S.getTransformedStmt()); } +void CodeGenFunction::EmitOMPStripeDirective(const OMPStripeDirective &S) { + // Emit the de-sugared statement. + OMPTransformDirectiveScopeRAII StripeScope(*this, &S); + EmitStmt(S.getTransformedStmt()); +} + void CodeGenFunction::EmitOMPReverseDirective(const OMPReverseDirective &S) { // Emit the de-sugared statement. OMPTransformDirectiveScopeRAII ReverseScope(*this, &S); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index e7a5100a9fa29..f7ce46cf1bcf2 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3837,6 +3837,7 @@ class CodeGenFunction : public CodeGenTypeCache { void EmitOMPParallelDirective(const OMPParallelDirective &S); void EmitOMPSimdDirective(const OMPSimdDirective &S); void EmitOMPTileDirective(const OMPTileDirective &S); + void EmitOMPStripeDirective(const OMPStripeDirective &S); void EmitOMPUnrollDirective(const OMPUnrollDirective &S); void EmitOMPReverseDirective(const OMPReverseDirective &S); void EmitOMPInterchangeDirective(const OMPInterchangeDirective &S); diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index a455659ca8f2c..42e6aac681c1c 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -2548,9 +2548,10 @@ StmtResult Parser::ParseOpenMPExecutableDirective( } } - if (DKind == OMPD_tile && !SeenClauses[unsigned(OMPC_sizes)]) { + if ((DKind == OMPD_tile || DKind == OMPD_stripe) && + !SeenClauses[unsigned(OMPC_sizes)]) { Diag(Loc, diag::err_omp_required_clause) - << getOpenMPDirectiveName(OMPD_tile) << "sizes"; + << getOpenMPDirectiveName(DKind) << "sizes"; } StmtResult AssociatedStmt; diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index 77a1bbcc74e50..8c8ba1da88ebf 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -1488,6 +1488,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) { case Stmt::OMPSectionsDirectiveClass: case Stmt::OMPSimdDirectiveClass: case Stmt::OMPTileDirectiveClass: + case Stmt::OMPStripeDirectiveClass: case Stmt::OMPUnrollDirectiveClass: case Stmt::OMPReverseDirectiveClass: case Stmt::OMPInterchangeDirectiveClass: diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 39ce65381a98c..616296027d811 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4386,6 +4386,7 @@ void SemaOpenMP::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, case OMPD_master: case OMPD_section: case OMPD_tile: + case OMPD_stripe: case OMPD_unroll: case OMPD_reverse: case OMPD_interchange: @@ -6197,6 +6198,10 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective( Res = ActOnOpenMPTileDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc); break; + case OMPD_stripe: + Res = ActOnOpenMPStripeDirective(ClausesWithImplicit, AStmt, StartLoc, + EndLoc); + break; case OMPD_unroll: Res = ActOnOpenMPUnrollDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc); @@ -14147,6 +14152,8 @@ bool SemaOpenMP::checkTransformableLoopNest( Stmt *DependentPreInits; if (auto *Dir = dyn_cast(Transform)) DependentPreInits = Dir->getPreInits(); + else if (auto *Dir = dyn_cast(Transform)) + DependentPreInits = Dir->getPreInits(); else if (auto *Dir = dyn_cast(Transform)) DependentPreInits = Dir->getPreInits(); else if (auto *Dir = dyn_cast(Transform)) @@ -14219,6 +14226,14 @@ static void collectLoopStmts(Stmt *AStmt, MutableArrayRef LoopStmts) { "Expecting a loop statement for each affected loop"); } +/// Build and return a DeclRefExpr for the floor induction variable using the +/// SemaRef and the provided parameters. +static Expr *makeFloorIVRef(Sema &SemaRef, ArrayRef FloorIndVars, + int I, QualType IVTy, DeclRefExpr *OrigCntVar) { + return buildDeclRefExpr(SemaRef, FloorIndVars[I], IVTy, + OrigCntVar->getExprLoc()); +} + StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, @@ -14356,22 +14371,21 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef Clauses, Stmt *LoopStmt = LoopStmts[I]; // Commonly used variables. One of the constraints of an AST is that every - // node object must appear at most once, hence we define lamdas that create - // a new AST node at every use. + // node object must appear at most once, hence we define a lambda that + // creates a new AST node at every use. auto MakeTileIVRef = [&SemaRef = this->SemaRef, &TileIndVars, I, IVTy, OrigCntVar]() { return buildDeclRefExpr(SemaRef, TileIndVars[I], IVTy, OrigCntVar->getExprLoc()); }; - auto MakeFloorIVRef = [&SemaRef = this->SemaRef, &FloorIndVars, I, IVTy, - OrigCntVar]() { - return buildDeclRefExpr(SemaRef, FloorIndVars[I], IVTy, - OrigCntVar->getExprLoc()); - }; // For init-statement: auto .tile.iv = .floor.iv SemaRef.AddInitializerToDecl( - TileIndVars[I], SemaRef.DefaultLvalueConversion(MakeFloorIVRef()).get(), + TileIndVars[I], + SemaRef + .DefaultLvalueConversion( + makeFloorIVRef(SemaRef, FloorIndVars, I, IVTy, OrigCntVar)) + .get(), /*DirectInit=*/false); Decl *CounterDecl = TileIndVars[I]; StmtResult InitStmt = new (Context) @@ -14382,9 +14396,10 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef Clauses, // For cond-expression: // .tile.iv < min(.floor.iv + DimTileSize, NumIterations) - ExprResult EndOfTile = - SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_Add, - MakeFloorIVRef(), MakeDimTileSize(I)); + ExprResult EndOfTile = SemaRef.BuildBinOp( + CurScope, LoopHelper.Cond->getExprLoc(), BO_Add, + makeFloorIVRef(SemaRef, FloorIndVars, I, IVTy, OrigCntVar), + MakeDimTileSize(I)); if (!EndOfTile.isUsable()) return StmtError(); ExprResult IsPartialTile = @@ -14445,15 +14460,6 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef Clauses, DeclRefExpr *OrigCntVar = cast(LoopHelper.Counters[0]); QualType IVTy = NumIterations->getType(); - // Commonly used variables. One of the constraints of an AST is that every - // node object must appear at most once, hence we define lamdas that create - // a new AST node at every use. - auto MakeFloorIVRef = [&SemaRef = this->SemaRef, &FloorIndVars, I, IVTy, - OrigCntVar]() { - return buildDeclRefExpr(SemaRef, FloorIndVars[I], IVTy, - OrigCntVar->getExprLoc()); - }; - // For init-statement: auto .floor.iv = 0 SemaRef.AddInitializerToDecl( FloorIndVars[I], @@ -14467,16 +14473,18 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef Clauses, return StmtError(); // For cond-expression: .floor.iv < NumIterations - ExprResult CondExpr = - SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, - MakeFloorIVRef(), NumIterations); + ExprResult CondExpr = SemaRef.BuildBinOp( + CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, + makeFloorIVRef(SemaRef, FloorIndVars, I, IVTy, OrigCntVar), + NumIterations); if (!CondExpr.isUsable()) return StmtError(); // For incr-statement: .floor.iv += DimTileSize - ExprResult IncrStmt = - SemaRef.BuildBinOp(CurScope, LoopHelper.Inc->getExprLoc(), BO_AddAssign, - MakeFloorIVRef(), MakeDimTileSize(I)); + ExprResult IncrStmt = SemaRef.BuildBinOp( + CurScope, LoopHelper.Inc->getExprLoc(), BO_AddAssign, + makeFloorIVRef(SemaRef, FloorIndVars, I, IVTy, OrigCntVar), + MakeDimTileSize(I)); if (!IncrStmt.isUsable()) return StmtError(); @@ -14491,6 +14499,262 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef Clauses, buildPreInits(Context, PreInits)); } +StmtResult SemaOpenMP::ActOnOpenMPStripeDirective(ArrayRef Clauses, + Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc) { + ASTContext &Context = getASTContext(); + Scope *CurScope = SemaRef.getCurScope(); + + const auto *SizesClause = + OMPExecutableDirective::getSingleClause(Clauses); + if (!SizesClause || llvm::is_contained(SizesClause->getSizesRefs(), nullptr)) + return StmtError(); + unsigned NumLoops = SizesClause->getNumSizes(); + + // Empty statement should only be possible if there already was an error. + if (!AStmt) + return StmtError(); + + // Verify and diagnose loop nest. + SmallVector LoopHelpers(NumLoops); + Stmt *Body = nullptr; + SmallVector, 4> OriginalInits; + if (!checkTransformableLoopNest(OMPD_stripe, AStmt, NumLoops, LoopHelpers, + Body, OriginalInits)) + return StmtError(); + + // Delay striping to when template is completely instantiated. + if (SemaRef.CurContext->isDependentContext()) + return OMPStripeDirective::Create(Context, StartLoc, EndLoc, Clauses, + NumLoops, AStmt, nullptr, nullptr); + + assert(LoopHelpers.size() == NumLoops && + "Expecting loop iteration space dimensionality to match number of " + "affected loops"); + assert(OriginalInits.size() == NumLoops && + "Expecting loop iteration space dimensionality to match number of " + "affected loops"); + + // Collect all affected loop statements. + SmallVector LoopStmts(NumLoops, nullptr); + collectLoopStmts(AStmt, LoopStmts); + + SmallVector PreInits; + CaptureVars CopyTransformer(SemaRef); + + // Create iteration variables for the generated loops. + SmallVector FloorIndVars; + SmallVector StripeIndVars; + FloorIndVars.resize(NumLoops); + StripeIndVars.resize(NumLoops); + for (unsigned I : llvm::seq(NumLoops)) { + OMPLoopBasedDirective::HelperExprs &LoopHelper = LoopHelpers[I]; + + assert(LoopHelper.Counters.size() == 1 && + "Expect single-dimensional loop iteration space"); + auto *OrigCntVar = cast(LoopHelper.Counters.front()); + std::string OrigVarName = OrigCntVar->getNameInfo().getAsString(); + DeclRefExpr *IterVarRef = cast(LoopHelper.IterationVarRef); + QualType CntTy = IterVarRef->getType(); + + // Iteration variable for the stripe (i.e. outer) loop. + { + std::string FloorCntName = + (Twine(".floor_") + llvm::utostr(I) + ".iv." + OrigVarName).str(); + VarDecl *FloorCntDecl = + buildVarDecl(SemaRef, {}, CntTy, FloorCntName, nullptr, OrigCntVar); + FloorIndVars[I] = FloorCntDecl; + } + + // Iteration variable for the stripe (i.e. inner) loop. + { + std::string StripeCntName = + (Twine(".stripe_") + llvm::utostr(I) + ".iv." + OrigVarName).str(); + + // Reuse the iteration variable created by checkOpenMPLoop. It is also + // used by the expressions to derive the original iteration variable's + // value from the logical iteration number. + auto *StripeCntDecl = cast(IterVarRef->getDecl()); + StripeCntDecl->setDeclName( + &SemaRef.PP.getIdentifierTable().get(StripeCntName)); + StripeIndVars[I] = StripeCntDecl; + } + + addLoopPreInits(Context, LoopHelper, LoopStmts[I], OriginalInits[I], + PreInits); + } + + // Once the original iteration values are set, append the innermost body. + Stmt *Inner = Body; + + auto MakeDimStripeSize = [&](int I) -> Expr * { + Expr *DimStripeSizeExpr = SizesClause->getSizesRefs()[I]; + if (isa(DimStripeSizeExpr)) + return AssertSuccess(CopyTransformer.TransformExpr(DimStripeSizeExpr)); + + // When the stripe size is not a constant but a variable, it is possible to + // pass non-positive numbers. For instance: + // \code{c} + // int a = 0; + // #pragma omp stripe sizes(a) + // for (int i = 0; i < 42; ++i) + // body(i); + // \endcode + // Although there is no meaningful interpretation of the stripe size, the + // body should still be executed 42 times to avoid surprises. To preserve + // the invariant that every loop iteration is executed exactly once and not + // cause an infinite loop, apply a minimum stripe size of one. + // Build expr: + // \code{c} + // (TS <= 0) ? 1 : TS + // \endcode + QualType DimTy = DimStripeSizeExpr->getType(); + uint64_t DimWidth = Context.getTypeSize(DimTy); + IntegerLiteral *Zero = IntegerLiteral::Create( + Context, llvm::APInt::getZero(DimWidth), DimTy, {}); + IntegerLiteral *One = + IntegerLiteral::Create(Context, llvm::APInt(DimWidth, 1), DimTy, {}); + Expr *Cond = AssertSuccess(SemaRef.BuildBinOp( + CurScope, {}, BO_LE, + AssertSuccess(CopyTransformer.TransformExpr(DimStripeSizeExpr)), Zero)); + Expr *MinOne = new (Context) ConditionalOperator( + Cond, {}, One, {}, + AssertSuccess(CopyTransformer.TransformExpr(DimStripeSizeExpr)), DimTy, + VK_PRValue, OK_Ordinary); + return MinOne; + }; + + // Create stripe loops from the inside to the outside. + for (int I = NumLoops - 1; I >= 0; --I) { + OMPLoopBasedDirective::HelperExprs &LoopHelper = LoopHelpers[I]; + Expr *NumIterations = LoopHelper.NumIterations; + auto *OrigCntVar = cast(LoopHelper.Counters[0]); + QualType IVTy = NumIterations->getType(); + Stmt *LoopStmt = LoopStmts[I]; + + // For init-statement: auto .stripe.iv = .floor.iv + SemaRef.AddInitializerToDecl( + StripeIndVars[I], + SemaRef + .DefaultLvalueConversion( + makeFloorIVRef(SemaRef, FloorIndVars, I, IVTy, OrigCntVar)) + .get(), + /*DirectInit=*/false); + Decl *CounterDecl = StripeIndVars[I]; + StmtResult InitStmt = new (Context) + DeclStmt(DeclGroupRef::Create(Context, &CounterDecl, 1), + OrigCntVar->getBeginLoc(), OrigCntVar->getEndLoc()); + if (!InitStmt.isUsable()) + return StmtError(); + + // For cond-expression: + // .stripe.iv < min(.floor.iv + DimStripeSize, NumIterations) + ExprResult EndOfStripe = SemaRef.BuildBinOp( + CurScope, LoopHelper.Cond->getExprLoc(), BO_Add, + makeFloorIVRef(SemaRef, FloorIndVars, I, IVTy, OrigCntVar), + MakeDimStripeSize(I)); + if (!EndOfStripe.isUsable()) + return StmtError(); + ExprResult IsPartialStripe = + SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, + NumIterations, EndOfStripe.get()); + if (!IsPartialStripe.isUsable()) + return StmtError(); + ExprResult MinStripeAndIterSpace = SemaRef.ActOnConditionalOp( + LoopHelper.Cond->getBeginLoc(), LoopHelper.Cond->getEndLoc(), + IsPartialStripe.get(), NumIterations, EndOfStripe.get()); + if (!MinStripeAndIterSpace.isUsable()) + return StmtError(); + ExprResult CondExpr = SemaRef.BuildBinOp( + CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, + makeFloorIVRef(SemaRef, StripeIndVars, I, IVTy, OrigCntVar), + MinStripeAndIterSpace.get()); + if (!CondExpr.isUsable()) + return StmtError(); + + // For incr-statement: ++.stripe.iv + ExprResult IncrStmt = SemaRef.BuildUnaryOp( + CurScope, LoopHelper.Inc->getExprLoc(), UO_PreInc, + makeFloorIVRef(SemaRef, StripeIndVars, I, IVTy, OrigCntVar)); + if (!IncrStmt.isUsable()) + return StmtError(); + + // Statements to set the original iteration variable's value from the + // logical iteration number. + // Generated for loop is: + // \code + // Original_for_init; + // for (auto .stripe.iv = .floor.iv; + // .stripe.iv < min(.floor.iv + DimStripeSize, NumIterations); + // ++.stripe.iv) { + // Original_Body; + // Original_counter_update; + // } + // \endcode + // FIXME: If the innermost body is a loop itself, inserting these + // statements stops it being recognized as a perfectly nested loop (e.g. + // for applying another loop transformation). If this is the case, sink the + // expressions further into the inner loop. + SmallVector BodyParts; + BodyParts.append(LoopHelper.Updates.begin(), LoopHelper.Updates.end()); + if (auto *SourceCXXFor = dyn_cast(LoopStmt)) + BodyParts.push_back(SourceCXXFor->getLoopVarStmt()); + BodyParts.push_back(Inner); + Inner = CompoundStmt::Create(Context, BodyParts, FPOptionsOverride(), + Inner->getBeginLoc(), Inner->getEndLoc()); + Inner = new (Context) + ForStmt(Context, InitStmt.get(), CondExpr.get(), nullptr, + IncrStmt.get(), Inner, LoopHelper.Init->getBeginLoc(), + LoopHelper.Init->getBeginLoc(), LoopHelper.Inc->getEndLoc()); + } + + // Create grid loops from the inside to the outside. + for (int I = NumLoops - 1; I >= 0; --I) { + auto &LoopHelper = LoopHelpers[I]; + Expr *NumIterations = LoopHelper.NumIterations; + DeclRefExpr *OrigCntVar = cast(LoopHelper.Counters[0]); + QualType IVTy = NumIterations->getType(); + + // For init-statement: auto .grid.iv = 0 + SemaRef.AddInitializerToDecl( + FloorIndVars[I], + SemaRef.ActOnIntegerConstant(LoopHelper.Init->getExprLoc(), 0).get(), + /*DirectInit=*/false); + Decl *CounterDecl = FloorIndVars[I]; + StmtResult InitStmt = new (Context) + DeclStmt(DeclGroupRef::Create(Context, &CounterDecl, 1), + OrigCntVar->getBeginLoc(), OrigCntVar->getEndLoc()); + if (!InitStmt.isUsable()) + return StmtError(); + + // For cond-expression: .floor.iv < NumIterations + ExprResult CondExpr = SemaRef.BuildBinOp( + CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, + makeFloorIVRef(SemaRef, FloorIndVars, I, IVTy, OrigCntVar), + NumIterations); + if (!CondExpr.isUsable()) + return StmtError(); + + // For incr-statement: .floor.iv += DimStripeSize + ExprResult IncrStmt = SemaRef.BuildBinOp( + CurScope, LoopHelper.Inc->getExprLoc(), BO_AddAssign, + makeFloorIVRef(SemaRef, FloorIndVars, I, IVTy, OrigCntVar), + MakeDimStripeSize(I)); + if (!IncrStmt.isUsable()) + return StmtError(); + + Inner = new (Context) + ForStmt(Context, InitStmt.get(), CondExpr.get(), nullptr, + IncrStmt.get(), Inner, LoopHelper.Init->getBeginLoc(), + LoopHelper.Init->getBeginLoc(), LoopHelper.Inc->getEndLoc()); + } + + return OMPStripeDirective::Create(Context, StartLoc, EndLoc, Clauses, + NumLoops, AStmt, Inner, + buildPreInits(Context, PreInits)); +} + StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 813b172c4d89e..fc1e3f7d58f4d 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -9545,6 +9545,17 @@ TreeTransform::TransformOMPTileDirective(OMPTileDirective *D) { return Res; } +template +StmtResult +TreeTransform::TransformOMPStripeDirective(OMPStripeDirective *D) { + DeclarationNameInfo DirName; + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + D->getDirectiveKind(), DirName, nullptr, D->getBeginLoc()); + StmtResult Res = getDerived().TransformOMPExecutableDirective(D); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); + return Res; +} + template StmtResult TreeTransform::TransformOMPUnrollDirective(OMPUnrollDirective *D) { diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index dc953ddeee85c..a89eee601e437 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2454,6 +2454,10 @@ void ASTStmtReader::VisitOMPTileDirective(OMPTileDirective *D) { VisitOMPLoopTransformationDirective(D); } +void ASTStmtReader::VisitOMPStripeDirective(OMPStripeDirective *D) { + VisitOMPLoopTransformationDirective(D); +} + void ASTStmtReader::VisitOMPUnrollDirective(OMPUnrollDirective *D) { VisitOMPLoopTransformationDirective(D); } @@ -3574,6 +3578,13 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { break; } + case STMP_OMP_STRIPE_DIRECTIVE: { + unsigned NumLoops = Record[ASTStmtReader::NumStmtFields]; + unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1]; + S = OMPStripeDirective::CreateEmpty(Context, NumClauses, NumLoops); + break; + } + case STMT_OMP_UNROLL_DIRECTIVE: { assert(Record[ASTStmtReader::NumStmtFields] == 1 && "Unroll directive accepts only a single loop"); unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1]; diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index e5caf3debc023..6a779f1618287 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2459,6 +2459,11 @@ void ASTStmtWriter::VisitOMPTileDirective(OMPTileDirective *D) { Code = serialization::STMT_OMP_TILE_DIRECTIVE; } +void ASTStmtWriter::VisitOMPStripeDirective(OMPStripeDirective *D) { + VisitOMPLoopTransformationDirective(D); + Code = serialization::STMP_OMP_STRIPE_DIRECTIVE; +} + void ASTStmtWriter::VisitOMPUnrollDirective(OMPUnrollDirective *D) { VisitOMPLoopTransformationDirective(D); Code = serialization::STMT_OMP_UNROLL_DIRECTIVE; diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index e3ec7c57571c8..0ff50bb835d1c 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -1815,6 +1815,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass: case Stmt::OMPReverseDirectiveClass: case Stmt::OMPTileDirectiveClass: + case Stmt::OMPStripeDirectiveClass: case Stmt::OMPInterchangeDirectiveClass: case Stmt::OMPInteropDirectiveClass: case Stmt::OMPDispatchDirectiveClass: diff --git a/clang/test/Index/openmp-stripe.c b/clang/test/Index/openmp-stripe.c new file mode 100644 index 0000000000000..4816c18659165 --- /dev/null +++ b/clang/test/Index/openmp-stripe.c @@ -0,0 +1,11 @@ +// RUN: c-index-test -test-load-source local %s -fopenmp=libomp -fopenmp-version=60 | FileCheck %s + +void test() { +#pragma omp stripe sizes(5) + for (int i = 0; i < 65; i += 1) + ; +} + +// CHECK: openmp-stripe.c:4:1: OMPStripeDirective= Extent=[4:1 - 4:28] +// CHECK: openmp-stripe.c:4:26: IntegerLiteral= Extent=[4:26 - 4:27] +// CHECK: openmp-stripe.c:5:3: ForStmt= Extent=[5:3 - 6:6] diff --git a/clang/test/OpenMP/stripe_ast_print.cpp b/clang/test/OpenMP/stripe_ast_print.cpp new file mode 100644 index 0000000000000..e632caf6d7753 --- /dev/null +++ b/clang/test/OpenMP/stripe_ast_print.cpp @@ -0,0 +1,202 @@ +// Check no warnings/errors +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -verify %s +// expected-no-diagnostics + +// Check AST and unparsing +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -ast-dump %s \ +// RUN: | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -ast-print %s \ +// RUN: | FileCheck %s --check-prefix=PRINT + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -emit-pch -o %t %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -ast-dump-all %s \ +// RUN: | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -ast-print %s \ +// RUN: | FileCheck %s --check-prefix=PRINT + +// placeholder for loop body code. +extern "C" void body(...); + + +// PRINT-LABEL: void foo1( +// DUMP-LABEL: FunctionDecl {{.*}} foo1 +void foo1() { + // PRINT: #pragma omp stripe sizes(5, 5) + // DUMP: OMPStripeDirective + // DUMP-NEXT: OMPSizesClause + // DUMP-NEXT: IntegerLiteral {{.*}} 5 + // DUMP-NEXT: IntegerLiteral {{.*}} 5 + #pragma omp stripe sizes(5,5) + // PRINT: for (int i = 7; i < 17; i += 3) + // DUMP-NEXT: ForStmt + for (int i = 7; i < 17; i += 3) + // PRINT: for (int j = 7; j < 17; j += 3) + // DUMP: ForStmt + for (int j = 7; j < 17; j += 3) + // PRINT: body(i, j); + // DUMP: CallExpr + body(i, j); +} + + +// PRINT-LABEL: void foo2( +// DUMP-LABEL: FunctionDecl {{.*}} foo2 +void foo2(int start1, int start2, int end1, int end2) { + // PRINT: #pragma omp stripe sizes(5, 5) + // DUMP: OMPStripeDirective + // DUMP-NEXT: OMPSizesClause + // DUMP-NEXT: IntegerLiteral {{.*}} 5 + // DUMP-NEXT: IntegerLiteral {{.*}} 5 + #pragma omp stripe sizes(5,5) + // PRINT: for (int i = start1; i < end1; i += 1) + // DUMP-NEXT: ForStmt + for (int i = start1; i < end1; i += 1) + // PRINT: for (int j = start2; j < end2; j += 1) + // DUMP: ForStmt + for (int j = start2; j < end2; j += 1) + // PRINT: body(i, j); + // DUMP: CallExpr + body(i, j); +} + + +// PRINT-LABEL: void foo3( +// DUMP-LABEL: FunctionDecl {{.*}} foo3 +void foo3() { + // PRINT: #pragma omp for + // DUMP: OMPForDirective + // DUMP-NEXT: CapturedStmt + // DUMP-NEXT: CapturedDecl + #pragma omp for + // PRINT: #pragma omp stripe sizes(5) + // DUMP-NEXT: OMPStripeDirective + // DUMP-NEXT: OMPSizesClause + // DUMP-NEXT: IntegerLiteral {{.*}} 5 + #pragma omp stripe sizes(5) + for (int i = 7; i < 17; i += 3) + // PRINT: body(i); + // DUMP: CallExpr + body(i); +} + + +// PRINT-LABEL: void foo4( +// DUMP-LABEL: FunctionDecl {{.*}} foo4 +void foo4() { + // PRINT: #pragma omp for collapse(3) + // DUMP: OMPForDirective + // DUMP-NEXT: OMPCollapseClause + // DUMP-NEXT: ConstantExpr + // DUMP-NEXT: value: Int 3 + // DUMP-NEXT: IntegerLiteral {{.*}} 3 + // DUMP-NEXT: CapturedStmt + // DUMP-NEXT: CapturedDecl + #pragma omp for collapse(3) + // PRINT: #pragma omp stripe sizes(5, 5) + // DUMP: OMPStripeDirective + // DUMP-NEXT: OMPSizesClause + // DUMP-NEXT: IntegerLiteral {{.*}} 5 + // DUMP-NEXT: IntegerLiteral {{.*}} 5 + #pragma omp stripe sizes(5, 5) + // PRINT: for (int i = 7; i < 17; i += 1) + // DUMP-NEXT: ForStmt + for (int i = 7; i < 17; i += 1) + // PRINT: for (int j = 7; j < 17; j += 1) + // DUMP: ForStmt + for (int j = 7; j < 17; j += 1) + // PRINT: body(i, j); + // DUMP: CallExpr + body(i, j); +} + + +// PRINT-LABEL: void foo5( +// DUMP-LABEL: FunctionDecl {{.*}} foo5 +void foo5(int start, int end, int step) { + // PRINT: #pragma omp for collapse(2) + // DUMP: OMPForDirective + // DUMP-NEXT: OMPCollapseClause + // DUMP-NEXT: ConstantExpr + // DUMP-NEXT: value: Int 2 + // DUMP-NEXT: IntegerLiteral {{.*}} 2 + // DUMP-NEXT: CapturedStmt + // DUMP-NEXT: CapturedDecl + #pragma omp for collapse(2) + // PRINT: for (int i = 7; i < 17; i += 1) + // DUMP-NEXT: ForStmt + for (int i = 7; i < 17; i += 1) + // PRINT: #pragma omp stripe sizes(5) + // DUMP: OMPStripeDirective + // DUMP-NEXT: OMPSizesClause + // DUMP-NEXT: IntegerLiteral {{.*}} 5 + #pragma omp stripe sizes(5) + // PRINT: for (int j = 7; j < 17; j += 1) + // DUMP-NEXT: ForStmt + for (int j = 7; j < 17; j += 1) + // PRINT: body(i, j); + // DUMP: CallExpr + body(i, j); +} + + +// PRINT-LABEL: void foo6( +// DUMP-LABEL: FunctionTemplateDecl {{.*}} foo6 +template +void foo6(T start, T end) { + // PRINT: #pragma omp stripe sizes(Stripe) + // DUMP: OMPStripeDirective + // DUMP-NEXT: OMPSizesClause + // DUMP-NEXT: DeclRefExpr {{.*}} 'Stripe' 'T' + #pragma omp stripe sizes(Stripe) + // PRINT-NEXT: for (T i = start; i < end; i += Step) + // DUMP-NEXT: ForStmt + for (T i = start; i < end; i += Step) + // PRINT-NEXT: body(i); + // DUMP: CallExpr + body(i); +} + +// Also test instantiating the template. +void tfoo6() { + foo6(0, 42); +} + + +// PRINT-LABEL: template void foo7(int start, int stop, int step) { +// DUMP-LABEL: FunctionTemplateDecl {{.*}} foo7 +template +void foo7(int start, int stop, int step) { + // PRINT: #pragma omp stripe sizes(Stripe) + // DUMP: OMPStripeDirective + // DUMP-NEXT: OMPSizesClause + // DUMP-NEXT: DeclRefExpr {{.*}} 'Stripe' 'int' + #pragma omp stripe sizes(Stripe) + // PRINT-NEXT: for (int i = start; i < stop; i += step) + // DUMP-NEXT: ForStmt + for (int i = start; i < stop; i += step) + // PRINT-NEXT: body(i); + // DUMP: CallExpr + body(i); +} +void tfoo7() { + foo7<5>(0, 42, 2); +} + + +// PRINT-LABEL: void foo8( +// DUMP-LABEL: FunctionDecl {{.*}} foo8 +void foo8(int a) { + // PRINT: #pragma omp stripe sizes(a) + // DUMP: OMPStripeDirective + // DUMP-NEXT: OMPSizesClause + // DUMP-NEXT: ImplicitCastExpr + // DUMP-NEXT: DeclRefExpr {{.*}} 'a' + #pragma omp stripe sizes(a) + // PRINT-NEXT: for (int i = 7; i < 19; i += 3) + // DUMP-NEXT: ForStmt + for (int i = 7; i < 19; i += 3) + // PRINT: body(i); + // DUMP: CallExpr + body(i); +} diff --git a/clang/test/OpenMP/stripe_codegen.cpp b/clang/test/OpenMP/stripe_codegen.cpp new file mode 100644 index 0000000000000..a2d3c88ecbd48 --- /dev/null +++ b/clang/test/OpenMP/stripe_codegen.cpp @@ -0,0 +1,1549 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 4 +// Check code generation +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fclang-abi-compat=latest\ +// RUN: -std=c++20 -fopenmp -fopenmp-version=60 -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefix=CHECK1 + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fclang-abi-compat=latest\ +// RUN: -std=c++20 -fopenmp -fopenmp-version=60 -emit-pch -o %t %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fclang-abi-compat=latest\ +// RUN: -std=c++20 -fopenmp -fopenmp-version=60 -include-pch %t -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefix=CHECK2 +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +// placeholder for loop body code. +extern "C" void body(...) {} + + +struct S { + int i; + S() { +#pragma omp stripe sizes(5) + for (i = 7; i < 17; i += 3) + body(i); + } +} s; + +extern "C" void foo1(int start, int end, int step) { + int i; +#pragma omp stripe sizes(5) + for (i = start; i < end; i += step) + body(i); +} + +extern "C" void foo2(int start, int end, int step) { +#pragma omp stripe sizes(5,5) + for (int i = 7; i < 17; i+=3) + for (int j = 7; j < 17; j+=3) + body(i,j); +} + +extern "C" void foo3() { +#pragma omp for +#pragma omp stripe sizes(5,5) + for (int i = 7; i < 17; i += 3) + for (int j = 7; j < 17; j += 3) + body(i, j); +} + +extern "C" void foo4() { +#pragma omp for collapse(2) + for (int k = 7; k < 17; k += 3) +#pragma omp stripe sizes(5,5) + for (int i = 7; i < 17; i += 3) + for (int j = 7; j < 17; j += 3) + body(i, j); +} + + +extern "C" void foo5() { +#pragma omp for collapse(3) +#pragma omp stripe sizes(5) + for (int i = 7; i < 17; i += 3) + for (int j = 7; j < 17; j += 3) + body(i, j); +} + + +extern "C" void foo6() { +#pragma omp parallel for +#pragma omp stripe sizes(5) + for (int i = 7; i < 17; i += 3) + body(i); +} + + +template +void foo7(T start, T end) { +#pragma omp stripe sizes(Stripe) + for (T i = start; i < end; i += Step) + body(i); +} + +extern "C" void tfoo7() { + foo7(0, 42); +} + + +extern "C" void foo8(int a) { +#pragma omp stripe sizes(a) + for (int i = 7; i < 17; i += 3) + body(i); +} + + +typedef struct { double array[12]; } data_t; +extern "C" void foo9(data_t data) { +#pragma omp stripe sizes(5) + for (double v : data.array) + body(v); +} + + +extern "C" void foo10(data_t data) { +#pragma omp stripe sizes(5) + for (double c = 42.0; double v : data.array) + body(c, v); +} + + +#endif /* HEADER */ + +// CHECK1-LABEL: define dso_local void @body( +// CHECK1-SAME: ...) #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define internal void @__cxx_global_var_init( +// CHECK1-SAME: ) #[[ATTR1:[0-9]+]] section ".text.startup" { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @_ZN1SC1Ev(ptr noundef nonnull align 4 dereferenceable(4) @s) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define linkonce_odr void @_ZN1SC1Ev( +// CHECK1-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define linkonce_odr void @_ZN1SC2Ev( +// CHECK1-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo1( +// CHECK1-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo2( +// CHECK1-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo3( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTFLOOR_1_IV_J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSTRIPE_0_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSTRIPE_1_IV_J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// CHECK1-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK1-NEXT: store i32 7, ptr [[J]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 0 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 5 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP7]], 4 +// CHECK1-NEXT: br i1 [[CMP2]], label [[FOR_BODY:%.*]], label [[FOR_END32:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND3:%.*]] +// CHECK1: for.cond3: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 5 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 4, [[ADD4]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK1: cond.true6: +// CHECK1-NEXT: br label [[COND_END9:%.*]] +// CHECK1: cond.false7: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP11]], 5 +// CHECK1-NEXT: br label [[COND_END9]] +// CHECK1: cond.end9: +// CHECK1-NEXT: [[COND10:%.*]] = phi i32 [ 4, [[COND_TRUE6]] ], [ [[ADD8]], [[COND_FALSE7]] ] +// CHECK1-NEXT: [[CMP11:%.*]] = icmp slt i32 [[TMP9]], [[COND10]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[FOR_BODY12:%.*]], label [[FOR_END29:%.*]] +// CHECK1: for.body12: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK1-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP12]], 3 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 7, [[MUL13]] +// CHECK1-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTSTRIPE_1_IV_J]], align 4 +// CHECK1-NEXT: br label [[FOR_COND15:%.*]] +// CHECK1: for.cond15: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTSTRIPE_1_IV_J]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP15]], 5 +// CHECK1-NEXT: [[CMP17:%.*]] = icmp slt i32 4, [[ADD16]] +// CHECK1-NEXT: br i1 [[CMP17]], label [[COND_TRUE18:%.*]], label [[COND_FALSE19:%.*]] +// CHECK1: cond.true18: +// CHECK1-NEXT: br label [[COND_END21:%.*]] +// CHECK1: cond.false19: +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP16]], 5 +// CHECK1-NEXT: br label [[COND_END21]] +// CHECK1: cond.end21: +// CHECK1-NEXT: [[COND22:%.*]] = phi i32 [ 4, [[COND_TRUE18]] ], [ [[ADD20]], [[COND_FALSE19]] ] +// CHECK1-NEXT: [[CMP23:%.*]] = icmp slt i32 [[TMP14]], [[COND22]] +// CHECK1-NEXT: br i1 [[CMP23]], label [[FOR_BODY24:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body24: +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTSTRIPE_1_IV_J]], align 4 +// CHECK1-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP17]], 3 +// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 7, [[MUL25]] +// CHECK1-NEXT: store i32 [[ADD26]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP18]], i32 noundef [[TMP19]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTSTRIPE_1_IV_J]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTSTRIPE_1_IV_J]], align 4 +// CHECK1-NEXT: br label [[FOR_COND15]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: br label [[FOR_INC27:%.*]] +// CHECK1: for.inc27: +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK1-NEXT: [[INC28:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[INC28]], ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND3]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK1: for.end29: +// CHECK1-NEXT: br label [[FOR_INC30:%.*]] +// CHECK1: for.inc30: +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i32 [[TMP22]], 5 +// CHECK1-NEXT: store i32 [[ADD31]], ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK1: for.end32: +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD33:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i32 [[ADD33]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo4( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTFLOOR_1_IV_J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSTRIPE_0_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSTRIPE_1_IV_J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK1-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK1-NEXT: store i32 7, ptr [[J]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 3 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP6]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 3 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 7, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP8]], 1 +// CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 1 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], [[MUL4]] +// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 5 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP9]], 4 +// CHECK1-NEXT: br i1 [[CMP7]], label [[FOR_BODY:%.*]], label [[FOR_END37:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND8:%.*]] +// CHECK1: for.cond8: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP12]], 5 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp slt i32 4, [[ADD9]] +// CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK1: cond.true11: +// CHECK1-NEXT: br label [[COND_END14:%.*]] +// CHECK1: cond.false12: +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP13]], 5 +// CHECK1-NEXT: br label [[COND_END14]] +// CHECK1: cond.end14: +// CHECK1-NEXT: [[COND15:%.*]] = phi i32 [ 4, [[COND_TRUE11]] ], [ [[ADD13]], [[COND_FALSE12]] ] +// CHECK1-NEXT: [[CMP16:%.*]] = icmp slt i32 [[TMP11]], [[COND15]] +// CHECK1-NEXT: br i1 [[CMP16]], label [[FOR_BODY17:%.*]], label [[FOR_END34:%.*]] +// CHECK1: for.body17: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK1-NEXT: [[MUL18:%.*]] = mul nsw i32 [[TMP14]], 3 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 7, [[MUL18]] +// CHECK1-NEXT: store i32 [[ADD19]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTSTRIPE_1_IV_J]], align 4 +// CHECK1-NEXT: br label [[FOR_COND20:%.*]] +// CHECK1: for.cond20: +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTSTRIPE_1_IV_J]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP17]], 5 +// CHECK1-NEXT: [[CMP22:%.*]] = icmp slt i32 4, [[ADD21]] +// CHECK1-NEXT: br i1 [[CMP22]], label [[COND_TRUE23:%.*]], label [[COND_FALSE24:%.*]] +// CHECK1: cond.true23: +// CHECK1-NEXT: br label [[COND_END26:%.*]] +// CHECK1: cond.false24: +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP18]], 5 +// CHECK1-NEXT: br label [[COND_END26]] +// CHECK1: cond.end26: +// CHECK1-NEXT: [[COND27:%.*]] = phi i32 [ 4, [[COND_TRUE23]] ], [ [[ADD25]], [[COND_FALSE24]] ] +// CHECK1-NEXT: [[CMP28:%.*]] = icmp slt i32 [[TMP16]], [[COND27]] +// CHECK1-NEXT: br i1 [[CMP28]], label [[FOR_BODY29:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body29: +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTSTRIPE_1_IV_J]], align 4 +// CHECK1-NEXT: [[MUL30:%.*]] = mul nsw i32 [[TMP19]], 3 +// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i32 7, [[MUL30]] +// CHECK1-NEXT: store i32 [[ADD31]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP20]], i32 noundef [[TMP21]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTSTRIPE_1_IV_J]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTSTRIPE_1_IV_J]], align 4 +// CHECK1-NEXT: br label [[FOR_COND20]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: br label [[FOR_INC32:%.*]] +// CHECK1: for.inc32: +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK1-NEXT: [[INC33:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i32 [[INC33]], ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND8]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK1: for.end34: +// CHECK1-NEXT: br label [[FOR_INC35:%.*]] +// CHECK1: for.inc35: +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i32 [[TMP24]], 5 +// CHECK1-NEXT: store i32 [[ADD36]], ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1: for.end37: +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD38:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[ADD38]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo5( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSTRIPE_0_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTFLOOR_0_IV_I11:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSTRIPE_0_IV_I12:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J13:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK1-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 5 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 4, [[ADD]] +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 5 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4, [[COND_TRUE]] ], [ [[ADD4]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[SUB6:%.*]] = sub i32 [[SUB]], 1 +// CHECK1-NEXT: [[ADD7:%.*]] = add i32 [[SUB6]], 1 +// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD7]], 1 +// CHECK1-NEXT: [[CONV:%.*]] = zext i32 [[DIV]] to i64 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 1, [[CONV]] +// CHECK1-NEXT: [[MUL8:%.*]] = mul nsw i64 [[MUL]], 4 +// CHECK1-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL8]], 1 +// CHECK1-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK1-NEXT: store i32 7, ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp slt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK1-NEXT: [[CMP14:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP14]], label [[COND_TRUE15:%.*]], label [[COND_FALSE16:%.*]] +// CHECK1: cond.true15: +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK1-NEXT: br label [[COND_END17:%.*]] +// CHECK1: cond.false16: +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: br label [[COND_END17]] +// CHECK1: cond.end17: +// CHECK1-NEXT: [[COND18:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE15]] ], [ [[TMP13]], [[COND_FALSE16]] ] +// CHECK1-NEXT: store i64 [[COND18]], ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP19:%.*]] = icmp sle i64 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP19]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB20:%.*]] = sub i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[SUB21:%.*]] = sub i32 [[SUB20]], 1 +// CHECK1-NEXT: [[ADD22:%.*]] = add i32 [[SUB21]], 1 +// CHECK1-NEXT: [[DIV23:%.*]] = udiv i32 [[ADD22]], 1 +// CHECK1-NEXT: [[MUL24:%.*]] = mul i32 1, [[DIV23]] +// CHECK1-NEXT: [[MUL25:%.*]] = mul i32 [[MUL24]], 4 +// CHECK1-NEXT: [[CONV26:%.*]] = zext i32 [[MUL25]] to i64 +// CHECK1-NEXT: [[DIV27:%.*]] = sdiv i64 [[TMP17]], [[CONV26]] +// CHECK1-NEXT: [[MUL28:%.*]] = mul nsw i64 [[DIV27]], 5 +// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i64 0, [[MUL28]] +// CHECK1-NEXT: [[CONV30:%.*]] = trunc i64 [[ADD29]] to i32 +// CHECK1-NEXT: store i32 [[CONV30]], ptr [[DOTFLOOR_0_IV_I11]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CONV31:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB32:%.*]] = sub i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: [[SUB33:%.*]] = sub i32 [[SUB32]], 1 +// CHECK1-NEXT: [[ADD34:%.*]] = add i32 [[SUB33]], 1 +// CHECK1-NEXT: [[DIV35:%.*]] = udiv i32 [[ADD34]], 1 +// CHECK1-NEXT: [[MUL36:%.*]] = mul i32 1, [[DIV35]] +// CHECK1-NEXT: [[MUL37:%.*]] = mul i32 [[MUL36]], 4 +// CHECK1-NEXT: [[CONV38:%.*]] = zext i32 [[MUL37]] to i64 +// CHECK1-NEXT: [[DIV39:%.*]] = sdiv i64 [[TMP22]], [[CONV38]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB40:%.*]] = sub i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[SUB41:%.*]] = sub i32 [[SUB40]], 1 +// CHECK1-NEXT: [[ADD42:%.*]] = add i32 [[SUB41]], 1 +// CHECK1-NEXT: [[DIV43:%.*]] = udiv i32 [[ADD42]], 1 +// CHECK1-NEXT: [[MUL44:%.*]] = mul i32 1, [[DIV43]] +// CHECK1-NEXT: [[MUL45:%.*]] = mul i32 [[MUL44]], 4 +// CHECK1-NEXT: [[CONV46:%.*]] = zext i32 [[MUL45]] to i64 +// CHECK1-NEXT: [[MUL47:%.*]] = mul nsw i64 [[DIV39]], [[CONV46]] +// CHECK1-NEXT: [[SUB48:%.*]] = sub nsw i64 [[TMP21]], [[MUL47]] +// CHECK1-NEXT: [[DIV49:%.*]] = sdiv i64 [[SUB48]], 4 +// CHECK1-NEXT: [[MUL50:%.*]] = mul nsw i64 [[DIV49]], 1 +// CHECK1-NEXT: [[ADD51:%.*]] = add nsw i64 [[CONV31]], [[MUL50]] +// CHECK1-NEXT: [[CONV52:%.*]] = trunc i64 [[ADD51]] to i32 +// CHECK1-NEXT: store i32 [[CONV52]], ptr [[DOTSTRIPE_0_IV_I12]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB53:%.*]] = sub i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: [[SUB54:%.*]] = sub i32 [[SUB53]], 1 +// CHECK1-NEXT: [[ADD55:%.*]] = add i32 [[SUB54]], 1 +// CHECK1-NEXT: [[DIV56:%.*]] = udiv i32 [[ADD55]], 1 +// CHECK1-NEXT: [[MUL57:%.*]] = mul i32 1, [[DIV56]] +// CHECK1-NEXT: [[MUL58:%.*]] = mul i32 [[MUL57]], 4 +// CHECK1-NEXT: [[CONV59:%.*]] = zext i32 [[MUL58]] to i64 +// CHECK1-NEXT: [[DIV60:%.*]] = sdiv i64 [[TMP28]], [[CONV59]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB61:%.*]] = sub i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: [[SUB62:%.*]] = sub i32 [[SUB61]], 1 +// CHECK1-NEXT: [[ADD63:%.*]] = add i32 [[SUB62]], 1 +// CHECK1-NEXT: [[DIV64:%.*]] = udiv i32 [[ADD63]], 1 +// CHECK1-NEXT: [[MUL65:%.*]] = mul i32 1, [[DIV64]] +// CHECK1-NEXT: [[MUL66:%.*]] = mul i32 [[MUL65]], 4 +// CHECK1-NEXT: [[CONV67:%.*]] = zext i32 [[MUL66]] to i64 +// CHECK1-NEXT: [[MUL68:%.*]] = mul nsw i64 [[DIV60]], [[CONV67]] +// CHECK1-NEXT: [[SUB69:%.*]] = sub nsw i64 [[TMP27]], [[MUL68]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB70:%.*]] = sub i32 [[TMP35]], [[TMP36]] +// CHECK1-NEXT: [[SUB71:%.*]] = sub i32 [[SUB70]], 1 +// CHECK1-NEXT: [[ADD72:%.*]] = add i32 [[SUB71]], 1 +// CHECK1-NEXT: [[DIV73:%.*]] = udiv i32 [[ADD72]], 1 +// CHECK1-NEXT: [[MUL74:%.*]] = mul i32 1, [[DIV73]] +// CHECK1-NEXT: [[MUL75:%.*]] = mul i32 [[MUL74]], 4 +// CHECK1-NEXT: [[CONV76:%.*]] = zext i32 [[MUL75]] to i64 +// CHECK1-NEXT: [[DIV77:%.*]] = sdiv i64 [[TMP34]], [[CONV76]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB78:%.*]] = sub i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[SUB79:%.*]] = sub i32 [[SUB78]], 1 +// CHECK1-NEXT: [[ADD80:%.*]] = add i32 [[SUB79]], 1 +// CHECK1-NEXT: [[DIV81:%.*]] = udiv i32 [[ADD80]], 1 +// CHECK1-NEXT: [[MUL82:%.*]] = mul i32 1, [[DIV81]] +// CHECK1-NEXT: [[MUL83:%.*]] = mul i32 [[MUL82]], 4 +// CHECK1-NEXT: [[CONV84:%.*]] = zext i32 [[MUL83]] to i64 +// CHECK1-NEXT: [[MUL85:%.*]] = mul nsw i64 [[DIV77]], [[CONV84]] +// CHECK1-NEXT: [[SUB86:%.*]] = sub nsw i64 [[TMP33]], [[MUL85]] +// CHECK1-NEXT: [[DIV87:%.*]] = sdiv i64 [[SUB86]], 4 +// CHECK1-NEXT: [[MUL88:%.*]] = mul nsw i64 [[DIV87]], 4 +// CHECK1-NEXT: [[SUB89:%.*]] = sub nsw i64 [[SUB69]], [[MUL88]] +// CHECK1-NEXT: [[MUL90:%.*]] = mul nsw i64 [[SUB89]], 3 +// CHECK1-NEXT: [[ADD91:%.*]] = add nsw i64 7, [[MUL90]] +// CHECK1-NEXT: [[CONV92:%.*]] = trunc i64 [[ADD91]] to i32 +// CHECK1-NEXT: store i32 [[CONV92]], ptr [[J13]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTSTRIPE_0_IV_I12]], align 4 +// CHECK1-NEXT: [[MUL93:%.*]] = mul nsw i32 [[TMP39]], 3 +// CHECK1-NEXT: [[ADD94:%.*]] = add nsw i32 7, [[MUL93]] +// CHECK1-NEXT: store i32 [[ADD94]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[J13]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP40]], i32 noundef [[TMP41]]) +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD95:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK1-NEXT: store i64 [[ADD95]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo6( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @foo6.omp_outlined) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define internal void @foo6.omp_outlined( +// CHECK1-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTSTRIPE_0_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 0 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 5 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp slt i32 4, [[ADD2]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[COND_TRUE4:%.*]], label [[COND_FALSE5:%.*]] +// CHECK1: cond.true4: +// CHECK1-NEXT: br label [[COND_END7:%.*]] +// CHECK1: cond.false5: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 5 +// CHECK1-NEXT: br label [[COND_END7]] +// CHECK1: cond.end7: +// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 4, [[COND_TRUE4]] ], [ [[ADD6]], [[COND_FALSE5]] ] +// CHECK1-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP9]], [[COND8]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP12]], 3 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 7, [[MUL10]] +// CHECK1-NEXT: store i32 [[ADD11]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP13]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @tfoo7( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @_Z4foo7IiTnT_Li3ETnS0_Li5EEvS0_S0_(i32 noundef 0, i32 noundef 42) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define linkonce_odr void @_Z4foo7IiTnT_Li3ETnS0_Li5EEvS0_S0_( +// CHECK1-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]]) #[[ATTR0]] comdat { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo8( +// CHECK1-SAME: i32 noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo9( +// CHECK1-SAME: ptr noundef byval([[STRUCT_DATA_T:%.*]]) align 8 [[DATA:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo10( +// CHECK1-SAME: ptr noundef byval([[STRUCT_DATA_T:%.*]]) align 8 [[DATA:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define internal void @_GLOBAL__sub_I_stripe_codegen.cpp( +// CHECK1-SAME: ) #[[ATTR1]] section ".text.startup" { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @__cxx_global_var_init() +// CHECK1-NEXT: ret void +// +// +// CHECK2-LABEL: define internal void @__cxx_global_var_init( +// CHECK2-SAME: ) #[[ATTR0:[0-9]+]] section ".text.startup" { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: call void @_ZN1SC1Ev(ptr noundef nonnull align 4 dereferenceable(4) @s) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define linkonce_odr void @_ZN1SC1Ev( +// CHECK2-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: call void @_ZN1SC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define linkonce_odr void @_ZN1SC2Ev( +// CHECK2-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @body( +// CHECK2-SAME: ...) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo1( +// CHECK2-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo10( +// CHECK2-SAME: ptr noundef byval([[STRUCT_DATA_T:%.*]]) align 8 [[DATA:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo2( +// CHECK2-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo3( +// CHECK2-SAME: ) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTFLOOR_1_IV_J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSTRIPE_0_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSTRIPE_1_IV_J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// CHECK2-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK2-NEXT: store i32 7, ptr [[J]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 0 +// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 5 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP7]], 4 +// CHECK2-NEXT: br i1 [[CMP2]], label [[FOR_BODY:%.*]], label [[FOR_END32:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND3:%.*]] +// CHECK2: for.cond3: +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 5 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 4, [[ADD4]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK2: cond.true6: +// CHECK2-NEXT: br label [[COND_END9:%.*]] +// CHECK2: cond.false7: +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP11]], 5 +// CHECK2-NEXT: br label [[COND_END9]] +// CHECK2: cond.end9: +// CHECK2-NEXT: [[COND10:%.*]] = phi i32 [ 4, [[COND_TRUE6]] ], [ [[ADD8]], [[COND_FALSE7]] ] +// CHECK2-NEXT: [[CMP11:%.*]] = icmp slt i32 [[TMP9]], [[COND10]] +// CHECK2-NEXT: br i1 [[CMP11]], label [[FOR_BODY12:%.*]], label [[FOR_END29:%.*]] +// CHECK2: for.body12: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK2-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP12]], 3 +// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 7, [[MUL13]] +// CHECK2-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTSTRIPE_1_IV_J]], align 4 +// CHECK2-NEXT: br label [[FOR_COND15:%.*]] +// CHECK2: for.cond15: +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTSTRIPE_1_IV_J]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP15]], 5 +// CHECK2-NEXT: [[CMP17:%.*]] = icmp slt i32 4, [[ADD16]] +// CHECK2-NEXT: br i1 [[CMP17]], label [[COND_TRUE18:%.*]], label [[COND_FALSE19:%.*]] +// CHECK2: cond.true18: +// CHECK2-NEXT: br label [[COND_END21:%.*]] +// CHECK2: cond.false19: +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP16]], 5 +// CHECK2-NEXT: br label [[COND_END21]] +// CHECK2: cond.end21: +// CHECK2-NEXT: [[COND22:%.*]] = phi i32 [ 4, [[COND_TRUE18]] ], [ [[ADD20]], [[COND_FALSE19]] ] +// CHECK2-NEXT: [[CMP23:%.*]] = icmp slt i32 [[TMP14]], [[COND22]] +// CHECK2-NEXT: br i1 [[CMP23]], label [[FOR_BODY24:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body24: +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTSTRIPE_1_IV_J]], align 4 +// CHECK2-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP17]], 3 +// CHECK2-NEXT: [[ADD26:%.*]] = add nsw i32 7, [[MUL25]] +// CHECK2-NEXT: store i32 [[ADD26]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP18]], i32 noundef [[TMP19]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTSTRIPE_1_IV_J]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTSTRIPE_1_IV_J]], align 4 +// CHECK2-NEXT: br label [[FOR_COND15]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: br label [[FOR_INC27:%.*]] +// CHECK2: for.inc27: +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK2-NEXT: [[INC28:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK2-NEXT: store i32 [[INC28]], ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND3]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK2: for.end29: +// CHECK2-NEXT: br label [[FOR_INC30:%.*]] +// CHECK2: for.inc30: +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK2-NEXT: [[ADD31:%.*]] = add nsw i32 [[TMP22]], 5 +// CHECK2-NEXT: store i32 [[ADD31]], ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK2: for.end32: +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD33:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK2-NEXT: store i32 [[ADD33]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo4( +// CHECK2-SAME: ) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTFLOOR_1_IV_J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSTRIPE_0_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSTRIPE_1_IV_J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK2-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK2-NEXT: store i32 7, ptr [[J]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 3 +// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP6]], 1 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 3 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 7, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP8]], 1 +// CHECK2-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 1 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], [[MUL4]] +// CHECK2-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 5 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] +// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP9]], 4 +// CHECK2-NEXT: br i1 [[CMP7]], label [[FOR_BODY:%.*]], label [[FOR_END37:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND8:%.*]] +// CHECK2: for.cond8: +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP12]], 5 +// CHECK2-NEXT: [[CMP10:%.*]] = icmp slt i32 4, [[ADD9]] +// CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK2: cond.true11: +// CHECK2-NEXT: br label [[COND_END14:%.*]] +// CHECK2: cond.false12: +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP13]], 5 +// CHECK2-NEXT: br label [[COND_END14]] +// CHECK2: cond.end14: +// CHECK2-NEXT: [[COND15:%.*]] = phi i32 [ 4, [[COND_TRUE11]] ], [ [[ADD13]], [[COND_FALSE12]] ] +// CHECK2-NEXT: [[CMP16:%.*]] = icmp slt i32 [[TMP11]], [[COND15]] +// CHECK2-NEXT: br i1 [[CMP16]], label [[FOR_BODY17:%.*]], label [[FOR_END34:%.*]] +// CHECK2: for.body17: +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK2-NEXT: [[MUL18:%.*]] = mul nsw i32 [[TMP14]], 3 +// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i32 7, [[MUL18]] +// CHECK2-NEXT: store i32 [[ADD19]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTSTRIPE_1_IV_J]], align 4 +// CHECK2-NEXT: br label [[FOR_COND20:%.*]] +// CHECK2: for.cond20: +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTSTRIPE_1_IV_J]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK2-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP17]], 5 +// CHECK2-NEXT: [[CMP22:%.*]] = icmp slt i32 4, [[ADD21]] +// CHECK2-NEXT: br i1 [[CMP22]], label [[COND_TRUE23:%.*]], label [[COND_FALSE24:%.*]] +// CHECK2: cond.true23: +// CHECK2-NEXT: br label [[COND_END26:%.*]] +// CHECK2: cond.false24: +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK2-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP18]], 5 +// CHECK2-NEXT: br label [[COND_END26]] +// CHECK2: cond.end26: +// CHECK2-NEXT: [[COND27:%.*]] = phi i32 [ 4, [[COND_TRUE23]] ], [ [[ADD25]], [[COND_FALSE24]] ] +// CHECK2-NEXT: [[CMP28:%.*]] = icmp slt i32 [[TMP16]], [[COND27]] +// CHECK2-NEXT: br i1 [[CMP28]], label [[FOR_BODY29:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body29: +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTSTRIPE_1_IV_J]], align 4 +// CHECK2-NEXT: [[MUL30:%.*]] = mul nsw i32 [[TMP19]], 3 +// CHECK2-NEXT: [[ADD31:%.*]] = add nsw i32 7, [[MUL30]] +// CHECK2-NEXT: store i32 [[ADD31]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP20]], i32 noundef [[TMP21]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTSTRIPE_1_IV_J]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTSTRIPE_1_IV_J]], align 4 +// CHECK2-NEXT: br label [[FOR_COND20]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: br label [[FOR_INC32:%.*]] +// CHECK2: for.inc32: +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK2-NEXT: [[INC33:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK2-NEXT: store i32 [[INC33]], ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND8]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2: for.end34: +// CHECK2-NEXT: br label [[FOR_INC35:%.*]] +// CHECK2: for.inc35: +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK2-NEXT: [[ADD36:%.*]] = add nsw i32 [[TMP24]], 5 +// CHECK2-NEXT: store i32 [[ADD36]], ptr [[DOTFLOOR_1_IV_J]], align 4 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2: for.end37: +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD38:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK2-NEXT: store i32 [[ADD38]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo5( +// CHECK2-SAME: ) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSTRIPE_0_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTFLOOR_0_IV_I11:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSTRIPE_0_IV_I12:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J13:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK2-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 5 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 4, [[ADD]] +// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP]], align 4 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 5 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 4, [[COND_TRUE]] ], [ [[ADD4]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: [[SUB6:%.*]] = sub i32 [[SUB]], 1 +// CHECK2-NEXT: [[ADD7:%.*]] = add i32 [[SUB6]], 1 +// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD7]], 1 +// CHECK2-NEXT: [[CONV:%.*]] = zext i32 [[DIV]] to i64 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 1, [[CONV]] +// CHECK2-NEXT: [[MUL8:%.*]] = mul nsw i64 [[MUL]], 4 +// CHECK2-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL8]], 1 +// CHECK2-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK2-NEXT: store i32 0, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK2-NEXT: store i32 7, ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[CMP10:%.*]] = icmp slt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK2-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK2-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK2-NEXT: [[CMP14:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP14]], label [[COND_TRUE15:%.*]], label [[COND_FALSE16:%.*]] +// CHECK2: cond.true15: +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK2-NEXT: br label [[COND_END17:%.*]] +// CHECK2: cond.false16: +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: br label [[COND_END17]] +// CHECK2: cond.end17: +// CHECK2-NEXT: [[COND18:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE15]] ], [ [[TMP13]], [[COND_FALSE16]] ] +// CHECK2-NEXT: store i64 [[COND18]], ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP14]], ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[CMP19:%.*]] = icmp sle i64 [[TMP15]], [[TMP16]] +// CHECK2-NEXT: br i1 [[CMP19]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB20:%.*]] = sub i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: [[SUB21:%.*]] = sub i32 [[SUB20]], 1 +// CHECK2-NEXT: [[ADD22:%.*]] = add i32 [[SUB21]], 1 +// CHECK2-NEXT: [[DIV23:%.*]] = udiv i32 [[ADD22]], 1 +// CHECK2-NEXT: [[MUL24:%.*]] = mul i32 1, [[DIV23]] +// CHECK2-NEXT: [[MUL25:%.*]] = mul i32 [[MUL24]], 4 +// CHECK2-NEXT: [[CONV26:%.*]] = zext i32 [[MUL25]] to i64 +// CHECK2-NEXT: [[DIV27:%.*]] = sdiv i64 [[TMP17]], [[CONV26]] +// CHECK2-NEXT: [[MUL28:%.*]] = mul nsw i64 [[DIV27]], 5 +// CHECK2-NEXT: [[ADD29:%.*]] = add nsw i64 0, [[MUL28]] +// CHECK2-NEXT: [[CONV30:%.*]] = trunc i64 [[ADD29]] to i32 +// CHECK2-NEXT: store i32 [[CONV30]], ptr [[DOTFLOOR_0_IV_I11]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CONV31:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB32:%.*]] = sub i32 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: [[SUB33:%.*]] = sub i32 [[SUB32]], 1 +// CHECK2-NEXT: [[ADD34:%.*]] = add i32 [[SUB33]], 1 +// CHECK2-NEXT: [[DIV35:%.*]] = udiv i32 [[ADD34]], 1 +// CHECK2-NEXT: [[MUL36:%.*]] = mul i32 1, [[DIV35]] +// CHECK2-NEXT: [[MUL37:%.*]] = mul i32 [[MUL36]], 4 +// CHECK2-NEXT: [[CONV38:%.*]] = zext i32 [[MUL37]] to i64 +// CHECK2-NEXT: [[DIV39:%.*]] = sdiv i64 [[TMP22]], [[CONV38]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB40:%.*]] = sub i32 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: [[SUB41:%.*]] = sub i32 [[SUB40]], 1 +// CHECK2-NEXT: [[ADD42:%.*]] = add i32 [[SUB41]], 1 +// CHECK2-NEXT: [[DIV43:%.*]] = udiv i32 [[ADD42]], 1 +// CHECK2-NEXT: [[MUL44:%.*]] = mul i32 1, [[DIV43]] +// CHECK2-NEXT: [[MUL45:%.*]] = mul i32 [[MUL44]], 4 +// CHECK2-NEXT: [[CONV46:%.*]] = zext i32 [[MUL45]] to i64 +// CHECK2-NEXT: [[MUL47:%.*]] = mul nsw i64 [[DIV39]], [[CONV46]] +// CHECK2-NEXT: [[SUB48:%.*]] = sub nsw i64 [[TMP21]], [[MUL47]] +// CHECK2-NEXT: [[DIV49:%.*]] = sdiv i64 [[SUB48]], 4 +// CHECK2-NEXT: [[MUL50:%.*]] = mul nsw i64 [[DIV49]], 1 +// CHECK2-NEXT: [[ADD51:%.*]] = add nsw i64 [[CONV31]], [[MUL50]] +// CHECK2-NEXT: [[CONV52:%.*]] = trunc i64 [[ADD51]] to i32 +// CHECK2-NEXT: store i32 [[CONV52]], ptr [[DOTSTRIPE_0_IV_I12]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB53:%.*]] = sub i32 [[TMP29]], [[TMP30]] +// CHECK2-NEXT: [[SUB54:%.*]] = sub i32 [[SUB53]], 1 +// CHECK2-NEXT: [[ADD55:%.*]] = add i32 [[SUB54]], 1 +// CHECK2-NEXT: [[DIV56:%.*]] = udiv i32 [[ADD55]], 1 +// CHECK2-NEXT: [[MUL57:%.*]] = mul i32 1, [[DIV56]] +// CHECK2-NEXT: [[MUL58:%.*]] = mul i32 [[MUL57]], 4 +// CHECK2-NEXT: [[CONV59:%.*]] = zext i32 [[MUL58]] to i64 +// CHECK2-NEXT: [[DIV60:%.*]] = sdiv i64 [[TMP28]], [[CONV59]] +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB61:%.*]] = sub i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: [[SUB62:%.*]] = sub i32 [[SUB61]], 1 +// CHECK2-NEXT: [[ADD63:%.*]] = add i32 [[SUB62]], 1 +// CHECK2-NEXT: [[DIV64:%.*]] = udiv i32 [[ADD63]], 1 +// CHECK2-NEXT: [[MUL65:%.*]] = mul i32 1, [[DIV64]] +// CHECK2-NEXT: [[MUL66:%.*]] = mul i32 [[MUL65]], 4 +// CHECK2-NEXT: [[CONV67:%.*]] = zext i32 [[MUL66]] to i64 +// CHECK2-NEXT: [[MUL68:%.*]] = mul nsw i64 [[DIV60]], [[CONV67]] +// CHECK2-NEXT: [[SUB69:%.*]] = sub nsw i64 [[TMP27]], [[MUL68]] +// CHECK2-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB70:%.*]] = sub i32 [[TMP35]], [[TMP36]] +// CHECK2-NEXT: [[SUB71:%.*]] = sub i32 [[SUB70]], 1 +// CHECK2-NEXT: [[ADD72:%.*]] = add i32 [[SUB71]], 1 +// CHECK2-NEXT: [[DIV73:%.*]] = udiv i32 [[ADD72]], 1 +// CHECK2-NEXT: [[MUL74:%.*]] = mul i32 1, [[DIV73]] +// CHECK2-NEXT: [[MUL75:%.*]] = mul i32 [[MUL74]], 4 +// CHECK2-NEXT: [[CONV76:%.*]] = zext i32 [[MUL75]] to i64 +// CHECK2-NEXT: [[DIV77:%.*]] = sdiv i64 [[TMP34]], [[CONV76]] +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB78:%.*]] = sub i32 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: [[SUB79:%.*]] = sub i32 [[SUB78]], 1 +// CHECK2-NEXT: [[ADD80:%.*]] = add i32 [[SUB79]], 1 +// CHECK2-NEXT: [[DIV81:%.*]] = udiv i32 [[ADD80]], 1 +// CHECK2-NEXT: [[MUL82:%.*]] = mul i32 1, [[DIV81]] +// CHECK2-NEXT: [[MUL83:%.*]] = mul i32 [[MUL82]], 4 +// CHECK2-NEXT: [[CONV84:%.*]] = zext i32 [[MUL83]] to i64 +// CHECK2-NEXT: [[MUL85:%.*]] = mul nsw i64 [[DIV77]], [[CONV84]] +// CHECK2-NEXT: [[SUB86:%.*]] = sub nsw i64 [[TMP33]], [[MUL85]] +// CHECK2-NEXT: [[DIV87:%.*]] = sdiv i64 [[SUB86]], 4 +// CHECK2-NEXT: [[MUL88:%.*]] = mul nsw i64 [[DIV87]], 4 +// CHECK2-NEXT: [[SUB89:%.*]] = sub nsw i64 [[SUB69]], [[MUL88]] +// CHECK2-NEXT: [[MUL90:%.*]] = mul nsw i64 [[SUB89]], 3 +// CHECK2-NEXT: [[ADD91:%.*]] = add nsw i64 7, [[MUL90]] +// CHECK2-NEXT: [[CONV92:%.*]] = trunc i64 [[ADD91]] to i32 +// CHECK2-NEXT: store i32 [[CONV92]], ptr [[J13]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTSTRIPE_0_IV_I12]], align 4 +// CHECK2-NEXT: [[MUL93:%.*]] = mul nsw i32 [[TMP39]], 3 +// CHECK2-NEXT: [[ADD94:%.*]] = add nsw i32 7, [[MUL93]] +// CHECK2-NEXT: store i32 [[ADD94]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[J13]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP40]], i32 noundef [[TMP41]]) +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[ADD95:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK2-NEXT: store i64 [[ADD95]], ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo6( +// CHECK2-SAME: ) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @foo6.omp_outlined) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define internal void @foo6.omp_outlined( +// CHECK2-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTSTRIPE_0_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 0 +// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 5 +// CHECK2-NEXT: [[CMP3:%.*]] = icmp slt i32 4, [[ADD2]] +// CHECK2-NEXT: br i1 [[CMP3]], label [[COND_TRUE4:%.*]], label [[COND_FALSE5:%.*]] +// CHECK2: cond.true4: +// CHECK2-NEXT: br label [[COND_END7:%.*]] +// CHECK2: cond.false5: +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 5 +// CHECK2-NEXT: br label [[COND_END7]] +// CHECK2: cond.end7: +// CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 4, [[COND_TRUE4]] ], [ [[ADD6]], [[COND_FALSE5]] ] +// CHECK2-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP9]], [[COND8]] +// CHECK2-NEXT: br i1 [[CMP9]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP12]], 3 +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 7, [[MUL10]] +// CHECK2-NEXT: store i32 [[ADD11]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP13]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTSTRIPE_0_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo8( +// CHECK2-SAME: i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo9( +// CHECK2-SAME: ptr noundef byval([[STRUCT_DATA_T:%.*]]) align 8 [[DATA:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @tfoo7( +// CHECK2-SAME: ) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: call void @_Z4foo7IiTnT_Li3ETnS0_Li5EEvS0_S0_(i32 noundef 0, i32 noundef 42) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define linkonce_odr void @_Z4foo7IiTnT_Li3ETnS0_Li5EEvS0_S0_( +// CHECK2-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]]) #[[ATTR1]] comdat { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define internal void @_GLOBAL__sub_I_stripe_codegen.cpp( +// CHECK2-SAME: ) #[[ATTR0]] section ".text.startup" { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: call void @__cxx_global_var_init() +// CHECK2-NEXT: ret void +// +//. +// CHECK1: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} +// CHECK1: [[META4]] = !{!"llvm.loop.mustprogress"} +// CHECK1: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]} +// CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +// CHECK1: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]} +// CHECK1: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} +// CHECK1: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]} +// CHECK1: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]} +//. +// CHECK2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} +// CHECK2: [[META4]] = !{!"llvm.loop.mustprogress"} +// CHECK2: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]} +// CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +// CHECK2: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]} +// CHECK2: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} +// CHECK2: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]} +// CHECK2: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]} +//. diff --git a/clang/test/OpenMP/stripe_messages.cpp b/clang/test/OpenMP/stripe_messages.cpp new file mode 100644 index 0000000000000..d05b8566f11de --- /dev/null +++ b/clang/test/OpenMP/stripe_messages.cpp @@ -0,0 +1,163 @@ +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++17 -fopenmp -fopenmp-version=60 -fsyntax-only -Wuninitialized -verify %s + +void func() { + + // expected-error@+1 {{expected '('}} + #pragma omp stripe sizes + ; + + // expected-error@+2 {{expected expression}} + // expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} + #pragma omp stripe sizes( + ; + + // expected-error@+1 {{expected expression}} + #pragma omp stripe sizes() + ; + + // expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} + #pragma omp stripe sizes(5 + for (int i = 0; i < 7; ++i); + + // expected-error@+2 {{expected expression}} + // expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} + #pragma omp stripe sizes(5, + ; + + // expected-error@+1 {{expected expression}} + #pragma omp stripe sizes(5,) + ; + + // expected-error@+2 {{expected expression}} + // expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} + #pragma omp stripe sizes(5+ + ; + + // expected-error@+1 {{expected expression}} + #pragma omp stripe sizes(5+) + ; + + // expected-error@+1 {{expected expression}} + #pragma omp stripe sizes(for) + ; + + // expected-error@+1 {{argument to 'sizes' clause must be a strictly positive integer value}} + #pragma omp stripe sizes(0) + for (int i = 0; i < 7; ++i) + ; + + // expected-warning@+2 {{extra tokens at the end of '#pragma omp stripe' are ignored}} + // expected-error@+1 {{directive '#pragma omp stripe' requires the 'sizes' clause}} + #pragma omp stripe foo + ; + + // expected-error@+1 {{directive '#pragma omp stripe' cannot contain more than one 'sizes' clause}} + #pragma omp stripe sizes(5) sizes(5) + for (int i = 0; i < 7; ++i) + ; + + // expected-error@+1 {{unexpected OpenMP clause 'collapse' in directive '#pragma omp stripe'}} + #pragma omp stripe sizes(5) collapse(2) + for (int i = 0; i < 7; ++i) + ; + + { + // expected-error@+2 {{expected statement}} + #pragma omp stripe sizes(5) + } + + // expected-error@+2 {{statement after '#pragma omp stripe' must be a for loop}} + #pragma omp stripe sizes(5) + int b = 0; + + // expected-error@+3 {{statement after '#pragma omp stripe' must be a for loop}} + #pragma omp stripe sizes(5,5) + for (int i = 0; i < 7; ++i) + ; + + // expected-error@+2 {{statement after '#pragma omp stripe' must be a for loop}} + #pragma omp stripe sizes(5,5) + for (int i = 0; i < 7; ++i) { + int k = 3; + for (int j = 0; j < 7; ++j) + ; + } + + // expected-error@+3 {{expected loop invariant expression}} + #pragma omp stripe sizes(5,5) + for (int i = 0; i < 7; ++i) + for (int j = i; j < 7; ++j) + ; + + // expected-error@+3 {{expected loop invariant expression}} + #pragma omp stripe sizes(5,5) + for (int i = 0; i < 7; ++i) + for (int j = 0; j < i; ++j) + ; + + // expected-error@+3 {{expected loop invariant expression}} + #pragma omp stripe sizes(5,5) + for (int i = 0; i < 7; ++i) + for (int j = 0; j < i; ++j) + ; + + // expected-error@+5 {{expected 3 for loops after '#pragma omp for', but found only 2}} + // expected-note@+1 {{as specified in 'collapse' clause}} + #pragma omp for collapse(3) + #pragma omp stripe sizes(5) + for (int i = 0; i < 7; ++i) + ; + + // expected-error@+2 {{statement after '#pragma omp stripe' must be a for loop}} + #pragma omp stripe sizes(5) + #pragma omp for + for (int i = 0; i < 7; ++i) + ; + + // expected-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', '>=', or '!=') of loop variable 'i'}} + #pragma omp stripe sizes(5) + for (int i = 0; i/3<7; ++i) + ; + + // expected-error@+2 {{expression must have integral or unscoped enumeration type, not 'struct S'}} + struct S{} s; + #pragma omp stripe sizes(s) + for (int i = 0; i < 7; ++i) + ; +} + + +template +static void templated_func() { + // In a template context, but expression itself not instantiation-dependent + + // expected-error@+1 {{argument to 'sizes' clause must be a strictly positive integer value}} + #pragma omp stripe sizes(0) + for (int i = 0; i < 7; ++i) + ; +} + +template +static void templated_func_value_dependent() { + // expected-error@+1 {{argument to 'sizes' clause must be a strictly positive integer value}} + #pragma omp stripe sizes(S) + for (int i = 0; i < 7; ++i) + ; +} + +template +static void templated_func_type_dependent() { + constexpr T s = 0; + // expected-error@+1 {{argument to 'sizes' clause must be a strictly positive integer value}} + #pragma omp stripe sizes(s) + for (int i = 0; i < 7; ++i) + ; +} + +void template_inst() { + templated_func(); + // expected-note@+1 {{in instantiation of function template specialization 'templated_func_value_dependent<0>' requested here}} + templated_func_value_dependent<0>(); + // expected-note@+1 {{in instantiation of function template specialization 'templated_func_type_dependent' requested here}} + templated_func_type_dependent(); +} diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 285ac31420007..c710816bd24d0 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2203,6 +2203,7 @@ class EnqueueVisitor : public ConstStmtVisitor, void VisitOMPLoopTransformationDirective(const OMPLoopTransformationDirective *D); void VisitOMPTileDirective(const OMPTileDirective *D); + void VisitOMPStripeDirective(const OMPStripeDirective *D); void VisitOMPUnrollDirective(const OMPUnrollDirective *D); void VisitOMPReverseDirective(const OMPReverseDirective *D); void VisitOMPInterchangeDirective(const OMPInterchangeDirective *D); @@ -3334,6 +3335,10 @@ void EnqueueVisitor::VisitOMPTileDirective(const OMPTileDirective *D) { VisitOMPLoopTransformationDirective(D); } +void EnqueueVisitor::VisitOMPStripeDirective(const OMPStripeDirective *D) { + VisitOMPLoopTransformationDirective(D); +} + void EnqueueVisitor::VisitOMPUnrollDirective(const OMPUnrollDirective *D) { VisitOMPLoopTransformationDirective(D); } @@ -6286,6 +6291,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) { return cxstring::createRef("OMPSimdDirective"); case CXCursor_OMPTileDirective: return cxstring::createRef("OMPTileDirective"); + case CXCursor_OMPStripeDirective: + return cxstring::createRef("OMPStripeDirective"); case CXCursor_OMPUnrollDirective: return cxstring::createRef("OMPUnrollDirective"); case CXCursor_OMPReverseDirective: diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp index 127f22bc5bdca..9ca0ce36bb7f2 100644 --- a/clang/tools/libclang/CXCursor.cpp +++ b/clang/tools/libclang/CXCursor.cpp @@ -677,6 +677,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent, case Stmt::OMPTileDirectiveClass: K = CXCursor_OMPTileDirective; break; + case Stmt::OMPStripeDirectiveClass: + K = CXCursor_OMPStripeDirective; + break; case Stmt::OMPUnrollDirectiveClass: K = CXCursor_OMPUnrollDirective; break; @@ -684,7 +687,7 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent, K = CXCursor_OMPReverseDirective; break; case Stmt::OMPInterchangeDirectiveClass: - K = CXCursor_OMPTileDirective; + K = CXCursor_OMPInterchangeDirective; break; case Stmt::OMPForDirectiveClass: K = CXCursor_OMPForDirective; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 37b92f2339df9..210acbff5af20 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -1195,6 +1195,13 @@ def OMP_Tile : Directive<"tile"> { let association = AS_Loop; let category = CA_Executable; } +def OMP_Stripe : Directive<"stripe"> { + let allowedOnceClauses = [ + VersionedClause, + ]; + let association = AS_Loop; + let category = CA_Executable; +} def OMP_Unknown : Directive<"unknown"> { let isDefault = true; let association = AS_None;