diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 2a956ad5b2909..5d9b68d4a7f2a 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -2629,7 +2629,7 @@ with the current table size. .. code-block:: c++ typedef void (*__funcref funcref_t)(); - static __funcref table[0]; + static funcref_t table[0]; size_t getSize() { return __builtin_wasm_table_size(table); @@ -2651,10 +2651,10 @@ or -1. It will return -1 if not enough space could be allocated. .. code-block:: c++ typedef void (*__funcref funcref_t)(); - static __funcref table[0]; + static funcref_t table[0]; // grow returns the new table size or -1 on error. - int grow(__funcref fn, int delta) { + int grow(funcref_t fn, int delta) { int prevSize = __builtin_wasm_table_grow(table, fn, delta); if (prevSize == -1) return -1; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index f10af8f5bd6b2..ee1ad214d81df 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10606,6 +10606,9 @@ def warn_noreturn_function_has_return_expr : Warning< def warn_falloff_noreturn_function : Warning< "function declared 'noreturn' should not return">, InGroup; +def warn_noreturn_coroutine : Warning< + "coroutine %0 cannot be declared 'noreturn' as it always returns a coroutine handle">, + InGroup; def err_noreturn_block_has_return_expr : Error< "block declared 'noreturn' should not return">; def err_carries_dependency_missing_on_first_decl : Error< diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 589869d018657..ce7d9be8d2faa 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -697,10 +697,12 @@ static void CheckFallThroughForBody(Sema &S, const Decl *D, const Stmt *Body, return; SourceLocation LBrace = Body->getBeginLoc(), RBrace = Body->getEndLoc(); auto EmitDiag = [&](SourceLocation Loc, unsigned DiagID) { - if (IsCoroutine) - S.Diag(Loc, DiagID) << FSI->CoroutinePromise->getType(); - else + if (IsCoroutine) { + if (DiagID != 0) + S.Diag(Loc, DiagID) << FSI->CoroutinePromise->getType(); + } else { S.Diag(Loc, DiagID); + } }; // cpu_dispatch functions permit empty function bodies for ICC compatibility. diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index 3a22097152df5..0e4f3b20c78cd 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -1176,6 +1176,10 @@ void Sema::CheckCompletedCoroutineBody(FunctionDecl *FD, Stmt *&Body) { for (AddrLabelExpr *ALE : Fn->AddrLabels) Diag(ALE->getBeginLoc(), diag::err_coro_invalid_addr_of_label); + // Coroutines always return a handle, so they can't be [[noreturn]]. + if (FD->isNoReturn()) + Diag(FD->getLocation(), diag::warn_noreturn_coroutine) << FD; + CoroutineStmtBuilder Builder(*this, *FD, *Fn, Body); if (Builder.isInvalid() || !Builder.buildStatements()) return FD->setInvalidDecl(); diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp index ec38674a2c3e7..0394edb7889ba 100644 --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -3910,7 +3910,7 @@ StmtResult Sema::BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp, FnRetType = FD->getReturnType(); if (FD->hasAttrs()) Attrs = &FD->getAttrs(); - if (FD->isNoReturn()) + if (FD->isNoReturn() && !getCurFunction()->isCoroutine()) Diag(ReturnLoc, diag::warn_noreturn_function_has_return_expr) << FD; if (FD->isMain() && RetValExp) if (isa(RetValExp)) diff --git a/clang/test/SemaCXX/coroutine-noreturn.cpp b/clang/test/SemaCXX/coroutine-noreturn.cpp new file mode 100644 index 0000000000000..4516b4e720ec0 --- /dev/null +++ b/clang/test/SemaCXX/coroutine-noreturn.cpp @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 %s -std=c++20 -fsyntax-only -Winvalid-noreturn -verify + +#include "Inputs/std-coroutine.h" + +struct Promise; + +struct Awaitable { + bool await_ready(); + void await_suspend(std::coroutine_handle<>); + void await_resume(); +}; + +struct Coro : std::coroutine_handle<> { + using promise_type = Promise; +}; + +struct Promise { + Coro get_return_object(); + std::suspend_always initial_suspend() noexcept; + std::suspend_always final_suspend() noexcept; + void return_void(); + void unhandled_exception(); +}; + +[[noreturn]] Coro test() { // expected-warning {{coroutine 'test' cannot be declared 'noreturn' as it always returns a coroutine handle}} + co_await Awaitable{}; +} + +// NO warning here. This could be a regular function returning a `Coro` object. +[[noreturn]] Coro test2(); diff --git a/flang-rt/include/flang-rt/runtime/io-stmt.h b/flang-rt/include/flang-rt/runtime/io-stmt.h index a364ddfd9b3c7..33cc91271ab12 100644 --- a/flang-rt/include/flang-rt/runtime/io-stmt.h +++ b/flang-rt/include/flang-rt/runtime/io-stmt.h @@ -627,7 +627,7 @@ class OpenStatementState : public ExternalIoStatementBase { Fortran::common::optional action_; Convert convert_{Convert::Unknown}; OwningPtr path_; - std::size_t pathLength_; + std::size_t pathLength_{}; Fortran::common::optional isUnformatted_; Fortran::common::optional access_; }; diff --git a/lldb/include/lldb/Symbol/ObjectFile.h b/lldb/include/lldb/Symbol/ObjectFile.h index d89314d44bf67..874926da2ceb7 100644 --- a/lldb/include/lldb/Symbol/ObjectFile.h +++ b/lldb/include/lldb/Symbol/ObjectFile.h @@ -81,9 +81,14 @@ class ObjectFile : public std::enable_shared_from_this, enum BinaryType { eBinaryTypeInvalid = 0, eBinaryTypeUnknown, - eBinaryTypeKernel, /// kernel binary - eBinaryTypeUser, /// user process binary - eBinaryTypeStandalone /// standalone binary / firmware + /// kernel binary + eBinaryTypeKernel, + /// user process binary, dyld addr + eBinaryTypeUser, + /// user process binary, dyld_all_image_infos addr + eBinaryTypeUserAllImageInfos, + /// standalone binary / firmware + eBinaryTypeStandalone }; struct LoadableData { diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 4e356a7c8f5d9..a19322ff1e263 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -5599,9 +5599,13 @@ bool ObjectFileMachO::GetCorefileMainBinaryInfo(addr_t &value, // struct main_bin_spec // { // uint32_t version; // currently 2 - // uint32_t type; // 0 == unspecified, 1 == kernel, + // uint32_t type; // 0 == unspecified, + // // 1 == kernel // // 2 == user process, + // dyld mach-o binary addr // // 3 == standalone binary + // // 4 == user process, + // // dyld_all_image_infos addr // uint64_t address; // UINT64_MAX if address not specified // uint64_t slide; // slide, UINT64_MAX if unspecified // // 0 if no slide needs to be applied to @@ -5652,6 +5656,7 @@ bool ObjectFileMachO::GetCorefileMainBinaryInfo(addr_t &value, // convert the "main bin spec" type into our // ObjectFile::BinaryType enum const char *typestr = "unrecognized type"; + type = eBinaryTypeInvalid; switch (binspec_type) { case 0: type = eBinaryTypeUnknown; @@ -5669,6 +5674,10 @@ bool ObjectFileMachO::GetCorefileMainBinaryInfo(addr_t &value, type = eBinaryTypeStandalone; typestr = "standalone"; break; + case 4: + type = eBinaryTypeUserAllImageInfos; + typestr = "userland dyld_all_image_infos"; + break; } LLDB_LOGF(log, "LC_NOTE 'main bin spec' found, version %d type %d " diff --git a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp index eef9bd4a175ec..281f3a0db8f69 100644 --- a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp +++ b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp @@ -114,6 +114,7 @@ ProcessMachCore::ProcessMachCore(lldb::TargetSP target_sp, : PostMortemProcess(target_sp, listener_sp, core_file), m_core_aranges(), m_core_range_infos(), m_core_module_sp(), m_dyld_addr(LLDB_INVALID_ADDRESS), + m_dyld_all_image_infos_addr(LLDB_INVALID_ADDRESS), m_mach_kernel_addr(LLDB_INVALID_ADDRESS) {} // Destructor @@ -320,6 +321,9 @@ bool ProcessMachCore::LoadBinariesViaMetadata() { } else if (type == ObjectFile::eBinaryTypeUser) { m_dyld_addr = objfile_binary_value; m_dyld_plugin_name = DynamicLoaderMacOSXDYLD::GetPluginNameStatic(); + } else if (type == ObjectFile::eBinaryTypeUserAllImageInfos) { + m_dyld_all_image_infos_addr = objfile_binary_value; + m_dyld_plugin_name = DynamicLoaderMacOSXDYLD::GetPluginNameStatic(); } else { const bool force_symbol_search = true; const bool notify = true; @@ -466,6 +470,7 @@ void ProcessMachCore::LoadBinariesViaExhaustiveSearch() { addr_t saved_user_dyld_addr = m_dyld_addr; m_mach_kernel_addr = LLDB_INVALID_ADDRESS; m_dyld_addr = LLDB_INVALID_ADDRESS; + m_dyld_all_image_infos_addr = LLDB_INVALID_ADDRESS; addr_t better_kernel_address = DynamicLoaderDarwinKernel::SearchForDarwinKernel(this); @@ -507,6 +512,12 @@ void ProcessMachCore::LoadBinariesAndSetDYLD() { "image at 0x%" PRIx64, __FUNCTION__, m_dyld_addr); m_dyld_plugin_name = DynamicLoaderMacOSXDYLD::GetPluginNameStatic(); + } else if (m_dyld_all_image_infos_addr != LLDB_INVALID_ADDRESS) { + LLDB_LOGF(log, + "ProcessMachCore::%s: Using user process dyld " + "dyld_all_image_infos at 0x%" PRIx64, + __FUNCTION__, m_dyld_all_image_infos_addr); + m_dyld_plugin_name = DynamicLoaderMacOSXDYLD::GetPluginNameStatic(); } } else { if (m_dyld_addr != LLDB_INVALID_ADDRESS) { @@ -515,6 +526,11 @@ void ProcessMachCore::LoadBinariesAndSetDYLD() { "image at 0x%" PRIx64, __FUNCTION__, m_dyld_addr); m_dyld_plugin_name = DynamicLoaderMacOSXDYLD::GetPluginNameStatic(); + } else if (m_dyld_all_image_infos_addr != LLDB_INVALID_ADDRESS) { + LLDB_LOGF(log, + "ProcessMachCore::%s: Using user process dyld " + "dyld_all_image_infos at 0x%" PRIx64, + __FUNCTION__, m_dyld_all_image_infos_addr); } else if (m_mach_kernel_addr != LLDB_INVALID_ADDRESS) { LLDB_LOGF(log, "ProcessMachCore::%s: Using kernel " @@ -763,19 +779,32 @@ void ProcessMachCore::Initialize() { } addr_t ProcessMachCore::GetImageInfoAddress() { - // If we found both a user-process dyld and a kernel binary, we need to - // decide which to prefer. + // The DynamicLoader plugin will call back in to this Process + // method to find the virtual address of one of these: + // 1. The xnu mach kernel binary Mach-O header + // 2. The dyld binary Mach-O header + // 3. dyld's dyld_all_image_infos object + // + // DynamicLoaderMacOSX will accept either the dyld Mach-O header + // address or the dyld_all_image_infos interchangably, no need + // to distinguish between them. It disambiguates by the Mach-O + // file magic number at the start. if (GetCorefilePreference() == eKernelCorefile) { - if (m_mach_kernel_addr != LLDB_INVALID_ADDRESS) { + if (m_mach_kernel_addr != LLDB_INVALID_ADDRESS) return m_mach_kernel_addr; - } - return m_dyld_addr; + if (m_dyld_addr != LLDB_INVALID_ADDRESS) + return m_dyld_addr; } else { - if (m_dyld_addr != LLDB_INVALID_ADDRESS) { + if (m_dyld_addr != LLDB_INVALID_ADDRESS) return m_dyld_addr; - } - return m_mach_kernel_addr; + if (m_mach_kernel_addr != LLDB_INVALID_ADDRESS) + return m_mach_kernel_addr; } + + // m_dyld_addr and m_mach_kernel_addr both + // invalid, return m_dyld_all_image_infos_addr + // in case it has a useful value. + return m_dyld_all_image_infos_addr; } lldb_private::ObjectFile *ProcessMachCore::GetCoreObjectFile() { diff --git a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.h b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.h index 8996ae116614b..6ba9f2354edf9 100644 --- a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.h +++ b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.h @@ -131,6 +131,7 @@ class ProcessMachCore : public lldb_private::PostMortemProcess { VMRangeToPermissions m_core_range_infos; lldb::ModuleSP m_core_module_sp; lldb::addr_t m_dyld_addr; + lldb::addr_t m_dyld_all_image_infos_addr; lldb::addr_t m_mach_kernel_addr; llvm::StringRef m_dyld_plugin_name; }; diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/TransactionSave.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/TransactionSave.h new file mode 100644 index 0000000000000..73aafabfd0b0c --- /dev/null +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/TransactionSave.h @@ -0,0 +1,28 @@ +//===- TransactionSave.h ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a region pass that simply calls Context::save() to save the IR state. +// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_TRANSACTIONSAVE_H +#define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_TRANSACTIONSAVE_H + +#include "llvm/SandboxIR/Pass.h" +#include "llvm/SandboxIR/Region.h" + +namespace llvm::sandboxir { + +class TransactionSave : public RegionPass { +public: + TransactionSave() : RegionPass("tr-save") {} + bool runOnRegion(Region &Rgn, const Analyses &A) final; +}; + +} // namespace llvm::sandboxir + +#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_TRANSACTIONSAVE_H diff --git a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp index 3a45113b0a2ea..23c87702eb133 100644 --- a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp +++ b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp @@ -77,9 +77,18 @@ static const uint32_t DefaultCutoffsData[] = { const ArrayRef ProfileSummaryBuilder::DefaultCutoffs = DefaultCutoffsData; +// An entry for the 0th percentile to correctly calculate hot/cold count +// thresholds when -profile-summary-cutoff-hot/cold is 0. If the hot cutoff is +// 0, no sample counts are treated as hot. If the cold cutoff is 0, all sample +// counts are treated as cold. Assumes there is no UINT64_MAX sample counts. +static const ProfileSummaryEntry ZeroCutoffEntry = {0, UINT64_MAX, 0}; + const ProfileSummaryEntry & ProfileSummaryBuilder::getEntryForPercentile(const SummaryEntryVector &DS, uint64_t Percentile) { + if (Percentile == 0) + return ZeroCutoffEntry; + auto It = partition_point(DS, [=](const ProfileSummaryEntry &Entry) { return Entry.Cutoff < Percentile; }); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index 38272b9d4840d..895d1e77bf1c4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -114,9 +114,63 @@ bool AMDGPUMCInstLower::lowerOperand(const MachineOperand &MO, llvm_unreachable("unknown operand type"); } -void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { +// Lower true16 D16 Pseudo instruction to d16_lo/d16_hi MCInst based on +// Dst/Data's .l/.h selection +void AMDGPUMCInstLower::lowerT16D16Helper(const MachineInstr *MI, + MCInst &OutMI) const { unsigned Opcode = MI->getOpcode(); const auto *TII = static_cast(ST.getInstrInfo()); + const SIRegisterInfo &TRI = TII->getRegisterInfo(); + const auto *Info = AMDGPU::getT16D16Helper(Opcode); + + llvm::AMDGPU::OpName OpName; + if (TII->isDS(Opcode)) { + if (MI->mayLoad()) + OpName = llvm::AMDGPU::OpName::vdst; + else if (MI->mayStore()) + OpName = llvm::AMDGPU::OpName::data0; + else + llvm_unreachable("LDS load or store expected"); + } else { + OpName = AMDGPU::hasNamedOperand(Opcode, llvm::AMDGPU::OpName::vdata) + ? llvm::AMDGPU::OpName::vdata + : llvm::AMDGPU::OpName::vdst; + } + + // select Dst/Data + int VDstOrVDataIdx = AMDGPU::getNamedOperandIdx(Opcode, OpName); + const MachineOperand &MIVDstOrVData = MI->getOperand(VDstOrVDataIdx); + + // select hi/lo MCInst + bool IsHi = AMDGPU::isHi16Reg(MIVDstOrVData.getReg(), TRI); + Opcode = IsHi ? Info->HiOp : Info->LoOp; + + int MCOpcode = TII->pseudoToMCOpcode(Opcode); + assert(MCOpcode != -1 && + "Pseudo instruction doesn't have a target-specific version"); + OutMI.setOpcode(MCOpcode); + + // lower operands + for (int I = 0, E = MI->getNumExplicitOperands(); I < E; I++) { + const MachineOperand &MO = MI->getOperand(I); + MCOperand MCOp; + if (I == VDstOrVDataIdx) + MCOp = MCOperand::createReg(TRI.get32BitRegister(MIVDstOrVData.getReg())); + else + lowerOperand(MO, MCOp); + OutMI.addOperand(MCOp); + } + + if (AMDGPU::hasNamedOperand(MCOpcode, AMDGPU::OpName::vdst_in)) { + MCOperand MCOp; + lowerOperand(MIVDstOrVData, MCOp); + OutMI.addOperand(MCOp); + } +} + +void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { + unsigned Opcode = MI->getOpcode(); + const auto *TII = static_cast(ST.getInstrInfo()); // FIXME: Should be able to handle this with lowerPseudoInstExpansion. We // need to select it to the subtarget specific version, and there's no way to @@ -137,6 +191,9 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { Opcode == AMDGPU::SI_TCRETURN_GFX) { // TODO: How to use branch immediate and avoid register+add? Opcode = AMDGPU::S_SETPC_B64; + } else if (AMDGPU::getT16D16Helper(Opcode)) { + lowerT16D16Helper(MI, OutMI); + return; } int MCOpcode = TII->pseudoToMCOpcode(Opcode); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h index 7176cc5d3439b..5ddf1ca2ab06d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h @@ -39,6 +39,8 @@ class AMDGPUMCInstLower { /// Lower a MachineInstr to an MCInst void lower(const MachineInstr *MI, MCInst &OutMI) const; + + void lowerT16D16Helper(const MachineInstr *MI, MCInst &OutMI) const; }; namespace { diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 8fa708b74dde3..ea6e703eba5d9 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -16,6 +16,12 @@ let WantsRoot = true in { def ScratchSVAddr : ComplexPattern; } +class True16D16Table { + Instruction T16Op = !cast(NAME); + Instruction HiOp = !cast(hiOp); + Instruction LoOp = !cast(loOp); +} + //===----------------------------------------------------------------------===// // FLAT classes //===----------------------------------------------------------------------===// @@ -226,6 +232,12 @@ class FLAT_Load_Pseudo { + def "" : FLAT_Load_Pseudo; + let True16Predicate = UseRealTrue16Insts in + def _t16 : FLAT_Load_Pseudo, True16D16Table; +} + class FLAT_Store_Pseudo : FLAT_Pseudo< opName, @@ -662,12 +674,12 @@ def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>; let SubtargetPredicate = HasD16LoadStore in { let TiedSourceNotRead = 1 in { -def FLAT_LOAD_UBYTE_D16 : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>; def FLAT_LOAD_UBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>; -def FLAT_LOAD_SBYTE_D16 : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>; +defm FLAT_LOAD_UBYTE_D16 : FLAT_Load_Pseudo_t16 <"flat_load_ubyte_d16">; def FLAT_LOAD_SBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>; -def FLAT_LOAD_SHORT_D16 : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>; +defm FLAT_LOAD_SBYTE_D16 : FLAT_Load_Pseudo_t16 <"flat_load_sbyte_d16">; def FLAT_LOAD_SHORT_D16_HI : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>; +defm FLAT_LOAD_SHORT_D16 : FLAT_Load_Pseudo_t16 <"flat_load_short_d16">; } def FLAT_STORE_BYTE_D16_HI : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>; @@ -1049,6 +1061,11 @@ class FlatLoadPat_D16 : (inst $vaddr, $offset, 0, $in) >; +class FlatLoadPat_D16_t16 : GCNPat < + (vt (node (FlatOffset (i64 VReg_64:$vaddr), i32:$offset))), + (inst $vaddr, $offset, (i32 0)) +>; + class FlatSignedLoadPat_D16 : GCNPat < (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in), (inst $vaddr, $offset, 0, $in) @@ -1371,16 +1388,29 @@ def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; -def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; +foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in +let True16Predicate = p in { + def : FlatLoadPat ; + def : FlatLoadPat ; + def : FlatLoadPat ; + def : FlatLoadPat ; + def : FlatStorePat ; + def : FlatStorePat ; +} + +let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts in { + def : FlatLoadPat_D16_t16; + def : FlatLoadPat_D16_t16; + def : FlatLoadPat_D16_t16; + def : FlatLoadPat_D16_t16; +} // End let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts + def : FlatLoadPat ; def : FlatLoadPat ; @@ -2761,3 +2791,11 @@ defm SCRATCH_STORE_SHORT_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x25, "scratch_ defm SCRATCH_LOAD_BLOCK : VSCRATCH_Real_AllAddr_gfx12<0x53>; defm SCRATCH_STORE_BLOCK : VSCRATCH_Real_AllAddr_gfx12<0x54>; + +def True16D16Table : GenericTable { + let FilterClass = "True16D16Table"; + let CppTypeName = "True16D16Info"; + let Fields = ["T16Op", "HiOp", "LoOp"]; + let PrimaryKey = ["T16Op"]; + let PrimaryKeyName = "getT16D16Helper"; +} diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 4fd68b52b53bb..e30e257da6873 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2483,8 +2483,15 @@ class getHasExt { + // This type of operands is only used in pseudo instructions helping + // code generation and thus doesn't need encoding and decoding methods. + // It also doesn't need to support AGPRs, because GFX908/A/40 do not + // support True16. + defvar VLdSt_16 = RegisterOperand; + RegisterOperand ret = - !cond(!eq(RC.Size, 32) : AVLdSt_32, + !cond(!eq(RC.Size, 16) : VLdSt_16, + !eq(RC.Size, 32) : AVLdSt_32, !eq(RC.Size, 64) : AVLdSt_64, !eq(RC.Size, 96) : AVLdSt_96, !eq(RC.Size, 128) : AVLdSt_128, diff --git a/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp b/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp index 1123696509818..b4d740422b94a 100644 --- a/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp @@ -212,6 +212,12 @@ uint64_t SIProgramInfo::getFunctionCodeSize(const MachineFunction &MF) { uint64_t CodeSize = 0; for (const MachineBasicBlock &MBB : MF) { + // The amount of padding to align code can be both underestimated and + // overestimated. In case of inline asm used getInstSizeInBytes() will + // return a maximum size of a single instruction, where the real size may + // differ. At this point CodeSize may be already off. + CodeSize = alignTo(CodeSize, MBB.getAlignment()); + for (const MachineInstr &MI : MBB) { // TODO: CodeSize should account for multiple functions. diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 59afcbed35294..c521d0dd3ad2d 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -430,6 +430,7 @@ struct FP4FP8DstByteSelInfo { #define GET_VOPDPairs_IMPL #define GET_VOPTrue16Table_DECL #define GET_VOPTrue16Table_IMPL +#define GET_True16D16Table_IMPL #define GET_WMMAOpcode2AddrMappingTable_DECL #define GET_WMMAOpcode2AddrMappingTable_IMPL #define GET_WMMAOpcode3AddrMappingTable_DECL diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index e458b6b9604b6..103993e6435de 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -113,6 +113,12 @@ struct CvtScaleF32_F32F16ToF8F4_Info { unsigned Opcode; }; +struct True16D16Info { + unsigned T16Op; + unsigned HiOp; + unsigned LoOp; +}; + #define GET_MIMGBaseOpcode_DECL #define GET_MIMGDim_DECL #define GET_MIMGEncoding_DECL @@ -123,6 +129,7 @@ struct CvtScaleF32_F32F16ToF8F4_Info { #define GET_MAIInstInfoTable_DECL #define GET_isMFMA_F8F6F4Table_DECL #define GET_isCvtScaleF32_F32F16ToF8F4Table_DECL +#define GET_True16D16Table_DECL #include "AMDGPUGenSearchableTables.inc" namespace IsaInfo { diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt index 676d23e1ebdf0..38670ba304e53 100644 --- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt +++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt @@ -11,6 +11,7 @@ add_llvm_component_library(LLVMVectorize SandboxVectorizer/Passes/RegionsFromMetadata.cpp SandboxVectorizer/Passes/SeedCollection.cpp SandboxVectorizer/Passes/TransactionAcceptOrRevert.cpp + SandboxVectorizer/Passes/TransactionSave.cpp SandboxVectorizer/SandboxVectorizer.cpp SandboxVectorizer/SandboxVectorizerPassBuilder.cpp SandboxVectorizer/Scheduler.cpp diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PassRegistry.def b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PassRegistry.def index 722c6f5db4192..f745073a1eba6 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PassRegistry.def +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PassRegistry.def @@ -19,6 +19,7 @@ REGION_PASS("null", ::llvm::sandboxir::NullPass) REGION_PASS("print-instruction-count", ::llvm::sandboxir::PrintInstructionCount) +REGION_PASS("tr-save", ::llvm::sandboxir::TransactionSave) REGION_PASS("tr-accept", ::llvm::sandboxir::TransactionAlwaysAccept) REGION_PASS("tr-accept-or-revert", ::llvm::sandboxir::TransactionAcceptOrRevert) REGION_PASS("bottom-up-vec", ::llvm::sandboxir::BottomUpVec) diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/SeedCollection.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/SeedCollection.cpp index 0001c9bb7c7e4..f3b62e36e5115 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/SeedCollection.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/SeedCollection.cpp @@ -82,8 +82,6 @@ bool SeedCollection::runOnFunction(Function &F, const Analyses &A) { // Create a region containing the seed slice. auto &Ctx = F.getContext(); Region Rgn(Ctx, A.getTTI()); - // TODO: Replace save() with a save pass in the pass pipeline. - Ctx.save(); Rgn.setAux(SeedSlice); // Run the region pass pipeline. Change |= RPM.runOnRegion(Rgn, A); diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/TransactionSave.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/TransactionSave.cpp new file mode 100644 index 0000000000000..8d39d971273b4 --- /dev/null +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/TransactionSave.cpp @@ -0,0 +1,20 @@ +//===- TransactionSave.cpp - Save the IR state ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/TransactionSave.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/InstructionCost.h" + +namespace llvm::sandboxir { + +bool TransactionSave::runOnRegion(Region &Rgn, const Analyses &A) { + Rgn.getContext().save(); + return false; +} + +} // namespace llvm::sandboxir diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp index 4f17aa213bbb0..5837cc16fcbac 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp @@ -36,7 +36,7 @@ SandboxVectorizerPass::SandboxVectorizerPass() : FPM("fpm") { // - Bottom-up Vectorizer pass that starts from a seed // - Accept or revert IR state pass FPM.setPassPipeline( - "seed-collection", + "seed-collection", sandboxir::SandboxVectorizerPassBuilder::createFunctionPass); } else { // Create the user-defined pipeline. diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.cpp index e552f0570dd9d..389f9cc4cae7c 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.cpp @@ -7,6 +7,7 @@ #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/SeedCollection.h" #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/TransactionAcceptOrRevert.h" #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/TransactionAlwaysAccept.h" +#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/TransactionSave.h" namespace llvm::sandboxir { diff --git a/llvm/test/Analysis/ProfileSummary/basic.ll b/llvm/test/Analysis/ProfileSummary/basic.ll index 966a1117c47d1..c4f48ccafde86 100644 --- a/llvm/test/Analysis/ProfileSummary/basic.ll +++ b/llvm/test/Analysis/ProfileSummary/basic.ll @@ -2,12 +2,16 @@ ; RUN: opt < %s -disable-output -profile-summary-hot-count=500 -passes=print-profile-summary -S 2>&1 | FileCheck %s -check-prefixes=OVERRIDE-HOT ; RUN: opt < %s -disable-output -profile-summary-cold-count=0 -passes=print-profile-summary -S 2>&1 | FileCheck %s -check-prefixes=OVERRIDE-COLD ; RUN: opt < %s -disable-output -profile-summary-cold-count=200 -profile-summary-hot-count=1000 -passes=print-profile-summary -S 2>&1 | FileCheck %s -check-prefixes=OVERRIDE-BOTH +; RUN: opt < %s -disable-output -profile-summary-cutoff-hot=0 -passes=print-profile-summary -S 2>&1 | FileCheck %s -check-prefixes=HOT-CUTOFF-0 +; RUN: opt < %s -disable-output -profile-summary-cutoff-cold=0 -profile-summary-hot-count=18446744073709551615 -passes=print-profile-summary -S 2>&1 | FileCheck %s -check-prefixes=COLD-CUTOFF-0 define void @f1() !prof !20 { ; CHECK-LABEL: f1 :hot ; OVERRIDE-HOT-LABEL: f1 ; OVERRIDE-COLD-LABEL: f1 :hot ; OVERRIDE-BOTH-LABEL: f1 +; HOT-CUTOFF-0-LABEL: f1{{$}} +; COLD-CUTOFF-0-LABEL: f1 :cold ret void } @@ -17,6 +21,8 @@ define void @f2() !prof !21 { ; OVERRIDE-HOT-LABEL: f2 :cold ; OVERRIDE-COLD-LABEL: f2 ; OVERRIDE-BOTH-LABEL: f2 +; HOT-CUTOFF-0-LABEL: f2 :cold +; COLD-CUTOFF-0-LABEL: f2 :cold ret void } @@ -26,6 +32,8 @@ define void @f3() !prof !22 { ; OVERRIDE-HOT-LABEL: f3 ; OVERRIDE-COLD-LABEL: f3 ; OVERRIDE-BOTH-LABEL: f3 +; HOT-CUTOFF-0-LABEL: f3{{$}} +; COLD-CUTOFF-0-LABEL: f3 :cold ret void } diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll index 21a2ae80574e0..db9a89a2a7370 100644 --- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll +++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll @@ -3,8 +3,10 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=-unaligned-access-mode -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLATSCR %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GFX10,GFX10_DEFAULT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs -mattr=-unaligned-access-mode -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GFX10,FLATSCR_GFX10 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GFX11 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode,+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode,-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode -mattr=+enable-flat-scratch,+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode -mattr=+enable-flat-scratch,-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s define <2 x half> @chain_hi_to_lo_private() { ; GFX900-LABEL: chain_hi_to_lo_private: @@ -156,14 +158,23 @@ define <2 x half> @chain_hi_to_lo_arithmatic(ptr addrspace(5) %base, half %in) { ; FLATSCR_GFX10-NEXT: v_mov_b32_e32 v0, v1 ; FLATSCR_GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: chain_hi_to_lo_arithmatic: -; GFX11: ; %bb.0: ; %bb -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_add_f16_e32 v1, 1.0, v1 -; GFX11-NEXT: scratch_load_d16_hi_b16 v1, v0, off -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_mov_b32_e32 v0, v1 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: chain_hi_to_lo_arithmatic: +; GFX11-TRUE16: ; %bb.0: ; %bb +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_add_f16_e32 v1.l, 1.0, v1.l +; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v1, v0, off +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, v1 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: chain_hi_to_lo_arithmatic: +; GFX11-FAKE16: ; %bb.0: ; %bb +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_add_f16_e32 v1, 1.0, v1 +; GFX11-FAKE16-NEXT: scratch_load_d16_hi_b16 v1, v0, off +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, v1 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] bb: %arith_lo = fadd half %in, 1.0 %load_hi = load half, ptr addrspace(5) %base @@ -361,18 +372,31 @@ define <2 x half> @chain_hi_to_lo_flat() { ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: chain_hi_to_lo_flat: -; GFX11: ; %bb.0: ; %bb -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_mov_b32_e32 v0, 2 -; GFX11-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-NEXT: flat_load_u16 v0, v[0:1] -; GFX11-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-NEXT: v_mov_b32_e32 v2, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: flat_load_d16_hi_b16 v0, v[1:2] -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: chain_hi_to_lo_flat: +; GFX11-TRUE16: ; %bb.0: ; %bb +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 2 +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-TRUE16-NEXT: flat_load_d16_b16 v0, v[0:1] +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: flat_load_d16_hi_b16 v0, v[1:2] +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: chain_hi_to_lo_flat: +; GFX11-FAKE16: ; %bb.0: ; %bb +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 2 +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-FAKE16-NEXT: flat_load_u16 v0, v[0:1] +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: flat_load_d16_hi_b16 v0, v[1:2] +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] bb: %gep_lo = getelementptr inbounds half, ptr null, i64 1 %load_lo = load half, ptr %gep_lo @@ -403,14 +427,23 @@ define <2 x half> @chain_hi_to_lo_flat_different_bases(ptr %base_lo, ptr %base_h ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: chain_hi_to_lo_flat_different_bases: -; GFX11: ; %bb.0: ; %bb -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: flat_load_u16 v0, v[0:1] -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: flat_load_d16_hi_b16 v0, v[2:3] -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: chain_hi_to_lo_flat_different_bases: +; GFX11-TRUE16: ; %bb.0: ; %bb +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: flat_load_d16_b16 v0, v[0:1] +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: flat_load_d16_hi_b16 v0, v[2:3] +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: chain_hi_to_lo_flat_different_bases: +; GFX11-FAKE16: ; %bb.0: ; %bb +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: flat_load_u16 v0, v[0:1] +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: flat_load_d16_hi_b16 v0, v[2:3] +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] bb: %load_lo = load half, ptr %base_lo %load_hi = load half, ptr %base_hi @@ -864,17 +897,31 @@ define <2 x i16> @chain_hi_to_lo_flat_other_dep(ptr addrspace(0) %ptr) { ; GFX10-NEXT: v_bfi_b32 v0, 0xffff, v2, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: chain_hi_to_lo_flat_other_dep: -; GFX11: ; %bb.0: ; %bb -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: flat_load_u16 v2, v[0:1] offset:2 glc dlc -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: flat_load_d16_hi_b16 v0, v[0:1] glc dlc -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_bfi_b32 v0, 0xffff, v2, v0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: chain_hi_to_lo_flat_other_dep: +; GFX11-TRUE16: ; %bb.0: ; %bb +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: flat_load_d16_b16 v2, v[0:1] offset:2 glc dlc +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-TRUE16-NEXT: flat_load_d16_hi_b16 v0, v[0:1] glc dlc +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0xffff, v1, v0 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: chain_hi_to_lo_flat_other_dep: +; GFX11-FAKE16: ; %bb.0: ; %bb +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: flat_load_u16 v2, v[0:1] offset:2 glc dlc +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-FAKE16-NEXT: flat_load_d16_hi_b16 v0, v[0:1] glc dlc +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0] +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_bfi_b32 v0, 0xffff, v2, v0 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] bb: %gep_lo = getelementptr inbounds i16, ptr addrspace(0) %ptr, i64 1 %load_lo = load volatile i16, ptr addrspace(0) %gep_lo diff --git a/llvm/test/CodeGen/AMDGPU/code-size-estimate.mir b/llvm/test/CodeGen/AMDGPU/code-size-estimate.mir index 76eaf350301e4..9ae536af6f0e9 100644 --- a/llvm/test/CodeGen/AMDGPU/code-size-estimate.mir +++ b/llvm/test/CodeGen/AMDGPU/code-size-estimate.mir @@ -31,3 +31,92 @@ body: | WAVE_BARRIER ... + +# CHECK: align4: ; @align4 +# CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +# CHECK: s_cbranch_scc1 .LBB{{[0-9_]+}} ; encoding: [A,A,0x85,0xbf] +# CHECK: s_barrier ; encoding: [0x00,0x00,0x8a,0xbf] +# CHECK: .p2align 2 +# CHECK: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +# CHECK: ; codeLenInByte = 16 + +--- +name: align4 +tracksRegLiveness: true +body: | + bb.0: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.2, implicit $scc + + bb.1: + S_BARRIER + + bb.2 (align 4): + S_ENDPGM 0 +... + +# CHECK: align8: ; @align8 +# CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +# CHECK: s_cbranch_scc1 .LBB{{[0-9_]+}} ; encoding: [A,A,0x85,0xbf] +# CHECK: s_barrier ; encoding: [0x00,0x00,0x8a,0xbf] +# CHECK: .p2align 3 +# CHECK: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +# CHECK: ; codeLenInByte = 20 +--- +name: align8 +tracksRegLiveness: true +body: | + bb.0: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.2, implicit $scc + + bb.1: + S_BARRIER + + bb.2 (align 8): + S_ENDPGM 0 +... + +# CHECK: align16: ; @align16 +# CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +# CHECK: s_cbranch_scc1 .LBB{{[0-9_]+}} ; encoding: [A,A,0x85,0xbf] +# CHECK: s_barrier ; encoding: [0x00,0x00,0x8a,0xbf] +# CHECK: .p2align 4 +# CHECK: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +# CHECK: ; codeLenInByte = 20 +--- +name: align16 +tracksRegLiveness: true +body: | + bb.0: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.2, implicit $scc + + bb.1: + S_BARRIER + + bb.2 (align 16): + S_ENDPGM 0 +... + +# CHECK: align32: ; @align32 +# CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] +# CHECK: s_cbranch_scc1 .LBB{{[0-9_]+}} ; encoding: [A,A,0x85,0xbf] +# CHECK: s_barrier ; encoding: [0x00,0x00,0x8a,0xbf] +# CHECK: .p2align 5 +# CHECK: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +# CHECK: ; codeLenInByte = 36 +--- +name: align32 +tracksRegLiveness: true +body: | + bb.0: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.2, implicit $scc + + bb.1: + S_BARRIER + + bb.2 (align 32): + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/flat-address-space.ll b/llvm/test/CodeGen/AMDGPU/flat-address-space.ll index 4c68b8d35260f..91f9aa1c5fe3b 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-address-space.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-address-space.ll @@ -2,8 +2,9 @@ ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,CIVI %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,CIVI,CIVI-HSA %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10PLUS %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX11 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10,GFX10PLUS %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX11-TRUE16 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX11-FAKE16 %s ; GCN-LABEL: {{^}}store_flat_i32: ; GCN-DAG: s_load_{{dwordx2|b64}} s[[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]], @@ -224,7 +225,8 @@ define amdgpu_kernel void @store_flat_i8_neg_offset(ptr %fptr, i8 %x) #0 { ; CIVI: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GFX9: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095 glc{{$}} ; GFX10: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc dlc{{$}} -; GFX11: flat_load_u8 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095 glc dlc{{$}} +; GFX11-TRUE16: flat_load_d16_u8 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095 glc dlc{{$}} +; GFX11-FAKE16: flat_load_u8 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095 glc dlc{{$}} define amdgpu_kernel void @load_flat_i8_max_offset(ptr %fptr) #0 { %fptr.offset = getelementptr inbounds i8, ptr %fptr, i64 4095 %val = load volatile i8, ptr %fptr.offset @@ -234,7 +236,9 @@ define amdgpu_kernel void @load_flat_i8_max_offset(ptr %fptr) #0 { ; GCN-LABEL: {{^}}load_flat_i8_max_offset_p1: ; CIVI: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GFX9: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc{{$}} -; GFX10PLUS: flat_load_{{ubyte|u8}} v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc dlc{{$}} +; GFX10: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc dlc{{$}} +; GFX11-TRUE16: flat_load_d16_u8 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc dlc{{$}} +; GFX11-FAKE16: flat_load_u8 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc dlc{{$}} define amdgpu_kernel void @load_flat_i8_max_offset_p1(ptr %fptr) #0 { %fptr.offset = getelementptr inbounds i8, ptr %fptr, i64 4096 %val = load volatile i8, ptr %fptr.offset diff --git a/llvm/test/Transforms/InstCombine/select-icmp-and.ll b/llvm/test/Transforms/InstCombine/select-icmp-and.ll index c854a770aa74c..1218799ab3dc5 100644 --- a/llvm/test/Transforms/InstCombine/select-icmp-and.ll +++ b/llvm/test/Transforms/InstCombine/select-icmp-and.ll @@ -809,3 +809,97 @@ define i8 @select_bittest_to_xor(i8 %x) { %masksel = select i1 %cmp, i8 %or, i8 %and ret i8 %masksel } + +define i8 @select_trunc_bittest_to_sub(i8 %x) { +; CHECK-LABEL: @select_trunc_bittest_to_sub( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[X:%.*]] to i1 +; CHECK-NEXT: [[RET:%.*]] = select i1 [[TRUNC]], i8 3, i8 4 +; CHECK-NEXT: ret i8 [[RET]] +; + %trunc = trunc i8 %x to i1 + %ret = select i1 %trunc, i8 3, i8 4 + ret i8 %ret +} + +define i8 @select_trunc_nuw_bittest_to_sub(i8 %x) { +; CHECK-LABEL: @select_trunc_nuw_bittest_to_sub( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw i8 [[X:%.*]] to i1 +; CHECK-NEXT: [[RET:%.*]] = select i1 [[TRUNC]], i8 3, i8 4 +; CHECK-NEXT: ret i8 [[RET]] +; + %trunc = trunc nuw i8 %x to i1 + %ret = select i1 %trunc, i8 3, i8 4 + ret i8 %ret +} + +define i8 @select_trunc_nsw_bittest_to_sub(i8 %x) { +; CHECK-LABEL: @select_trunc_nsw_bittest_to_sub( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc nsw i8 [[X:%.*]] to i1 +; CHECK-NEXT: [[RET:%.*]] = select i1 [[TRUNC]], i8 3, i8 4 +; CHECK-NEXT: ret i8 [[RET]] +; + %trunc = trunc nsw i8 %x to i1 + %ret = select i1 %trunc, i8 3, i8 4 + ret i8 %ret +} + +define i8 @select_trunc_nuw_bittest_to_sub_extra_use(i8 %x) { +; CHECK-LABEL: @select_trunc_nuw_bittest_to_sub_extra_use( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw i8 [[X:%.*]] to i1 +; CHECK-NEXT: call void @use1(i1 [[TRUNC]]) +; CHECK-NEXT: [[RET:%.*]] = select i1 [[TRUNC]], i8 3, i8 4 +; CHECK-NEXT: ret i8 [[RET]] +; + %trunc = trunc nuw i8 %x to i1 + call void @use1(i1 %trunc) + %ret = select i1 %trunc, i8 3, i8 4 + ret i8 %ret +} + +define i8 @neg_select_trunc_bittest_to_sub_extra_use(i8 %x) { +; CHECK-LABEL: @neg_select_trunc_bittest_to_sub_extra_use( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[X:%.*]] to i1 +; CHECK-NEXT: call void @use1(i1 [[TRUNC]]) +; CHECK-NEXT: [[RET:%.*]] = select i1 [[TRUNC]], i8 3, i8 4 +; CHECK-NEXT: ret i8 [[RET]] +; + %trunc = trunc i8 %x to i1 + call void @use1(i1 %trunc) + %ret = select i1 %trunc, i8 3, i8 4 + ret i8 %ret +} + +define i8 @select_trunc_nuw_bittest_to_shl_not(i8 %x) { +; CHECK-LABEL: @select_trunc_nuw_bittest_to_shl_not( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw i8 [[X:%.*]] to i1 +; CHECK-NEXT: [[RET:%.*]] = select i1 [[TRUNC]], i8 0, i8 4 +; CHECK-NEXT: ret i8 [[RET]] +; + %trunc = trunc nuw i8 %x to i1 + %ret = select i1 %trunc, i8 0, i8 4 + ret i8 %ret +} + +define i8 @select_trunc_bittest_to_shl(i8 %x) { +; CHECK-LABEL: @select_trunc_bittest_to_shl( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[X:%.*]] to i1 +; CHECK-NEXT: [[RET:%.*]] = select i1 [[TRUNC]], i8 4, i8 0 +; CHECK-NEXT: ret i8 [[RET]] +; + %trunc = trunc i8 %x to i1 + %ret = select i1 %trunc, i8 4, i8 0 + ret i8 %ret +} + +define i8 @neg_select_trunc_bittest_to_shl_extra_use(i8 %x) { +; CHECK-LABEL: @neg_select_trunc_bittest_to_shl_extra_use( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[X:%.*]] to i1 +; CHECK-NEXT: call void @use1(i1 [[TRUNC]]) +; CHECK-NEXT: [[RET:%.*]] = select i1 [[TRUNC]], i8 4, i8 0 +; CHECK-NEXT: ret i8 [[RET]] +; + %trunc = trunc i8 %x to i1 + call void @use1(i1 %trunc) + %ret = select i1 %trunc, i8 4, i8 0 + ret i8 %ret +} diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll index ad23126b681fd..c076c0e849fa9 100644 --- a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll +++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" %s -S | FileCheck %s +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" %s -S | FileCheck %s define void @store_load(ptr %ptr) { ; CHECK-LABEL: define void @store_load( diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice.ll index f4fcc5742f8a7..38c0816504481 100644 --- a/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice.ll +++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" %s -S | FileCheck %s +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" %s -S | FileCheck %s declare void @foo() diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll index 4218ca830dccf..f2eb124494b5e 100644 --- a/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll +++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2=false -sbvec-passes="seed-collection" %s -S | FileCheck %s --check-prefix=POW2 -; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2=true -sbvec-passes="seed-collection" %s -S | FileCheck %s --check-prefix=NON-POW2 +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2=false -sbvec-passes="seed-collection" %s -S | FileCheck %s --check-prefix=POW2 +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2=true -sbvec-passes="seed-collection" %s -S | FileCheck %s --check-prefix=NON-POW2 define void @pow2(ptr %ptr, float %val) { ; POW2-LABEL: define void @pow2( diff --git a/llvm/test/Transforms/SandboxVectorizer/cross_bbs.ll b/llvm/test/Transforms/SandboxVectorizer/cross_bbs.ll index 75d7cc8d9cefe..c559f294f9695 100644 --- a/llvm/test/Transforms/SandboxVectorizer/cross_bbs.ll +++ b/llvm/test/Transforms/SandboxVectorizer/cross_bbs.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" %s -S | FileCheck %s +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" %s -S | FileCheck %s define void @cross_bbs(ptr %ptr) { ; CHECK-LABEL: define void @cross_bbs( diff --git a/llvm/test/Transforms/SandboxVectorizer/pack.ll b/llvm/test/Transforms/SandboxVectorizer/pack.ll index c1e22d3dc2e73..c5abddb7fba01 100644 --- a/llvm/test/Transforms/SandboxVectorizer/pack.ll +++ b/llvm/test/Transforms/SandboxVectorizer/pack.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" %s -S | FileCheck %s +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" %s -S | FileCheck %s define void @pack_constants(ptr %ptr) { ; CHECK-LABEL: define void @pack_constants( diff --git a/llvm/test/Transforms/SandboxVectorizer/repeated_instrs.ll b/llvm/test/Transforms/SandboxVectorizer/repeated_instrs.ll index 3e4ef6787a563..081267da77e5f 100644 --- a/llvm/test/Transforms/SandboxVectorizer/repeated_instrs.ll +++ b/llvm/test/Transforms/SandboxVectorizer/repeated_instrs.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" %s -S | FileCheck %s +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" %s -S | FileCheck %s define i32 @repeated_splat(ptr %ptr, i32 %v) #0 { ; CHECK-LABEL: define i32 @repeated_splat( diff --git a/llvm/test/Transforms/SandboxVectorizer/scheduler.ll b/llvm/test/Transforms/SandboxVectorizer/scheduler.ll index fd4847da920d1..7741d8c64c8fc 100644 --- a/llvm/test/Transforms/SandboxVectorizer/scheduler.ll +++ b/llvm/test/Transforms/SandboxVectorizer/scheduler.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" %s -S | FileCheck %s +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" %s -S | FileCheck %s ; This used to crash because the newly added pack instructions would not update ; the DAG and scheduler, leading to def-after-use. diff --git a/llvm/test/Transforms/SandboxVectorizer/special_opcodes.ll b/llvm/test/Transforms/SandboxVectorizer/special_opcodes.ll index 1aca7cf2a8bd8..edb8d615e0055 100644 --- a/llvm/test/Transforms/SandboxVectorizer/special_opcodes.ll +++ b/llvm/test/Transforms/SandboxVectorizer/special_opcodes.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" %s -S | FileCheck %s +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" %s -S | FileCheck %s ; This file includes tests for opcodes that need special checks. diff --git a/llvm/utils/gn/secondary/bolt/include/bolt/Core/BUILD.gn b/llvm/utils/gn/secondary/bolt/include/bolt/Core/BUILD.gn index 2d7c1a70abe95..bf5cbf1ff0cd9 100644 --- a/llvm/utils/gn/secondary/bolt/include/bolt/Core/BUILD.gn +++ b/llvm/utils/gn/secondary/bolt/include/bolt/Core/BUILD.gn @@ -1,6 +1,14 @@ import("//llvm/lib/Target/write_target_def_file.gni") +bolt_targets_to_build = [] +foreach(target, llvm_targets_to_build) { + if (target == "AArch64" || target == "RISCV" || target == "X86") { + bolt_targets_to_build += [ target ] + } +} + write_target_def_file("TargetConfig.def") { key = "BOLT_ENUM_TARGETS" value = "BOLT_TARGET" + all_targets = bolt_targets_to_build } diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn index 433a7f43bb780..71493498ef03f 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn @@ -19,7 +19,9 @@ static_library("Vectorize") { "SandboxVectorizer/Legality.cpp", "SandboxVectorizer/Passes/BottomUpVec.cpp", "SandboxVectorizer/Passes/RegionsFromMetadata.cpp", + "SandboxVectorizer/Passes/SeedCollection.cpp", "SandboxVectorizer/Passes/TransactionAcceptOrRevert.cpp", + "SandboxVectorizer/Passes/TransactionSave.cpp", "SandboxVectorizer/SandboxVectorizer.cpp", "SandboxVectorizer/SandboxVectorizerPassBuilder.cpp", "SandboxVectorizer/Scheduler.cpp", diff --git a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp index 26c2565cd79ac..1680d521d919f 100644 --- a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp +++ b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp @@ -1043,8 +1043,8 @@ LogicalResult emitc::VerbatimOp::verify() { return success(); } -static ParseResult parseVariadicTypeFmtArgs(AsmParser &p, - SmallVector ¶ms) { +[[maybe_unused]] static ParseResult +parseVariadicTypeFmtArgs(AsmParser &p, SmallVector ¶ms) { Type type; if (p.parseType(type)) return failure(); @@ -1059,7 +1059,8 @@ static ParseResult parseVariadicTypeFmtArgs(AsmParser &p, return success(); } -static void printVariadicTypeFmtArgs(AsmPrinter &p, ArrayRef params) { +[[maybe_unused]] static void printVariadicTypeFmtArgs(AsmPrinter &p, + ArrayRef params) { llvm::interleaveComma(params, p, [&](Type type) { p.printType(type); }); }