diff --git a/.github/workflows/release-binaries.yml b/.github/workflows/release-binaries.yml index 398734c1a366d..64d59a5a6449d 100644 --- a/.github/workflows/release-binaries.yml +++ b/.github/workflows/release-binaries.yml @@ -138,6 +138,11 @@ jobs: arches=arm64 else arches=x86_64 + # Disable Flang builds on macOS x86_64. The FortranLower library takes + # 2-3 hours to build on macOS, much slower than on Linux. + # The long build time causes the release build to time out on x86_64, + # so we need to disable flang there. + target_cmake_flags="$target_cmake_flags -DLLVM_RELEASE_ENABLE_PROJECTS='clang;lld;lldb;clang-tools-extra;bolt;polly;mlir'" fi target_cmake_flags="$target_cmake_flags -DBOOTSTRAP_BOOTSTRAP_DARWIN_osx_ARCHS=$arches -DBOOTSTRAP_BOOTSTRAP_DARWIN_osx_BUILTIN_ARCHS=$arches" fi diff --git a/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp b/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp index f3199ebb00b3d..217b4f23e8572 100644 --- a/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp +++ b/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp @@ -219,7 +219,7 @@ void InstrumentationRuntimeLibrary::link( } outs() << "BOLT-INFO: output linked against instrumentation runtime " "library, lib entry point is 0x" - << Twine::utohexstr(RuntimeFiniAddress) << "\n"; + << Twine::utohexstr(RuntimeStartAddress) << "\n"; outs() << "BOLT-INFO: clear procedure is 0x" << Twine::utohexstr( Linker.lookupSymbol("__bolt_instr_clear_counters").value_or(0)) diff --git a/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp b/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp index 71a4cee4bdc6e..6e412e576e5f9 100644 --- a/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp @@ -8,6 +8,8 @@ #include "ConstCorrectnessCheck.h" #include "../utils/FixItHintUtils.h" +#include "../utils/Matchers.h" +#include "../utils/OptionsUtils.h" #include "clang/AST/ASTContext.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" @@ -41,7 +43,9 @@ ConstCorrectnessCheck::ConstCorrectnessCheck(StringRef Name, TransformValues(Options.get("TransformValues", true)), TransformReferences(Options.get("TransformReferences", true)), TransformPointersAsValues( - Options.get("TransformPointersAsValues", false)) { + Options.get("TransformPointersAsValues", false)), + AllowedTypes( + utils::options::parseStringList(Options.get("AllowedTypes", ""))) { if (AnalyzeValues == false && AnalyzeReferences == false) this->configurationDiag( "The check 'misc-const-correctness' will not " @@ -57,6 +61,8 @@ void ConstCorrectnessCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { Options.store(Opts, "TransformValues", TransformValues); Options.store(Opts, "TransformReferences", TransformReferences); Options.store(Opts, "TransformPointersAsValues", TransformPointersAsValues); + Options.store(Opts, "AllowedTypes", + utils::options::serializeStringList(AllowedTypes)); } void ConstCorrectnessCheck::registerMatchers(MatchFinder *Finder) { @@ -73,6 +79,12 @@ void ConstCorrectnessCheck::registerMatchers(MatchFinder *Finder) { hasType(referenceType(pointee(hasCanonicalType(templateTypeParmType())))), hasType(referenceType(pointee(substTemplateTypeParmType())))); + const auto AllowedType = hasType(qualType(anyOf( + hasDeclaration(namedDecl(matchers::matchesAnyListedName(AllowedTypes))), + references(namedDecl(matchers::matchesAnyListedName(AllowedTypes))), + pointerType(pointee(hasDeclaration( + namedDecl(matchers::matchesAnyListedName(AllowedTypes)))))))); + const auto AutoTemplateType = varDecl( anyOf(hasType(autoType()), hasType(referenceType(pointee(autoType()))), hasType(pointerType(pointee(autoType()))))); @@ -87,7 +99,8 @@ void ConstCorrectnessCheck::registerMatchers(MatchFinder *Finder) { unless(anyOf(ConstType, ConstReference, TemplateType, hasInitializer(isInstantiationDependent()), AutoTemplateType, RValueReference, FunctionPointerRef, - hasType(cxxRecordDecl(isLambda())), isImplicit()))); + hasType(cxxRecordDecl(isLambda())), isImplicit(), + AllowedType))); // Match the function scope for which the analysis of all local variables // shall be run. diff --git a/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.h b/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.h index bba060e555d00..87dddc4faf781 100644 --- a/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.h +++ b/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.h @@ -45,6 +45,7 @@ class ConstCorrectnessCheck : public ClangTidyCheck { const bool TransformValues; const bool TransformReferences; const bool TransformPointersAsValues; + const std::vector AllowedTypes; }; } // namespace clang::tidy::misc diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp index 1a23f6cca7756..8b9fffa3f64cd 100644 --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -372,6 +372,15 @@ void enhanceLocatedSymbolsFromIndex(llvm::MutableArrayRef Result, }); } +bool objcMethodIsTouched(const SourceManager &SM, const ObjCMethodDecl *OMD, + SourceLocation Loc) { + unsigned NumSels = OMD->getNumSelectorLocs(); + for (unsigned I = 0; I < NumSels; ++I) + if (SM.getSpellingLoc(OMD->getSelectorLoc(I)) == Loc) + return true; + return false; +} + // Decls are more complicated. // The AST contains at least a declaration, maybe a definition. // These are up-to-date, and so generally preferred over index results. @@ -430,6 +439,26 @@ locateASTReferent(SourceLocation CurLoc, const syntax::Token *TouchedIdentifier, continue; } } + // Special case: - (void)^method {} should jump to overrides, but the decl + // shouldn't, only the definition. Note that an Objective-C method can + // override a parent class or protocol. + // + // FIXME: Support jumping from a protocol decl to overrides on go-to + // definition. + if (const auto *OMD = llvm::dyn_cast(D)) { + if (OMD->isThisDeclarationADefinition() && TouchedIdentifier && + objcMethodIsTouched(SM, OMD, TouchedIdentifier->location())) { + llvm::SmallVector Overrides; + OMD->getOverriddenMethods(Overrides); + if (!Overrides.empty()) { + for (const auto *Override : Overrides) + AddResultDecl(Override); + LocateASTReferentMetric.record(1, "objc-overriden-method"); + } + AddResultDecl(OMD); + continue; + } + } // Special case: the cursor is on an alias, prefer other results. // This targets "using ns::^Foo", where the target is more interesting. @@ -1283,6 +1312,12 @@ std::vector findImplementations(ParsedAST &AST, Position Pos, } else if (const auto *RD = dyn_cast(ND)) { IDs.insert(getSymbolID(RD)); QueryKind = RelationKind::BaseOf; + } else if (const auto *OMD = dyn_cast(ND)) { + IDs.insert(getSymbolID(OMD)); + QueryKind = RelationKind::OverriddenBy; + } else if (const auto *ID = dyn_cast(ND)) { + IDs.insert(getSymbolID(ID)); + QueryKind = RelationKind::BaseOf; } } return findImplementors(std::move(IDs), QueryKind, Index, AST.tuPath()); @@ -1302,6 +1337,21 @@ void getOverriddenMethods(const CXXMethodDecl *CMD, } } +// Recursively finds all the overridden methods of `OMD` in complete type +// hierarchy. +void getOverriddenMethods(const ObjCMethodDecl *OMD, + llvm::DenseSet &OverriddenMethods) { + if (!OMD) + return; + llvm::SmallVector Overrides; + OMD->getOverriddenMethods(Overrides); + for (const ObjCMethodDecl *Base : Overrides) { + if (auto ID = getSymbolID(Base)) + OverriddenMethods.insert(ID); + getOverriddenMethods(Base, OverriddenMethods); + } +} + std::optional stringifyContainerForMainFileRef(const Decl *Container) { // FIXME We might also want to display the signature here @@ -1438,6 +1488,12 @@ ReferencesResult findReferences(ParsedAST &AST, Position Pos, uint32_t Limit, getOverriddenMethods(CMD, OverriddenMethods); } } + // Special case: Objective-C methods can override a parent class or + // protocol, we should be sure to report references to those. + if (const auto *OMD = llvm::dyn_cast(ND)) { + OverriddenBy.Subjects.insert(getSymbolID(OMD)); + getOverriddenMethods(OMD, OverriddenMethods); + } } } diff --git a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp index 7a9703c744e93..1ce28c91a420c 100644 --- a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp +++ b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp @@ -1335,6 +1335,42 @@ TEST_F(SymbolCollectorTest, OverrideRelationsMultipleInheritance) { OverriddenBy(CBar, DBar), OverriddenBy(CBaz, DBaz))); } +TEST_F(SymbolCollectorTest, ObjCOverrideRelationsSimpleInheritance) { + std::string Header = R"cpp( + @interface A + - (void)foo; + @end + @interface B : A + - (void)foo; // A::foo + - (void)bar; + @end + @interface C : B + - (void)bar; // B::bar + @end + @interface D : C + - (void)foo; // B::foo + - (void)bar; // C::bar + @end + )cpp"; + runSymbolCollector(Header, /*Main=*/"", + {"-xobjective-c++", "-Wno-objc-root-class"}); + const Symbol &AFoo = findSymbol(Symbols, "A::foo"); + const Symbol &BFoo = findSymbol(Symbols, "B::foo"); + const Symbol &DFoo = findSymbol(Symbols, "D::foo"); + + const Symbol &BBar = findSymbol(Symbols, "B::bar"); + const Symbol &CBar = findSymbol(Symbols, "C::bar"); + const Symbol &DBar = findSymbol(Symbols, "D::bar"); + + std::vector Result; + for (const Relation &R : Relations) + if (R.Predicate == RelationKind::OverriddenBy) + Result.push_back(R); + EXPECT_THAT(Result, UnorderedElementsAre( + OverriddenBy(AFoo, BFoo), OverriddenBy(BBar, CBar), + OverriddenBy(BFoo, DFoo), OverriddenBy(CBar, DBar))); +} + TEST_F(SymbolCollectorTest, CountReferences) { const std::string Header = R"( class W; diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp index 7d824d659ad2c..475b56b1dc230 100644 --- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp +++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp @@ -411,6 +411,85 @@ TEST(LocateSymbol, FindOverrides) { sym("foo", Code.range("2"), std::nullopt))); } +TEST(LocateSymbol, FindOverridesFromDefObjC) { + auto Code = Annotations(R"objc( + @protocol Fooey + - (void)foo; + @end + @interface Base + - (void)foo; + @end + @interface Foo : Base + - (void)$1[[foo]]; + @end + + @interface Bar : Foo + - (void)$2[[foo]]; + @end + @implementation Bar + - (void)$3[[fo^o]] {} + @end + )objc"); + TestTU TU = TestTU::withCode(Code.code()); + TU.ExtraArgs.push_back("-xobjective-c++"); + auto AST = TU.build(); + EXPECT_THAT( + locateSymbolAt(AST, Code.point(), TU.index().get()), + UnorderedElementsAre(sym("foo", Code.range("1"), std::nullopt), + sym("foo", Code.range("2"), Code.range("3")))); +} + +TEST(LocateSymbol, NoOverridesFromDeclObjC) { + auto Code = Annotations(R"objc( + @protocol Fooey + - (void)foo; + @end + @interface Base + - (void)foo; + @end + @interface Foo : Base + - (void)foo; + @end + + @interface Bar : Foo + - (void)$2[[fo^o]]; + @end + @implementation Bar + - (void)$3[[foo]] {} + @end + )objc"); + TestTU TU = TestTU::withCode(Code.code()); + TU.ExtraArgs.push_back("-xobjective-c++"); + auto AST = TU.build(); + EXPECT_THAT( + locateSymbolAt(AST, Code.point(), TU.index().get()), + UnorderedElementsAre(sym("foo", Code.range("2"), Code.range("3")))); +} + +TEST(LocateSymbol, ObjCNoOverridesOnUsage) { + auto Code = Annotations(R"objc( + @interface Foo + - (void)foo; + @end + + @interface Bar : Foo + - (void)$1[[foo]]; + @end + @implementation Bar + - (void)$2[[foo]] {} + @end + void doSomething(Bar *bar) { + [bar fo^o]; + } + )objc"); + TestTU TU = TestTU::withCode(Code.code()); + TU.ExtraArgs.push_back("-xobjective-c++"); + auto AST = TU.build(); + EXPECT_THAT( + locateSymbolAt(AST, Code.point(), TU.index().get()), + UnorderedElementsAre(sym("foo", Code.range("1"), Code.range("2")))); +} + TEST(LocateSymbol, WithIndexPreferredLocation) { Annotations SymbolHeader(R"cpp( class $p[[Proto]] {}; @@ -1834,6 +1913,41 @@ TEST(FindImplementations, Inheritance) { } } +TEST(FindImplementations, InheritanceObjC) { + llvm::StringRef Test = R"objc( + @interface $base^Base + - (void)fo$foo^o; + @end + @protocol Protocol + - (void)$protocol^protocol; + @end + @interface $ChildDecl[[Child]] : Base + - (void)concrete; + - (void)$fooDecl[[foo]]; + @end + @implementation $ChildDef[[Child]] + - (void)concrete {} + - (void)$fooDef[[foo]] {} + - (void)$protocolDef[[protocol]] {} + @end + )objc"; + + Annotations Code(Test); + auto TU = TestTU::withCode(Code.code()); + TU.ExtraArgs.push_back("-xobjective-c++"); + auto AST = TU.build(); + auto Index = TU.index(); + EXPECT_THAT(findImplementations(AST, Code.point("base"), Index.get()), + UnorderedElementsAre(sym("Child", Code.range("ChildDecl"), + Code.range("ChildDef")))); + EXPECT_THAT(findImplementations(AST, Code.point("foo"), Index.get()), + UnorderedElementsAre( + sym("foo", Code.range("fooDecl"), Code.range("fooDef")))); + EXPECT_THAT(findImplementations(AST, Code.point("protocol"), Index.get()), + UnorderedElementsAre(sym("protocol", Code.range("protocolDef"), + Code.range("protocolDef")))); +} + TEST(FindImplementations, CaptureDefinition) { llvm::StringRef Test = R"cpp( struct Base { @@ -1963,6 +2077,7 @@ void checkFindRefs(llvm::StringRef Test, bool UseIndex = false) { Annotations T(Test); auto TU = TestTU::withCode(T.code()); TU.ExtraArgs.push_back("-std=c++20"); + TU.ExtraArgs.push_back("-xobjective-c++"); auto AST = TU.build(); std::vector> ExpectedLocations; @@ -2260,6 +2375,25 @@ TEST(FindReferences, IncludeOverrides) { checkFindRefs(Test, /*UseIndex=*/true); } +TEST(FindReferences, IncludeOverridesObjC) { + llvm::StringRef Test = + R"objc( + @interface Base + - (void)$decl(Base)[[f^unc]]; + @end + @interface Derived : Base + - (void)$overridedecl(Derived::func)[[func]]; + @end + @implementation Derived + - (void)$overridedef[[func]] {} + @end + void test(Derived *derived, Base *base) { + [derived func]; // No references to the overrides. + [base $(test)[[func]]]; + })objc"; + checkFindRefs(Test, /*UseIndex=*/true); +} + TEST(FindReferences, RefsToBaseMethod) { llvm::StringRef Test = R"cpp( @@ -2284,6 +2418,27 @@ TEST(FindReferences, RefsToBaseMethod) { checkFindRefs(Test, /*UseIndex=*/true); } +TEST(FindReferences, RefsToBaseMethodObjC) { + llvm::StringRef Test = + R"objc( + @interface BaseBase + - (void)$(BaseBase)[[func]]; + @end + @interface Base : BaseBase + - (void)$(Base)[[func]]; + @end + @interface Derived : Base + - (void)$decl(Derived)[[fu^nc]]; + @end + void test(BaseBase *bb, Base *b, Derived *d) { + // refs to overridden methods in complete type hierarchy are reported. + [bb $(test)[[func]]]; + [b $(test)[[func]]]; + [d $(test)[[fu^nc]]]; + })objc"; + checkFindRefs(Test, /*UseIndex=*/true); +} + TEST(FindReferences, MainFileReferencesOnly) { llvm::StringRef Test = R"cpp( diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 6b8fe22242417..41ff1c1016f25 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -106,6 +106,11 @@ Changes in existing checks ` check to allow specifying additional C++ member functions to match. +- Improved :doc:`misc-const-correctness + ` check by adding the option + `AllowedTypes`, that excludes specified types from const-correctness + checking. + - Improved :doc:`misc-redundant-expression ` check by providing additional examples and fixing some macro related false positives. diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc/const-correctness.rst b/clang-tools-extra/docs/clang-tidy/checks/misc/const-correctness.rst index 8ac1ad56bc8cf..2e7e0f3602ab9 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/misc/const-correctness.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/misc/const-correctness.rst @@ -80,9 +80,10 @@ This limitation affects the capability to add ``const`` to methods which is not Options ------- -.. option:: AnalyzeValues (default = true) +.. option:: AnalyzeValues - Enable or disable the analysis of ordinary value variables, like ``int i = 42;`` + Enable or disable the analysis of ordinary value variables, like + ``int i = 42;``. Default is `true`. .. code-block:: c++ @@ -96,9 +97,10 @@ Options // No warning int const a[] = {42, 42, 42}; -.. option:: AnalyzeReferences (default = true) +.. option:: AnalyzeReferences - Enable or disable the analysis of reference variables, like ``int &ref = i;`` + Enable or disable the analysis of reference variables, like + ``int &ref = i;``. Default is `true`. .. code-block:: c++ @@ -108,11 +110,11 @@ Options // No warning int const& ref = i; -.. option:: WarnPointersAsValues (default = false) +.. option:: WarnPointersAsValues This option enables the suggestion for ``const`` of the pointer itself. Pointer values have two possibilities to be ``const``, the pointer - and the value pointing to. + and the value pointing to. Default is `false`. .. code-block:: c++ @@ -123,9 +125,10 @@ Options // No warning const int *const pointer_variable = &value; -.. option:: TransformValues (default = true) +.. option:: TransformValues - Provides fixit-hints for value types that automatically add ``const`` if its a single declaration. + Provides fixit-hints for value types that automatically add ``const`` if + its a single declaration. Default is `true`. .. code-block:: c++ @@ -143,10 +146,10 @@ Options int result = value * 3; result -= 10; -.. option:: TransformReferences (default = true) +.. option:: TransformReferences - Provides fixit-hints for reference types that automatically add ``const`` if its a single - declaration. + Provides fixit-hints for reference types that automatically add ``const`` if + its a single declaration. Default is `true`. .. code-block:: c++ @@ -163,10 +166,10 @@ Options int result = ref_value * 3; result -= 10; -.. option:: TransformPointersAsValues (default = false) +.. option:: TransformPointersAsValues - Provides fixit-hints for pointers if their pointee is not changed. This does not analyze if the - value-pointed-to is unchanged! + Provides fixit-hints for pointers if their pointee is not changed. This does + not analyze if the value-pointed-to is unchanged! Default is `false`. Requires 'WarnPointersAsValues' to be 'true'. @@ -196,3 +199,13 @@ Options // The following pointer may not become a 'int *const'. int *changing_pointee = &value; changing_pointee = &result; + +.. option:: AllowedTypes + + A semicolon-separated list of names of types that will be excluded from + const-correctness checking. Regular expressions are accepted, e.g. + ``[Rr]ef(erence)?$`` matches every type with suffix ``Ref``, ``ref``, + ``Reference`` and ``reference``. If a name in the list contains the sequence + `::`, it is matched against the qualified type name + (i.e. ``namespace::Type``), otherwise it is matched against only the type + name (i.e. ``Type``). Default is empty string. diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-allowed-types.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-allowed-types.cpp new file mode 100644 index 0000000000000..a73b4a08d0a71 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-allowed-types.cpp @@ -0,0 +1,180 @@ +// RUN: %check_clang_tidy %s misc-const-correctness %t -- \ +// RUN: -config="{CheckOptions: {\ +// RUN: misc-const-correctness.AllowedTypes: '[Pp]ointer$;[Pp]tr$;[Rr]ef(erence)?$;qualified::Type;::fully::QualifiedType;ConstTemplate', \ +// RUN: misc-const-correctness.TransformPointersAsValues: true, \ +// RUN: misc-const-correctness.TransformReferences: true, \ +// RUN: misc-const-correctness.WarnPointersAsValues: true } \ +// RUN: }" -- -fno-delayed-template-parsing + +struct SmartPointer { +}; + +struct smart_pointer { +}; + +struct SmartPtr { +}; + +struct smart_ptr { +}; + +struct SmartReference { +}; + +struct smart_reference { +}; + +struct SmartRef { +}; + +struct smart_ref { +}; + +struct OtherType { +}; + +template struct ConstTemplate { +}; + +namespace qualified { +struct Type { +}; +} // namespace qualified + +namespace fully { +struct QualifiedType { +}; +} // namespace fully + +void negativeSmartPointer() { + SmartPointer p1 = {}; + SmartPointer* p2 = {}; + SmartPointer& p3 = p1; +} + +void negative_smart_pointer() { + smart_pointer p1 = {}; + smart_pointer* p2 = {}; + smart_pointer& p3 = p1; +} + +void negativeSmartPtr() { + SmartPtr p1 = {}; + SmartPtr* p2 = {}; + SmartPtr& p3 = p1; +} + +void negative_smart_ptr() { + smart_ptr p1 = {}; + smart_ptr* p2 = {}; + smart_ptr& p3 = p1; +} + +void negativeSmartReference() { + SmartReference p1 = {}; + SmartReference* p2 = {}; + SmartReference& p3 = p1; +} + +void negative_smart_reference() { + smart_reference p1 = {}; + smart_reference* p2 = {}; + smart_reference& p3 = p1; +} + +void negativeSmartRef() { + SmartRef p1 = {}; + SmartRef* p2 = {}; + SmartRef& p3 = p1; +} + +void negative_smart_ref() { + smart_ref p1 = {}; + smart_ref* p2 = {}; + smart_ref& p3 = p1; +} + +void positiveOtherType() { + OtherType t = {}; + // CHECK-MESSAGES: [[@LINE-1]]:3: warning: variable 't' of type 'OtherType' can be declared 'const' + // CHECK-FIXES: OtherType const t = {}; +} + +void negativeSomeComplex() { + ConstTemplate t1 = {}; + ConstTemplate* t2 = {}; + ConstTemplate& t3 = t1; +} + +void negativeQualified() { + qualified::Type t1 = {}; + qualified::Type* t2 = {}; + qualified::Type& t3 = t1; + + using qualified::Type; + Type t4 = {}; + Type* t5 = {}; + Type& t6 = t4; +} + +void negativeFullyQualified() { + fully::QualifiedType t1 = {}; + fully::QualifiedType* t2 = {}; + fully::QualifiedType& t3 = t1; + + using fully::QualifiedType; + QualifiedType t4 = {}; + QualifiedType* t5 = {}; + QualifiedType& t6 = t4; +} + +using MySP = SmartPointer; +using MyTemplate = ConstTemplate; +template using MyTemplate2 = ConstTemplate; + +void positiveTypedefs() { + MySP p1 = {}; + // CHECK-MESSAGES: [[@LINE-1]]:3: warning: variable 'p1' of type 'MySP' (aka 'SmartPointer') can be declared 'const' + // CHECK-FIXES: MySP const p1 = {}; + + MySP* p2 = {}; + // CHECK-MESSAGES: [[@LINE-1]]:3: warning: variable 'p2' of type 'MySP *' (aka 'SmartPointer *') can be declared 'const' + // CHECK-FIXES: MySP* const p2 = {}; + + MySP& p3 = p1; + // CHECK-MESSAGES: [[@LINE-1]]:3: warning: variable 'p3' of type 'MySP &' (aka 'SmartPointer &') can be declared 'const' + // CHECK-FIXES: MySP const& p3 = p1; + + MyTemplate t1 = {}; + // CHECK-MESSAGES: [[@LINE-1]]:3: warning: variable 't1' of type 'MyTemplate' (aka 'ConstTemplate') can be declared 'const' + // CHECK-FIXES: MyTemplate const t1 = {}; + + MyTemplate* t2 = {}; + // CHECK-MESSAGES: [[@LINE-1]]:3: warning: variable 't2' of type 'MyTemplate *' (aka 'ConstTemplate *') can be declared 'const' + // CHECK-FIXES: MyTemplate* const t2 = {}; + + MyTemplate& t3 = t1; + // CHECK-MESSAGES: [[@LINE-1]]:3: warning: variable 't3' of type 'MyTemplate &' (aka 'ConstTemplate &') can be declared 'const' + // CHECK-FIXES: MyTemplate const& t3 = t1; + + MyTemplate2 t4 = {}; + // CHECK-MESSAGES: [[@LINE-1]]:3: warning: variable 't4' of type 'MyTemplate2' (aka 'ConstTemplate') can be declared 'const' + // CHECK-FIXES: MyTemplate2 const t4 = {}; + + MyTemplate2* t5 = {}; + // CHECK-MESSAGES: [[@LINE-1]]:3: warning: variable 't5' of type 'MyTemplate2 *' (aka 'ConstTemplate *') can be declared 'const' + // CHECK-FIXES: MyTemplate2* const t5 = {}; + + MyTemplate2& t6 = t4; + // CHECK-MESSAGES: [[@LINE-1]]:3: warning: variable 't6' of type 'MyTemplate2 &' (aka 'ConstTemplate &') can be declared 'const' + // CHECK-FIXES: MyTemplate2 const& t6 = t4; +} + +template +class Vector {}; + +void positiveSmartPtrWrapped() { + Vector vec = {}; + // CHECK-MESSAGES: [[@LINE-1]]:3: warning: variable 'vec' of type 'Vector' can be declared 'const' + // CHECK-FIXES: Vector const vec = {}; +} diff --git a/clang/cmake/caches/Release.cmake b/clang/cmake/caches/Release.cmake index 23e99493087ff..a1c68fc51dbd0 100644 --- a/clang/cmake/caches/Release.cmake +++ b/clang/cmake/caches/Release.cmake @@ -48,10 +48,8 @@ set(CLANG_ENABLE_BOOTSTRAP ON CACHE BOOL "") set(STAGE1_PROJECTS "clang") -# Building Flang on Windows requires compiler-rt, so we need to build it in -# stage1. compiler-rt is also required for building the Flang tests on -# macOS. -set(STAGE1_RUNTIMES "compiler-rt") +# Build all runtimes so we can statically link them into the stage2 compiler. +set(STAGE1_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind") if (LLVM_RELEASE_ENABLE_PGO) list(APPEND STAGE1_PROJECTS "lld") @@ -90,9 +88,20 @@ else() set(CLANG_BOOTSTRAP_TARGETS ${LLVM_RELEASE_FINAL_STAGE_TARGETS} CACHE STRING "") endif() +if (LLVM_RELEASE_ENABLE_LTO) + # Enable LTO for the runtimes. We need to configure stage1 clang to default + # to using lld as the linker because the stage1 toolchain will be used to + # build and link the runtimes. + # FIXME: We can't use LLVM_ENABLE_LTO=Thin here, because it causes the CMake + # step for the libcxx build to fail. CMAKE_INTERPROCEDURAL_OPTIMIZATION does + # enable ThinLTO, though. + set(RUNTIMES_CMAKE_ARGS "-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=ON -DLLVM_ENABLE_LLD=ON" CACHE STRING "") +endif() + # Stage 1 Common Config set(LLVM_ENABLE_RUNTIMES ${STAGE1_RUNTIMES} CACHE STRING "") set(LLVM_ENABLE_PROJECTS ${STAGE1_PROJECTS} CACHE STRING "") +set(LIBCXX_STATICALLY_LINK_ABI_IN_STATIC_LIBRARY ON CACHE STRING "") # stage2-instrumented and Final Stage Config: # Options that need to be set in both the instrumented stage (if we are doing @@ -102,6 +111,16 @@ set_instrument_and_final_stage_var(LLVM_ENABLE_LTO "${LLVM_RELEASE_ENABLE_LTO}" if (LLVM_RELEASE_ENABLE_LTO) set_instrument_and_final_stage_var(LLVM_ENABLE_LLD "ON" BOOL) endif() +set_instrument_and_final_stage_var(LLVM_ENABLE_LIBCXX "ON" BOOL) +set_instrument_and_final_stage_var(LLVM_STATIC_LINK_CXX_STDLIB "ON" BOOL) +set(RELEASE_LINKER_FLAGS "-rtlib=compiler-rt --unwindlib=libunwind") +if(NOT ${CMAKE_HOST_SYSTEM_NAME} MATCHES "Darwin") + set(RELEASE_LINKER_FLAGS "${RELEASE_LINKER_FLAGS} -static-libgcc") +endif() + +set_instrument_and_final_stage_var(CMAKE_EXE_LINKER_FLAGS ${RELEASE_LINKER_FLAGS} STRING) +set_instrument_and_final_stage_var(CMAKE_SHARED_LINKER_FLAGS ${RELEASE_LINKER_FLAGS} STRING) +set_instrument_and_final_stage_var(CMAKE_MODULE_LINKER_FLAGS ${RELEASE_LINKER_FLAGS} STRING) # Final Stage Config (stage2) set_final_stage_var(LLVM_ENABLE_RUNTIMES "${LLVM_RELEASE_ENABLE_RUNTIMES}" STRING) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 5d9b68d4a7f2a..86295a3146510 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -1640,6 +1640,7 @@ Conditional ``explicit`` __cpp_conditional_explicit C+ ``static operator()`` __cpp_static_call_operator C++23 C++03 Attributes on Lambda-Expressions C++23 C++11 Attributes on Structured Bindings __cpp_structured_bindings C++26 C++03 +Packs in Structured Bindings __cpp_structured_bindings C++26 C++03 Static assert with user-generated message __cpp_static_assert >= 202306L C++26 C++11 Pack Indexing __cpp_pack_indexing C++26 C++03 ``= delete ("should have a reason");`` __cpp_deleted_function C++26 C++03 diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 5780f5d61d579..db42fc5cc0da7 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -71,6 +71,8 @@ C++ Language Changes C++2c Feature Support ^^^^^^^^^^^^^^^^^^^^^ +- Implemented `P1061R10 Structured Bindings can introduce a Pack `_. + C++23 Feature Support ^^^^^^^^^^^^^^^^^^^^^ @@ -168,6 +170,8 @@ Bug Fixes to C++ Support ^^^^^^^^^^^^^^^^^^^^^^^^ - Clang is now better at keeping track of friend function template instance contexts. (#GH55509) +- Clang now prints the correct instantiation context for diagnostics suppressed + by template argument deduction. - The initialization kind of elements of structured bindings direct-list-initialized from an array is corrected to direct-initialization. - Clang no longer crashes when a coroutine is declared ``[[noreturn]]``. (#GH127327) @@ -266,13 +270,16 @@ clang-format libclang -------- +- Fixed a buffer overflow in ``CXString`` implementation. The fix may result in + increased memory allocation. + Code Completion --------------- Static Analyzer --------------- -- Clang currently support extending lifetime of object bound to +- Clang currently support extending lifetime of object bound to reference members of aggregates in CFG and ExprEngine, that are created from default member initializer. diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index d977868b8a2c6..8213334b61c22 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -1681,19 +1681,27 @@ for more details. permitted to produce more precise results than performing the same operations separately. - The C standard permits intermediate floating-point results within an + The C and C++ standards permit intermediate floating-point results within an expression to be computed with more precision than their type would normally allow. This permits operation fusing, and Clang takes advantage - of this by default. This behavior can be controlled with the ``FP_CONTRACT`` - and ``clang fp contract`` pragmas. Please refer to the pragma documentation - for a description of how the pragmas interact with this option. + of this by default (``on``). Fusion across statements is not compliant with + the C and C++ standards but can be enabled using ``-ffp-contract=fast``. + + Fusion can be controlled with the ``FP_CONTRACT`` and ``clang fp contract`` + pragmas. Please note that pragmas will be ingored with + ``-ffp-contract=fast``, and refer to the pragma documentation for a + description of how the pragmas interact with the different ``-ffp-contract`` + option values. Valid values are: - * ``fast`` (fuse across statements disregarding pragmas, default for CUDA) - * ``on`` (fuse in the same statement unless dictated by pragmas, default for languages other than CUDA/HIP) - * ``off`` (never fuse) - * ``fast-honor-pragmas`` (fuse across statements unless dictated by pragmas, default for HIP) + * ``fast``: enable fusion across statements disregarding pragmas, breaking + compliance with the C and C++ standards (default for CUDA). + * ``on``: enable C and C++ standard complaint fusion in the same statement + unless dictated by pragmas (default for languages other than CUDA/HIP) + * ``off``: disable fusion + * ``fast-honor-pragmas``: fuse across statements unless dictated by pragmas + (default for HIP) .. option:: -f[no-]honor-infinities diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 0e5df338dd2e5..0e8b0189540bd 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4771,6 +4771,12 @@ def HLSLAll : LangBuiltin<"HLSL_LANG"> { let Prototype = "bool(...)"; } +def HLSLAnd : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_and"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void(...)"; +} + def HLSLAny : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_any"]; let Attributes = [NoThrow, Const]; diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.td b/clang/include/clang/Basic/BuiltinsNVPTX.td index 327dc88cffdb4..61e48b31c244b 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.td +++ b/clang/include/clang/Basic/BuiltinsNVPTX.td @@ -21,12 +21,14 @@ class SM newer_list> : SMFeatures { !strconcat(f, "|", newer.Features)); } +let Features = "sm_120a" in def SM_120a : SMFeatures; +let Features = "sm_101a" in def SM_101a : SMFeatures; let Features = "sm_100a" in def SM_100a : SMFeatures; - -def SM_100 : SM<"100", [SM_100a]>; - let Features = "sm_90a" in def SM_90a : SMFeatures; +def SM_120 : SM<"120", [SM_120a]>; +def SM_101 : SM<"101", [SM_101a, SM_120]>; +def SM_100 : SM<"100", [SM_100a, SM_101]>; def SM_90 : SM<"90", [SM_90a, SM_100]>; def SM_89 : SM<"89", [SM_90]>; def SM_87 : SM<"87", [SM_89]>; diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index 793cab1f4e84a..c4eb7b7cac1d6 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -82,6 +82,10 @@ enum class OffloadArch { SM_90a, SM_100, SM_100a, + SM_101, + SM_101a, + SM_120, + SM_120a, GFX600, GFX601, GFX602, diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index b15e0415360ea..45d39807b35c8 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -11,8 +11,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_CIR_DIALECT_IR_CIROPS -#define LLVM_CLANG_CIR_DIALECT_IR_CIROPS +#ifndef CLANG_CIR_DIALECT_IR_CIROPS_TD +#define CLANG_CIR_DIALECT_IR_CIROPS_TD include "clang/CIR/Dialect/IR/CIRDialect.td" include "clang/CIR/Dialect/IR/CIRTypes.td" @@ -115,6 +115,165 @@ def ConstantOp : CIR_Op<"const", let hasFolder = 1; } +//===----------------------------------------------------------------------===// +// ReturnOp +//===----------------------------------------------------------------------===// + +def ReturnOp : CIR_Op<"return", [ParentOneOf<["FuncOp", "ScopeOp"]>, + Terminator]> { + let summary = "Return from function"; + let description = [{ + The "return" operation represents a return operation within a function. + The operation takes an optional operand and produces no results. + The operand type must match the signature of the function that contains + the operation. + + ```mlir + func @foo() -> i32 { + ... + cir.return %0 : i32 + } + ``` + }]; + + // The return operation takes an optional input operand to return. This + // value must match the return type of the enclosing function. + let arguments = (ins Variadic:$input); + + // The return operation only emits the input in the format if it is present. + let assemblyFormat = "($input^ `:` type($input))? attr-dict "; + + // Allow building a ReturnOp with no return operand. + let builders = [ + OpBuilder<(ins), [{ build($_builder, $_state, std::nullopt); }]> + ]; + + // Provide extra utility definitions on the c++ operation class definition. + let extraClassDeclaration = [{ + bool hasOperand() { return getNumOperands() != 0; } + }]; + + let hasVerifier = 1; +} + +//===----------------------------------------------------------------------===// +// YieldOp +//===----------------------------------------------------------------------===// + +def YieldOp : CIR_Op<"yield", [ReturnLike, Terminator, + ParentOneOf<["ScopeOp"]>]> { + let summary = "Represents the default branching behaviour of a region"; + let description = [{ + The `cir.yield` operation terminates regions on different CIR operations, + and it is used to represent the default branching behaviour of a region. + Said branching behaviour is determinted by the parent operation. For + example, a yield in a `switch-case` region implies a fallthrough, while + a yield in a `cir.if` region implies a branch to the exit block, and so + on. + + In some cases, it might yield an SSA value and the semantics of how the + values are yielded is defined by the parent operation. For example, a + `cir.ternary` operation yields a value from one of its regions. + + As a general rule, `cir.yield` must be explicitly used whenever a region has + more than one block and no terminator, or within `cir.switch` regions not + `cir.return` terminated. + + Examples: + ```mlir + cir.if %4 { + ... + cir.yield + } + + cir.switch (%5) [ + case (equal, 3) { + ... + cir.yield + }, ... + ] + + cir.scope { + ... + cir.yield + } + + %x = cir.scope { + ... + cir.yield %val + } + + %y = cir.ternary { + ... + cir.yield %val : i32 + } : i32 + ``` + }]; + + let arguments = (ins Variadic:$args); + let assemblyFormat = "($args^ `:` type($args))? attr-dict"; + let builders = [ + OpBuilder<(ins), [{ /* nothing to do */ }]>, + ]; +} + +//===----------------------------------------------------------------------===// +// ScopeOp +//===----------------------------------------------------------------------===// + +def ScopeOp : CIR_Op<"scope", [ + DeclareOpInterfaceMethods, + RecursivelySpeculatable, AutomaticAllocationScope, NoRegionArguments]> { + let summary = "Represents a C/C++ scope"; + let description = [{ + `cir.scope` contains one region and defines a strict "scope" for all new + values produced within its blocks. + + The region can contain an arbitrary number of blocks but usually defaults + to one and can optionally return a value (useful for representing values + coming out of C++ full-expressions) via `cir.yield`: + + + ```mlir + %rvalue = cir.scope { + ... + cir.yield %value + } + ``` + + The blocks can be terminated by `cir.yield`, `cir.return` or `cir.throw`. + If `cir.scope` yields no value, the `cir.yield` can be left out, and + will be inserted implicitly. + }]; + + let results = (outs Optional:$results); + let regions = (region AnyRegion:$scopeRegion); + + let hasVerifier = 1; + let skipDefaultBuilders = 1; + let assemblyFormat = [{ + custom($scopeRegion) (`:` type($results)^)? attr-dict + }]; + + let extraClassDeclaration = [{ + /// Determine whether the scope is empty, meaning it contains a single block + /// terminated by a cir.yield. + bool isEmpty() { + auto &entry = getRegion().front(); + return getRegion().hasOneBlock() && + llvm::isa(entry.front()); + } + }]; + + let builders = [ + // Scopes for yielding values. + OpBuilder<(ins + "llvm::function_ref":$scopeBuilder)>, + // Scopes without yielding values. + OpBuilder<(ins "llvm::function_ref":$scopeBuilder)> + ]; +} + //===----------------------------------------------------------------------===// // GlobalOp //===----------------------------------------------------------------------===// @@ -158,25 +317,86 @@ def GlobalOp : CIR_Op<"global"> { // FuncOp //===----------------------------------------------------------------------===// -// TODO(CIR): For starters, cir.func has only name, nothing else. The other -// properties of a function will be added over time as more of ClangIR is -// upstreamed. +// TODO(CIR): FuncOp is still a tiny shell of what it will become. Many more +// properties and attributes will be added as upstreaming continues. -def FuncOp : CIR_Op<"func"> { +def FuncOp : CIR_Op<"func", [ + AutomaticAllocationScope, CallableOpInterface, FunctionOpInterface, + IsolatedFromAbove +]> { let summary = "Declare or define a function"; let description = [{ The `cir.func` operation defines a function, similar to the `mlir::FuncOp` built-in. }]; - let arguments = (ins SymbolNameAttr:$sym_name); + let arguments = (ins SymbolNameAttr:$sym_name, + TypeAttrOf:$function_type, + OptionalAttr:$arg_attrs, + OptionalAttr:$res_attrs); + + let regions = (region AnyRegion:$body); let skipDefaultBuilders = 1; - let builders = [OpBuilder<(ins "llvm::StringRef":$sym_name)>]; + let builders = [OpBuilder<(ins "llvm::StringRef":$sym_name, + "FuncType":$type)>]; + + let extraClassDeclaration = [{ + /// Returns the region on the current operation that is callable. This may + /// return null in the case of an external callable object, e.g. an external + /// function. + ::mlir::Region *getCallableRegion(); + + /// Returns the results types that the callable region produces when + /// executed. + llvm::ArrayRef getCallableResults() { + return getFunctionType().getReturnTypes(); + } + + /// Returns the argument types of this function. + llvm::ArrayRef getArgumentTypes() { + return getFunctionType().getInputs(); + } + + /// Returns 0 or 1 result type of this function (0 in the case of a function + /// returing void) + llvm::ArrayRef getResultTypes() { + return getFunctionType().getReturnTypes(); + } + + /// Hook for OpTrait::FunctionOpInterfaceTrait, called after verifying that + /// the 'type' attribute is present and checks if it holds a function type. + /// Ensures getType, getNumFuncArguments, and getNumFuncResults can be + /// called safely. + llvm::LogicalResult verifyType(); + + //===------------------------------------------------------------------===// + // SymbolOpInterface Methods + //===------------------------------------------------------------------===// + + bool isDeclaration(); + }]; let hasCustomAssemblyFormat = 1; let hasVerifier = 1; } -#endif // LLVM_CLANG_CIR_DIALECT_IR_CIROPS +//===----------------------------------------------------------------------===// +// TrapOp +//===----------------------------------------------------------------------===// + +def TrapOp : CIR_Op<"trap", [Terminator]> { + let summary = "Exit the program abnormally"; + let description = [{ + The cir.trap operation causes the program to exit abnormally. The + implementations may implement this operation with different mechanisms. For + example, an implementation may implement this operation by calling abort, + while another implementation may implement this operation by executing an + illegal instruction. + }]; + + let assemblyFormat = "attr-dict"; +} + +#endif // CLANG_CIR_DIALECT_IR_CIROPS_TD diff --git a/clang/include/clang/CIR/TypeEvaluationKind.h b/clang/include/clang/CIR/TypeEvaluationKind.h new file mode 100644 index 0000000000000..5d65eeb9d25b9 --- /dev/null +++ b/clang/include/clang/CIR/TypeEvaluationKind.h @@ -0,0 +1,21 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_CIR_TYPEEVALUATIONKIND_H +#define CLANG_CIR_TYPEEVALUATIONKIND_H + +namespace cir { + +// This is copied from clang/lib/CodeGen/CodeGenFunction.h. That file (1) is +// not available as an include from ClangIR files, and (2) has lots of stuff +// that we don't want in ClangIR. +enum TypeEvaluationKind { TEK_Scalar, TEK_Complex, TEK_Aggregate }; + +} // namespace cir + +#endif // CLANG_CIR_TYPEEVALUATIONKIND_H diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 5ad187926e710..e6932821d8af8 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -932,9 +932,20 @@ def W_Joined : Joined<["-"], "W">, Group, def Xanalyzer : Separate<["-"], "Xanalyzer">, HelpText<"Pass to the static analyzer">, MetaVarName<"">, Group; -def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[NoXarchOption]>, - HelpText<"Pass to the compiliation if the target matches ">, - MetaVarName<" ">; +def Xarch__ + : JoinedAndSeparate<["-"], "Xarch_">, + Flags<[NoXarchOption]>, + HelpText<"Pass to the compilation if the target matches ">, + DocBrief< + [{Specifies that the argument should only be used if the compilation + target matches the specified architecture. This can be used with the target + CPU, triple architecture, or offloading host and device. It is most useful + for separating behavior undesirable on one of the targets when combining many + compilation jobs, as is commong with offloading. For example, -Xarch_x86_64, + -Xarch_gfx90a, and -Xarch_device are all valid selectors. -Xarch_device will + forward the argument to the offloading device while -Xarch_host will target + the host system, which can be used to suppress incompatible GPU arguments.}]>, + MetaVarName<" ">; def Xarch_host : Separate<["-"], "Xarch_host">, Flags<[NoXarchOption]>, HelpText<"Pass to the CUDA/HIP host compilation">, MetaVarName<"">; def Xarch_device : Separate<["-"], "Xarch_device">, Flags<[NoXarchOption]>, @@ -3986,11 +3997,15 @@ defm assumptions : BoolFOption<"assumptions", "Disable codegen and compile-time checks for C++23's [[assume]] attribute">, PosFlag>; + +let Visibility = [ClangOption, FlangOption] in { def fvectorize : Flag<["-"], "fvectorize">, Group, HelpText<"Enable the loop vectorization passes">; def fno_vectorize : Flag<["-"], "fno-vectorize">, Group; def : Flag<["-"], "ftree-vectorize">, Alias; def : Flag<["-"], "fno-tree-vectorize">, Alias; +} + def fslp_vectorize : Flag<["-"], "fslp-vectorize">, Group, HelpText<"Enable the superword-level parallelism vectorization passes">; def fno_slp_vectorize : Flag<["-"], "fno-slp-vectorize">, Group; @@ -7332,6 +7347,10 @@ def mlink_builtin_bitcode : Separate<["-"], "mlink-builtin-bitcode">, def mlink_bitcode_file : Separate<["-"], "mlink-bitcode-file">, HelpText<"Link the given bitcode file before performing optimizations.">; + +def vectorize_loops : Flag<["-"], "vectorize-loops">, + HelpText<"Run the Loop vectorization passes">, + MarshallingInfoFlag>; } // let Visibility = [CC1Option, FC1Option] let Visibility = [CC1Option] in { @@ -7447,9 +7466,6 @@ defm link_builtin_bitcode_postopt: BoolMOption<"link-builtin-bitcode-postopt", PosFlag, NegFlag>; -def vectorize_loops : Flag<["-"], "vectorize-loops">, - HelpText<"Run the Loop vectorization passes">, - MarshallingInfoFlag>; def vectorize_slp : Flag<["-"], "vectorize-slp">, HelpText<"Run the SLP vectorization passes">, MarshallingInfoFlag>; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index c55b964650323..093e9a06b00ce 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -1908,7 +1908,23 @@ class Sema final : public SemaBase { /// '\#pragma clang attribute push' directives to the given declaration. void AddPragmaAttributes(Scope *S, Decl *D); - void PrintPragmaAttributeInstantiationPoint(); + using InstantiationContextDiagFuncRef = + llvm::function_ref; + auto getDefaultDiagFunc() { + return [this](SourceLocation Loc, PartialDiagnostic PD) { + // This bypasses a lot of the filters in the diag engine, as it's + // to be used to attach notes to diagnostics which have already + // been filtered through. + DiagnosticBuilder Builder(Diags.Report(Loc, PD.getDiagID())); + PD.Emit(Builder); + }; + } + + void PrintPragmaAttributeInstantiationPoint( + InstantiationContextDiagFuncRef DiagFunc); + void PrintPragmaAttributeInstantiationPoint() { + PrintPragmaAttributeInstantiationPoint(getDefaultDiagFunc()); + } void DiagnoseUnterminatedPragmaAttribute(); @@ -13263,18 +13279,22 @@ class Sema final : public SemaBase { void pushCodeSynthesisContext(CodeSynthesisContext Ctx); void popCodeSynthesisContext(); - void PrintContextStack() { + void PrintContextStack(InstantiationContextDiagFuncRef DiagFunc) { if (!CodeSynthesisContexts.empty() && CodeSynthesisContexts.size() != LastEmittedCodeSynthesisContextDepth) { - PrintInstantiationStack(); + PrintInstantiationStack(DiagFunc); LastEmittedCodeSynthesisContextDepth = CodeSynthesisContexts.size(); } if (PragmaAttributeCurrentTargetDecl) - PrintPragmaAttributeInstantiationPoint(); + PrintPragmaAttributeInstantiationPoint(DiagFunc); } + void PrintContextStack() { PrintContextStack(getDefaultDiagFunc()); } /// Prints the current instantiation stack through a series of /// notes. - void PrintInstantiationStack(); + void PrintInstantiationStack(InstantiationContextDiagFuncRef DiagFunc); + void PrintInstantiationStack() { + PrintInstantiationStack(getDefaultDiagFunc()); + } /// Determines whether we are currently in a context where /// template argument substitution failures are not considered diff --git a/clang/include/module.modulemap b/clang/include/module.modulemap index fb8e445cb4b72..8489619832a47 100644 --- a/clang/include/module.modulemap +++ b/clang/include/module.modulemap @@ -135,7 +135,16 @@ module Clang_Frontend { module Clang_FrontendTool { requires cplusplus umbrella "clang/FrontendTool" module * { export * } } module Clang_Index { requires cplusplus umbrella "clang/Index" module * { export * } } -module Clang_Lex { requires cplusplus umbrella "clang/Lex" module * { export * } } + +module Clang_Lex { + requires cplusplus + umbrella "clang/Lex" + + textual header "clang/Lex/HLSLRootSignatureTokenKinds.def" + + module * { export * } +} + module Clang_Parse { requires cplusplus umbrella "clang/Parse" module * { export * } } module Clang_Rewrite { requires cplusplus umbrella "clang/Rewrite/Core" module * { export * } } module Clang_RewriteFrontend { requires cplusplus umbrella "clang/Rewrite/Frontend" module * { export * } } diff --git a/clang/lib/AST/APValue.cpp b/clang/lib/AST/APValue.cpp index 3b814be266330..7c33d3a165a08 100644 --- a/clang/lib/AST/APValue.cpp +++ b/clang/lib/AST/APValue.cpp @@ -390,7 +390,6 @@ APValue &APValue::operator=(const APValue &RHS) { if (this != &RHS) *this = APValue(RHS); - AllowConstexprUnknown = RHS.AllowConstexprUnknown; return *this; } diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index f45fb0eca3714..68d042eca2492 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -100,6 +100,10 @@ static const OffloadArchToStringMap arch_names[] = { SM(90a), // Hopper SM(100), // Blackwell SM(100a), // Blackwell + SM(101), // Blackwell + SM(101a), // Blackwell + SM(120), // Blackwell + SM(120a), // Blackwell GFX(600), // gfx600 GFX(601), // gfx601 GFX(602), // gfx602 @@ -228,6 +232,10 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) { return CudaVersion::CUDA_120; case OffloadArch::SM_100: case OffloadArch::SM_100a: + case OffloadArch::SM_101: + case OffloadArch::SM_101a: + case OffloadArch::SM_120: + case OffloadArch::SM_120a: return CudaVersion::CUDA_128; default: llvm_unreachable("invalid enum"); diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 547cf3dfa2be7..5931a77a85fec 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -185,7 +185,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) { // Set __CUDA_ARCH__ for the GPU specified. - std::string CUDAArchCode = [this] { + llvm::StringRef CUDAArchCode = [this] { switch (GPU) { case OffloadArch::GFX600: case OffloadArch::GFX601: @@ -290,14 +290,27 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, case OffloadArch::SM_100: case OffloadArch::SM_100a: return "1000"; + case OffloadArch::SM_101: + case OffloadArch::SM_101a: + return "1010"; + case OffloadArch::SM_120: + case OffloadArch::SM_120a: + return "1200"; } llvm_unreachable("unhandled OffloadArch"); }(); Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode); - if (GPU == OffloadArch::SM_90a) - Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1"); - if (GPU == OffloadArch::SM_100a) - Builder.defineMacro("__CUDA_ARCH_FEAT_SM100_ALL", "1"); + switch(GPU) { + case OffloadArch::SM_90a: + case OffloadArch::SM_100a: + case OffloadArch::SM_101a: + case OffloadArch::SM_120a: + Builder.defineMacro("__CUDA_ARCH_FEAT_SM" + CUDAArchCode.drop_back() + "_ALL", "1"); + break; + default: + // Do nothing if this is not an enhanced architecture. + break; + } } } diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp new file mode 100644 index 0000000000000..b802705ca8fdc --- /dev/null +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -0,0 +1,70 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Emit Expr nodes with scalar CIR types as CIR code. +// +//===----------------------------------------------------------------------===// + +#include "CIRGenFunction.h" + +#include "clang/AST/Expr.h" +#include "clang/AST/StmtVisitor.h" + +#include "mlir/IR/Value.h" + +#include + +using namespace clang; +using namespace clang::CIRGen; + +namespace { + +class ScalarExprEmitter : public StmtVisitor { + CIRGenFunction &cgf; + CIRGenBuilderTy &builder; + bool ignoreResultAssign; + +public: + ScalarExprEmitter(CIRGenFunction &cgf, CIRGenBuilderTy &builder, + bool ira = false) + : cgf(cgf), builder(builder), ignoreResultAssign(ira) {} + + //===--------------------------------------------------------------------===// + // Visitor Methods + //===--------------------------------------------------------------------===// + + mlir::Value Visit(Expr *e) { + return StmtVisitor::Visit(e); + } + + mlir::Value VisitStmt(Stmt *s) { + llvm_unreachable("Statement passed to ScalarExprEmitter"); + } + + mlir::Value VisitExpr(Expr *e) { + cgf.getCIRGenModule().errorNYI( + e->getSourceRange(), "scalar expression kind: ", e->getStmtClassName()); + return {}; + } + + mlir::Value VisitIntegerLiteral(const IntegerLiteral *e) { + mlir::Type type = cgf.convertType(e->getType()); + return builder.create( + cgf.getLoc(e->getExprLoc()), type, + builder.getAttr(type, e->getValue())); + } +}; +} // namespace + +/// Emit the computation of the specified expression of scalar type. +mlir::Value CIRGenFunction::emitScalarExpr(const Expr *e) { + assert(e && hasScalarEvaluationKind(e->getType()) && + "Invalid scalar expression to emit"); + + return ScalarExprEmitter(*this, builder).Visit(const_cast(e)); +} diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp new file mode 100644 index 0000000000000..bba2f71a87627 --- /dev/null +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp @@ -0,0 +1,220 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Internal per-function state used for AST-to-ClangIR code gen +// +//===----------------------------------------------------------------------===// + +#include "CIRGenFunction.h" + +#include "clang/AST/GlobalDecl.h" + +#include + +namespace clang::CIRGen { + +CIRGenFunction::CIRGenFunction(CIRGenModule &cgm, CIRGenBuilderTy &builder, + bool suppressNewContext) + : CIRGenTypeCache(cgm), cgm{cgm}, builder(builder) {} + +CIRGenFunction::~CIRGenFunction() {} + +// This is copied from clang/lib/CodeGen/CodeGenFunction.cpp +cir::TypeEvaluationKind CIRGenFunction::getEvaluationKind(QualType type) { + type = type.getCanonicalType(); + while (true) { + switch (type->getTypeClass()) { +#define TYPE(name, parent) +#define ABSTRACT_TYPE(name, parent) +#define NON_CANONICAL_TYPE(name, parent) case Type::name: +#define DEPENDENT_TYPE(name, parent) case Type::name: +#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(name, parent) case Type::name: +#include "clang/AST/TypeNodes.inc" + llvm_unreachable("non-canonical or dependent type in IR-generation"); + + case Type::ArrayParameter: + case Type::HLSLAttributedResource: + llvm_unreachable("NYI"); + + case Type::Auto: + case Type::DeducedTemplateSpecialization: + llvm_unreachable("undeduced type in IR-generation"); + + // Various scalar types. + case Type::Builtin: + case Type::Pointer: + case Type::BlockPointer: + case Type::LValueReference: + case Type::RValueReference: + case Type::MemberPointer: + case Type::Vector: + case Type::ExtVector: + case Type::ConstantMatrix: + case Type::FunctionProto: + case Type::FunctionNoProto: + case Type::Enum: + case Type::ObjCObjectPointer: + case Type::Pipe: + case Type::BitInt: + return cir::TEK_Scalar; + + // Complexes. + case Type::Complex: + return cir::TEK_Complex; + + // Arrays, records, and Objective-C objects. + case Type::ConstantArray: + case Type::IncompleteArray: + case Type::VariableArray: + case Type::Record: + case Type::ObjCObject: + case Type::ObjCInterface: + return cir::TEK_Aggregate; + + // We operate on atomic values according to their underlying type. + case Type::Atomic: + type = cast(type)->getValueType(); + continue; + } + llvm_unreachable("unknown type kind!"); + } +} + +mlir::Type CIRGenFunction::convertTypeForMem(QualType t) { + return cgm.getTypes().convertTypeForMem(t); +} + +mlir::Type CIRGenFunction::convertType(QualType t) { + return cgm.getTypes().convertType(t); +} + +mlir::Location CIRGenFunction::getLoc(SourceLocation srcLoc) { + // Some AST nodes might contain invalid source locations (e.g. + // CXXDefaultArgExpr), workaround that to still get something out. + if (srcLoc.isValid()) { + const SourceManager &sm = getContext().getSourceManager(); + PresumedLoc pLoc = sm.getPresumedLoc(srcLoc); + StringRef filename = pLoc.getFilename(); + return mlir::FileLineColLoc::get(builder.getStringAttr(filename), + pLoc.getLine(), pLoc.getColumn()); + } + // Do our best... + assert(currSrcLoc && "expected to inherit some source location"); + return *currSrcLoc; +} + +mlir::Location CIRGenFunction::getLoc(SourceRange srcLoc) { + // Some AST nodes might contain invalid source locations (e.g. + // CXXDefaultArgExpr), workaround that to still get something out. + if (srcLoc.isValid()) { + mlir::Location beg = getLoc(srcLoc.getBegin()); + mlir::Location end = getLoc(srcLoc.getEnd()); + SmallVector locs = {beg, end}; + mlir::Attribute metadata; + return mlir::FusedLoc::get(locs, metadata, &getMLIRContext()); + } + if (currSrcLoc) { + return *currSrcLoc; + } + // We're brave, but time to give up. + return builder.getUnknownLoc(); +} + +mlir::Location CIRGenFunction::getLoc(mlir::Location lhs, mlir::Location rhs) { + SmallVector locs = {lhs, rhs}; + mlir::Attribute metadata; + return mlir::FusedLoc::get(locs, metadata, &getMLIRContext()); +} + +void CIRGenFunction::startFunction(GlobalDecl gd, QualType returnType, + cir::FuncOp fn, cir::FuncType funcType, + SourceLocation loc, + SourceLocation startLoc) { + assert(!curFn && + "CIRGenFunction can only be used for one function at a time"); + + fnRetTy = returnType; + curFn = fn; + + mlir::Block *entryBB = &fn.getBlocks().front(); + builder.setInsertionPointToStart(entryBB); +} + +void CIRGenFunction::finishFunction(SourceLocation endLoc) {} + +mlir::LogicalResult CIRGenFunction::emitFunctionBody(const clang::Stmt *body) { + auto result = mlir::LogicalResult::success(); + if (const CompoundStmt *block = dyn_cast(body)) + emitCompoundStmtWithoutScope(*block); + else + result = emitStmt(body, /*useCurrentScope=*/true); + return result; +} + +cir::FuncOp CIRGenFunction::generateCode(clang::GlobalDecl gd, cir::FuncOp fn, + cir::FuncType funcType) { + const auto funcDecl = cast(gd.getDecl()); + SourceLocation loc = funcDecl->getLocation(); + Stmt *body = funcDecl->getBody(); + SourceRange bodyRange = + body ? body->getSourceRange() : funcDecl->getLocation(); + + SourceLocRAIIObject fnLoc{*this, loc.isValid() ? getLoc(loc) + : builder.getUnknownLoc()}; + + // This will be used once more code is upstreamed. + [[maybe_unused]] mlir::Block *entryBB = fn.addEntryBlock(); + + startFunction(gd, funcDecl->getReturnType(), fn, funcType, loc, + bodyRange.getBegin()); + + if (isa(funcDecl)) + getCIRGenModule().errorNYI(bodyRange, "C++ destructor definition"); + else if (isa(funcDecl)) + getCIRGenModule().errorNYI(bodyRange, "C++ constructor definition"); + else if (getLangOpts().CUDA && !getLangOpts().CUDAIsDevice && + funcDecl->hasAttr()) + getCIRGenModule().errorNYI(bodyRange, "CUDA kernel"); + else if (isa(funcDecl) && + cast(funcDecl)->isLambdaStaticInvoker()) + getCIRGenModule().errorNYI(bodyRange, "Lambda static invoker"); + else if (funcDecl->isDefaulted() && isa(funcDecl) && + (cast(funcDecl)->isCopyAssignmentOperator() || + cast(funcDecl)->isMoveAssignmentOperator())) + getCIRGenModule().errorNYI(bodyRange, "Default assignment operator"); + else if (body) { + if (mlir::failed(emitFunctionBody(body))) { + fn.erase(); + return nullptr; + } + } else + llvm_unreachable("no definition for normal function"); + + // This code to insert a cir.return or cir.trap at the end of the function is + // temporary until the function return code, including + // CIRGenFunction::LexicalScope::emitImplicitReturn(), is upstreamed. + mlir::Block &lastBlock = fn.getRegion().back(); + if (lastBlock.empty() || !lastBlock.mightHaveTerminator() || + !lastBlock.getTerminator()->hasTrait()) { + builder.setInsertionPointToEnd(&lastBlock); + if (mlir::isa(funcType.getReturnType())) { + builder.create(getLoc(bodyRange.getEnd())); + } else { + builder.create(getLoc(bodyRange.getEnd())); + } + } + + if (mlir::failed(fn.verifyBody())) + return nullptr; + + finishFunction(bodyRange.getEnd()); + + return fn; +} + +} // namespace clang::CIRGen diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h new file mode 100644 index 0000000000000..92fbea16d3aa1 --- /dev/null +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -0,0 +1,141 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Internal per-function state used for AST-to-ClangIR code gen +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_LIB_CIR_CODEGEN_CIRGENFUNCTION_H +#define CLANG_LIB_CIR_CODEGEN_CIRGENFUNCTION_H + +#include "CIRGenBuilder.h" +#include "CIRGenModule.h" +#include "CIRGenTypeCache.h" + +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Type.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" +#include "clang/CIR/TypeEvaluationKind.h" + +#include "llvm/ADT/ScopedHashTable.h" + +namespace { +class ScalarExprEmitter; +} // namespace + +namespace clang::CIRGen { + +class CIRGenFunction : public CIRGenTypeCache { +public: + CIRGenModule &cgm; + +private: + friend class ::ScalarExprEmitter; + /// The builder is a helper class to create IR inside a function. The + /// builder is stateful, in particular it keeps an "insertion point": this + /// is where the next operations will be introduced. + CIRGenBuilderTy &builder; + +public: + clang::QualType fnRetTy; + + /// This is the current function or global initializer that is generated code + /// for. + mlir::Operation *curFn = nullptr; + + clang::ASTContext &getContext() const { return cgm.getASTContext(); } + + CIRGenBuilderTy &getBuilder() { return builder; } + + CIRGenModule &getCIRGenModule() { return cgm; } + const CIRGenModule &getCIRGenModule() const { return cgm; } + + mlir::Type convertTypeForMem(QualType T); + + mlir::Type convertType(clang::QualType T); + mlir::Type convertType(const TypeDecl *T) { + return convertType(getContext().getTypeDeclType(T)); + } + + /// Return the cir::TypeEvaluationKind of QualType \c type. + static cir::TypeEvaluationKind getEvaluationKind(clang::QualType type); + + static bool hasScalarEvaluationKind(clang::QualType type) { + return getEvaluationKind(type) == cir::TEK_Scalar; + } + + CIRGenFunction(CIRGenModule &cgm, CIRGenBuilderTy &builder, + bool suppressNewContext = false); + ~CIRGenFunction(); + + CIRGenTypes &getTypes() const { return cgm.getTypes(); } + + mlir::MLIRContext &getMLIRContext() { return cgm.getMLIRContext(); } + + /// Use to track source locations across nested visitor traversals. + /// Always use a `SourceLocRAIIObject` to change currSrcLoc. + std::optional currSrcLoc; + class SourceLocRAIIObject { + CIRGenFunction &cgf; + std::optional oldLoc; + + public: + SourceLocRAIIObject(CIRGenFunction &cgf, mlir::Location value) : cgf(cgf) { + if (cgf.currSrcLoc) + oldLoc = cgf.currSrcLoc; + cgf.currSrcLoc = value; + } + + /// Can be used to restore the state early, before the dtor + /// is run. + void restore() { cgf.currSrcLoc = oldLoc; } + ~SourceLocRAIIObject() { restore(); } + }; + + /// Helpers to convert Clang's SourceLocation to a MLIR Location. + mlir::Location getLoc(clang::SourceLocation srcLoc); + mlir::Location getLoc(clang::SourceRange srcLoc); + mlir::Location getLoc(mlir::Location lhs, mlir::Location rhs); + + const clang::LangOptions &getLangOpts() const { return cgm.getLangOpts(); } + + void finishFunction(SourceLocation endLoc); + mlir::LogicalResult emitFunctionBody(const clang::Stmt *body); + + // Build CIR for a statement. useCurrentScope should be true if no + // new scopes need be created when finding a compound statement. + mlir::LogicalResult + emitStmt(const clang::Stmt *s, bool useCurrentScope, + llvm::ArrayRef attrs = std::nullopt); + + mlir::LogicalResult emitSimpleStmt(const clang::Stmt *s, + bool useCurrentScope); + + void emitCompoundStmt(const clang::CompoundStmt &s); + + void emitCompoundStmtWithoutScope(const clang::CompoundStmt &s); + + mlir::LogicalResult emitReturnStmt(const clang::ReturnStmt &s); + + /// Emit the computation of the specified expression of scalar type. + mlir::Value emitScalarExpr(const clang::Expr *e); + cir::FuncOp generateCode(clang::GlobalDecl gd, cir::FuncOp fn, + cir::FuncType funcType); + + /// Emit code for the start of a function. + /// \param loc The location to be associated with the function. + /// \param startLoc The location of the function body. + void startFunction(clang::GlobalDecl gd, clang::QualType retTy, + cir::FuncOp fn, cir::FuncType funcType, + clang::SourceLocation loc, clang::SourceLocation startLoc); +}; + +} // namespace clang::CIRGen + +#endif diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index cbecdf925aa5d..c1d3265200e3b 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "CIRGenModule.h" +#include "CIRGenFunction.h" #include "clang/AST/ASTContext.h" #include "clang/AST/DeclBase.h" @@ -102,19 +103,30 @@ void CIRGenModule::emitGlobal(clang::GlobalDecl gd) { void CIRGenModule::emitGlobalFunctionDefinition(clang::GlobalDecl gd, mlir::Operation *op) { auto const *funcDecl = cast(gd.getDecl()); - if (clang::IdentifierInfo *identifier = funcDecl->getIdentifier()) { - auto funcOp = builder.create( - getLoc(funcDecl->getSourceRange()), identifier->getName()); - theModule.push_back(funcOp); - } else { + if (funcDecl->getIdentifier() == nullptr) { errorNYI(funcDecl->getSourceRange().getBegin(), "function definition with a non-identifier for a name"); + return; + } + cir::FuncType funcType = + cast(convertType(funcDecl->getType())); + + cir::FuncOp funcOp = dyn_cast_if_present(op); + if (!funcOp || funcOp.getFunctionType() != funcType) { + funcOp = getAddrOfFunction(gd, funcType, /*ForVTable=*/false, + /*DontDefer=*/true, ForDefinition); + } + + CIRGenFunction cgf(*this, builder); + { + mlir::OpBuilder::InsertionGuard guard(builder); + cgf.generateCode(gd, funcOp, funcType); } } void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *vd, bool isTentative) { - mlir::Type type = getTypes().convertType(vd->getType()); + mlir::Type type = convertType(vd->getType()); if (clang::IdentifierInfo *identifier = vd->getIdentifier()) { auto varOp = builder.create(getLoc(vd->getSourceRange()), identifier->getName(), type); @@ -223,6 +235,56 @@ void CIRGenModule::emitTopLevelDecl(Decl *decl) { } } +cir::FuncOp CIRGenModule::getAddrOfFunction(clang::GlobalDecl gd, + mlir::Type funcType, bool forVTable, + bool dontDefer, + ForDefinition_t isForDefinition) { + assert(!cast(gd.getDecl())->isConsteval() && + "consteval function should never be emitted"); + + if (!funcType) { + const auto *fd = cast(gd.getDecl()); + funcType = convertType(fd->getType()); + } + + cir::FuncOp func = getOrCreateCIRFunction( + cast(gd.getDecl())->getIdentifier()->getName(), funcType, gd, + forVTable, dontDefer, /*isThunk=*/false, isForDefinition); + return func; +} + +cir::FuncOp CIRGenModule::getOrCreateCIRFunction( + StringRef mangledName, mlir::Type funcType, GlobalDecl gd, bool forVTable, + bool dontDefer, bool isThunk, ForDefinition_t isForDefinition, + mlir::ArrayAttr extraAttrs) { + auto *funcDecl = llvm::cast_or_null(gd.getDecl()); + bool invalidLoc = !funcDecl || + funcDecl->getSourceRange().getBegin().isInvalid() || + funcDecl->getSourceRange().getEnd().isInvalid(); + cir::FuncOp funcOp = createCIRFunction( + invalidLoc ? theModule->getLoc() : getLoc(funcDecl->getSourceRange()), + mangledName, mlir::cast(funcType), funcDecl); + return funcOp; +} + +cir::FuncOp +CIRGenModule::createCIRFunction(mlir::Location loc, StringRef name, + cir::FuncType funcType, + const clang::FunctionDecl *funcDecl) { + cir::FuncOp func; + { + mlir::OpBuilder::InsertionGuard guard(builder); + + func = builder.create(loc, name, funcType); + theModule.push_back(func); + } + return func; +} + +mlir::Type CIRGenModule::convertType(QualType type) { + return genTypes.convertType(type); +} + DiagnosticBuilder CIRGenModule::errorNYI(SourceLocation loc, llvm::StringRef feature) { unsigned diagID = diags.getCustomDiagID( diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h index 29bb4036218e4..bf3a4d1130f15 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.h +++ b/clang/lib/CIR/CodeGen/CIRGenModule.h @@ -17,6 +17,8 @@ #include "CIRGenTypeCache.h" #include "CIRGenTypes.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" + #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/MLIRContext.h" @@ -36,6 +38,8 @@ class VarDecl; namespace CIRGen { +enum ForDefinition_t : bool { NotForDefinition = false, ForDefinition = true }; + /// This class organizes the cross-function state that is used while generating /// CIR code. class CIRGenModule : public CIRGenTypeCache { @@ -71,6 +75,7 @@ class CIRGenModule : public CIRGenTypeCache { CIRGenBuilderTy &getBuilder() { return builder; } clang::ASTContext &getASTContext() const { return astContext; } CIRGenTypes &getTypes() { return genTypes; } + const clang::LangOptions &getLangOpts() const { return langOpts; } mlir::MLIRContext &getMLIRContext() { return *builder.getContext(); } /// Helpers to convert the presumed location of Clang's SourceLocation to an @@ -80,16 +85,37 @@ class CIRGenModule : public CIRGenTypeCache { void emitTopLevelDecl(clang::Decl *decl); + /// Return the address of the given function. If funcType is non-null, then + /// this function will use the specified type if it has to create it. + // TODO: this is a bit weird as `GetAddr` given we give back a FuncOp? + cir::FuncOp + getAddrOfFunction(clang::GlobalDecl gd, mlir::Type funcType = nullptr, + bool forVTable = false, bool dontDefer = false, + ForDefinition_t isForDefinition = NotForDefinition); + /// Emit code for a single global function or variable declaration. Forward /// declarations are emitted lazily. void emitGlobal(clang::GlobalDecl gd); + mlir::Type convertType(clang::QualType type); + void emitGlobalDefinition(clang::GlobalDecl gd, mlir::Operation *op = nullptr); void emitGlobalFunctionDefinition(clang::GlobalDecl gd, mlir::Operation *op); void emitGlobalVarDefinition(const clang::VarDecl *vd, bool isTentative = false); + cir::FuncOp + getOrCreateCIRFunction(llvm::StringRef mangledName, mlir::Type funcType, + clang::GlobalDecl gd, bool forVTable, + bool dontDefer = false, bool isThunk = false, + ForDefinition_t isForDefinition = NotForDefinition, + mlir::ArrayAttr extraAttrs = {}); + + cir::FuncOp createCIRFunction(mlir::Location loc, llvm::StringRef name, + cir::FuncType funcType, + const clang::FunctionDecl *funcDecl); + const llvm::Triple &getTriple() const { return target.getTriple(); } /// Helpers to emit "not yet implemented" error diagnostics diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp new file mode 100644 index 0000000000000..f42f30cc5a433 --- /dev/null +++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp @@ -0,0 +1,128 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Emit Stmt nodes as CIR code. +// +//===----------------------------------------------------------------------===// + +#include "CIRGenBuilder.h" +#include "CIRGenFunction.h" + +#include "mlir/IR/Builders.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/Stmt.h" + +using namespace clang; +using namespace clang::CIRGen; +using namespace cir; + +void CIRGenFunction::emitCompoundStmtWithoutScope(const CompoundStmt &s) { + for (auto *curStmt : s.body()) { + if (emitStmt(curStmt, /*useCurrentScope=*/false).failed()) + getCIRGenModule().errorNYI(curStmt->getSourceRange(), "statement"); + } +} + +void CIRGenFunction::emitCompoundStmt(const CompoundStmt &s) { + mlir::Location scopeLoc = getLoc(s.getSourceRange()); + auto scope = builder.create( + scopeLoc, [&](mlir::OpBuilder &b, mlir::Type &type, mlir::Location loc) { + emitCompoundStmtWithoutScope(s); + }); + + // This code to insert a cir.yield at the end of the scope is temporary until + // CIRGenFunction::LexicalScope::cleanup() is upstreamed. + if (!scope.getRegion().empty()) { + mlir::Block &lastBlock = scope.getRegion().back(); + if (lastBlock.empty() || !lastBlock.mightHaveTerminator() || + !lastBlock.getTerminator()->hasTrait()) { + builder.setInsertionPointToEnd(&lastBlock); + builder.create(getLoc(s.getEndLoc())); + } + } +} + +// Build CIR for a statement. useCurrentScope should be true if no new scopes +// need to be created when finding a compound statement. +mlir::LogicalResult CIRGenFunction::emitStmt(const Stmt *s, + bool useCurrentScope, + ArrayRef attr) { + if (mlir::succeeded(emitSimpleStmt(s, useCurrentScope))) + return mlir::success(); + + // Only a subset of simple statements are supported at the moment. When more + // kinds of statements are supported, a + // switch (s->getStmtClass()) { + // will be added here. + return mlir::failure(); +} + +mlir::LogicalResult CIRGenFunction::emitSimpleStmt(const Stmt *s, + bool useCurrentScope) { + switch (s->getStmtClass()) { + default: + // Only compound and return statements are supported right now. + return mlir::failure(); + case Stmt::CompoundStmtClass: + if (useCurrentScope) + emitCompoundStmtWithoutScope(cast(*s)); + else + emitCompoundStmt(cast(*s)); + break; + case Stmt::ReturnStmtClass: + return emitReturnStmt(cast(*s)); + } + + return mlir::success(); +} + +mlir::LogicalResult CIRGenFunction::emitReturnStmt(const ReturnStmt &s) { + mlir::Location loc = getLoc(s.getSourceRange()); + const Expr *rv = s.getRetValue(); + + if (getContext().getLangOpts().ElideConstructors && s.getNRVOCandidate() && + s.getNRVOCandidate()->isNRVOVariable()) { + getCIRGenModule().errorNYI(s.getSourceRange(), + "named return value optimization"); + } else if (!rv) { + // No return expression. Do nothing. + // TODO(CIR): In the future when function returns are fully implemented, + // this section will do nothing. But for now a ReturnOp is necessary. + builder.create(loc); + } else if (rv->getType()->isVoidType()) { + // No return value. Emit the return expression for its side effects. + // TODO(CIR): Once emitAnyExpr(e) has been upstreamed, get rid of the check + // and just call emitAnyExpr(rv) here. + if (CIRGenFunction::hasScalarEvaluationKind(rv->getType())) { + emitScalarExpr(rv); + } else { + getCIRGenModule().errorNYI(s.getSourceRange(), + "non-scalar function return type"); + } + builder.create(loc); + } else if (fnRetTy->isReferenceType()) { + getCIRGenModule().errorNYI(s.getSourceRange(), + "function return type that is a reference"); + } else { + mlir::Value value = nullptr; + switch (CIRGenFunction::getEvaluationKind(rv->getType())) { + case cir::TEK_Scalar: + value = emitScalarExpr(rv); + if (value) { // Change this to an assert once emitScalarExpr is complete + builder.create(loc, llvm::ArrayRef(value)); + } + break; + default: + getCIRGenModule().errorNYI(s.getSourceRange(), + "non-scalar function return type"); + break; + } + } + + return mlir::success(); +} diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp index 8519854556b1c..551b43ef121b3 100644 --- a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp @@ -3,6 +3,7 @@ #include "CIRGenModule.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/GlobalDecl.h" #include "clang/AST/Type.h" #include "clang/Basic/TargetInfo.h" @@ -56,7 +57,7 @@ bool CIRGenTypes::isFuncTypeConvertible(const FunctionType *ft) { return true; } -mlir::Type CIRGenTypes::ConvertFunctionTypeInternal(QualType qft) { +mlir::Type CIRGenTypes::convertFunctionTypeInternal(QualType qft) { assert(qft.isCanonical()); const FunctionType *ft = cast(qft.getTypePtr()); // First, check whether we can build the full fucntion type. If the function @@ -198,7 +199,7 @@ mlir::Type CIRGenTypes::convertType(QualType type) { case Type::FunctionNoProto: case Type::FunctionProto: - resultType = ConvertFunctionTypeInternal(type); + resultType = convertFunctionTypeInternal(type); break; case Type::BitInt: { @@ -224,3 +225,19 @@ mlir::Type CIRGenTypes::convertType(QualType type) { typeCache[ty] = resultType; return resultType; } + +mlir::Type CIRGenTypes::convertTypeForMem(clang::QualType qualType, + bool forBitField) { + assert(!qualType->isConstantMatrixType() && "Matrix types NYI"); + + mlir::Type convertedType = convertType(qualType); + + assert(!forBitField && "Bit fields NYI"); + + // If this is a bit-precise integer type in a bitfield representation, map + // this integer to the target-specified size. + if (forBitField && qualType->isBitIntType()) + assert(!qualType->isBitIntType() && "Bit field with type _BitInt NYI"); + + return convertedType; +} diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.h b/clang/lib/CIR/CodeGen/CIRGenTypes.h index 71427e1200027..f280e17ebddc6 100644 --- a/clang/lib/CIR/CodeGen/CIRGenTypes.h +++ b/clang/lib/CIR/CodeGen/CIRGenTypes.h @@ -15,11 +15,14 @@ #include "clang/CIR/Dialect/IR/CIRTypes.h" +#include "clang/AST/Type.h" + #include "llvm/ADT/SmallPtrSet.h" namespace clang { class ASTContext; class FunctionType; +class GlobalDecl; class QualType; class Type; } // namespace clang @@ -40,8 +43,8 @@ class CIRGenTypes { clang::ASTContext &astContext; CIRGenBuilderTy &builder; - /// Heper for ConvertType. - mlir::Type ConvertFunctionTypeInternal(clang::QualType ft); + /// Heper for convertType. + mlir::Type convertFunctionTypeInternal(clang::QualType ft); public: CIRGenTypes(CIRGenModule &cgm); @@ -61,6 +64,13 @@ class CIRGenTypes { /// Convert a Clang type into a mlir::Type. mlir::Type convertType(clang::QualType type); + + /// Convert type T into an mlir::Type. This differs from convertType in that + /// it is used to convert to the memory representation for a type. For + /// example, the scalar representation for bool is i1, but the memory + /// representation is usually i8 or i32, depending on the target. + // TODO: convert this comment to account for MLIR's equivalence + mlir::Type convertTypeForMem(clang::QualType, bool forBitField = false); }; } // namespace clang::CIRGen diff --git a/clang/lib/CIR/CodeGen/CMakeLists.txt b/clang/lib/CIR/CodeGen/CMakeLists.txt index 782b814d75daa..5602efae1ba41 100644 --- a/clang/lib/CIR/CodeGen/CMakeLists.txt +++ b/clang/lib/CIR/CodeGen/CMakeLists.txt @@ -8,7 +8,10 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) add_clang_library(clangCIR CIRGenerator.cpp + CIRGenExprScalar.cpp + CIRGenFunction.cpp CIRGenModule.cpp + CIRGenStmt.cpp CIRGenTypes.cpp DEPENDS diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index f98d8b60f6ff8..10ad7fb4e6542 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -14,6 +14,7 @@ #include "clang/CIR/Dialect/IR/CIRTypes.h" +#include "mlir/Interfaces/FunctionImplementation.h" #include "mlir/Support/LogicalResult.h" #include "clang/CIR/Dialect/IR/CIROpsDialect.cpp.inc" @@ -34,6 +35,70 @@ void cir::CIRDialect::initialize() { >(); } +//===----------------------------------------------------------------------===// +// Helpers +//===----------------------------------------------------------------------===// + +// Check if a region's termination omission is valid and, if so, creates and +// inserts the omitted terminator into the region. +LogicalResult ensureRegionTerm(OpAsmParser &parser, Region ®ion, + SMLoc errLoc) { + Location eLoc = parser.getEncodedSourceLoc(parser.getCurrentLocation()); + OpBuilder builder(parser.getBuilder().getContext()); + + // Insert empty block in case the region is empty to ensure the terminator + // will be inserted + if (region.empty()) + builder.createBlock(®ion); + + Block &block = region.back(); + // Region is properly terminated: nothing to do. + if (!block.empty() && block.back().hasTrait()) + return success(); + + // Check for invalid terminator omissions. + if (!region.hasOneBlock()) + return parser.emitError(errLoc, + "multi-block region must not omit terminator"); + + // Terminator was omitted correctly: recreate it. + builder.setInsertionPointToEnd(&block); + builder.create(eLoc); + return success(); +} + +// True if the region's terminator should be omitted. +bool omitRegionTerm(mlir::Region &r) { + const auto singleNonEmptyBlock = r.hasOneBlock() && !r.back().empty(); + const auto yieldsNothing = [&r]() { + auto y = dyn_cast(r.back().getTerminator()); + return y && y.getArgs().empty(); + }; + return singleNonEmptyBlock && yieldsNothing(); +} + +//===----------------------------------------------------------------------===// +// CIR Custom Parsers/Printers +//===----------------------------------------------------------------------===// + +static mlir::ParseResult parseOmittedTerminatorRegion(mlir::OpAsmParser &parser, + mlir::Region ®ion) { + auto regionLoc = parser.getCurrentLocation(); + if (parser.parseRegion(region)) + return failure(); + if (ensureRegionTerm(parser, region, regionLoc).failed()) + return failure(); + return success(); +} + +static void printOmittedTerminatorRegion(mlir::OpAsmPrinter &printer, + cir::ScopeOp &op, + mlir::Region ®ion) { + printer.printRegion(region, + /*printEntryBlockArgs=*/false, + /*printBlockTerminators=*/!omitRegionTerm(region)); +} + //===----------------------------------------------------------------------===// // ConstantOp //===----------------------------------------------------------------------===// @@ -73,6 +138,92 @@ OpFoldResult cir::ConstantOp::fold(FoldAdaptor /*adaptor*/) { return getValue(); } +//===----------------------------------------------------------------------===// +// ReturnOp +//===----------------------------------------------------------------------===// + +static mlir::LogicalResult checkReturnAndFunction(cir::ReturnOp op, + cir::FuncOp function) { + // ReturnOps currently only have a single optional operand. + if (op.getNumOperands() > 1) + return op.emitOpError() << "expects at most 1 return operand"; + + // Ensure returned type matches the function signature. + auto expectedTy = function.getFunctionType().getReturnType(); + auto actualTy = + (op.getNumOperands() == 0 ? cir::VoidType::get(op.getContext()) + : op.getOperand(0).getType()); + if (actualTy != expectedTy) + return op.emitOpError() << "returns " << actualTy + << " but enclosing function returns " << expectedTy; + + return mlir::success(); +} + +mlir::LogicalResult cir::ReturnOp::verify() { + // Returns can be present in multiple different scopes, get the + // wrapping function and start from there. + auto *fnOp = getOperation()->getParentOp(); + while (!isa(fnOp)) + fnOp = fnOp->getParentOp(); + + // Make sure return types match function return type. + if (checkReturnAndFunction(*this, cast(fnOp)).failed()) + return failure(); + + return success(); +} + +//===----------------------------------------------------------------------===// +// ScopeOp +//===----------------------------------------------------------------------===// + +/// Given the region at `index`, or the parent operation if `index` is None, +/// return the successor regions. These are the regions that may be selected +/// during the flow of control. `operands` is a set of optional attributes that +/// correspond to a constant value for each operand, or null if that operand is +/// not a constant. +void cir::ScopeOp::getSuccessorRegions( + mlir::RegionBranchPoint point, SmallVectorImpl ®ions) { + // The only region always branch back to the parent operation. + if (!point.isParent()) { + regions.push_back(RegionSuccessor(getODSResults(0))); + return; + } + + // If the condition isn't constant, both regions may be executed. + regions.push_back(RegionSuccessor(&getScopeRegion())); +} + +void cir::ScopeOp::build( + OpBuilder &builder, OperationState &result, + function_ref scopeBuilder) { + assert(scopeBuilder && "the builder callback for 'then' must be present"); + + OpBuilder::InsertionGuard guard(builder); + Region *scopeRegion = result.addRegion(); + builder.createBlock(scopeRegion); + + mlir::Type yieldTy; + scopeBuilder(builder, yieldTy, result.location); + + if (yieldTy) + result.addTypes(TypeRange{yieldTy}); +} + +LogicalResult cir::ScopeOp::verify() { + if (getRegion().empty()) { + return emitOpError() << "cir.scope must not be empty since it should " + "include at least an implicit cir.yield "; + } + + mlir::Block &lastBlock = getRegion().back(); + if (lastBlock.empty() || !lastBlock.mightHaveTerminator() || + !lastBlock.getTerminator()->hasTrait()) + return emitOpError() << "last block of cir.scope must be terminated"; + return success(); +} + //===----------------------------------------------------------------------===// // GlobalOp //===----------------------------------------------------------------------===// @@ -154,23 +305,101 @@ parseGlobalOpTypeAndInitialValue(OpAsmParser &parser, TypeAttr &typeAttr, //===----------------------------------------------------------------------===// void cir::FuncOp::build(OpBuilder &builder, OperationState &result, - StringRef name) { + StringRef name, FuncType type) { + result.addRegion(); result.addAttribute(SymbolTable::getSymbolAttrName(), builder.getStringAttr(name)); + result.addAttribute(getFunctionTypeAttrName(result.name), + TypeAttr::get(type)); } ParseResult cir::FuncOp::parse(OpAsmParser &parser, OperationState &state) { + llvm::SMLoc loc = parser.getCurrentLocation(); + mlir::Builder &builder = parser.getBuilder(); + StringAttr nameAttr; if (parser.parseSymbolName(nameAttr, SymbolTable::getSymbolAttrName(), state.attributes)) return failure(); + llvm::SmallVector arguments; + llvm::SmallVector resultTypes; + llvm::SmallVector resultAttrs; + bool isVariadic = false; + if (function_interface_impl::parseFunctionSignatureWithArguments( + parser, /*allowVariadic=*/true, arguments, isVariadic, resultTypes, + resultAttrs)) + return failure(); + llvm::SmallVector argTypes; + for (OpAsmParser::Argument &arg : arguments) + argTypes.push_back(arg.type); + + if (resultTypes.size() > 1) { + return parser.emitError( + loc, "functions with multiple return types are not supported"); + } + + mlir::Type returnType = + (resultTypes.empty() ? cir::VoidType::get(builder.getContext()) + : resultTypes.front()); + + cir::FuncType fnType = cir::FuncType::get(argTypes, returnType, isVariadic); + if (!fnType) + return failure(); + state.addAttribute(getFunctionTypeAttrName(state.name), + TypeAttr::get(fnType)); + + // Parse the optional function body. + auto *body = state.addRegion(); + OptionalParseResult parseResult = parser.parseOptionalRegion( + *body, arguments, /*enableNameShadowing=*/false); + if (parseResult.has_value()) { + if (failed(*parseResult)) + return failure(); + // Function body was parsed, make sure its not empty. + if (body->empty()) + return parser.emitError(loc, "expected non-empty function body"); + } + return success(); } +bool cir::FuncOp::isDeclaration() { + // TODO(CIR): This function will actually do something once external function + // declarations and aliases are upstreamed. + return false; +} + +mlir::Region *cir::FuncOp::getCallableRegion() { + // TODO(CIR): This function will have special handling for aliases and a + // check for an external function, once those features have been upstreamed. + return &getBody(); +} + void cir::FuncOp::print(OpAsmPrinter &p) { p << ' '; - // For now the only property a function has is its name p.printSymbolName(getSymName()); + cir::FuncType fnType = getFunctionType(); + function_interface_impl::printFunctionSignature( + p, *this, fnType.getInputs(), fnType.isVarArg(), fnType.getReturnTypes()); + + // Print the body if this is not an external function. + Region &body = getOperation()->getRegion(0); + if (!body.empty()) { + p << ' '; + p.printRegion(body, /*printEntryBlockArgs=*/false, + /*printBlockTerminators=*/true); + } +} + +// Hook for OpTrait::FunctionLike, called after verifying that the 'type' +// attribute is present. This can check for preconditions of the +// getNumArguments hook not failing. +LogicalResult cir::FuncOp::verifyType() { + auto type = getFunctionType(); + if (!isa(type)) + return emitOpError("requires '" + getFunctionTypeAttrName().str() + + "' attribute of function type"); + return success(); } // TODO(CIR): The properties of functions that require verification haven't diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 4688381040be2..4625bf8088be6 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -859,24 +859,6 @@ static void emitSincosBuiltin(CodeGenFunction &CGF, const CallExpr *E, StoreCos->setMetadata(LLVMContext::MD_noalias, AliasScopeList); } -static llvm::Value *emitModfBuiltin(CodeGenFunction &CGF, const CallExpr *E, - llvm::Intrinsic::ID IntrinsicID) { - llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(0)); - llvm::Value *IntPartDest = CGF.EmitScalarExpr(E->getArg(1)); - - llvm::Value *Call = - CGF.Builder.CreateIntrinsic(IntrinsicID, {Val->getType()}, Val); - - llvm::Value *FractionalResult = CGF.Builder.CreateExtractValue(Call, 0); - llvm::Value *IntegralResult = CGF.Builder.CreateExtractValue(Call, 1); - - QualType DestPtrType = E->getArg(1)->getType()->getPointeeType(); - LValue IntegralLV = CGF.MakeNaturalAlignAddrLValue(IntPartDest, DestPtrType); - CGF.EmitStoreOfScalar(IntegralResult, IntegralLV); - - return FractionalResult; -} - /// EmitFAbs - Emit a call to @llvm.fabs(). static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); @@ -4130,15 +4112,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_frexpf128: case Builtin::BI__builtin_frexpf16: return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp)); - case Builtin::BImodf: - case Builtin::BImodff: - case Builtin::BImodfl: - case Builtin::BI__builtin_modf: - case Builtin::BI__builtin_modff: - case Builtin::BI__builtin_modfl: - if (Builder.getIsFPConstrained()) - break; // TODO: Emit constrained modf intrinsic once one exists. - return RValue::get(emitModfBuiltin(*this, E, Intrinsic::modf)); case Builtin::BI__builtin_isgreater: case Builtin::BI__builtin_isgreaterequal: case Builtin::BI__builtin_isless: @@ -19506,6 +19479,11 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef{Op0}, nullptr, "hlsl.all"); } + case Builtin::BI__builtin_hlsl_and: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + return Builder.CreateAnd(Op0, Op1, "hlsl.and"); + } case Builtin::BI__builtin_hlsl_any: { Value *Op0 = EmitScalarExpr(E->getArg(0)); return Builder.CreateIntrinsic( diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 826ec4da8ea28..feb2448297542 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -2278,6 +2278,10 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) { case OffloadArch::SM_90a: case OffloadArch::SM_100: case OffloadArch::SM_100a: + case OffloadArch::SM_101: + case OffloadArch::SM_101a: + case OffloadArch::SM_120: + case OffloadArch::SM_120a: case OffloadArch::GFX600: case OffloadArch::GFX601: case OffloadArch::GFX602: diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp index 7faf6821a6cdc..c9108938bca50 100644 --- a/clang/lib/CodeGen/CGVTables.cpp +++ b/clang/lib/CodeGen/CGVTables.cpp @@ -771,6 +771,10 @@ void CodeGenVTables::addVTableComponent(ConstantArrayBuilder &builder, case VTableComponent::CK_DeletingDtorPointer: { GlobalDecl GD = component.getGlobalDecl(); + const bool IsThunk = + nextVTableThunkIndex < layout.vtable_thunks().size() && + layout.vtable_thunks()[nextVTableThunkIndex].first == componentIndex; + if (CGM.getLangOpts().CUDA) { // Emit NULL for methods we can't codegen on this // side. Otherwise we'd end up with vtable with unresolved @@ -782,9 +786,12 @@ void CodeGenVTables::addVTableComponent(ConstantArrayBuilder &builder, CGM.getLangOpts().CUDAIsDevice ? MD->hasAttr() : (MD->hasAttr() || !MD->hasAttr()); - if (!CanEmitMethod) + if (!CanEmitMethod) { + if (IsThunk) + nextVTableThunkIndex++; return builder.add( llvm::ConstantExpr::getNullValue(CGM.GlobalsInt8PtrTy)); + } // Method is acceptable, continue processing as usual. } @@ -830,9 +837,7 @@ void CodeGenVTables::addVTableComponent(ConstantArrayBuilder &builder, fnPtr = DeletedVirtualFn; // Thunks. - } else if (nextVTableThunkIndex < layout.vtable_thunks().size() && - layout.vtable_thunks()[nextVTableThunkIndex].first == - componentIndex) { + } else if (IsThunk) { auto &thunkInfo = layout.vtable_thunks()[nextVTableThunkIndex].second; nextVTableThunkIndex++; diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp index 3f1a24791ddd8..818b6dabaa144 100644 --- a/clang/lib/CodeGen/CodeGenTBAA.cpp +++ b/clang/lib/CodeGen/CodeGenTBAA.cpp @@ -80,6 +80,42 @@ llvm::MDNode *CodeGenTBAA::getChar() { return Char; } +llvm::MDNode *CodeGenTBAA::getAnyPtr(unsigned PtrDepth) { + assert(PtrDepth >= 1 && "Pointer must have some depth"); + + // Populate at least PtrDepth elements in AnyPtrs. These are the type nodes + // for "any" pointers of increasing pointer depth, and are organized in the + // hierarchy: any pointer <- any p2 pointer <- any p3 pointer <- ... + // + // Note that AnyPtrs[Idx] is actually the node for pointer depth (Idx+1), + // since there is no node for pointer depth 0. + // + // These "any" pointer type nodes are used in pointer TBAA. The type node of + // a concrete pointer type has the "any" pointer type node of appropriate + // pointer depth as its parent. The "any" pointer type nodes are also used + // directly for accesses to void pointers, or to specific pointers that we + // conservatively do not distinguish in pointer TBAA (e.g. pointers to + // members). Essentially, this establishes that e.g. void** can alias with + // any type that can unify with T**, ignoring things like qualifiers. Here, T + // is a variable that represents an arbitrary type, including pointer types. + // As such, each depth is naturally a subtype of the previous depth, and thus + // transitively of all previous depths. + if (AnyPtrs.size() < PtrDepth) { + AnyPtrs.reserve(PtrDepth); + auto Size = Module.getDataLayout().getPointerSize(); + // Populate first element. + if (AnyPtrs.empty()) + AnyPtrs.push_back(createScalarTypeNode("any pointer", getChar(), Size)); + // Populate further elements. + for (size_t Idx = AnyPtrs.size(); Idx < PtrDepth; ++Idx) { + auto Name = ("any p" + llvm::Twine(Idx + 1) + " pointer").str(); + AnyPtrs.push_back(createScalarTypeNode(Name, AnyPtrs[Idx - 1], Size)); + } + } + + return AnyPtrs[PtrDepth - 1]; +} + static bool TypeHasMayAlias(QualType QTy) { // Tagged types have declarations, and therefore may have attributes. if (auto *TD = QTy->getAsTagDecl()) @@ -202,9 +238,8 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) { // they involve a significant representation difference. We don't // currently do so, however. if (Ty->isPointerType() || Ty->isReferenceType()) { - llvm::MDNode *AnyPtr = createScalarTypeNode("any pointer", getChar(), Size); if (!CodeGenOpts.PointerTBAA) - return AnyPtr; + return getAnyPtr(); // C++ [basic.lval]p11 permits objects to accessed through an l-value of // similar type. Two types are similar under C++ [conv.qual]p2 if the // decomposition of the types into pointers, member pointers, and arrays has @@ -232,7 +267,7 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) { // common idioms and there is no good alternative to re-write the code // without strict-aliasing violations. if (Ty->isVoidType()) - return AnyPtr; + return getAnyPtr(PtrDepth); assert(!isa(Ty)); // When the underlying type is a builtin type, we compute the pointee type @@ -256,7 +291,7 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) { // similar-types rule. const auto *RT = Ty->getAs(); if (!RT) - return AnyPtr; + return getAnyPtr(PtrDepth); // For unnamed structs or unions C's compatible types rule applies. Two // compatible types in different compilation units can have different @@ -270,7 +305,7 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) { // compatibility rule, but it doesn't matter because you can never have a // pointer to an anonymous struct or union. if (!RT->getDecl()->getDeclName()) - return AnyPtr; + return getAnyPtr(PtrDepth); // For non-builtin types use the mangled name of the canonical type. llvm::raw_svector_ostream TyOut(TyName); @@ -281,7 +316,7 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) { OutName += std::to_string(PtrDepth); OutName += " "; OutName += TyName; - return createScalarTypeNode(OutName, AnyPtr, Size); + return createScalarTypeNode(OutName, getAnyPtr(PtrDepth), Size); } // Accesses to arrays are accesses to objects of their element types. diff --git a/clang/lib/CodeGen/CodeGenTBAA.h b/clang/lib/CodeGen/CodeGenTBAA.h index ab3b05df7713b..0aae171d168df 100644 --- a/clang/lib/CodeGen/CodeGenTBAA.h +++ b/clang/lib/CodeGen/CodeGenTBAA.h @@ -139,6 +139,7 @@ class CodeGenTBAA { llvm::MDNode *Root; llvm::MDNode *Char; + llvm::SmallVector AnyPtrs; /// getRoot - This is the mdnode for the root of the metadata type graph /// for this translation unit. @@ -148,6 +149,10 @@ class CodeGenTBAA { /// considered to be equivalent to it. llvm::MDNode *getChar(); + /// getAnyPtr - This is the mdnode for any pointer type of (at least) the + /// given pointer depth. + llvm::MDNode *getAnyPtr(unsigned PtrDepth = 1); + /// CollectFields - Collect information about the fields of a type for /// !tbaa.struct metadata formation. Return false for an unsupported type. bool CollectFields(uint64_t BaseOffset, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 96af466e067a8..7c50970068fa9 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -511,39 +511,6 @@ static void addCoveragePrefixMapArg(const Driver &D, const ArgList &Args, } } -/// Vectorize at all optimization levels greater than 1 except for -Oz. -/// For -Oz the loop vectorizer is disabled, while the slp vectorizer is -/// enabled. -static bool shouldEnableVectorizerAtOLevel(const ArgList &Args, bool isSlpVec) { - if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { - if (A->getOption().matches(options::OPT_O4) || - A->getOption().matches(options::OPT_Ofast)) - return true; - - if (A->getOption().matches(options::OPT_O0)) - return false; - - assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag"); - - // Vectorize -Os. - StringRef S(A->getValue()); - if (S == "s") - return true; - - // Don't vectorize -Oz, unless it's the slp vectorizer. - if (S == "z") - return isSlpVec; - - unsigned OptLevel = 0; - if (S.getAsInteger(10, OptLevel)) - return false; - - return OptLevel > 1; - } - - return false; -} - /// Add -x lang to \p CmdArgs for \p Input. static void addDashXForInput(const ArgList &Args, const InputInfo &Input, ArgStringList &CmdArgs) { diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 2d01943ca1ac4..1a2299a92c54e 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -3133,3 +3133,36 @@ void tools::renderCommonIntegerOverflowOptions(const ArgList &Args, if (use_fwrapv_pointer) CmdArgs.push_back("-fwrapv-pointer"); } + +/// Vectorize at all optimization levels greater than 1 except for -Oz. +/// For -Oz the loop vectorizer is disabled, while the slp vectorizer is +/// enabled. +bool tools::shouldEnableVectorizerAtOLevel(const ArgList &Args, bool isSlpVec) { + if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { + if (A->getOption().matches(options::OPT_O4) || + A->getOption().matches(options::OPT_Ofast)) + return true; + + if (A->getOption().matches(options::OPT_O0)) + return false; + + assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag"); + + // Vectorize -Os. + StringRef S(A->getValue()); + if (S == "s") + return true; + + // Don't vectorize -Oz, unless it's the slp vectorizer. + if (S == "z") + return isSlpVec; + + unsigned OptLevel = 0; + if (S.getAsInteger(10, OptLevel)) + return false; + + return OptLevel > 1; + } + + return false; +} diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h index b6ddd99b87279..783a1f834b33d 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.h +++ b/clang/lib/Driver/ToolChains/CommonArgs.h @@ -265,6 +265,8 @@ bool shouldRecordCommandLine(const ToolChain &TC, void renderCommonIntegerOverflowOptions(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs); +bool shouldEnableVectorizerAtOLevel(const llvm::opt::ArgList &Args, + bool isSlpVec); } // end namespace tools } // end namespace driver } // end namespace clang diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 9ad795edd724d..45c26cf8c3159 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -149,6 +149,16 @@ void Flang::addCodegenOptions(const ArgList &Args, !stackArrays->getOption().matches(options::OPT_fno_stack_arrays)) CmdArgs.push_back("-fstack-arrays"); + // Enable vectorization per default according to the optimization level + // selected. For optimization levels that want vectorization we use the alias + // option to simplify the hasFlag logic. + bool enableVec = shouldEnableVectorizerAtOLevel(Args, false); + OptSpecifier vectorizeAliasOption = + enableVec ? options::OPT_O_Group : options::OPT_fvectorize; + if (Args.hasFlag(options::OPT_fvectorize, vectorizeAliasOption, + options::OPT_fno_vectorize, enableVec)) + CmdArgs.push_back("-vectorize-loops"); + if (shouldLoopVersion(Args)) CmdArgs.push_back("-fversion-loops-for-stride"); diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 77833f5d1defb..e1dc728558def 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -720,7 +720,7 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts, Builder.defineMacro("__cpp_nested_namespace_definitions", "201411L"); Builder.defineMacro("__cpp_variadic_using", "201611L"); Builder.defineMacro("__cpp_aggregate_bases", "201603L"); - Builder.defineMacro("__cpp_structured_bindings", "202403L"); + Builder.defineMacro("__cpp_structured_bindings", "202411L"); Builder.defineMacro("__cpp_nontype_template_args", "201411L"); // (not latest) Builder.defineMacro("__cpp_fold_expressions", "201603L"); diff --git a/clang/lib/Headers/gpuintrin.h b/clang/lib/Headers/gpuintrin.h index 11c87e85cd497..efdc3d94ac0b3 100644 --- a/clang/lib/Headers/gpuintrin.h +++ b/clang/lib/Headers/gpuintrin.h @@ -150,35 +150,33 @@ __gpu_shuffle_idx_f64(uint64_t __lane_mask, uint32_t __idx, double __x, __builtin_bit_cast(uint64_t, __x), __width)); } -// Gets the sum of all lanes inside the warp or wavefront. -#define __DO_LANE_SUM(__type, __suffix) \ - _DEFAULT_FN_ATTRS static __inline__ __type __gpu_lane_sum_##__suffix( \ - uint64_t __lane_mask, __type __x) { \ - for (uint32_t __step = __gpu_num_lanes() / 2; __step > 0; __step /= 2) { \ - uint32_t __index = __step + __gpu_lane_id(); \ - __x += __gpu_shuffle_idx_##__suffix(__lane_mask, __index, __x, \ - __gpu_num_lanes()); \ - } \ - return __gpu_read_first_lane_##__suffix(__lane_mask, __x); \ - } -__DO_LANE_SUM(uint32_t, u32); // uint32_t __gpu_lane_sum_u32(m, x) -__DO_LANE_SUM(uint64_t, u64); // uint64_t __gpu_lane_sum_u64(m, x) -__DO_LANE_SUM(float, f32); // float __gpu_lane_sum_f32(m, x) -__DO_LANE_SUM(double, f64); // double __gpu_lane_sum_f64(m, x) -#undef __DO_LANE_SUM - // Gets the accumulator scan of the threads in the warp or wavefront. #define __DO_LANE_SCAN(__type, __bitmask_type, __suffix) \ _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_lane_scan_##__suffix( \ uint64_t __lane_mask, uint32_t __x) { \ - for (uint32_t __step = 1; __step < __gpu_num_lanes(); __step *= 2) { \ - uint32_t __index = __gpu_lane_id() - __step; \ - __bitmask_type bitmask = __gpu_lane_id() >= __step; \ - __x += __builtin_bit_cast( \ - __type, -bitmask & __builtin_bit_cast(__bitmask_type, \ - __gpu_shuffle_idx_##__suffix( \ - __lane_mask, __index, __x, \ - __gpu_num_lanes()))); \ + uint64_t __first = __lane_mask >> __builtin_ctzll(__lane_mask); \ + bool __divergent = __gpu_read_first_lane_##__suffix( \ + __lane_mask, __first & (__first + 1)); \ + if (__divergent) { \ + __type __accum = 0; \ + for (uint64_t __mask = __lane_mask; __mask; __mask &= __mask - 1) { \ + __type __index = __builtin_ctzll(__mask); \ + __type __tmp = __gpu_shuffle_idx_##__suffix(__lane_mask, __index, __x, \ + __gpu_num_lanes()); \ + __x = __gpu_lane_id() == __index ? __accum + __tmp : __x; \ + __accum += __tmp; \ + } \ + } else { \ + for (uint32_t __step = 1; __step < __gpu_num_lanes(); __step *= 2) { \ + uint32_t __index = __gpu_lane_id() - __step; \ + __bitmask_type bitmask = __gpu_lane_id() >= __step; \ + __x += __builtin_bit_cast( \ + __type, \ + -bitmask & __builtin_bit_cast(__bitmask_type, \ + __gpu_shuffle_idx_##__suffix( \ + __lane_mask, __index, __x, \ + __gpu_num_lanes()))); \ + } \ } \ return __x; \ } @@ -188,6 +186,32 @@ __DO_LANE_SCAN(float, uint32_t, f32); // float __gpu_lane_scan_f32(m, x) __DO_LANE_SCAN(double, uint64_t, f64); // double __gpu_lane_scan_f64(m, x) #undef __DO_LANE_SCAN +// Gets the sum of all lanes inside the warp or wavefront. +#define __DO_LANE_SUM(__type, __suffix) \ + _DEFAULT_FN_ATTRS static __inline__ __type __gpu_lane_sum_##__suffix( \ + uint64_t __lane_mask, __type __x) { \ + uint64_t __first = __lane_mask >> __builtin_ctzll(__lane_mask); \ + bool __divergent = __gpu_read_first_lane_##__suffix( \ + __lane_mask, __first & (__first + 1)); \ + if (__divergent) { \ + return __gpu_shuffle_idx_##__suffix( \ + __lane_mask, 63 - __builtin_clzll(__lane_mask), \ + __gpu_lane_scan_##__suffix(__lane_mask, __x), __gpu_num_lanes()); \ + } else { \ + for (uint32_t __step = 1; __step < __gpu_num_lanes(); __step *= 2) { \ + uint32_t __index = __step + __gpu_lane_id(); \ + __x += __gpu_shuffle_idx_##__suffix(__lane_mask, __index, __x, \ + __gpu_num_lanes()); \ + } \ + return __gpu_read_first_lane_##__suffix(__lane_mask, __x); \ + } \ + } +__DO_LANE_SUM(uint32_t, u32); // uint32_t __gpu_lane_sum_u32(m, x) +__DO_LANE_SUM(uint64_t, u64); // uint64_t __gpu_lane_sum_u64(m, x) +__DO_LANE_SUM(float, f32); // float __gpu_lane_sum_f32(m, x) +__DO_LANE_SUM(double, f64); // double __gpu_lane_sum_f64(m, x) +#undef __DO_LANE_SUM + _Pragma("omp end declare variant"); _Pragma("omp end declare target"); diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index d1f5fdff8b600..f03b620eee142 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -249,6 +249,28 @@ bool all(double3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_all) bool all(double4); +//===----------------------------------------------------------------------===// +// and builtins +//===----------------------------------------------------------------------===// + +/// \fn bool and(bool x, bool y) +/// \brief Logically ands two boolean vectors elementwise and produces a bool +/// vector output. + +// TODO: Clean up clang-format marker once we've resolved +// https://github.com/llvm/llvm-project/issues/127851 +// +// clang-format off +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_and) +bool and(bool x, bool y); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_and) +bool2 and(bool2 x, bool2 y); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_and) +bool3 and(bool3 x, bool3 y); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_and) +bool4 and(bool4 x, bool4 y); +// clang-format on + //===----------------------------------------------------------------------===// // any builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Headers/nvptxintrin.h b/clang/lib/Headers/nvptxintrin.h index f857a87b5f4c7..29d0adcabc82f 100644 --- a/clang/lib/Headers/nvptxintrin.h +++ b/clang/lib/Headers/nvptxintrin.h @@ -155,8 +155,11 @@ _DEFAULT_FN_ATTRS static __inline__ void __gpu_sync_lane(uint64_t __lane_mask) { _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x, uint32_t __width) { + // Mask out inactive lanes to match AMDGPU behavior. uint32_t __mask = (uint32_t)__lane_mask; - return __nvvm_shfl_sync_idx_i32(__mask, __x, __idx, + bool __bitmask = (1ull << __idx) & __lane_mask; + return -__bitmask & + __nvvm_shfl_sync_idx_i32(__mask, __x, __idx, ((__gpu_num_lanes() - __width) << 8u) | 0x1f); } diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index afd1d7a4e36c1..145cda6c46b9b 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -1655,11 +1655,20 @@ void Sema::EmitDiagnostic(unsigned DiagID, const DiagnosticBuilder &DB) { } case DiagnosticIDs::SFINAE_Suppress: + if (DiagnosticsEngine::Level Level = getDiagnostics().getDiagnosticLevel( + DiagInfo.getID(), DiagInfo.getLocation()); + Level == DiagnosticsEngine::Ignored) + return; // Make a copy of this suppressed diagnostic and store it with the // template-deduction information; if (*Info) { - (*Info)->addSuppressedDiagnostic(DiagInfo.getLocation(), - PartialDiagnostic(DiagInfo, Context.getDiagAllocator())); + (*Info)->addSuppressedDiagnostic( + DiagInfo.getLocation(), + PartialDiagnostic(DiagInfo, Context.getDiagAllocator())); + if (!Diags.getDiagnosticIDs()->isNote(DiagID)) + PrintContextStack([Info](SourceLocation Loc, PartialDiagnostic PD) { + (*Info)->addSuppressedDiagnostic(Loc, std::move(PD)); + }); } // Suppress this diagnostic. diff --git a/clang/lib/Sema/SemaAttr.cpp b/clang/lib/Sema/SemaAttr.cpp index 37c0546e321d9..3c2b63a70345b 100644 --- a/clang/lib/Sema/SemaAttr.cpp +++ b/clang/lib/Sema/SemaAttr.cpp @@ -1221,10 +1221,11 @@ void Sema::AddPragmaAttributes(Scope *S, Decl *D) { } } -void Sema::PrintPragmaAttributeInstantiationPoint() { +void Sema::PrintPragmaAttributeInstantiationPoint( + InstantiationContextDiagFuncRef DiagFunc) { assert(PragmaAttributeCurrentTargetDecl && "Expected an active declaration"); - Diags.Report(PragmaAttributeCurrentTargetDecl->getBeginLoc(), - diag::note_pragma_attribute_applied_decl_here); + DiagFunc(PragmaAttributeCurrentTargetDecl->getBeginLoc(), + PDiag(diag::note_pragma_attribute_applied_decl_here)); } void Sema::DiagnosePrecisionLossInComplexDivision() { diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index fad15bf95c415..16226f1ae6550 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -225,9 +225,10 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef Locs, // emit them now. auto Pos = SuppressedDiagnostics.find(D->getCanonicalDecl()); if (Pos != SuppressedDiagnostics.end()) { - for (const PartialDiagnosticAt &Suppressed : Pos->second) - Diag(Suppressed.first, Suppressed.second); - + for (const auto &[DiagLoc, PD] : Pos->second) { + DiagnosticBuilder Builder(Diags.Report(DiagLoc, PD.getDiagID())); + PD.Emit(Builder); + } // Clear out the list of suppressed diagnostics, so that we don't emit // them again for this specialization. However, we don't obsolete this // entry from the table, because we want to avoid ever emitting these diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 957c3a0888438..20275ded8a561 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2245,6 +2245,20 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { break; } + case Builtin::BI__builtin_hlsl_and: { + if (SemaRef.checkArgCount(TheCall, 2)) + return true; + if (CheckVectorElementCallArgs(&SemaRef, TheCall)) + return true; + if (CheckScalarOrVector(&SemaRef, TheCall, getASTContext().BoolTy, 0)) + return true; + + ExprResult A = TheCall->getArg(0); + QualType ArgTyA = A.get()->getType(); + // return type is the same as the input type + TheCall->setType(ArgTyA); + break; + } case Builtin::BI__builtin_hlsl_all: case Builtin::BI__builtin_hlsl_any: { if (SemaRef.checkArgCount(TheCall, 1)) diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index a34005bf376aa..340e51adf190d 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -4577,7 +4577,6 @@ static void TryConstructorInitialization(Sema &S, if (!IsListInit && (Kind.getKind() == InitializationKind::IK_Default || Kind.getKind() == InitializationKind::IK_Direct) && - DestRecordDecl != nullptr && !(CtorDecl->isCopyOrMoveConstructor() && CtorDecl->isImplicit()) && DestRecordDecl->isAggregate() && DestRecordDecl->hasUninitializedExplicitInitFields()) { diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 121da4916ed43..fcb7671ed92f0 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -903,7 +903,7 @@ bool Sema::InstantiatingTemplate::CheckInstantiationDepth( return true; } -void Sema::PrintInstantiationStack() { +void Sema::PrintInstantiationStack(InstantiationContextDiagFuncRef DiagFunc) { // Determine which template instantiations to skip, if any. unsigned SkipStart = CodeSynthesisContexts.size(), SkipEnd = SkipStart; unsigned Limit = Diags.getTemplateBacktraceLimit(); @@ -923,9 +923,9 @@ void Sema::PrintInstantiationStack() { if (InstantiationIdx >= SkipStart && InstantiationIdx < SkipEnd) { if (InstantiationIdx == SkipStart) { // Note that we're skipping instantiations. - Diags.Report(Active->PointOfInstantiation, - diag::note_instantiation_contexts_suppressed) - << unsigned(CodeSynthesisContexts.size() - Limit); + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_instantiation_contexts_suppressed) + << unsigned(CodeSynthesisContexts.size() - Limit)); } continue; } @@ -937,37 +937,34 @@ void Sema::PrintInstantiationStack() { unsigned DiagID = diag::note_template_member_class_here; if (isa(Record)) DiagID = diag::note_template_class_instantiation_here; - Diags.Report(Active->PointOfInstantiation, DiagID) - << Record << Active->InstantiationRange; + DiagFunc(Active->PointOfInstantiation, + PDiag(DiagID) << Record << Active->InstantiationRange); } else if (FunctionDecl *Function = dyn_cast(D)) { unsigned DiagID; if (Function->getPrimaryTemplate()) DiagID = diag::note_function_template_spec_here; else DiagID = diag::note_template_member_function_here; - Diags.Report(Active->PointOfInstantiation, DiagID) - << Function - << Active->InstantiationRange; + DiagFunc(Active->PointOfInstantiation, + PDiag(DiagID) << Function << Active->InstantiationRange); } else if (VarDecl *VD = dyn_cast(D)) { - Diags.Report(Active->PointOfInstantiation, - VD->isStaticDataMember()? - diag::note_template_static_data_member_def_here - : diag::note_template_variable_def_here) - << VD - << Active->InstantiationRange; + DiagFunc(Active->PointOfInstantiation, + PDiag(VD->isStaticDataMember() + ? diag::note_template_static_data_member_def_here + : diag::note_template_variable_def_here) + << VD << Active->InstantiationRange); } else if (EnumDecl *ED = dyn_cast(D)) { - Diags.Report(Active->PointOfInstantiation, - diag::note_template_enum_def_here) - << ED - << Active->InstantiationRange; + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_template_enum_def_here) + << ED << Active->InstantiationRange); } else if (FieldDecl *FD = dyn_cast(D)) { - Diags.Report(Active->PointOfInstantiation, - diag::note_template_nsdmi_here) - << FD << Active->InstantiationRange; + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_template_nsdmi_here) + << FD << Active->InstantiationRange); } else if (ClassTemplateDecl *CTD = dyn_cast(D)) { - Diags.Report(Active->PointOfInstantiation, - diag::note_template_class_instantiation_here) - << CTD << Active->InstantiationRange; + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_template_class_instantiation_here) + << CTD << Active->InstantiationRange); } break; } @@ -979,35 +976,35 @@ void Sema::PrintInstantiationStack() { Template->printName(OS, getPrintingPolicy()); printTemplateArgumentList(OS, Active->template_arguments(), getPrintingPolicy()); - Diags.Report(Active->PointOfInstantiation, - diag::note_default_arg_instantiation_here) - << OS.str() - << Active->InstantiationRange; + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_default_arg_instantiation_here) + << OS.str() << Active->InstantiationRange); break; } case CodeSynthesisContext::ExplicitTemplateArgumentSubstitution: { FunctionTemplateDecl *FnTmpl = cast(Active->Entity); - Diags.Report(Active->PointOfInstantiation, - diag::note_explicit_template_arg_substitution_here) - << FnTmpl - << getTemplateArgumentBindingsText(FnTmpl->getTemplateParameters(), - Active->TemplateArgs, - Active->NumTemplateArgs) - << Active->InstantiationRange; + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_explicit_template_arg_substitution_here) + << FnTmpl + << getTemplateArgumentBindingsText( + FnTmpl->getTemplateParameters(), Active->TemplateArgs, + Active->NumTemplateArgs) + << Active->InstantiationRange); break; } case CodeSynthesisContext::DeducedTemplateArgumentSubstitution: { if (FunctionTemplateDecl *FnTmpl = dyn_cast(Active->Entity)) { - Diags.Report(Active->PointOfInstantiation, - diag::note_function_template_deduction_instantiation_here) - << FnTmpl - << getTemplateArgumentBindingsText(FnTmpl->getTemplateParameters(), - Active->TemplateArgs, - Active->NumTemplateArgs) - << Active->InstantiationRange; + DiagFunc( + Active->PointOfInstantiation, + PDiag(diag::note_function_template_deduction_instantiation_here) + << FnTmpl + << getTemplateArgumentBindingsText( + FnTmpl->getTemplateParameters(), Active->TemplateArgs, + Active->NumTemplateArgs) + << Active->InstantiationRange); } else { bool IsVar = isa(Active->Entity) || isa(Active->Entity); @@ -1026,12 +1023,13 @@ void Sema::PrintInstantiationStack() { llvm_unreachable("unexpected template kind"); } - Diags.Report(Active->PointOfInstantiation, - diag::note_deduced_template_arg_substitution_here) - << IsVar << IsTemplate << cast(Active->Entity) - << getTemplateArgumentBindingsText(Params, Active->TemplateArgs, - Active->NumTemplateArgs) - << Active->InstantiationRange; + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_deduced_template_arg_substitution_here) + << IsVar << IsTemplate << cast(Active->Entity) + << getTemplateArgumentBindingsText(Params, + Active->TemplateArgs, + Active->NumTemplateArgs) + << Active->InstantiationRange); } break; } @@ -1045,10 +1043,9 @@ void Sema::PrintInstantiationStack() { FD->printName(OS, getPrintingPolicy()); printTemplateArgumentList(OS, Active->template_arguments(), getPrintingPolicy()); - Diags.Report(Active->PointOfInstantiation, - diag::note_default_function_arg_instantiation_here) - << OS.str() - << Active->InstantiationRange; + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_default_function_arg_instantiation_here) + << OS.str() << Active->InstantiationRange); break; } @@ -1065,14 +1062,13 @@ void Sema::PrintInstantiationStack() { TemplateParams = cast(Active->Template) ->getTemplateParameters(); - Diags.Report(Active->PointOfInstantiation, - diag::note_prior_template_arg_substitution) - << isa(Parm) - << Name - << getTemplateArgumentBindingsText(TemplateParams, - Active->TemplateArgs, - Active->NumTemplateArgs) - << Active->InstantiationRange; + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_prior_template_arg_substitution) + << isa(Parm) << Name + << getTemplateArgumentBindingsText(TemplateParams, + Active->TemplateArgs, + Active->NumTemplateArgs) + << Active->InstantiationRange); break; } @@ -1085,55 +1081,56 @@ void Sema::PrintInstantiationStack() { cast(Active->Template) ->getTemplateParameters(); - Diags.Report(Active->PointOfInstantiation, - diag::note_template_default_arg_checking) - << getTemplateArgumentBindingsText(TemplateParams, - Active->TemplateArgs, - Active->NumTemplateArgs) - << Active->InstantiationRange; + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_template_default_arg_checking) + << getTemplateArgumentBindingsText(TemplateParams, + Active->TemplateArgs, + Active->NumTemplateArgs) + << Active->InstantiationRange); break; } case CodeSynthesisContext::ExceptionSpecEvaluation: - Diags.Report(Active->PointOfInstantiation, - diag::note_evaluating_exception_spec_here) - << cast(Active->Entity); + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_evaluating_exception_spec_here) + << cast(Active->Entity)); break; case CodeSynthesisContext::ExceptionSpecInstantiation: - Diags.Report(Active->PointOfInstantiation, - diag::note_template_exception_spec_instantiation_here) - << cast(Active->Entity) - << Active->InstantiationRange; + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_template_exception_spec_instantiation_here) + << cast(Active->Entity) + << Active->InstantiationRange); break; case CodeSynthesisContext::RequirementInstantiation: - Diags.Report(Active->PointOfInstantiation, - diag::note_template_requirement_instantiation_here) - << Active->InstantiationRange; + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_template_requirement_instantiation_here) + << Active->InstantiationRange); break; case CodeSynthesisContext::RequirementParameterInstantiation: - Diags.Report(Active->PointOfInstantiation, - diag::note_template_requirement_params_instantiation_here) - << Active->InstantiationRange; + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_template_requirement_params_instantiation_here) + << Active->InstantiationRange); break; case CodeSynthesisContext::NestedRequirementConstraintsCheck: - Diags.Report(Active->PointOfInstantiation, - diag::note_nested_requirement_here) - << Active->InstantiationRange; + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_nested_requirement_here) + << Active->InstantiationRange); break; case CodeSynthesisContext::DeclaringSpecialMember: - Diags.Report(Active->PointOfInstantiation, - diag::note_in_declaration_of_implicit_special_member) - << cast(Active->Entity) - << llvm::to_underlying(Active->SpecialMember); + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_in_declaration_of_implicit_special_member) + << cast(Active->Entity) + << llvm::to_underlying(Active->SpecialMember)); break; case CodeSynthesisContext::DeclaringImplicitEqualityComparison: - Diags.Report(Active->Entity->getLocation(), - diag::note_in_declaration_of_implicit_equality_comparison); + DiagFunc( + Active->Entity->getLocation(), + PDiag(diag::note_in_declaration_of_implicit_equality_comparison)); break; case CodeSynthesisContext::DefiningSynthesizedFunction: { @@ -1144,60 +1141,62 @@ void Sema::PrintInstantiationStack() { FD ? getDefaultedFunctionKind(FD) : DefaultedFunctionKind(); if (DFK.isSpecialMember()) { auto *MD = cast(FD); - Diags.Report(Active->PointOfInstantiation, - diag::note_member_synthesized_at) - << MD->isExplicitlyDefaulted() - << llvm::to_underlying(DFK.asSpecialMember()) - << Context.getTagDeclType(MD->getParent()); + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_member_synthesized_at) + << MD->isExplicitlyDefaulted() + << llvm::to_underlying(DFK.asSpecialMember()) + << Context.getTagDeclType(MD->getParent())); } else if (DFK.isComparison()) { QualType RecordType = FD->getParamDecl(0) ->getType() .getNonReferenceType() .getUnqualifiedType(); - Diags.Report(Active->PointOfInstantiation, - diag::note_comparison_synthesized_at) - << (int)DFK.asComparison() << RecordType; + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_comparison_synthesized_at) + << (int)DFK.asComparison() << RecordType); } break; } case CodeSynthesisContext::RewritingOperatorAsSpaceship: - Diags.Report(Active->Entity->getLocation(), - diag::note_rewriting_operator_as_spaceship); + DiagFunc(Active->Entity->getLocation(), + PDiag(diag::note_rewriting_operator_as_spaceship)); break; case CodeSynthesisContext::InitializingStructuredBinding: - Diags.Report(Active->PointOfInstantiation, - diag::note_in_binding_decl_init) - << cast(Active->Entity); + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_in_binding_decl_init) + << cast(Active->Entity)); break; case CodeSynthesisContext::MarkingClassDllexported: - Diags.Report(Active->PointOfInstantiation, - diag::note_due_to_dllexported_class) - << cast(Active->Entity) << !getLangOpts().CPlusPlus11; + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_due_to_dllexported_class) + << cast(Active->Entity) + << !getLangOpts().CPlusPlus11); break; case CodeSynthesisContext::BuildingBuiltinDumpStructCall: - Diags.Report(Active->PointOfInstantiation, - diag::note_building_builtin_dump_struct_call) - << convertCallArgsToString( - *this, llvm::ArrayRef(Active->CallArgs, Active->NumCallArgs)); + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_building_builtin_dump_struct_call) + << convertCallArgsToString( + *this, llvm::ArrayRef(Active->CallArgs, + Active->NumCallArgs))); break; case CodeSynthesisContext::Memoization: break; case CodeSynthesisContext::LambdaExpressionSubstitution: - Diags.Report(Active->PointOfInstantiation, - diag::note_lambda_substitution_here); + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_lambda_substitution_here)); break; case CodeSynthesisContext::ConstraintsCheck: { unsigned DiagID = 0; if (!Active->Entity) { - Diags.Report(Active->PointOfInstantiation, - diag::note_nested_requirement_here) - << Active->InstantiationRange; + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_nested_requirement_here) + << Active->InstantiationRange); break; } if (isa(Active->Entity)) @@ -1219,42 +1218,44 @@ void Sema::PrintInstantiationStack() { printTemplateArgumentList(OS, Active->template_arguments(), getPrintingPolicy()); } - Diags.Report(Active->PointOfInstantiation, DiagID) << OS.str() - << Active->InstantiationRange; + DiagFunc(Active->PointOfInstantiation, + PDiag(DiagID) << OS.str() << Active->InstantiationRange); break; } case CodeSynthesisContext::ConstraintSubstitution: - Diags.Report(Active->PointOfInstantiation, - diag::note_constraint_substitution_here) - << Active->InstantiationRange; + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_constraint_substitution_here) + << Active->InstantiationRange); break; case CodeSynthesisContext::ConstraintNormalization: - Diags.Report(Active->PointOfInstantiation, - diag::note_constraint_normalization_here) - << cast(Active->Entity) << Active->InstantiationRange; + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_constraint_normalization_here) + << cast(Active->Entity) + << Active->InstantiationRange); break; case CodeSynthesisContext::ParameterMappingSubstitution: - Diags.Report(Active->PointOfInstantiation, - diag::note_parameter_mapping_substitution_here) - << Active->InstantiationRange; + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_parameter_mapping_substitution_here) + << Active->InstantiationRange); break; case CodeSynthesisContext::BuildingDeductionGuides: - Diags.Report(Active->PointOfInstantiation, - diag::note_building_deduction_guide_here); + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_building_deduction_guide_here)); break; case CodeSynthesisContext::TypeAliasTemplateInstantiation: - Diags.Report(Active->PointOfInstantiation, - diag::note_template_type_alias_instantiation_here) - << cast(Active->Entity) - << Active->InstantiationRange; + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_template_type_alias_instantiation_here) + << cast(Active->Entity) + << Active->InstantiationRange); break; case CodeSynthesisContext::PartialOrderingTTP: - Diags.Report(Active->PointOfInstantiation, - diag::note_template_arg_template_params_mismatch); + DiagFunc(Active->PointOfInstantiation, + PDiag(diag::note_template_arg_template_params_mismatch)); if (SourceLocation ParamLoc = Active->Entity->getLocation(); ParamLoc.isValid()) - Diags.Report(ParamLoc, diag::note_template_prev_declaration) - << /*isTemplateTemplateParam=*/true << Active->InstantiationRange; + DiagFunc(ParamLoc, PDiag(diag::note_template_prev_declaration) + << /*isTemplateTemplateParam=*/true + << Active->InstantiationRange); break; } } diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefMemberChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefMemberChecker.cpp index 79f88553feb95..963f59831c8ed 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefMemberChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefMemberChecker.cpp @@ -61,6 +61,11 @@ class RawPtrRefMemberChecker Checker->visitRecordDecl(RD); return true; } + + bool VisitObjCContainerDecl(const ObjCContainerDecl *CD) override { + Checker->visitObjCDecl(CD); + return true; + } }; LocalVisitor visitor(this); @@ -87,6 +92,31 @@ class RawPtrRefMemberChecker } } + void visitObjCDecl(const ObjCContainerDecl *CD) const { + if (auto *ID = dyn_cast(CD)) { + for (auto *Ivar : ID->ivars()) + visitIvarDecl(CD, Ivar); + return; + } + if (auto *ID = dyn_cast(CD)) { + for (auto *Ivar : ID->ivars()) + visitIvarDecl(CD, Ivar); + return; + } + } + + void visitIvarDecl(const ObjCContainerDecl *CD, + const ObjCIvarDecl *Ivar) const { + const Type *IvarType = Ivar->getType().getTypePtrOrNull(); + if (!IvarType) + return; + if (auto *IvarCXXRD = IvarType->getPointeeCXXRecordDecl()) { + std::optional IsCompatible = isPtrCompatible(IvarCXXRD); + if (IsCompatible && *IsCompatible) + reportBug(Ivar, IvarType, IvarCXXRD, CD); + } + } + bool shouldSkipDecl(const RecordDecl *RD) const { if (!RD->isThisDeclarationADefinition()) return true; @@ -121,9 +151,10 @@ class RawPtrRefMemberChecker return false; } - void reportBug(const FieldDecl *Member, const Type *MemberType, + template + void reportBug(const DeclType *Member, const Type *MemberType, const CXXRecordDecl *MemberCXXRD, - const RecordDecl *ClassCXXRD) const { + const ParentDeclType *ClassCXXRD) const { assert(Member); assert(MemberType); assert(MemberCXXRD); @@ -131,7 +162,10 @@ class RawPtrRefMemberChecker SmallString<100> Buf; llvm::raw_svector_ostream Os(Buf); - Os << "Member variable "; + if (isa(ClassCXXRD)) + Os << "Instance variable "; + else + Os << "Member variable "; printQuotedName(Os, Member); Os << " in "; printQuotedQualifiedName(Os, ClassCXXRD); diff --git a/clang/test/Analysis/Checkers/WebKit/unchecked-members-objc.mm b/clang/test/Analysis/Checkers/WebKit/unchecked-members-objc.mm new file mode 100644 index 0000000000000..a9a9a367fb9f4 --- /dev/null +++ b/clang/test/Analysis/Checkers/WebKit/unchecked-members-objc.mm @@ -0,0 +1,35 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.NoUncheckedPtrMemberChecker -verify %s + +#include "mock-types.h" + +__attribute__((objc_root_class)) +@interface NSObject ++ (instancetype) alloc; +- (instancetype) init; +- (instancetype)retain; +- (void)release; +@end + +void doSomeWork(); + +@interface SomeObjC : NSObject { + CheckedObj* _unchecked1; +// expected-warning@-1{{Instance variable '_unchecked1' in 'SomeObjC' is a raw pointer to CheckedPtr capable type 'CheckedObj'}} + CheckedPtr _counted1; + [[clang::suppress]] CheckedObj* _unchecked2; +} +- (void)doWork; +@end + +@implementation SomeObjC { + CheckedObj* _unchecked3; +// expected-warning@-1{{Instance variable '_unchecked3' in 'SomeObjC' is a raw pointer to CheckedPtr capable type 'CheckedObj'}} + CheckedPtr _counted2; + [[clang::suppress]] CheckedObj* _unchecked4; +} + +- (void)doWork { + doSomeWork(); +} + +@end diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-members-objc.mm b/clang/test/Analysis/Checkers/WebKit/uncounted-members-objc.mm new file mode 100644 index 0000000000000..83b08a6841d26 --- /dev/null +++ b/clang/test/Analysis/Checkers/WebKit/uncounted-members-objc.mm @@ -0,0 +1,35 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=webkit.NoUncountedMemberChecker -verify %s + +#include "mock-types.h" + +__attribute__((objc_root_class)) +@interface NSObject ++ (instancetype) alloc; +- (instancetype) init; +- (instancetype)retain; +- (void)release; +@end + +void doSomeWork(); + +@interface SomeObjC : NSObject { + RefCountable* _uncounted1; +// expected-warning@-1{{Instance variable '_uncounted1' in 'SomeObjC' is a raw pointer to ref-countable type 'RefCountable'}} + RefPtr _counted1; + [[clang::suppress]] RefCountable* _uncounted2; +} +- (void)doWork; +@end + +@implementation SomeObjC { + RefCountable* _uncounted3; +// expected-warning@-1{{Instance variable '_uncounted3' in 'SomeObjC' is a raw pointer to ref-countable type 'RefCountable'}} + RefPtr _counted2; + [[clang::suppress]] RefCountable* _uncounted4; +} + +- (void)doWork { + doSomeWork(); +} + +@end diff --git a/clang/test/CIR/emit-actions.cpp b/clang/test/CIR/emit-actions.cpp index 94ddf23b34753..3cfb3d32e8e2c 100644 --- a/clang/test/CIR/emit-actions.cpp +++ b/clang/test/CIR/emit-actions.cpp @@ -11,7 +11,7 @@ int x = 1; -// BC: @x = dso_local global i32 1 +// BC: @x = {{(dso_local )?}}global i32 1 // ASM: x: // ASM: .long 1 diff --git a/clang/test/CIR/func-simple.cpp b/clang/test/CIR/func-simple.cpp new file mode 100644 index 0000000000000..10c49bc506c87 --- /dev/null +++ b/clang/test/CIR/func-simple.cpp @@ -0,0 +1,53 @@ +// Simple functions +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s + +void empty() { } +// CHECK: cir.func @empty() -> !cir.void { +// CHECK: cir.return +// CHECK: } + +void voidret() { return; } +// CHECK: cir.func @voidret() -> !cir.void { +// CHECK: cir.return +// CHECK: } + +int intfunc() { return 42; } +// CHECK: cir.func @intfunc() -> !cir.int { +// CHECK: %0 = cir.const #cir.int<42> : !cir.int +// CHECK: cir.return %0 : !cir.int +// CHECK: } + +int scopes() { + { + { + return 99; + } + } +} +// CHECK: cir.func @scopes() -> !cir.int { +// CHECK: cir.scope { +// CHECK: cir.scope { +// CHECK: %0 = cir.const #cir.int<99> : !cir.int +// CHECK: cir.return %0 : !cir.int +// CHECK: } +// CHECK: } +// CHECK: cir.trap +// CHECK: } + +long longfunc() { return 42l; } +// CHECK: cir.func @longfunc() -> !cir.int +// CHECK: %0 = cir.const #cir.int<42> : !cir.int +// CHECK: cir.return %0 : !cir.int +// CHECK: } + +unsigned unsignedfunc() { return 42u; } +// CHECK: cir.func @unsignedfunc() -> !cir.int +// CHECK: %0 = cir.const #cir.int<42> : !cir.int +// CHECK: cir.return %0 : !cir.int +// CHECK: } + +unsigned long long ullfunc() { return 42ull; } +// CHECK: cir.func @ullfunc() -> !cir.int +// CHECK: %0 = cir.const #cir.int<42> : !cir.int +// CHECK: cir.return %0 : !cir.int +// CHECK: } diff --git a/clang/test/CIR/global-var-simple.cpp b/clang/test/CIR/global-var-simple.cpp index ffcc3ef71a6c7..237070a5b7564 100644 --- a/clang/test/CIR/global-var-simple.cpp +++ b/clang/test/CIR/global-var-simple.cpp @@ -1,4 +1,4 @@ -// Global variables of intergal types +// Global variables of scalar typees with initial values // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s char c; diff --git a/clang/test/CIR/hello.c b/clang/test/CIR/hello.c deleted file mode 100644 index 4b07c04994aa8..0000000000000 --- a/clang/test/CIR/hello.c +++ /dev/null @@ -1,5 +0,0 @@ -// Smoke test for ClangIR code generation -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s - -void foo() {} -// CHECK: cir.func @foo diff --git a/clang/test/CXX/drs/cwg0xx.cpp b/clang/test/CXX/drs/cwg0xx.cpp index 44a0eb520af22..282e71bbf3bda 100644 --- a/clang/test/CXX/drs/cwg0xx.cpp +++ b/clang/test/CXX/drs/cwg0xx.cpp @@ -1041,12 +1041,15 @@ namespace cwg62 { // cwg62: 2.9 NoNameForLinkagePtr p1 = get(); // cxx98-error@-1 {{template argument uses unnamed type}} // cxx98-note@#cwg62-unnamed {{unnamed type used in template argument was declared here}} + // cxx98-note@-3 {{while substituting explicitly-specified template arguments}} NoNameForLinkagePtr p2 = get(); // cxx98-error@-1 {{template argument uses unnamed type}} // cxx98-note@#cwg62-unnamed {{unnamed type used in template argument was declared here}} + // cxx98-note@-3 {{while substituting explicitly-specified template arguments}} int n1 = take(noNameForLinkagePtr); // cxx98-error@-1 {{template argument uses unnamed type}} // cxx98-note@#cwg62-unnamed {{unnamed type used in template argument was declared here}} + // cxx98-note@-3 {{while substituting deduced template arguments}} X x4; @@ -1058,8 +1061,10 @@ namespace cwg62 { // cwg62: 2.9 // cxx98-error@-1 {{template argument uses local type }} get(); // cxx98-error@-1 {{template argument uses local type }} + // cxx98-note@-2 {{while substituting explicitly-specified template arguments}} get(); // cxx98-error@-1 {{template argument uses local type }} + // cxx98-note@-2 {{while substituting explicitly-specified template arguments}} X c; // cxx98-error@-1 {{template argument uses local type }} X d; diff --git a/clang/test/CXX/drs/cwg158.cpp b/clang/test/CXX/drs/cwg158.cpp index 1f5c319e6bd25..a083ded82bb2e 100644 --- a/clang/test/CXX/drs/cwg158.cpp +++ b/clang/test/CXX/drs/cwg158.cpp @@ -42,5 +42,6 @@ const int * h(const int * (*p)[10], int *(*q)[9]) { } // POINTER-TBAA: [[PTRARRAY_TBAA]] = !{[[PTRARRAY_TY:!.+]], [[PTRARRAY_TY]], i64 0} -// POINTER-TBAA: [[PTRARRAY_TY]] = !{!"p2 int", [[ANYPTR:!.+]], i64 0} +// POINTER-TBAA: [[PTRARRAY_TY]] = !{!"p2 int", [[ANYP2PTR:!.+]], i64 0} +// POINTER-TBAA: [[ANYP2PTR]] = !{!"any p2 pointer", [[ANYPTR:!.+]], // POINTER-TBAA: [[ANYPTR]] = !{!"any pointer" diff --git a/clang/test/CXX/drs/cwg4xx.cpp b/clang/test/CXX/drs/cwg4xx.cpp index bcaf7db04ad3b..0debc104ac45b 100644 --- a/clang/test/CXX/drs/cwg4xx.cpp +++ b/clang/test/CXX/drs/cwg4xx.cpp @@ -20,7 +20,7 @@ namespace cwg400 { // cwg400: 2.7 struct A { int a; struct a {}; }; // #cwg400-A struct B { int a; struct a {}; }; // #cwg400-B struct C : A, B { using A::a; struct a b; }; - struct D : A, B { + struct D : A, B { using A::a; // FIXME: we should issue a single diagnostic using B::a; // #cwg400-using-B-a @@ -1386,6 +1386,7 @@ namespace cwg488 { // cwg488: 2.9 c++11 enum E { e }; f(e); // cxx98-error@-1 {{template argument uses local type 'E'}} + // cxx98-note@-2 {{while substituting deduced template arguments}} } } // namespace cwg488 diff --git a/clang/test/CXX/temp/temp.arg/temp.arg.type/p2.cpp b/clang/test/CXX/temp/temp.arg/temp.arg.type/p2.cpp index 1e314da313990..650f8585b115a 100644 --- a/clang/test/CXX/temp/temp.arg/temp.arg.type/p2.cpp +++ b/clang/test/CXX/temp/temp.arg/temp.arg.type/p2.cpp @@ -15,14 +15,13 @@ B b; // expected-note{{instantiation of}} template int f0(void *, const T&); // expected-note{{candidate template ignored: substitution failure}} enum {e}; -#if __cplusplus <= 199711L -// expected-note@-2 {{unnamed type used in template argument was declared here}} -#endif +// expected-note@-1 {{unnamed type used in template argument was declared here}} void test_f0(int n) { // #here int i = f0(0, e); #if __cplusplus <= 199711L // expected-warning@-2 {{template argument uses unnamed type}} + // expected-note@-3 {{while substituting deduced template arguments}} #endif int vla[n]; // expected-warning {{variable length arrays in C++ are a Clang extension}} @@ -59,21 +58,25 @@ namespace N0 { f0( #if __cplusplus <= 199711L // expected-warning@-2 {{template argument uses unnamed type}} + // expected-note@-3 {{while substituting deduced template arguments}} #endif &f1<__typeof__(e1)>); #if __cplusplus <= 199711L // expected-warning@-2 {{template argument uses unnamed type}} + // expected-note@-3 {{while substituting explicitly-specified template arguments}} #endif int (*fp1)(int, __typeof__(e2)) = f1; #if __cplusplus <= 199711L // expected-warning@-2 {{template argument uses unnamed type}} + // expected-note@-3 {{while substituting deduced template arguments}} #endif f1(e2); #if __cplusplus <= 199711L // expected-warning@-2 {{template argument uses unnamed type}} + // expected-note@-3 {{while substituting deduced template arguments}} #endif f1(e2); diff --git a/clang/test/CodeGen/X86/math-builtins.c b/clang/test/CodeGen/X86/math-builtins.c index d5301b7bafd9c..d7bf7d57fba26 100644 --- a/clang/test/CodeGen/X86/math-builtins.c +++ b/clang/test/CodeGen/X86/math-builtins.c @@ -38,24 +38,6 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) { // NO__ERRNO-NEXT: [[FREXP_F128_0:%.+]] = extractvalue { fp128, i32 } [[FREXP_F128]], 0 -// NO__ERRNO: [[MODF_F64:%.+]] = call { double, double } @llvm.modf.f64(double %{{.+}}) -// NO__ERRNO-NEXT: [[MODF_F64_FP:%.+]] = extractvalue { double, double } [[MODF_F64]], 0 -// NO__ERRNO-NEXT: [[MODF_F64_IP:%.+]] = extractvalue { double, double } [[MODF_F64]], 1 -// NO__ERRNO-NEXT: store double [[MODF_F64_IP]], ptr %{{.+}}, align 8 - -// NO__ERRNO: [[MODF_F32:%.+]] = call { float, float } @llvm.modf.f32(float %{{.+}}) -// NO__ERRNO-NEXT: [[MODF_F32_FP:%.+]] = extractvalue { float, float } [[MODF_F32]], 0 -// NO__ERRNO-NEXT: [[MODF_F32_IP:%.+]] = extractvalue { float, float } [[MODF_F32]], 1 -// NO__ERRNO-NEXT: store float [[MODF_F32_IP]], ptr %{{.+}}, align 4 - -// NO__ERRNO: [[MODF_F80:%.+]] = call { x86_fp80, x86_fp80 } @llvm.modf.f80(x86_fp80 %{{.+}}) -// NO__ERRNO-NEXT: [[MODF_F80_FP:%.+]] = extractvalue { x86_fp80, x86_fp80 } [[MODF_F80]], 0 -// NO__ERRNO-NEXT: [[MODF_F80_IP:%.+]] = extractvalue { x86_fp80, x86_fp80 } [[MODF_F80]], 1 -// NO__ERRNO-NEXT: store x86_fp80 [[MODF_F80_IP]], ptr %{{.+}}, align 16 - -// NO__ERRNO: call fp128 @modff128(fp128 noundef %{{.+}}, ptr noundef %{{.+}}) - - // NO__ERRNO: [[SINCOS_F64:%.+]] = call { double, double } @llvm.sincos.f64(double %{{.+}}) // NO__ERRNO-NEXT: [[SINCOS_F64_0:%.+]] = extractvalue { double, double } [[SINCOS_F64]], 0 // NO__ERRNO-NEXT: [[SINCOS_F64_1:%.+]] = extractvalue { double, double } [[SINCOS_F64]], 1 @@ -157,13 +139,13 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) { __builtin_modf(f,d); __builtin_modff(f,fp); __builtin_modfl(f,l); __builtin_modff128(f,l); -// NO__ERRNO: declare { double, double } @llvm.modf.f64(double) [[READNONE_INTRINSIC]] -// NO__ERRNO: declare { float, float } @llvm.modf.f32(float) [[READNONE_INTRINSIC]] -// NO__ERRNO: declare { x86_fp80, x86_fp80 } @llvm.modf.f80(x86_fp80) [[READNONE_INTRINSIC]] -// NO__ERRNO: declare fp128 @modff128(fp128 noundef, ptr noundef) [[NOT_READNONE:#[0-9]+]] -// HAS_ERRNO: declare { double, double } @llvm.modf.f64(double) [[READNONE_INTRINSIC]] -// HAS_ERRNO: declare { float, float } @llvm.modf.f32(float) [[READNONE_INTRINSIC]] -// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @llvm.modf.f80(x86_fp80) [[READNONE_INTRINSIC]] +// NO__ERRNO: declare double @modf(double noundef, ptr noundef) [[NOT_READNONE:#[0-9]+]] +// NO__ERRNO: declare float @modff(float noundef, ptr noundef) [[NOT_READNONE]] +// NO__ERRNO: declare x86_fp80 @modfl(x86_fp80 noundef, ptr noundef) [[NOT_READNONE]] +// NO__ERRNO: declare fp128 @modff128(fp128 noundef, ptr noundef) [[NOT_READNONE]] +// HAS_ERRNO: declare double @modf(double noundef, ptr noundef) [[NOT_READNONE]] +// HAS_ERRNO: declare float @modff(float noundef, ptr noundef) [[NOT_READNONE]] +// HAS_ERRNO: declare x86_fp80 @modfl(x86_fp80 noundef, ptr noundef) [[NOT_READNONE]] // HAS_ERRNO: declare fp128 @modff128(fp128 noundef, ptr noundef) [[NOT_READNONE]] __builtin_nan(c); __builtin_nanf(c); __builtin_nanl(c); __builtin_nanf128(c); diff --git a/clang/test/CodeGen/aix-builtin-mapping.c b/clang/test/CodeGen/aix-builtin-mapping.c index cc1cc1a44f32c..a79218c6f1d8b 100644 --- a/clang/test/CodeGen/aix-builtin-mapping.c +++ b/clang/test/CodeGen/aix-builtin-mapping.c @@ -17,6 +17,6 @@ int main() returnValue = __builtin_ldexpl(1.0L, 1); } -// CHECK: %{{.+}} = call { double, double } @llvm.modf.f64(double 1.000000e+00) +// CHECK: %call = call double @modf(double noundef 1.000000e+00, ptr noundef %DummyLongDouble) #3 // CHECK: %{{.+}} = call { double, i32 } @llvm.frexp.f64.i32(double 0.000000e+00) // CHECK: %{{.+}} = call double @llvm.ldexp.f64.i32(double 1.000000e+00, i32 1) diff --git a/clang/test/CodeGen/builtin-attributes.c b/clang/test/CodeGen/builtin-attributes.c index 506b165fcf36e..e5b0faccfd23f 100644 --- a/clang/test/CodeGen/builtin-attributes.c +++ b/clang/test/CodeGen/builtin-attributes.c @@ -24,11 +24,6 @@ char* f2(char* a, char* b) { return __builtin_strstr(a, b); } -// Note: Use asm label to disable intrinsic lowering of modf. -double modf(double x, double*) asm("modf"); -float modff(float x, float*) asm("modff"); -long double modfl(long double x, long double*) asm("modfl"); - // frexp is NOT readnone. It writes to its pointer argument. // // CHECK: f3 @@ -60,9 +55,9 @@ int f3(double x) { frexp(x, &e); frexpf(x, &e); frexpl(x, &e); - modf(x, &e); - modff(x, &e); - modfl(x, &e); + __builtin_modf(x, &e); + __builtin_modff(x, &e); + __builtin_modfl(x, &e); __builtin_remquo(x, x, &e); __builtin_remquof(x, x, &e); __builtin_remquol(x, x, &e); diff --git a/clang/test/Analysis/builtin_signbit.cpp b/clang/test/CodeGen/builtin_signbit.cpp similarity index 100% rename from clang/test/Analysis/builtin_signbit.cpp rename to clang/test/CodeGen/builtin_signbit.cpp diff --git a/clang/test/CodeGen/math-builtins-long.c b/clang/test/CodeGen/math-builtins-long.c index 87e64a2eaa1c3..183349e0f0173 100644 --- a/clang/test/CodeGen/math-builtins-long.c +++ b/clang/test/CodeGen/math-builtins-long.c @@ -58,9 +58,9 @@ void foo(long double f, long double *l, int *i, const char *c) { // PPCF128: call fp128 @ldexpf128(fp128 noundef %{{.+}}, {{(signext)?.+}}) __builtin_ldexpl(f,f); - // F80: call { x86_fp80, x86_fp80 } @llvm.modf.f80(x86_fp80 %{{.+}}) - // PPC: call { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %{{.+}}) - // X86F128: call { fp128, fp128 } @llvm.modf.f128(fp128 %{{.+}}) + // F80: call x86_fp80 @modfl(x86_fp80 noundef %{{.+}}, ptr noundef %{{.+}}) + // PPC: call ppc_fp128 @modfl(ppc_fp128 noundef %{{.+}}, ptr noundef %{{.+}}) + // X86F128: call fp128 @modfl(fp128 noundef %{{.+}}, ptr noundef %{{.+}}) // PPCF128: call fp128 @modff128(fp128 noundef %{{.+}}, ptr noundef %{{.+}}) __builtin_modfl(f,l); diff --git a/clang/test/CodeGen/math-libcalls.c b/clang/test/CodeGen/math-libcalls.c index ad297828f48ed..14fdee77f4d78 100644 --- a/clang/test/CodeGen/math-libcalls.c +++ b/clang/test/CodeGen/math-libcalls.c @@ -83,12 +83,12 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) { modf(f,d); modff(f,fp); modfl(f,l); - // NO__ERRNO: declare { double, double } @llvm.modf.f64(double) [[READNONE_INTRINSIC]] - // NO__ERRNO: declare { float, float } @llvm.modf.f32(float) [[READNONE_INTRINSIC]] - // NO__ERRNO: declare { x86_fp80, x86_fp80 } @llvm.modf.f80(x86_fp80) [[READNONE_INTRINSIC]] - // HAS_ERRNO: declare { double, double } @llvm.modf.f64(double) [[READNONE_INTRINSIC]] - // HAS_ERRNO: declare { float, float } @llvm.modf.f32(float) [[READNONE_INTRINSIC]] - // HAS_ERRNO: declare { x86_fp80, x86_fp80 } @llvm.modf.f80(x86_fp80) [[READNONE_INTRINSIC]] + // NO__ERRNO: declare double @modf(double noundef, ptr noundef) [[NOT_READNONE]] + // NO__ERRNO: declare float @modff(float noundef, ptr noundef) [[NOT_READNONE]] + // NO__ERRNO: declare x86_fp80 @modfl(x86_fp80 noundef, ptr noundef) [[NOT_READNONE]] + // HAS_ERRNO: declare double @modf(double noundef, ptr noundef) [[NOT_READNONE]] + // HAS_ERRNO: declare float @modff(float noundef, ptr noundef) [[NOT_READNONE]] + // HAS_ERRNO: declare x86_fp80 @modfl(x86_fp80 noundef, ptr noundef) [[NOT_READNONE]] // HAS_MAYTRAP: declare double @modf(double noundef, ptr noundef) [[NOT_READNONE]] // HAS_MAYTRAP: declare float @modff(float noundef, ptr noundef) [[NOT_READNONE]] // HAS_MAYTRAP: declare x86_fp80 @modfl(x86_fp80 noundef, ptr noundef) [[NOT_READNONE]] diff --git a/clang/test/CodeGen/tbaa-pointers.c b/clang/test/CodeGen/tbaa-pointers.c index 48adac503357f..9cfaa0a47af6e 100644 --- a/clang/test/CodeGen/tbaa-pointers.c +++ b/clang/test/CodeGen/tbaa-pointers.c @@ -208,8 +208,10 @@ int void_ptrs(void **ptr) { // COMMON-LABEL: define i32 @void_ptrs( // COMMON-SAME: ptr noundef [[PTRA:%.+]]) // COMMON: [[PTR_ADDR:%.+]] = alloca ptr, align 8 -// COMMON-NEXT: store ptr [[PTRA]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] -// COMMON-NEXT: [[L0:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] +// DISABLE-NEXT: store ptr [[PTRA]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] +// DEFAULT-NEXT: store ptr [[PTRA]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYP2:!.+]] +// DISABLE-NEXT: [[L0:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] +// DEFAULT-NEXT: [[L0:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYP2]] // COMMON-NEXT: [[L1:%.+]] = load ptr, ptr [[L0]], align 8, !tbaa [[ANYPTR]] // COMMON-NEXT: [[BOOL:%.+]] = icmp ne ptr [[L1]], null // COMMON-NEXT: [[BOOL_EXT:%.+]] = zext i1 [[BOOL]] to i64 @@ -220,7 +222,8 @@ int void_ptrs(void **ptr) { } // DEFAULT: [[P2INT_0]] = !{[[P2INT:!.+]], [[P2INT]], i64 0} -// DEFAULT: [[P2INT]] = !{!"p2 int", [[ANY_POINTER:!.+]], i64 0} +// DEFAULT: [[P2INT]] = !{!"p2 int", [[ANY_P2_POINTER:!.+]], i64 0} +// DEFAULT: [[ANY_P2_POINTER]] = !{!"any p2 pointer", [[ANY_POINTER:!.+]], i64 0} // DISABLE: [[ANYPTR]] = !{[[ANY_POINTER:!.+]], [[ANY_POINTER]], i64 0} // COMMON: [[ANY_POINTER]] = !{!"any pointer", [[CHAR:!.+]], i64 0} // COMMON: [[CHAR]] = !{!"omnipotent char", [[TBAA_ROOT:!.+]], i64 0} @@ -228,17 +231,19 @@ int void_ptrs(void **ptr) { // DEFAULT: [[P1INT_0]] = !{[[P1INT:!.+]], [[P1INT]], i64 0} // DEFAULT: [[P1INT]] = !{!"p1 int", [[ANY_POINTER]], i64 0} // DEFAULT: [[P3INT_0]] = !{[[P3INT:!.+]], [[P3INT]], i64 0} -// DEFAULT: [[P3INT]] = !{!"p3 int", [[ANY_POINTER]], i64 0} +// DEFAULT: [[P3INT]] = !{!"p3 int", [[ANY_P3_POINTER:!.+]], i64 0} +// DEFAULT: [[ANY_P3_POINTER]] = !{!"any p3 pointer", [[ANY_P2_POINTER]], i64 0} // DEFAULT: [[P4CHAR_0]] = !{[[P4CHAR:!.+]], [[P4CHAR]], i64 0} -// DEFAULT: [[P4CHAR]] = !{!"p4 omnipotent char", [[ANY_POINTER]], i64 0} +// DEFAULT: [[P4CHAR]] = !{!"p4 omnipotent char", [[ANY_P4_POINTER:!.*]], i64 0} +// DEFAULT: [[ANY_P4_POINTER]] = !{!"any p4 pointer", [[ANY_P3_POINTER]], i64 0} // DEFAULT: [[P3CHAR_0]] = !{[[P3CHAR:!.+]], [[P3CHAR]], i64 0} -// DEFAULT: [[P3CHAR]] = !{!"p3 omnipotent char", [[ANY_POINTER]], i64 0} +// DEFAULT: [[P3CHAR]] = !{!"p3 omnipotent char", [[ANY_P3_POINTER]], i64 0} // DEFAULT: [[P2CHAR_0]] = !{[[P2CHAR:!.+]], [[P2CHAR]], i64 0} -// DEFAULT: [[P2CHAR]] = !{!"p2 omnipotent char", [[ANY_POINTER]], i64 0} +// DEFAULT: [[P2CHAR]] = !{!"p2 omnipotent char", [[ANY_P2_POINTER]], i64 0} // DEFAULT: [[P1CHAR_0]] = !{[[P1CHAR:!.+]], [[P1CHAR]], i64 0} // DEFAULT: [[P1CHAR]] = !{!"p1 omnipotent char", [[ANY_POINTER]], i64 0} // DEFAULT: [[P2S1_TAG]] = !{[[P2S1:!.+]], [[P2S1]], i64 0} -// DEFAULT: [[P2S1]] = !{!"p2 _ZTS2S1", [[ANY_POINTER]], i64 0} +// DEFAULT: [[P2S1]] = !{!"p2 _ZTS2S1", [[ANY_P2_POINTER]], i64 0} // DEFAULT: [[P1S1_TAG:!.+]] = !{[[P1S1:!.+]], [[P1S1]], i64 0} // DEFAULT: [[P1S1]] = !{!"p1 _ZTS2S1", [[ANY_POINTER]], i64 0} // DEFAULT: [[P1S2_TAG]] = !{[[P1S2:!.+]], [[P1S2]], i64 0} @@ -251,3 +256,4 @@ int void_ptrs(void **ptr) { // COMMON: [[INT_TAG]] = !{[[INT_TY:!.+]], [[INT_TY]], i64 0} // COMMON: [[INT_TY]] = !{!"int", [[CHAR]], i64 0} // DEFAULT: [[ANYPTR]] = !{[[ANY_POINTER]], [[ANY_POINTER]], i64 0} +// DEFAULT: [[ANYP2]] = !{[[ANY_P2_POINTER]], [[ANY_P2_POINTER]], i64 0} diff --git a/clang/test/CodeGenCUDA/increment-index-for-thunks.cu b/clang/test/CodeGenCUDA/increment-index-for-thunks.cu new file mode 100644 index 0000000000000..48dbf6ef82b7d --- /dev/null +++ b/clang/test/CodeGenCUDA/increment-index-for-thunks.cu @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx942 \ +// RUN: -emit-llvm -xhip %s -o - | FileCheck %s --check-prefix=GCN +// RUN: %clang_cc1 -fcuda-is-device -triple spirv64-amd-amdhsa \ +// RUN: -emit-llvm -xhip %s -o - | FileCheck %s --check-prefix=SPIRV + +// GCN: @_ZTV1C = linkonce_odr unnamed_addr addrspace(1) constant { [5 x ptr addrspace(1)], [4 x ptr addrspace(1)] } { [5 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) null, ptr addrspace(1) addrspacecast (ptr @_ZN1B2f2Ev to ptr addrspace(1)), ptr addrspace(1) null, ptr addrspace(1) addrspacecast (ptr @_ZN1C2f1Ev to ptr addrspace(1))], [4 x ptr addrspace(1)] [ptr addrspace(1) inttoptr (i64 -8 to ptr addrspace(1)), ptr addrspace(1) null, ptr addrspace(1) null, ptr addrspace(1) addrspacecast (ptr @_ZThn8_N1C2f1Ev to ptr addrspace(1))] }, comdat, align 8 +// GCN: @_ZTV1B = linkonce_odr unnamed_addr addrspace(1) constant { [3 x ptr addrspace(1)] } { [3 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) null, ptr addrspace(1) addrspacecast (ptr @_ZN1B2f2Ev to ptr addrspace(1))] }, comdat, align 8 +// GCN: @_ZTV1A = linkonce_odr unnamed_addr addrspace(1) constant { [4 x ptr addrspace(1)] } { [4 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) null, ptr addrspace(1) null, ptr addrspace(1) addrspacecast (ptr @__cxa_pure_virtual to ptr addrspace(1))] }, comdat, align 8 +// SPIRV: @_ZTV1C = linkonce_odr unnamed_addr addrspace(1) constant { [5 x ptr addrspace(1)], [4 x ptr addrspace(1)] } { [5 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) null, ptr addrspace(1) addrspacecast (ptr addrspace(4) @_ZN1B2f2Ev to ptr addrspace(1)), ptr addrspace(1) null, ptr addrspace(1) addrspacecast (ptr addrspace(4) @_ZN1C2f1Ev to ptr addrspace(1))], [4 x ptr addrspace(1)] [ptr addrspace(1) inttoptr (i64 -8 to ptr addrspace(1)), ptr addrspace(1) null, ptr addrspace(1) null, ptr addrspace(1) addrspacecast (ptr addrspace(4) @_ZThn8_N1C2f1Ev to ptr addrspace(1))] }, comdat, align 8 +// SPIRV: @_ZTV1B = linkonce_odr unnamed_addr addrspace(1) constant { [3 x ptr addrspace(1)] } { [3 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) null, ptr addrspace(1) addrspacecast (ptr addrspace(4) @_ZN1B2f2Ev to ptr addrspace(1))] }, comdat, align 8 +// SPIRV: @_ZTV1A = linkonce_odr unnamed_addr addrspace(1) constant { [4 x ptr addrspace(1)] } { [4 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) null, ptr addrspace(1) null, ptr addrspace(1) addrspacecast (ptr addrspace(4) @__cxa_pure_virtual to ptr addrspace(1))] }, comdat, align 8 + +struct A { + __attribute__((device)) A() { } + virtual void neither_device_nor_host_f() = 0 ; + __attribute__((device)) virtual void f1() = 0; + +}; + +struct B { + __attribute__((device)) B() { } + __attribute__((device)) virtual void f2() { }; +}; + +struct C : public B, public A { + __attribute__((device)) C() : B(), A() { } + + virtual void neither_device_nor_host_f() override { } + __attribute__((device)) virtual void f1() override { } + +}; + +__attribute__((device)) void test() { + C obj; +} diff --git a/clang/test/CodeGenHLSL/builtins/and.hlsl b/clang/test/CodeGenHLSL/builtins/and.hlsl new file mode 100644 index 0000000000000..b77889cd9ae70 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/and.hlsl @@ -0,0 +1,68 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -finclude-default-header -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s \ +// RUN: -emit-llvm -O1 -o - | FileCheck %s + +// CHECK-LABEL: define noundef i1 @_Z15test_and_scalarbb( +// CHECK-SAME: i1 noundef [[X:%.*]], i1 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[HLSL_AND:%.*]] = and i1 [[X]], [[Y]] +// CHECK-NEXT: ret i1 [[HLSL_AND]] +// +bool test_and_scalar(bool x, bool y) { + return and(x, y); +} + +// CHECK-LABEL: define noundef <2 x i1> @_Z14test_and_bool2Dv2_bS_( +// CHECK-SAME: <2 x i1> noundef [[X:%.*]], <2 x i1> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[HLSL_AND:%.*]] = and <2 x i1> [[X]], [[Y]] +// CHECK-NEXT: ret <2 x i1> [[HLSL_AND]] +// +bool2 test_and_bool2(bool2 x, bool2 y) { + return and(x, y); +} + +// CHECK-LABEL: define noundef <3 x i1> @_Z14test_and_bool3Dv3_bS_( +// CHECK-SAME: <3 x i1> noundef [[X:%.*]], <3 x i1> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[HLSL_AND:%.*]] = and <3 x i1> [[X]], [[Y]] +// CHECK-NEXT: ret <3 x i1> [[HLSL_AND]] +// +bool3 test_and_bool3(bool3 x, bool3 y) { + return and(x, y); +} + +// CHECK-LABEL: define noundef <4 x i1> @_Z14test_and_bool4Dv4_bS_( +// CHECK-SAME: <4 x i1> noundef [[X:%.*]], <4 x i1> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[HLSL_AND:%.*]] = and <4 x i1> [[X]], [[Y]] +// CHECK-NEXT: ret <4 x i1> [[HLSL_AND]] +// +bool4 test_and_bool4(bool4 x, bool4 y) { + return and(x, y); +} + +// CHECK-LABEL: define noundef <4 x i1> @_Z13test_and_int4Dv4_iS_( +// CHECK-SAME: <4 x i32> noundef [[X:%.*]], <4 x i32> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne <4 x i32> [[X]], zeroinitializer +// CHECK-NEXT: [[TOBOOL1:%.*]] = icmp ne <4 x i32> [[Y]], zeroinitializer +// CHECK-NEXT: [[HLSL_AND:%.*]] = and <4 x i1> [[TOBOOL]], [[TOBOOL1]] +// CHECK-NEXT: ret <4 x i1> [[HLSL_AND]] +// +bool4 test_and_int4(int4 x, int4 y) { + return and(x, y); +} + +// CHECK-LABEL: define noundef <4 x i1> @_Z15test_and_float4Dv4_fS_( +// CHECK-SAME: <4 x float> noundef nofpclass(nan inf) [[X:%.*]], <4 x float> noundef nofpclass(nan inf) [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TOBOOL:%.*]] = fcmp reassoc nnan ninf nsz arcp afn une <4 x float> [[X]], zeroinitializer +// CHECK-NEXT: [[TOBOOL1:%.*]] = fcmp reassoc nnan ninf nsz arcp afn une <4 x float> [[Y]], zeroinitializer +// CHECK-NEXT: [[HLSL_AND:%.*]] = and <4 x i1> [[TOBOOL]], [[TOBOOL1]] +// CHECK-NEXT: ret <4 x i1> [[HLSL_AND]] +// +bool4 test_and_float4(float4 x, float4 y) { + return and(x, y); +} diff --git a/clang/test/Lexer/cxx-features.cpp b/clang/test/Lexer/cxx-features.cpp index b423e94b26aca..ff9a1a1210c44 100644 --- a/clang/test/Lexer/cxx-features.cpp +++ b/clang/test/Lexer/cxx-features.cpp @@ -229,7 +229,7 @@ #error "wrong value for __cpp_aggregate_bases" #endif -#if check(structured_bindings, 0, 0, 0, 202403L, 202403L, 202403L, 202403L) +#if check(structured_bindings, 0, 0, 0, 202411L, 202411L, 202411L, 202411L) #error "wrong value for __cpp_structured_bindings" #endif diff --git a/clang/test/Misc/target-invalid-cpu-note/nvptx.c b/clang/test/Misc/target-invalid-cpu-note/nvptx.c index 06ef72878340f..6675a1ecc34ba 100644 --- a/clang/test/Misc/target-invalid-cpu-note/nvptx.c +++ b/clang/test/Misc/target-invalid-cpu-note/nvptx.c @@ -28,6 +28,10 @@ // CHECK-SAME: {{^}}, sm_90a // CHECK-SAME: {{^}}, sm_100 // CHECK-SAME: {{^}}, sm_100a +// CHECK-SAME: {{^}}, sm_101 +// CHECK-SAME: {{^}}, sm_101a +// CHECK-SAME: {{^}}, sm_120 +// CHECK-SAME: {{^}}, sm_120a // CHECK-SAME: {{^}}, gfx600 // CHECK-SAME: {{^}}, gfx601 // CHECK-SAME: {{^}}, gfx602 diff --git a/clang/test/SemaCXX/anonymous-struct.cpp b/clang/test/SemaCXX/anonymous-struct.cpp index e1db98d2b2f50..75309821998eb 100644 --- a/clang/test/SemaCXX/anonymous-struct.cpp +++ b/clang/test/SemaCXX/anonymous-struct.cpp @@ -29,14 +29,13 @@ struct E { template void foo(T); typedef struct { // expected-error {{anonymous non-C-compatible type given name for linkage purposes by typedef declaration after its linkage was computed; add a tag name here to establish linkage prior to definition}} -#if __cplusplus <= 199711L -// expected-note@-2 {{declared here}} -#endif +// expected-note@-1 {{unnamed type used in template argument was declared here}} void test() { // expected-note {{type is not C-compatible due to this member declaration}} foo(this); #if __cplusplus <= 199711L // expected-warning@-2 {{template argument uses unnamed type}} + // expected-note@-3 {{while substituting deduced template arguments}} #endif } } A; // expected-note {{type is given name 'A' for linkage purposes by this typedef declaration}} diff --git a/clang/test/SemaCXX/bool-increment-SFINAE.cpp b/clang/test/SemaCXX/bool-increment-SFINAE.cpp index d3889293fc0b6..3a465fa5a3d5a 100644 --- a/clang/test/SemaCXX/bool-increment-SFINAE.cpp +++ b/clang/test/SemaCXX/bool-increment-SFINAE.cpp @@ -7,7 +7,7 @@ template auto f(T t) -> decltype(++t); // precxx17-warning {{incrementing expression of type bool is deprecated}} auto f(...) -> void; -void g() { f(true); } +void g() { f(true); } // precxx17-note {{while substituting deduced template arguments}} #ifdef FAILED_CXX17 @@ -30,7 +30,7 @@ void f() { int main() { f(); // cxx20-note {{in instantiation of function template specialization 'f' requested here}} - static_assert(!can_increment); + static_assert(!can_increment); return 0; } diff --git a/clang/test/SemaCXX/cxx98-compat-flags.cpp b/clang/test/SemaCXX/cxx98-compat-flags.cpp index 1fdb50c7fb287..6ffb3a5884d17 100644 --- a/clang/test/SemaCXX/cxx98-compat-flags.cpp +++ b/clang/test/SemaCXX/cxx98-compat-flags.cpp @@ -5,9 +5,11 @@ template int TemplateFn(T) { return 0; } void LocalTemplateArg() { struct S {}; TemplateFn(S()); // expected-warning {{local type 'S' as template argument is incompatible with C++98}} + // expected-note@-1 {{while substituting deduced template arguments}} } struct {} obj_of_unnamed_type; // expected-note {{here}} int UnnamedTemplateArg = TemplateFn(obj_of_unnamed_type); // expected-warning {{unnamed type as template argument is incompatible with C++98}} + // expected-note@-1 {{while substituting deduced template arguments}} namespace CopyCtorIssues { struct Private { diff --git a/clang/test/SemaCXX/cxx98-compat.cpp b/clang/test/SemaCXX/cxx98-compat.cpp index 28547d42c6490..d31d95a9995f1 100644 --- a/clang/test/SemaCXX/cxx98-compat.cpp +++ b/clang/test/SemaCXX/cxx98-compat.cpp @@ -177,9 +177,11 @@ template int TemplateFn(T) { return 0; } void LocalTemplateArg() { struct S {}; TemplateFn(S()); // expected-warning {{local type 'S' as template argument is incompatible with C++98}} + // expected-note@-1 {{while substituting deduced template arguments}} } struct {} obj_of_unnamed_type; // expected-note {{here}} int UnnamedTemplateArg = TemplateFn(obj_of_unnamed_type); // expected-warning {{unnamed type as template argument is incompatible with C++98}} + // expected-note@-1 {{while substituting deduced template arguments}} // FIXME: We do not implement C++98 compatibility warnings for the C++17 // template argument evaluation rules. diff --git a/clang/test/SemaCXX/deprecated.cpp b/clang/test/SemaCXX/deprecated.cpp index 4282239af81b4..a24b40d8e622a 100644 --- a/clang/test/SemaCXX/deprecated.cpp +++ b/clang/test/SemaCXX/deprecated.cpp @@ -216,7 +216,7 @@ namespace DeprecatedVolatile { #endif template T f(T v); // cxx20-warning 2{{deprecated}} - int use_f = f(0); // FIXME: Missing "in instantiation of" note. + int use_f = f(0); // cxx20-note {{while substituting deduced template arguments}} // OK, only the built-in operators are deprecated. struct UDT { @@ -247,7 +247,7 @@ namespace ArithConv { namespace ArrayComp { int arr1[3], arr2[4]; bool b1 = arr1 == arr2; // not-cxx20-warning {{comparison between two arrays compare their addresses}} cxx20-warning {{comparison between two arrays is deprecated}} - // expected-warning@-1 {{array comparison always evaluates to false}} + // expected-warning@-1 {{array comparison always evaluates to false}} bool b2 = arr1 < arr2; // not-cxx20-warning {{comparison between two arrays compare their addresses}} cxx20-warning {{comparison between two arrays is deprecated}} // expected-warning@-1 {{array comparison always evaluates to a constant}} __attribute__((weak)) int arr3[3]; diff --git a/clang/test/SemaCXX/lambda-expressions.cpp b/clang/test/SemaCXX/lambda-expressions.cpp index f3deb6ee3f424..2d2dde82a28e6 100644 --- a/clang/test/SemaCXX/lambda-expressions.cpp +++ b/clang/test/SemaCXX/lambda-expressions.cpp @@ -452,6 +452,7 @@ void g(F f) { void f() { g([] {}); // cxx03-warning {{template argument uses local type}} // expected-note-re@-1 {{in instantiation of function template specialization 'PR20731::g<(lambda at {{.*}}>' requested here}} + // cxx03-note@-2 {{while substituting deduced template arguments}} } template struct function { @@ -503,6 +504,7 @@ namespace PR21857 { }; template fun wrap(Fn fn); // cxx03-warning {{template argument uses unnamed type}} auto x = wrap([](){}); // cxx03-warning {{template argument uses unnamed type}} cxx03-note 2 {{unnamed type used in template argument was declared here}} + // cxx03-note@-1 {{while substituting deduced template arguments into function template}} } namespace PR13987 { diff --git a/clang/test/SemaCXX/undefined-internal.cpp b/clang/test/SemaCXX/undefined-internal.cpp index 054e71b92f93d..9745f097c76b7 100644 --- a/clang/test/SemaCXX/undefined-internal.cpp +++ b/clang/test/SemaCXX/undefined-internal.cpp @@ -133,7 +133,7 @@ namespace PR9323 { } void f(const Uncopyable&) {} void test() { - f(Uncopyable()); + f(Uncopyable()); #if __cplusplus <= 199711L // C++03 or earlier modes // expected-warning@-2 {{C++98 requires an accessible copy constructor}} #else @@ -209,7 +209,9 @@ namespace OverloadUse { t(&n, &n); // expected-note {{used here}} #if __cplusplus < 201103L // expected-warning@-3 {{non-type template argument referring to function 'f' with internal linkage}} - // expected-warning@-3 {{non-type template argument referring to function 'f' with internal linkage}} + // expected-note@-4 {{while substituting explicitly-specified template arguments}} + // expected-warning@-4 {{non-type template argument referring to function 'f' with internal linkage}} + // expected-note@-5 {{while substituting explicitly-specified template arguments}} #endif } } diff --git a/clang/test/SemaHLSL/BuiltIns/and-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/and-errors.hlsl new file mode 100644 index 0000000000000..0a99feb023d73 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/and-errors.hlsl @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -finclude-default-header -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s \ +// RUN: -emit-llvm -O1 -verify + +bool test_too_few_arg(bool a) { + return __builtin_hlsl_and(a); + // expected-error@-1 {{too few arguments to function call, expected 2, have 1}} +} + +bool test_too_many_arg(bool a) { + return __builtin_hlsl_and(a, a, a); + // expected-error@-1 {{too many arguments to function call, expected 2, have 3}} +} + +bool2 test_mismatched_args(bool2 a, bool3 b) { + return __builtin_hlsl_and(a, b); + // expected-error@-1 {{all arguments to '__builtin_hlsl_and' must have the same type}} +} + +bool test_incorrect_type(int a) { + return __builtin_hlsl_and(a, a); + // expected-error@-1{{invalid operand of type 'int' where 'bool' or a vector of such type is required}} +} diff --git a/clang/test/SemaTemplate/recovery-crash.cpp b/clang/test/SemaTemplate/recovery-crash.cpp index 88e989aeb4064..ac8053da101ab 100644 --- a/clang/test/SemaTemplate/recovery-crash.cpp +++ b/clang/test/SemaTemplate/recovery-crash.cpp @@ -32,6 +32,7 @@ namespace PR16225 { f(); #if __cplusplus <= 199711L // expected-warning@-2 {{template argument uses local type 'LocalStruct'}} + // expected-note@-3 {{while substituting explicitly-specified template arguments}} #endif struct LocalStruct2 : UnknownBase { }; // expected-error {{no template named 'UnknownBase'}} } diff --git a/clang/test/SemaTemplate/temp_arg_nontype.cpp b/clang/test/SemaTemplate/temp_arg_nontype.cpp index 8b270b22a12b4..2a1c059df002e 100644 --- a/clang/test/SemaTemplate/temp_arg_nontype.cpp +++ b/clang/test/SemaTemplate/temp_arg_nontype.cpp @@ -255,6 +255,7 @@ namespace test8 { namespace PR8372 { template void foo() { } // expected-note{{template parameter is declared here}} void bar() { foo <0x80000000> (); } // expected-warning{{non-type template argument value '2147483648' truncated to '-2147483648' for template parameter of type 'int'}} + // expected-note@-1 {{while substituting explicitly-specified template arguments}} } namespace PR9227 { diff --git a/clang/tools/libclang/CXString.cpp b/clang/tools/libclang/CXString.cpp index 5e427957a1092..aaa8f8eeb67a1 100644 --- a/clang/tools/libclang/CXString.cpp +++ b/clang/tools/libclang/CXString.cpp @@ -87,19 +87,7 @@ CXString createRef(StringRef String) { if (String.empty()) return createEmpty(); - // If the string is not nul-terminated, we have to make a copy. - - // FIXME: This is doing a one past end read, and should be removed! For memory - // we don't manage, the API string can become unterminated at any time outside - // our control. - - if (String.data()[String.size()] != 0) - return createDup(String); - - CXString Result; - Result.data = String.data(); - Result.private_flags = (unsigned) CXS_Unmanaged; - return Result; + return createDup(String); } CXString createDup(StringRef String) { diff --git a/clang/utils/perf-training/CMakeLists.txt b/clang/utils/perf-training/CMakeLists.txt index 4aed086563ee9..1d7bb788a15ed 100644 --- a/clang/utils/perf-training/CMakeLists.txt +++ b/clang/utils/perf-training/CMakeLists.txt @@ -6,6 +6,12 @@ set(CLANG_PGO_TRAINING_DATA "${CMAKE_CURRENT_SOURCE_DIR}" CACHE PATH set(CLANG_PGO_TRAINING_DATA_SOURCE_DIR OFF CACHE STRING "Path to source directory containing cmake project with source files to use for generating pgo data") set(CLANG_PGO_TRAINING_DEPS "" CACHE STRING "Extra dependencies needed to build the PGO training data.") +option(CLANG_PGO_TRAINING_USE_LLVM_BUILD "Use LLVM build for generating PGO data" ON) + +llvm_canonicalize_cmake_booleans( + CLANG_PGO_TRAINING_USE_LLVM_BUILD +) + if(LLVM_BUILD_INSTRUMENTED) configure_lit_site_cfg( ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in diff --git a/clang/utils/perf-training/lit.cfg b/clang/utils/perf-training/lit.cfg index adefc7893ac44..3f6089b7139a7 100644 --- a/clang/utils/perf-training/lit.cfg +++ b/clang/utils/perf-training/lit.cfg @@ -27,6 +27,9 @@ config.clang = lit.util.which('clang', config.clang_tools_dir).replace('\\', '/' config.name = 'Clang Perf Training' config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test'] +if not config.use_llvm_build: + config.excludes = ['llvm-support'] + cc1_wrapper = '%s %s/perf-helper.py cc1' % (config.python_exe, config.perf_helper_dir) use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL") diff --git a/clang/utils/perf-training/lit.site.cfg.in b/clang/utils/perf-training/lit.site.cfg.in index 9d279d552919a..da81ec21a28a6 100644 --- a/clang/utils/perf-training/lit.site.cfg.in +++ b/clang/utils/perf-training/lit.site.cfg.in @@ -11,6 +11,7 @@ config.python_exe = "@Python3_EXECUTABLE@" config.cmake_exe = "@CMAKE_COMMAND@" config.llvm_src_dir ="@CMAKE_SOURCE_DIR@" config.cmake_generator ="@CMAKE_GENERATOR@" +config.use_llvm_build = @CLANG_PGO_TRAINING_USE_LLVM_BUILD@ # Let the main config do the real work. lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/lit.cfg") diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index 2d5b96b47fe2d..2e2fecc418504 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -244,7 +244,7 @@

C++2c implementation status

Structured Bindings can introduce a Pack P1061R10 - No + Clang 21 The Oxford variadic comma diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def index deb8d1aede518..44cb5a2cdd497 100644 --- a/flang/include/flang/Frontend/CodeGenOptions.def +++ b/flang/include/flang/Frontend/CodeGenOptions.def @@ -31,6 +31,7 @@ CODEGENOPT(PrepareForFullLTO , 1, 0) ///< Set when -flto is enabled on the CODEGENOPT(PrepareForThinLTO , 1, 0) ///< Set when -flto=thin is enabled on the ///< compile step. CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass) +CODEGENOPT(VectorizeLoop, 1, 0) ///< Enable loop vectorization. CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning. CODEGENOPT(UnrollLoops, 1, 0) ///< Enable loop unrolling CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index f3d9432c62d3b..724316f1b30c7 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -23,6 +23,7 @@ #include "clang/Basic/AllDiagnostics.h" #include "clang/Basic/DiagnosticDriver.h" #include "clang/Basic/DiagnosticOptions.h" +#include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/OptionUtils.h" #include "clang/Driver/Options.h" @@ -242,6 +243,9 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts, clang::driver::options::OPT_fno_stack_arrays, false)) opts.StackArrays = 1; + if (args.getLastArg(clang::driver::options::OPT_vectorize_loops)) + opts.VectorizeLoop = 1; + if (args.hasFlag(clang::driver::options::OPT_floop_versioning, clang::driver::options::OPT_fno_loop_versioning, false)) opts.LoopVersioning = 1; diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index 763c810ace0eb..76d329d043731 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -1037,6 +1037,8 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) { si.getTimePasses().setOutStream(ci.getTimingStreamLLVM()); pto.LoopUnrolling = opts.UnrollLoops; pto.LoopInterleaving = opts.UnrollLoops; + pto.LoopVectorization = opts.VectorizeLoop; + llvm::PassBuilder pb(targetMachine, pto, pgoOpt, &pic); // Attempt to load pass plugins and register their callbacks with PB. diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp index d13f101f516e7..d725dfd3e94f3 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp @@ -508,6 +508,8 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym); assert(hsb && "Host symbol box not found"); + hlfir::Entity entity{hsb.getAddr()}; + bool cannotHaveNonDefaultLowerBounds = !entity.mayHaveNonDefaultLowerBounds(); mlir::Location symLoc = hsb.getAddr().getLoc(); std::string privatizerName = sym->name().ToString() + ".privatizer"; @@ -528,7 +530,6 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, // an alloca for a fir.array type there. Get around this by boxing all // arrays. if (mlir::isa(allocType)) { - hlfir::Entity entity{hsb.getAddr()}; entity = genVariableBox(symLoc, firOpBuilder, entity); privVal = entity.getBase(); allocType = privVal.getType(); @@ -590,7 +591,7 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, result.getDeallocRegion(), isFirstPrivate ? DeclOperationKind::FirstPrivate : DeclOperationKind::Private, - sym); + sym, cannotHaveNonDefaultLowerBounds); // TODO: currently there are false positives from dead uses of the mold // arg if (!result.getInitMoldArg().getUses().empty()) diff --git a/flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp b/flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp index 22cd0679050db..21ade77d82d37 100644 --- a/flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp +++ b/flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp @@ -122,25 +122,40 @@ static void createCleanupRegion(Fortran::lower::AbstractConverter &converter, typeError(); } -fir::ShapeShiftOp Fortran::lower::omp::getShapeShift(fir::FirOpBuilder &builder, - mlir::Location loc, - mlir::Value box) { +fir::ShapeShiftOp +Fortran::lower::omp::getShapeShift(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Value box, + bool cannotHaveNonDefaultLowerBounds) { fir::SequenceType sequenceType = mlir::cast( hlfir::getFortranElementOrSequenceType(box.getType())); const unsigned rank = sequenceType.getDimension(); + llvm::SmallVector lbAndExtents; lbAndExtents.reserve(rank * 2); - mlir::Type idxTy = builder.getIndexType(); - for (unsigned i = 0; i < rank; ++i) { - // TODO: ideally we want to hoist box reads out of the critical section. - // We could do this by having box dimensions in block arguments like - // OpenACC does - mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i); - auto dimInfo = - builder.create(loc, idxTy, idxTy, idxTy, box, dim); - lbAndExtents.push_back(dimInfo.getLowerBound()); - lbAndExtents.push_back(dimInfo.getExtent()); + + if (cannotHaveNonDefaultLowerBounds && !sequenceType.hasDynamicExtents()) { + // We don't need fir::BoxDimsOp if all of the extents are statically known + // and we can assume default lower bounds. This helps avoids reads from the + // mold arg. + mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); + for (int64_t extent : sequenceType.getShape()) { + assert(extent != sequenceType.getUnknownExtent()); + mlir::Value extentVal = builder.createIntegerConstant(loc, idxTy, extent); + lbAndExtents.push_back(one); + lbAndExtents.push_back(extentVal); + } + } else { + for (unsigned i = 0; i < rank; ++i) { + // TODO: ideally we want to hoist box reads out of the critical section. + // We could do this by having box dimensions in block arguments like + // OpenACC does + mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i); + auto dimInfo = + builder.create(loc, idxTy, idxTy, idxTy, box, dim); + lbAndExtents.push_back(dimInfo.getLowerBound()); + lbAndExtents.push_back(dimInfo.getExtent()); + } } auto shapeShiftTy = fir::ShapeShiftType::get(builder.getContext(), rank); @@ -248,12 +263,13 @@ class PopulateInitAndCleanupRegionsHelper { mlir::Type argType, mlir::Value scalarInitValue, mlir::Value allocatedPrivVarArg, mlir::Value moldArg, mlir::Block *initBlock, mlir::Region &cleanupRegion, - DeclOperationKind kind, const Fortran::semantics::Symbol *sym) + DeclOperationKind kind, const Fortran::semantics::Symbol *sym, + bool cannotHaveLowerBounds) : converter{converter}, builder{converter.getFirOpBuilder()}, loc{loc}, argType{argType}, scalarInitValue{scalarInitValue}, allocatedPrivVarArg{allocatedPrivVarArg}, moldArg{moldArg}, initBlock{initBlock}, cleanupRegion{cleanupRegion}, kind{kind}, - sym{sym} { + sym{sym}, cannotHaveNonDefaultLowerBounds{cannotHaveLowerBounds} { valType = fir::unwrapRefType(argType); } @@ -295,6 +311,10 @@ class PopulateInitAndCleanupRegionsHelper { /// Any length parameters which have been fetched for the type mlir::SmallVector lenParams; + /// If the source variable being privatized definitely can't have non-default + /// lower bounds then we don't need to generate code to read them. + bool cannotHaveNonDefaultLowerBounds; + void createYield(mlir::Value ret) { builder.create(loc, ret); } @@ -432,7 +452,8 @@ void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedArray( // Special case for (possibly allocatable) arrays of polymorphic types // e.g. !fir.class>>> if (source.isPolymorphic()) { - fir::ShapeShiftOp shape = getShapeShift(builder, loc, source); + fir::ShapeShiftOp shape = + getShapeShift(builder, loc, source, cannotHaveNonDefaultLowerBounds); mlir::Type arrayType = source.getElementOrSequenceType(); mlir::Value allocatedArray = builder.create( loc, arrayType, /*typeparams=*/mlir::ValueRange{}, shape.getExtents()); @@ -471,8 +492,8 @@ void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedArray( // Put the temporary inside of a box: // hlfir::genVariableBox doesn't handle non-default lower bounds mlir::Value box; - fir::ShapeShiftOp shapeShift = - getShapeShift(builder, loc, getLoadedMoldArg()); + fir::ShapeShiftOp shapeShift = getShapeShift(builder, loc, getLoadedMoldArg(), + cannotHaveNonDefaultLowerBounds); mlir::Type boxType = getLoadedMoldArg().getType(); if (mlir::isa(temp.getType())) // the box created by the declare form createTempFromMold is missing @@ -607,10 +628,10 @@ void Fortran::lower::omp::populateByRefInitAndCleanupRegions( mlir::Type argType, mlir::Value scalarInitValue, mlir::Block *initBlock, mlir::Value allocatedPrivVarArg, mlir::Value moldArg, mlir::Region &cleanupRegion, DeclOperationKind kind, - const Fortran::semantics::Symbol *sym) { + const Fortran::semantics::Symbol *sym, bool cannotHaveLowerBounds) { PopulateInitAndCleanupRegionsHelper helper( converter, loc, argType, scalarInitValue, allocatedPrivVarArg, moldArg, - initBlock, cleanupRegion, kind, sym); + initBlock, cleanupRegion, kind, sym, cannotHaveLowerBounds); helper.populateByRefInitAndCleanupRegions(); // Often we load moldArg to check something (e.g. length parameters, shape) diff --git a/flang/lib/Lower/OpenMP/PrivateReductionUtils.h b/flang/lib/Lower/OpenMP/PrivateReductionUtils.h index fcd36392a29e0..0a3513bff19b0 100644 --- a/flang/lib/Lower/OpenMP/PrivateReductionUtils.h +++ b/flang/lib/Lower/OpenMP/PrivateReductionUtils.h @@ -55,11 +55,13 @@ void populateByRefInitAndCleanupRegions( mlir::Value scalarInitValue, mlir::Block *initBlock, mlir::Value allocatedPrivVarArg, mlir::Value moldArg, mlir::Region &cleanupRegion, DeclOperationKind kind, - const Fortran::semantics::Symbol *sym = nullptr); + const Fortran::semantics::Symbol *sym = nullptr, + bool cannotHaveNonDefaultLowerBounds = false); /// Generate a fir::ShapeShift op describing the provided boxed array. fir::ShapeShiftOp getShapeShift(fir::FirOpBuilder &builder, mlir::Location loc, - mlir::Value box); + mlir::Value box, + bool cannotHaveNonDefaultLowerBounds = false); } // namespace omp } // namespace lower diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 49e507feab580..ef7204dcd9196 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -1477,7 +1477,20 @@ void OmpStructureChecker::CheckThreadprivateOrDeclareTargetVar( } } }, - [&](const parser::Name &) {}, // common block + [&](const parser::Name &name) { + if (name.symbol) { + if (auto *cb{name.symbol->detailsIf()}) { + for (const auto &obj : cb->objects()) { + if (FindEquivalenceSet(*obj)) { + context_.Say(name.source, + "A variable in a %s directive cannot appear in an EQUIVALENCE statement (variable '%s' from common block '/%s/')"_err_en_US, + ContextDirectiveAsFortran(), obj->name(), + name.symbol->name()); + } + } + } + } + }, }, ompObject.u); } diff --git a/flang/test/Driver/optimization-remark.f90 b/flang/test/Driver/optimization-remark.f90 index e90baa892f46a..90e310d36c807 100644 --- a/flang/test/Driver/optimization-remark.f90 +++ b/flang/test/Driver/optimization-remark.f90 @@ -5,33 +5,33 @@ ! DEFINE: %{output} = -emit-llvm -flang-deprecated-no-hlfir -o /dev/null 2>&1 ! Check fc1 can handle -Rpass -! RUN: %flang_fc1 %s -O1 -Rpass %{output} 2>&1 | FileCheck %s --check-prefix=REMARKS +! RUN: %flang_fc1 %s -O1 -vectorize-loops -Rpass %{output} 2>&1 | FileCheck %s --check-prefix=REMARKS ! Check that we can override -Rpass= with -Rno-pass. -! RUN: %flang_fc1 %s -O1 -Rpass -Rno-pass %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS +! RUN: %flang_fc1 %s -O1 -vectorize-loops -Rpass -Rno-pass %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS ! Check -Rno-pass, -Rno-pass-analysis, -Rno-pass-missed nothing emitted -! RUN: %flang %s -O1 -Rno-pass -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS -! RUN: %flang %s -O1 -Rno-pass-missed -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS -! RUN: %flang %s -O1 -Rno-pass-analysis -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS +! RUN: %flang %s -O2 -Rno-pass -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS +! RUN: %flang %s -O2 -Rno-pass-missed -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS +! RUN: %flang %s -O2 -Rno-pass-analysis -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS ! Check valid -Rpass regex -! RUN: %flang %s -O1 -Rpass=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS-REGEX-LOOP-ONLY +! RUN: %flang %s -O2 -Rpass=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS-REGEX-LOOP-ONLY ! Check valid -Rpass-missed regex -! RUN: %flang %s -O1 -Rpass-missed=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED-REGEX-LOOP-ONLY +! RUN: %flang %s -O2 -Rpass-missed=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED-REGEX-LOOP-ONLY ! Check valid -Rpass-analysis regex -! RUN: %flang %s -O1 -Rpass-analysis=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=ANALYSIS-REGEX-LOOP-ONLY +! RUN: %flang %s -O2 -Rpass-analysis=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=ANALYSIS-REGEX-LOOP-ONLY ! Check full -Rpass message is emitted -! RUN: %flang %s -O1 -Rpass -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS +! RUN: %flang %s -O2 -Rpass -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS ! Check full -Rpass-missed message is emitted -! RUN: %flang %s -O1 -Rpass-missed -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED +! RUN: %flang %s -O2 -Rpass-missed -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED ! Check full -Rpass-analysis message is emitted -! RUN: %flang %s -O1 -Rpass-analysis -S -o /dev/null 2>&1 | FileCheck %s --check-prefix=ANALYSIS +! RUN: %flang %s -O2 -Rpass-analysis -S -o /dev/null 2>&1 | FileCheck %s --check-prefix=ANALYSIS ! REMARKS: remark: ! NO-REMARKS-NOT: remark: diff --git a/flang/test/Integration/unroll-loops.f90 b/flang/test/Integration/unroll-loops.f90 index c3fcf1c3a7cf3..debe45e0ec359 100644 --- a/flang/test/Integration/unroll-loops.f90 +++ b/flang/test/Integration/unroll-loops.f90 @@ -1,8 +1,8 @@ ! FIXME: https://github.com/llvm/llvm-project/issues/123668 ! ! DEFINE: %{triple} = -! DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL -! DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL +! DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL +! DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL ! ! REDEFINE: %{triple} = aarch64-unknown-linux-gnu ! RUN: %if aarch64-registered-target %{ %{check-unroll} %} diff --git a/flang/test/Lower/HLFIR/unroll-loops.fir b/flang/test/Lower/HLFIR/unroll-loops.fir index 6a9dd28a37b6d..1321f39677405 100644 --- a/flang/test/Lower/HLFIR/unroll-loops.fir +++ b/flang/test/Lower/HLFIR/unroll-loops.fir @@ -1,6 +1,6 @@ // DEFINE: %{triple} = -// DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL -// DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL +// DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL +// DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL // REDEFINE: %{triple} = aarch64-unknown-linux-gnu // RUN: %if aarch64-registered-target %{ %{check-unroll} %} diff --git a/flang/test/Lower/OpenMP/delayed-privatization-array.f90 b/flang/test/Lower/OpenMP/delayed-privatization-array.f90 index 95fa3f9e03052..c447fa6f27a75 100644 --- a/flang/test/Lower/OpenMP/delayed-privatization-array.f90 +++ b/flang/test/Lower/OpenMP/delayed-privatization-array.f90 @@ -108,15 +108,14 @@ program main ! ONE_DIM_DEFAULT_LB-SAME: @[[PRIVATIZER_SYM:.*]] : [[BOX_TYPE:!fir.box>]] init { ! ONE_DIM_DEFAULT_LB-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE:!fir.ref>>]], %[[PRIV_BOX_ALLOC:.*]]: [[TYPE]]): -! ONE_DIM_DEFAULT_LB-NEXT: %[[PRIV_ARG_VAL:.*]] = fir.load %[[PRIV_ARG]] ! ONE_DIM_DEFAULT_LB-NEXT: %[[C10:.*]] = arith.constant 10 : index ! ONE_DIM_DEFAULT_LB-NEXT: %[[SHAPE:.*]] = fir.shape %[[C10]] ! ONE_DIM_DEFAULT_LB-NEXT: %[[ARRAY_ALLOC:.*]] = fir.allocmem !fir.array<10xi32> ! ONE_DIM_DEFAULT_LB-NEXT: %[[TRUE:.*]] = arith.constant true ! ONE_DIM_DEFAULT_LB-NEXT: %[[DECL:.*]]:2 = hlfir.declare %[[ARRAY_ALLOC]](%[[SHAPE]]) -! ONE_DIM_DEFAULT_LB-NEXT: %[[C0_0:.*]] = arith.constant 0 -! ONE_DIM_DEFAULT_LB-NEXT: %[[DIMS2:.*]]:3 = fir.box_dims %[[PRIV_ARG_VAL]], %[[C0_0]] -! ONE_DIM_DEFAULT_LB-NEXT: %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[DIMS2]]#0, %[[DIMS2]]#1 +! ONE_DIM_DEFAULT_LB-NEXT: %[[ONE:.*]] = arith.constant 1 : index +! ONE_DIM_DEFAULT_LB-NEXT: %[[TEN:.*]] = arith.constant 10 : index +! ONE_DIM_DEFAULT_LB-NEXT: %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[ONE]], %[[TEN]] ! ONE_DIM_DEFAULT_LB-NEXT: %[[EMBOX:.*]] = fir.embox %[[DECL]]#0(%[[SHAPE_SHIFT]]) ! ONE_DIM_DEFAULT_LB-NEXT: fir.store %[[EMBOX]] to %[[PRIV_BOX_ALLOC]] ! ONE_DIM_DEFAULT_LB-NEXT: omp.yield(%[[PRIV_BOX_ALLOC]] : [[TYPE]]) diff --git a/flang/test/Lower/OpenMP/different_vars_lastprivate_barrier.f90 b/flang/test/Lower/OpenMP/different_vars_lastprivate_barrier.f90 index 0f04977754291..b74e083925aba 100644 --- a/flang/test/Lower/OpenMP/different_vars_lastprivate_barrier.f90 +++ b/flang/test/Lower/OpenMP/different_vars_lastprivate_barrier.f90 @@ -1,8 +1,8 @@ ! RUN: %flang_fc1 -fopenmp -mmlir --openmp-enable-delayed-privatization-staging=true -emit-hlfir %s -o - | FileCheck %s -subroutine first_and_lastprivate +subroutine first_and_lastprivate(var) integer i - integer, dimension(3) :: var + integer, dimension(:) :: var !$omp parallel do lastprivate(i) private(var) do i=1,1 @@ -10,19 +10,20 @@ subroutine first_and_lastprivate !$omp end parallel do end subroutine -! CHECK: omp.private {type = private} @[[VAR_PRIVATIZER:.*Evar_private_box_3xi32]] : [[BOX_TYPE:!fir\.box>]] init { +! CHECK: omp.private {type = private} @[[VAR_PRIVATIZER:.*Evar_private_box_Uxi32]] : [[BOX_TYPE:!fir\.box>]] init { ! CHECK-NEXT: ^bb0(%[[ORIG_REF:.*]]: {{.*}}, %[[PRIV_REF:.*]]: {{.*}}): ! CHECK: %[[ORIG_VAL:.*]] = fir.load %[[ORIG_REF]] +! CHECK: %[[BOX_DIMS_0:.*]]:3 = fir.box_dims %[[ORIG_VAL]], %{{.*}} : ([[BOX_TYPE]], index) -> (index, index, index) ! CHECK: %[[BOX_DIMS:.*]]:3 = fir.box_dims %[[ORIG_VAL]], %{{.*}} : ([[BOX_TYPE]], index) -> (index, index, index) ! CHECK: %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[BOX_DIMS]]#0, %[[BOX_DIMS]]#1 -! CHECK: %[[EMBOX:.*]] = fir.embox %{{.*}}(%[[SHAPE_SHIFT]]) : {{.*}} -> [[BOX_TYPE]] +! CHECK: %[[EMBOX:.*]] = fir.rebox %{{.*}}(%[[SHAPE_SHIFT]]) : {{.*}} -> [[BOX_TYPE]] ! CHECK: fir.store %[[EMBOX]] to %[[PRIV_REF]] ! CHECK: omp.yield(%[[PRIV_REF]] : !fir.ref<[[BOX_TYPE]]>) ! CHECK: } ! CHECK: omp.private {type = private} @[[I_PRIVATIZER:.*Ei_private_i32]] : i32 -! CHECK: func.func @{{.*}}first_and_lastprivate() +! CHECK: func.func @{{.*}}first_and_lastprivate({{.*}}) ! CHECK: %[[ORIG_I_DECL:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "{{.*}}Ei"} ! CHECK: omp.parallel { ! CHECK-NOT: omp.barrier diff --git a/flang/test/Semantics/OpenMP/threadprivate02.f90 b/flang/test/Semantics/OpenMP/threadprivate02.f90 index 7f6e8dcc8e8ab..9dc031a8ce47e 100644 --- a/flang/test/Semantics/OpenMP/threadprivate02.f90 +++ b/flang/test/Semantics/OpenMP/threadprivate02.f90 @@ -7,6 +7,9 @@ program threadprivate02 integer :: arr1(10) common /blk1/ a1 real, save :: eq_a, eq_b, eq_c, eq_d + integer :: eq_e, eq_f + equivalence(eq_e, eq_f) + common /blk2/ eq_e !$omp threadprivate(arr1) @@ -25,6 +28,9 @@ program threadprivate02 !$omp threadprivate(eq_c) equivalence(eq_c, eq_d) + !ERROR: A variable in a THREADPRIVATE directive cannot appear in an EQUIVALENCE statement (variable 'eq_e' from common block '/blk2/') + !$omp threadprivate(/blk2/) + contains subroutine func() integer :: arr2(10) diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index efb5e1c7bd698..aac4017a0d845 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -329,6 +329,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.unistd.geteuid libc.src.unistd.getpid libc.src.unistd.getppid + libc.src.unistd.getsid libc.src.unistd.gettid libc.src.unistd.getuid libc.src.unistd.isatty diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt index a9ba0c257755b..6b006f0ecca89 100644 --- a/libc/config/linux/riscv/entrypoints.txt +++ b/libc/config/linux/riscv/entrypoints.txt @@ -326,6 +326,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.unistd.geteuid libc.src.unistd.getpid libc.src.unistd.getppid + libc.src.unistd.getsid libc.src.unistd.gettid libc.src.unistd.getuid libc.src.unistd.isatty diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index a4f6671a59789..35661004663c9 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -328,6 +328,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.unistd.geteuid libc.src.unistd.getpid libc.src.unistd.getppid + libc.src.unistd.getsid libc.src.unistd.gettid libc.src.unistd.getuid libc.src.unistd.isatty diff --git a/libc/include/unistd.yaml b/libc/include/unistd.yaml index d04d46bd5c002..051e92b006741 100644 --- a/libc/include/unistd.yaml +++ b/libc/include/unistd.yaml @@ -161,6 +161,12 @@ functions: return_type: int arguments: - type: void + - name: getsid + standards: + - POSIX + return_type: pid_t + arguments: + - type: pid_t - name: gettid standards: - Linux diff --git a/libc/src/unistd/CMakeLists.txt b/libc/src/unistd/CMakeLists.txt index fb563ec4ecfd9..b1a1716aa85c6 100644 --- a/libc/src/unistd/CMakeLists.txt +++ b/libc/src/unistd/CMakeLists.txt @@ -125,6 +125,13 @@ add_entrypoint_object( .${LIBC_TARGET_OS}.getppid ) +add_entrypoint_object( + getsid + ALIAS + DEPENDS + .${LIBC_TARGET_OS}.getsid +) + add_entrypoint_object( geteuid ALIAS diff --git a/libc/src/unistd/getsid.h b/libc/src/unistd/getsid.h new file mode 100644 index 0000000000000..e788b5dc4fba0 --- /dev/null +++ b/libc/src/unistd/getsid.h @@ -0,0 +1,21 @@ +//===-- Implementation header for getsid ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_UNISTD_GETSID_H +#define LLVM_LIBC_SRC_UNISTD_GETSID_H + +#include "hdr/types/pid_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +pid_t getsid(pid_t); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_UNISTD_GETSID_H diff --git a/libc/src/unistd/linux/CMakeLists.txt b/libc/src/unistd/linux/CMakeLists.txt index afdc595d0b26f..368593a3bb7b5 100644 --- a/libc/src/unistd/linux/CMakeLists.txt +++ b/libc/src/unistd/linux/CMakeLists.txt @@ -235,6 +235,19 @@ add_entrypoint_object( libc.src.__support.OSUtil.osutil ) +add_entrypoint_object( + getsid + SRCS + getsid.cpp + HDRS + ../getsid.h + DEPENDS + libc.hdr.types.pid_t + libc.include.sys_syscall + libc.src.__support.OSUtil.osutil + libc.src.errno.errno +) + add_entrypoint_object( getuid SRCS diff --git a/libc/src/unistd/linux/getsid.cpp b/libc/src/unistd/linux/getsid.cpp new file mode 100644 index 0000000000000..5977c5bf10e94 --- /dev/null +++ b/libc/src/unistd/linux/getsid.cpp @@ -0,0 +1,29 @@ +//===-- Linux implementation of getsid-------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/unistd/getsid.h" + +#include "hdr/types/pid_t.h" +#include "src/__support/OSUtil/syscall.h" // For internal syscall function. +#include "src/__support/common.h" +#include "src/__support/macros/config.h" +#include "src/errno/libc_errno.h" +#include // For syscall numbers. + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(pid_t, getsid, (pid_t pid)) { + pid_t ret = LIBC_NAMESPACE::syscall_impl(SYS_getsid, pid); + if (ret < 0) { + libc_errno = static_cast(-ret); + return -1; + } + return ret; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/integration/src/__support/GPU/scan_reduce.cpp b/libc/test/integration/src/__support/GPU/scan_reduce.cpp index bc621c3300cbe..1d50e1f99bf31 100644 --- a/libc/test/integration/src/__support/GPU/scan_reduce.cpp +++ b/libc/test/integration/src/__support/GPU/scan_reduce.cpp @@ -53,10 +53,59 @@ static void test_scan() { EXPECT_EQ(z, gpu::get_lane_id() % 2 ? gpu::get_lane_id() / 2 + 1 : 0); } +static uint32_t random(uint64_t *rand_next) { + uint64_t x = *rand_next; + x ^= x >> 12; + x ^= x << 25; + x ^= x >> 27; + *rand_next = x; + return static_cast((x * 0x2545F4914F6CDD1Dul) >> 32); +} + +// Scan operations can break down under thread divergence, make sure that the +// function works under some random divergence. We do this by trivially +// implementing a scan with shared scratch memory and then comparing the +// results. +static void test_scan_divergent() { + static uint32_t input[64] = {0}; + static uint32_t result[64] = {0}; + uint64_t state = gpu::processor_clock() + __gpu_lane_id(); + + for (int i = 0; i < 64; ++i) { + uint64_t lanemask = gpu::get_lane_mask(); + if (random(&state) & (1ull << gpu::get_lane_id())) { + uint64_t divergent = gpu::get_lane_mask(); + uint32_t value = random(&state) % 256; + input[gpu::get_lane_id()] = value; + + if (gpu::is_first_lane(divergent)) { + uint32_t accumulator = 0; + for (uint32_t lane = 0; lane < gpu::get_lane_size(); ++lane) { + uint32_t tmp = input[lane]; + result[lane] = tmp + accumulator; + accumulator += tmp; + } + } + gpu::sync_lane(divergent); + + uint32_t scan = gpu::scan(divergent, value); + EXPECT_EQ(scan, result[gpu::get_lane_id()]); + } + if (gpu::is_first_lane(lanemask)) + __builtin_memset(input, 0, sizeof(input)); + gpu::sync_lane(lanemask); + } +} + TEST_MAIN(int argc, char **argv, char **envp) { + if (gpu::get_thread_id() >= gpu::get_lane_size()) + return 0; + test_reduce(); test_scan(); + test_scan_divergent(); + return 0; } diff --git a/libc/test/src/unistd/CMakeLists.txt b/libc/test/src/unistd/CMakeLists.txt index 665cb367ba4fd..d1f3050e6cccf 100644 --- a/libc/test/src/unistd/CMakeLists.txt +++ b/libc/test/src/unistd/CMakeLists.txt @@ -394,6 +394,16 @@ add_libc_unittest( libc.src.unistd.getppid ) +add_libc_unittest( + getsid_test + SUITE + libc_unistd_unittests + SRCS + getsid_test.cpp + DEPENDS + libc.src.unistd.getsid +) + add_libc_unittest( getuid_test SUITE diff --git a/libc/test/src/unistd/getsid_test.cpp b/libc/test/src/unistd/getsid_test.cpp new file mode 100644 index 0000000000000..b3e8d54b14dcb --- /dev/null +++ b/libc/test/src/unistd/getsid_test.cpp @@ -0,0 +1,21 @@ +//===-- Unittests for getsid ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/errno/libc_errno.h" +#include "src/unistd/getsid.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcGetPidTest, GetCurrSID) { + pid_t sid = LIBC_NAMESPACE::getsid(0); + ASSERT_NE(sid, -1); + ASSERT_ERRNO_SUCCESS(); + + pid_t nonexist_sid = LIBC_NAMESPACE::getsid(-1); + ASSERT_EQ(nonexist_sid, -1); + ASSERT_ERRNO_FAILURE(); +} diff --git a/libc/utils/hdrgen/header.py b/libc/utils/hdrgen/header.py index 42a075c4b6c89..11e0234eda1cf 100644 --- a/libc/utils/hdrgen/header.py +++ b/libc/utils/hdrgen/header.py @@ -233,3 +233,12 @@ def relpath(file): content.append("__END_C_DECLS") return "\n".join(content) + + def json_data(self): + return { + "name": self.name, + "standards": self.standards, + "includes": [ + str(file) for file in sorted({COMMON_HEADER} | self.includes()) + ], + } diff --git a/libc/utils/hdrgen/main.py b/libc/utils/hdrgen/main.py index 27b21ce8ca44b..d5a1c25e7ce20 100755 --- a/libc/utils/hdrgen/main.py +++ b/libc/utils/hdrgen/main.py @@ -9,6 +9,7 @@ # ==------------------------------------------------------------------------==# import argparse +import json import sys from pathlib import Path @@ -23,7 +24,7 @@ def main(): help="Path to the YAML file containing header specification", metavar="FILE", type=Path, - nargs=1, + nargs="+", ) parser.add_argument( "-o", @@ -32,6 +33,11 @@ def main(): type=Path, required=True, ) + parser.add_argument( + "--json", + help="Write JSON instead of a header, can use multiple YAML files", + action="store_true", + ) parser.add_argument( "--depfile", help="Path to write a depfile", @@ -52,6 +58,11 @@ def main(): ) args = parser.parse_args() + if not args.json and len(args.yaml_file) != 1: + print("Only one YAML file at a time without --json", file=sys.stderr) + parser.print_usage(sys.stderr) + return 2 + files_read = set() def write_depfile(): @@ -66,35 +77,47 @@ def load_yaml(path): files_read.add(path) return load_yaml_file(path, HeaderFile, args.entry_point) - merge_from_files = dict() - - def merge_from(paths): - for path in paths: - # Load each file exactly once, in case of redundant merges. - if path in merge_from_files: - continue - header = load_yaml(path) - merge_from_files[path] = header - merge_from(path.parent / f for f in header.merge_yaml_files) - - # Load the main file first. - [yaml_file] = args.yaml_file - header = load_yaml(yaml_file) - - # Now load all the merge_yaml_files, and any transitive merge_yaml_files. - merge_from(yaml_file.parent / f for f in header.merge_yaml_files) - - # Merge in all those files' contents. - for merge_from_path, merge_from_header in merge_from_files.items(): - if merge_from_header.name is not None: - print(f"{merge_from_path!s}: Merge file cannot have header field", stderr) - return 2 - header.merge(merge_from_header) - - # The header_template path is relative to the containing YAML file. - template = header.template(yaml_file.parent, files_read) - - contents = fill_public_api(header.public_api(), template) + def load_header(yaml_file): + merge_from_files = dict() + + def merge_from(paths): + for path in paths: + # Load each file exactly once, in case of redundant merges. + if path in merge_from_files: + continue + header = load_yaml(path) + merge_from_files[path] = header + merge_from(path.parent / f for f in header.merge_yaml_files) + + # Load the main file first. + header = load_yaml(yaml_file) + + # Now load all the merge_yaml_files, and transitive merge_yaml_files. + merge_from(yaml_file.parent / f for f in header.merge_yaml_files) + + # Merge in all those files' contents. + for merge_from_path, merge_from_header in merge_from_files.items(): + if merge_from_header.name is not None: + print( + f"{merge_from_path!s}: Merge file cannot have header field", + file=sys.stderr, + ) + return 2 + header.merge(merge_from_header) + + return header + + if args.json: + contents = json.dumps( + [load_header(file).json_data() for file in args.yaml_file], + indent=2, + ) + else: + [yaml_file] = args.yaml_file + header = load_header(yaml_file) + # The header_template path is relative to the containing YAML file. + template = header.template(yaml_file.parent, files_read) + contents = fill_public_api(header.public_api(), template) write_depfile() diff --git a/libc/utils/hdrgen/tests/expected_output/test_small.json b/libc/utils/hdrgen/tests/expected_output/test_small.json new file mode 100644 index 0000000000000..9cc73d013a679 --- /dev/null +++ b/libc/utils/hdrgen/tests/expected_output/test_small.json @@ -0,0 +1,14 @@ +[ + { + "name": "test_small.h", + "standards": [], + "includes": [ + "__llvm-libc-common.h", + "llvm-libc-macros/test_more-macros.h", + "llvm-libc-macros/test_small-macros.h", + "llvm-libc-types/float128.h", + "llvm-libc-types/type_a.h", + "llvm-libc-types/type_b.h" + ] + } +] \ No newline at end of file diff --git a/libc/utils/hdrgen/tests/test_integration.py b/libc/utils/hdrgen/tests/test_integration.py index 4f3d2a939520a..bf393d26a8101 100644 --- a/libc/utils/hdrgen/tests/test_integration.py +++ b/libc/utils/hdrgen/tests/test_integration.py @@ -12,14 +12,14 @@ def setUp(self): self.main_script = self.source_dir.parent / "main.py" self.maxDiff = 80 * 100 - def run_script(self, yaml_file, output_file, entry_points=[]): + def run_script(self, yaml_file, output_file, entry_points=[], switches=[]): command = [ "python3", str(self.main_script), str(yaml_file), "--output", str(output_file), - ] + ] + switches for entry_point in entry_points: command.extend(["--entry-point", entry_point]) @@ -59,6 +59,15 @@ def test_generate_subdir_header(self): self.run_script(yaml_file, output_file) self.compare_files(output_file, expected_output_file) + def test_generate_json(self): + yaml_file = self.source_dir / "input/test_small.yaml" + expected_output_file = self.source_dir / "expected_output/test_small.json" + output_file = self.output_dir / "test_small.json" + + self.run_script(yaml_file, output_file, switches=["--json"]) + + self.compare_files(output_file, expected_output_file) + def main(): parser = argparse.ArgumentParser(description="TestHeaderGenIntegration arguments") diff --git a/libclc/clc/include/clc/integer/gentype.inc b/libclc/clc/include/clc/integer/gentype.inc index 98682a6d32c70..408ea821c6e14 100644 --- a/libclc/clc/include/clc/integer/gentype.inc +++ b/libclc/clc/include/clc/integer/gentype.inc @@ -1,5 +1,15 @@ #include #include +#include + +#define __CLC_AS_GENTYPE __CLC_XCONCAT(__clc_as_, __CLC_GENTYPE) +#define __CLC_CONVERT_GENTYPE __CLC_XCONCAT(__clc_convert_, __CLC_GENTYPE) + +#define __CLC_AS_U_GENTYPE __CLC_XCONCAT(__clc_as_, __CLC_U_GENTYPE) +#define __CLC_CONVERT_U_GENTYPE __CLC_XCONCAT(__clc_convert_, __CLC_U_GENTYPE) + +#define __CLC_AS_S_GENTYPE __CLC_XCONCAT(__clc_as_, __CLC_S_GENTYPE) +#define __CLC_CONVERT_S_GENTYPE __CLC_XCONCAT(__clc_convert_, __CLC_S_GENTYPE) // These 2 defines only change when switching between data sizes or base types // to keep this file manageable. @@ -532,3 +542,12 @@ #undef __CLC_GENSIZE #undef __CLC_SCALAR_GENTYPE #undef __CLC_BODY + +#undef __CLC_CONVERT_S_GENTYPE +#undef __CLC_AS_S_GENTYPE + +#undef __CLC_CONVERT_U_GENTYPE +#undef __CLC_AS_U_GENTYPE + +#undef __CLC_CONVERT_GENTYPE +#undef __CLC_AS_GENTYPE diff --git a/libclc/clc/include/clc/math/clc_frexp.h b/libclc/clc/include/clc/math/clc_frexp.h new file mode 100644 index 0000000000000..f8a88ce69e154 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_frexp.h @@ -0,0 +1,11 @@ +#ifndef __CLC_MATH_CLC_FREXP_H__ +#define __CLC_MATH_CLC_FREXP_H__ + +#define __CLC_FUNCTION __clc_frexp +#define __CLC_BODY +#include + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_MATH_CLC_FREXP_H__ diff --git a/libclc/clc/include/clc/math/clc_modf.h b/libclc/clc/include/clc/math/clc_modf.h new file mode 100644 index 0000000000000..45484b09628a4 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_modf.h @@ -0,0 +1,11 @@ +#ifndef __CLC_MATH_CLC_MODF_H__ +#define __CLC_MATH_CLC_MODF_H__ + +#define __CLC_FUNCTION __clc_modf +#define __CLC_BODY +#include + +#undef __CLC_BODY +#undef __CLC_FUNCTION + +#endif // __CLC_MATH_CLC_MODF_H__ diff --git a/libclc/clc/include/clc/math/gentype.inc b/libclc/clc/include/clc/math/gentype.inc index 3c80f1c6172ad..c624f4d19bbcf 100644 --- a/libclc/clc/include/clc/math/gentype.inc +++ b/libclc/clc/include/clc/math/gentype.inc @@ -1,70 +1,120 @@ #include #include +#include + +// Define some useful macros for type conversions. +#define __CLC_AS_GENTYPE __CLC_XCONCAT(__clc_as_, __CLC_GENTYPE) +#define __CLC_CONVERT_GENTYPE __CLC_XCONCAT(__clc_convert_, __CLC_GENTYPE) + +// Define some macros for types matching the same vector size as __CLC_GENTYPE. +#define __CLC_HALFN __CLC_XCONCAT(half, __CLC_VECSIZE) +#define __CLC_FLOATN __CLC_XCONCAT(float, __CLC_VECSIZE) +#define __CLC_DOUBLEN __CLC_XCONCAT(double, __CLC_VECSIZE) + +#define __CLC_CHARN __CLC_XCONCAT(char, __CLC_VECSIZE) +#define __CLC_SHORTN __CLC_XCONCAT(short, __CLC_VECSIZE) +#define __CLC_INTN __CLC_XCONCAT(int, __CLC_VECSIZE) +#define __CLC_LONGN __CLC_XCONCAT(long, __CLC_VECSIZE) + +#define __CLC_UCHARN __CLC_XCONCAT(uchar, __CLC_VECSIZE) +#define __CLC_USHORTN __CLC_XCONCAT(ushort, __CLC_VECSIZE) +#define __CLC_UINTN __CLC_XCONCAT(uint, __CLC_VECSIZE) +#define __CLC_ULONGN __CLC_XCONCAT(ulong, __CLC_VECSIZE) + +#define __CLC_AS_CHARN __CLC_XCONCAT(__clc_as_, __CLC_CHARN) +#define __CLC_AS_SHORTN __CLC_XCONCAT(__clc_as_, __CLC_SHORTN) +#define __CLC_AS_INTN __CLC_XCONCAT(__clc_as_, __CLC_INTN) +#define __CLC_AS_LONGN __CLC_XCONCAT(__clc_as_, __CLC_LONGN) + +#define __CLC_AS_UCHARN __CLC_XCONCAT(__clc_as_, __CLC_UCHARN) +#define __CLC_AS_USHORTN __CLC_XCONCAT(__clc_as_, __CLC_USHORTN) +#define __CLC_AS_UINTN __CLC_XCONCAT(__clc_as_, __CLC_UINTN) +#define __CLC_AS_ULONGN __CLC_XCONCAT(__clc_as_, __CLC_ULONGN) + +#define __CLC_CONVERT_HALFN __CLC_XCONCAT(__clc_convert_half, __CLC_VECSIZE) +#define __CLC_CONVERT_FLOATN __CLC_XCONCAT(__clc_convert_float, __CLC_VECSIZE) +#define __CLC_CONVERT_DOUBLEN __CLC_XCONCAT(__clc_convert_double, __CLC_VECSIZE) + +#define __CLC_CONVERT_CHARN __CLC_XCONCAT(__clc_convert_, __CLC_CHARN) +#define __CLC_CONVERT_SHORTN __CLC_XCONCAT(__clc_convert_, __CLC_SHORTN) +#define __CLC_CONVERT_INTN __CLC_XCONCAT(__clc_convert_, __CLC_INTN) +#define __CLC_CONVERT_LONGN __CLC_XCONCAT(__clc_convert_, __CLC_LONGN) + +#define __CLC_CONVERT_UCHARN __CLC_XCONCAT(__clc_convert_, __CLC_UCHARN) +#define __CLC_CONVERT_USHORTN __CLC_XCONCAT(__clc_convert_, __CLC_USHORTN) +#define __CLC_CONVERT_UINTN __CLC_XCONCAT(__clc_convert_, __CLC_UINTN) +#define __CLC_CONVERT_ULONGN __CLC_XCONCAT(__clc_convert_, __CLC_ULONGN) + +// See definitions of __CLC_S_GENTYPE/__CLC_U_GENTYPE below, which depend on the +// specific size of floating-point type. These are the signed and unsigned +// integers of the same bitwidth and element count as the GENTYPE. They match +// the naming conventions in the integer version gentype.inc, for +// convenience. +#define __CLC_AS_S_GENTYPE __CLC_XCONCAT(__clc_as_, __CLC_S_GENTYPE) +#define __CLC_AS_U_GENTYPE __CLC_XCONCAT(__clc_as_, __CLC_U_GENTYPE) + +#define __CLC_CONVERT_S_GENTYPE __CLC_XCONCAT(__clc_convert_, __CLC_S_GENTYPE) +#define __CLC_CONVERT_U_GENTYPE __CLC_XCONCAT(__clc_convert_, __CLC_U_GENTYPE) #define __CLC_SCALAR_GENTYPE float #define __CLC_FPSIZE 32 #define __CLC_FP_LIT(x) x##F +#define __CLC_S_GENTYPE __CLC_XCONCAT(int, __CLC_VECSIZE) +#define __CLC_U_GENTYPE __CLC_XCONCAT(uint, __CLC_VECSIZE) + #define __CLC_GENTYPE float -#define __CLC_INTN int #define __CLC_BIT_INTN int #define __CLC_SCALAR +#define __CLC_VECSIZE #include __CLC_BODY +#undef __CLC_VECSIZE #undef __CLC_GENTYPE #undef __CLC_BIT_INTN -#undef __CLC_INTN #undef __CLC_SCALAR #define __CLC_GENTYPE float2 -#define __CLC_INTN int2 #define __CLC_BIT_INTN int2 #define __CLC_VECSIZE 2 #include __CLC_BODY #undef __CLC_VECSIZE #undef __CLC_GENTYPE #undef __CLC_BIT_INTN -#undef __CLC_INTN #define __CLC_GENTYPE float3 -#define __CLC_INTN int3 #define __CLC_BIT_INTN int3 #define __CLC_VECSIZE 3 #include __CLC_BODY #undef __CLC_VECSIZE #undef __CLC_GENTYPE #undef __CLC_BIT_INTN -#undef __CLC_INTN #define __CLC_GENTYPE float4 -#define __CLC_INTN int4 #define __CLC_BIT_INTN int4 #define __CLC_VECSIZE 4 #include __CLC_BODY #undef __CLC_VECSIZE #undef __CLC_GENTYPE #undef __CLC_BIT_INTN -#undef __CLC_INTN #define __CLC_GENTYPE float8 -#define __CLC_INTN int8 #define __CLC_BIT_INTN int8 #define __CLC_VECSIZE 8 #include __CLC_BODY #undef __CLC_VECSIZE #undef __CLC_GENTYPE #undef __CLC_BIT_INTN -#undef __CLC_INTN #define __CLC_GENTYPE float16 -#define __CLC_INTN int16 #define __CLC_BIT_INTN int16 #define __CLC_VECSIZE 16 #include __CLC_BODY #undef __CLC_VECSIZE #undef __CLC_GENTYPE #undef __CLC_BIT_INTN -#undef __CLC_INTN +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE #undef __CLC_FP_LIT #undef __CLC_FPSIZE #undef __CLC_SCALAR_GENTYPE @@ -77,66 +127,61 @@ #define __CLC_FPSIZE 64 #define __CLC_FP_LIT(x) (x) +#define __CLC_S_GENTYPE __CLC_XCONCAT(long, __CLC_VECSIZE) +#define __CLC_U_GENTYPE __CLC_XCONCAT(ulong, __CLC_VECSIZE) + #define __CLC_SCALAR +#define __CLC_VECSIZE #define __CLC_GENTYPE double -#define __CLC_INTN int #define __CLC_BIT_INTN long #include __CLC_BODY #undef __CLC_GENTYPE #undef __CLC_BIT_INTN -#undef __CLC_INTN +#undef __CLC_VECSIZE #undef __CLC_SCALAR #define __CLC_GENTYPE double2 -#define __CLC_INTN int2 #define __CLC_BIT_INTN long2 #define __CLC_VECSIZE 2 #include __CLC_BODY #undef __CLC_VECSIZE #undef __CLC_GENTYPE #undef __CLC_BIT_INTN -#undef __CLC_INTN #define __CLC_GENTYPE double3 -#define __CLC_INTN int3 #define __CLC_BIT_INTN long3 #define __CLC_VECSIZE 3 #include __CLC_BODY #undef __CLC_VECSIZE #undef __CLC_GENTYPE #undef __CLC_BIT_INTN -#undef __CLC_INTN #define __CLC_GENTYPE double4 -#define __CLC_INTN int4 #define __CLC_BIT_INTN long4 #define __CLC_VECSIZE 4 #include __CLC_BODY #undef __CLC_VECSIZE #undef __CLC_GENTYPE #undef __CLC_BIT_INTN -#undef __CLC_INTN #define __CLC_GENTYPE double8 -#define __CLC_INTN int8 #define __CLC_BIT_INTN long8 #define __CLC_VECSIZE 8 #include __CLC_BODY #undef __CLC_VECSIZE #undef __CLC_GENTYPE #undef __CLC_BIT_INTN -#undef __CLC_INTN #define __CLC_GENTYPE double16 -#define __CLC_INTN int16 #define __CLC_BIT_INTN long16 #define __CLC_VECSIZE 16 #include __CLC_BODY #undef __CLC_VECSIZE #undef __CLC_GENTYPE #undef __CLC_BIT_INTN -#undef __CLC_INTN +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE #undef __CLC_FP_LIT #undef __CLC_FPSIZE #undef __CLC_SCALAR_GENTYPE @@ -151,66 +196,61 @@ #define __CLC_FPSIZE 16 #define __CLC_FP_LIT(x) x##H +#define __CLC_S_GENTYPE __CLC_XCONCAT(short, __CLC_VECSIZE) +#define __CLC_U_GENTYPE __CLC_XCONCAT(ushort, __CLC_VECSIZE) + #define __CLC_SCALAR +#define __CLC_VECSIZE #define __CLC_GENTYPE half -#define __CLC_INTN int #define __CLC_BIT_INTN short #include __CLC_BODY #undef __CLC_GENTYPE #undef __CLC_BIT_INTN -#undef __CLC_INTN +#undef __CLC_VECSIZE #undef __CLC_SCALAR #define __CLC_GENTYPE half2 -#define __CLC_INTN int2 #define __CLC_BIT_INTN short2 #define __CLC_VECSIZE 2 #include __CLC_BODY #undef __CLC_VECSIZE #undef __CLC_GENTYPE #undef __CLC_BIT_INTN -#undef __CLC_INTN #define __CLC_GENTYPE half3 -#define __CLC_INTN int3 #define __CLC_BIT_INTN short3 #define __CLC_VECSIZE 3 #include __CLC_BODY #undef __CLC_VECSIZE #undef __CLC_GENTYPE #undef __CLC_BIT_INTN -#undef __CLC_INTN #define __CLC_GENTYPE half4 -#define __CLC_INTN int4 #define __CLC_BIT_INTN short4 #define __CLC_VECSIZE 4 #include __CLC_BODY #undef __CLC_VECSIZE #undef __CLC_GENTYPE #undef __CLC_BIT_INTN -#undef __CLC_INTN #define __CLC_GENTYPE half8 -#define __CLC_INTN int8 #define __CLC_BIT_INTN short8 #define __CLC_VECSIZE 8 #include __CLC_BODY #undef __CLC_VECSIZE #undef __CLC_GENTYPE #undef __CLC_BIT_INTN -#undef __CLC_INTN #define __CLC_GENTYPE half16 -#define __CLC_INTN int16 #define __CLC_BIT_INTN short16 #define __CLC_VECSIZE 16 #include __CLC_BODY #undef __CLC_VECSIZE #undef __CLC_GENTYPE #undef __CLC_BIT_INTN -#undef __CLC_INTN +#undef __CLC_U_GENTYPE +#undef __CLC_S_GENTYPE #undef __CLC_FP_LIT #undef __CLC_FPSIZE #undef __CLC_SCALAR_GENTYPE @@ -218,3 +258,50 @@ #endif #undef __CLC_BODY + +#undef __CLC_AS_U_GENTYPE +#undef __CLC_AS_S_GENTYPE + +#undef __CLC_CONVERT_U_GENTYPE +#undef __CLC_CONVERT_S_GENTYPE + +#undef __CLC_AS_CHARN +#undef __CLC_AS_SHORTN +#undef __CLC_AS_INTN +#undef __CLC_AS_LONGN + +#undef __CLC_AS_UCHARN +#undef __CLC_AS_USHORTN +#undef __CLC_AS_UINTN +#undef __CLC_AS_ULONGN + +#undef __CLC_CONVERT_HALFN +#undef __CLC_CONVERT_FLOATN +#undef __CLC_CONVERT_DOUBLEN + +#undef __CLC_CONVERT_CHARN +#undef __CLC_CONVERT_SHORTN +#undef __CLC_CONVERT_INTN +#undef __CLC_CONVERT_LONGN + +#undef __CLC_CONVERT_UCHARN +#undef __CLC_CONVERT_USHORTN +#undef __CLC_CONVERT_UINTN +#undef __CLC_CONVERT_ULONGN + +#undef __CLC_ULONGN +#undef __CLC_UINTN +#undef __CLC_USHORTN +#undef __CLC_UCHARN + +#undef __CLC_LONGN +#undef __CLC_INTN +#undef __CLC_SHORTN +#undef __CLC_CHARN + +#undef __CLC_DOUBLEN +#undef __CLC_FLOATN +#undef __CLC_HALFN + +#undef __CLC_AS_GENTYPE +#undef __CLC_CONVERT_GENTYPE diff --git a/libclc/clc/include/clc/math/unary_decl_with_int_ptr.inc b/libclc/clc/include/clc/math/unary_decl_with_int_ptr.inc new file mode 100644 index 0000000000000..088e3bf122ee5 --- /dev/null +++ b/libclc/clc/include/clc/math/unary_decl_with_int_ptr.inc @@ -0,0 +1,6 @@ +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x, + global __CLC_INTN *iptr); +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x, + local __CLC_INTN *iptr); +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x, + private __CLC_INTN *iptr); diff --git a/libclc/clc/include/clc/math/unary_decl_with_ptr.inc b/libclc/clc/include/clc/math/unary_decl_with_ptr.inc new file mode 100644 index 0000000000000..04122108bc1f7 --- /dev/null +++ b/libclc/clc/include/clc/math/unary_decl_with_ptr.inc @@ -0,0 +1,6 @@ +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x, + global __CLC_GENTYPE *ptr); +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x, + local __CLC_GENTYPE *ptr); +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE +__CLC_FUNCTION(__CLC_GENTYPE x, private __CLC_GENTYPE *ptr); diff --git a/libclc/clc/include/clc/math/unary_def_with_int_ptr.inc b/libclc/clc/include/clc/math/unary_def_with_int_ptr.inc new file mode 100644 index 0000000000000..95f50c27bc34b --- /dev/null +++ b/libclc/clc/include/clc/math/unary_def_with_int_ptr.inc @@ -0,0 +1,20 @@ +#include + +#ifndef __CLC_FUNCTION +#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x) +#endif + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x, + private __CLC_INTN *iptr) { + return __CLC_FUNCTION(FUNCTION)(x, iptr); +} + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x, + global __CLC_INTN *iptr) { + return __CLC_FUNCTION(FUNCTION)(x, iptr); +} + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x, + local __CLC_INTN *iptr) { + return __CLC_FUNCTION(FUNCTION)(x, iptr); +} diff --git a/libclc/clc/include/clc/math/unary_def_with_ptr.inc b/libclc/clc/include/clc/math/unary_def_with_ptr.inc new file mode 100644 index 0000000000000..de7c9af756980 --- /dev/null +++ b/libclc/clc/include/clc/math/unary_def_with_ptr.inc @@ -0,0 +1,20 @@ +#include + +#ifndef __CLC_FUNCTION +#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x) +#endif + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x, + private __CLC_GENTYPE *ptr) { + return __CLC_FUNCTION(FUNCTION)(x, ptr); +} + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x, + global __CLC_GENTYPE *ptr) { + return __CLC_FUNCTION(FUNCTION)(x, ptr); +} + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x, + local __CLC_GENTYPE *ptr) { + return __CLC_FUNCTION(FUNCTION)(x, ptr); +} diff --git a/libclc/clc/include/clc/relational/clc_select.h b/libclc/clc/include/clc/relational/clc_select.h index a92f2051b577d..480a648c2efc0 100644 --- a/libclc/clc/include/clc/relational/clc_select.h +++ b/libclc/clc/include/clc/relational/clc_select.h @@ -1,9 +1,7 @@ #ifndef __CLC_RELATIONAL_CLC_SELECT_H__ #define __CLC_RELATIONAL_CLC_SELECT_H__ -/* Duplciate these so we don't have to distribute utils.h */ -#define __CLC_CONCAT(x, y) x##y -#define __CLC_XCONCAT(x, y) __CLC_CONCAT(x, y) +#include #define __CLC_SELECT_FN __clc_select @@ -13,7 +11,5 @@ #include #undef __CLC_SELECT_FN -#undef __CLC_CONCAT -#undef __CLC_XCONCAT #endif // __CLC_RELATIONAL_CLC_SELECT_H__ diff --git a/libclc/clc/include/clc/relational/clc_select_decl.inc b/libclc/clc/include/clc/relational/clc_select_decl.inc index 3a4f2dcb75170..6e94aab19b82d 100644 --- a/libclc/clc/include/clc/relational/clc_select_decl.inc +++ b/libclc/clc/include/clc/relational/clc_select_decl.inc @@ -1,29 +1,6 @@ -#ifdef __CLC_SCALAR -#define __CLC_VECSIZE -#endif - -#if __CLC_FPSIZE == 64 -#define __CLC_S_GENTYPE __CLC_XCONCAT(long, __CLC_VECSIZE) -#define __CLC_U_GENTYPE __CLC_XCONCAT(ulong, __CLC_VECSIZE) -#elif __CLC_FPSIZE == 32 -#define __CLC_S_GENTYPE __CLC_XCONCAT(int, __CLC_VECSIZE) -#define __CLC_U_GENTYPE __CLC_XCONCAT(uint, __CLC_VECSIZE) -#elif __CLC_FPSIZE == 16 -#define __CLC_S_GENTYPE __CLC_XCONCAT(short, __CLC_VECSIZE) -#define __CLC_U_GENTYPE __CLC_XCONCAT(ushort, __CLC_VECSIZE) -#endif - _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_SELECT_FN(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_S_GENTYPE z); _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_SELECT_FN(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_U_GENTYPE z); - -#ifdef __CLC_FPSIZE -#undef __CLC_S_GENTYPE -#undef __CLC_U_GENTYPE -#endif -#ifdef __CLC_SCALAR -#undef __CLC_VECSIZE -#endif diff --git a/libclc/clc/include/clc/relational/clc_select_impl.inc b/libclc/clc/include/clc/relational/clc_select_impl.inc index ad53e822179fb..bc6f3d69caa38 100644 --- a/libclc/clc/include/clc/relational/clc_select_impl.inc +++ b/libclc/clc/include/clc/relational/clc_select_impl.inc @@ -1,18 +1,3 @@ -#ifdef __CLC_SCALAR -#define __CLC_VECSIZE -#endif - -#if __CLC_FPSIZE == 64 -#define __CLC_S_GENTYPE __CLC_XCONCAT(long, __CLC_VECSIZE) -#define __CLC_U_GENTYPE __CLC_XCONCAT(ulong, __CLC_VECSIZE) -#elif __CLC_FPSIZE == 32 -#define __CLC_S_GENTYPE __CLC_XCONCAT(int, __CLC_VECSIZE) -#define __CLC_U_GENTYPE __CLC_XCONCAT(uint, __CLC_VECSIZE) -#elif __CLC_FPSIZE == 16 -#define __CLC_S_GENTYPE __CLC_XCONCAT(short, __CLC_VECSIZE) -#define __CLC_U_GENTYPE __CLC_XCONCAT(ushort, __CLC_VECSIZE) -#endif - _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_SELECT_FN(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_S_GENTYPE z) { @@ -24,12 +9,3 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_SELECT_FN(__CLC_GENTYPE x, __CLC_U_GENTYPE z) { __CLC_SELECT_DEF(x, y, z); } - -#ifdef __CLC_FPSIZE -#undef __CLC_S_GENTYPE -#undef __CLC_U_GENTYPE -#endif - -#ifdef __CLC_SCALAR -#undef __CLC_VECSIZE -#endif diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES index b0eaf84c41438..f7fdba0a341ed 100644 --- a/libclc/clc/lib/generic/SOURCES +++ b/libclc/clc/lib/generic/SOURCES @@ -21,7 +21,9 @@ math/clc_ceil.cl math/clc_copysign.cl math/clc_fabs.cl math/clc_floor.cl +math/clc_frexp.cl math/clc_mad.cl +math/clc_modf.cl math/clc_nextafter.cl math/clc_rint.cl math/clc_trunc.cl diff --git a/libclc/clc/lib/generic/integer/clc_rotate.inc b/libclc/clc/lib/generic/integer/clc_rotate.inc index f144553eabd52..3dcb30b25a6fe 100644 --- a/libclc/clc/lib/generic/integer/clc_rotate.inc +++ b/libclc/clc/lib/generic/integer/clc_rotate.inc @@ -1,6 +1,3 @@ -#define __CLC_AS_GENTYPE(x) __CLC_XCONCAT(__clc_as_, __CLC_GENTYPE)(x) -#define __CLC_AS_U_GENTYPE(x) __CLC_XCONCAT(__clc_as_, __CLC_U_GENTYPE)(x) - // The rotate(A, B) builtin left-shifts corresponding to the usual OpenCL shift // modulo rules. These rules state that A is left-shifted by the log2(N) least // significant bits in B when viewed as an unsigned integer value. Thus we don't diff --git a/libclc/clc/lib/generic/math/clc_frexp.cl b/libclc/clc/lib/generic/math/clc_frexp.cl new file mode 100644 index 0000000000000..ecc3eb6281b1e --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_frexp.cl @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#define __CLC_BODY +#define __CLC_ADDRESS_SPACE private +#include +#undef __CLC_ADDRESS_SPACE + +#define __CLC_BODY +#define __CLC_ADDRESS_SPACE global +#include +#undef __CLC_ADDRESS_SPACE + +#define __CLC_BODY +#define __CLC_ADDRESS_SPACE local +#include +#undef __CLC_ADDRESS_SPACE diff --git a/libclc/clc/lib/generic/math/clc_frexp.inc b/libclc/clc/lib/generic/math/clc_frexp.inc new file mode 100644 index 0000000000000..22966f08579a9 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_frexp.inc @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2014 Advanced Micro Devices, Inc. + * Copyright (c) 2016 Aaron Watry + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include + +#if __CLC_FPSIZE == 32 +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE +__clc_frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE __CLC_INTN *ep) { + __CLC_INTN i = __CLC_AS_INTN(x); + __CLC_INTN ai = i & 0x7fffffff; + __CLC_INTN d = ai > 0 & ai < 0x00800000; + /* scale subnormal by 2^26 without multiplying */ + __CLC_GENTYPE s = __CLC_AS_GENTYPE(ai | 0x0d800000) - 0x1.0p-100f; + ai = __clc_select(ai, __CLC_AS_INTN(s), d); + __CLC_INTN e = + (ai >> 23) - 126 - __clc_select((__CLC_INTN)0, (__CLC_INTN)26, d); + __CLC_INTN t = ai == (__CLC_INTN)0 | e == (__CLC_INTN)129; + i = (i & (__CLC_INTN)0x80000000) | (__CLC_INTN)0x3f000000 | (ai & 0x007fffff); + *ep = __clc_select(e, (__CLC_INTN)0, t); + return __clc_select(__CLC_AS_GENTYPE(i), x, t); +} +#endif + +#if __CLC_FPSIZE == 16 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE +__clc_frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE __CLC_INTN *ep) { + return __CLC_CONVERT_HALFN(__clc_frexp(__CLC_CONVERT_FLOATN(x), ep)); +} + +#endif + +#if __CLC_FPSIZE == 64 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE +__clc_frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE __CLC_INTN *ep) { + __CLC_LONGN i = __CLC_AS_LONGN(x); + __CLC_LONGN ai = i & 0x7fffffffffffffffL; + __CLC_LONGN d = ai > 0 & ai < 0x0010000000000000L; + // scale subnormal by 2^54 without multiplying + __CLC_GENTYPE s = __CLC_AS_GENTYPE(ai | 0x0370000000000000L) - 0x1.0p-968; + ai = __clc_select(ai, __CLC_AS_LONGN(s), d); + __CLC_LONGN e = (ai >> 52) - (__CLC_LONGN)1022 - + __clc_select((__CLC_LONGN)0, (__CLC_LONGN)54, d); + __CLC_LONGN t = ai == 0 | e == 1025; + i = (i & (__CLC_LONGN)0x8000000000000000L) | + (__CLC_LONGN)0x3fe0000000000000L | + (ai & (__CLC_LONGN)0x000fffffffffffffL); + *ep = __CLC_CONVERT_INTN(__clc_select(e, 0L, t)); + return __clc_select(__CLC_AS_GENTYPE(i), x, t); +} + +#endif diff --git a/libclc/generic/include/clc/math/modf.inc b/libclc/clc/lib/generic/math/clc_modf.cl similarity index 76% rename from libclc/generic/include/clc/math/modf.inc rename to libclc/clc/lib/generic/math/clc_modf.cl index 42bcf625686d2..27d2a08515257 100644 --- a/libclc/generic/include/clc/math/modf.inc +++ b/libclc/clc/lib/generic/math/clc_modf.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc. + * Copyright (c) 2015 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -20,6 +20,11 @@ * THE SOFTWARE. */ -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE modf(__CLC_GENTYPE x, global __CLC_GENTYPE *iptr); -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE modf(__CLC_GENTYPE x, local __CLC_GENTYPE *iptr); -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE modf(__CLC_GENTYPE x, private __CLC_GENTYPE *iptr); +#include +#include +#include +#include +#include + +#define __CLC_BODY +#include diff --git a/libclc/generic/lib/math/modf.inc b/libclc/clc/lib/generic/math/clc_modf.inc similarity index 68% rename from libclc/generic/lib/math/modf.inc rename to libclc/clc/lib/generic/math/clc_modf.inc index ff7ef30dd42f8..8242291c98d4e 100644 --- a/libclc/generic/lib/math/modf.inc +++ b/libclc/clc/lib/generic/math/clc_modf.inc @@ -19,31 +19,22 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ - -#if __CLC_FPSIZE == 64 -#define ZERO 0.0 -#elif __CLC_FPSIZE == 32 -#define ZERO 0.0f -#elif __CLC_FPSIZE == 16 -#define ZERO 0.0h -#endif - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE modf(__CLC_GENTYPE x, - private __CLC_GENTYPE *iptr) { - *iptr = trunc(x); - return copysign(isinf(x) ? ZERO : x - *iptr, x); +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_modf(__CLC_GENTYPE x, + private __CLC_GENTYPE *iptr) { + *iptr = __clc_trunc(x); + return __clc_copysign(__clc_isinf(x) ? __CLC_FP_LIT(0.0) : x - *iptr, x); } -#define MODF_DEF(addrspace) \ - _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE modf(__CLC_GENTYPE x, \ - addrspace __CLC_GENTYPE *iptr) { \ +#define CLC_MODF_DEF(addrspace) \ + _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_modf( \ + __CLC_GENTYPE x, addrspace __CLC_GENTYPE *iptr) { \ __CLC_GENTYPE private_iptr; \ - __CLC_GENTYPE ret = modf(x, &private_iptr); \ + __CLC_GENTYPE ret = __clc_modf(x, &private_iptr); \ *iptr = private_iptr; \ return ret; \ } -MODF_DEF(local); -MODF_DEF(global); +CLC_MODF_DEF(local); +CLC_MODF_DEF(global); -#undef ZERO +#undef CLC_MODF_DEF diff --git a/libclc/clspv/lib/shared/vstore_half.inc b/libclc/clspv/lib/shared/vstore_half.inc index 83704cca3a010..5559a56603598 100644 --- a/libclc/clspv/lib/shared/vstore_half.inc +++ b/libclc/clspv/lib/shared/vstore_half.inc @@ -1,6 +1,6 @@ // This does exist only for fp32 #if __CLC_FPSIZE == 32 -#ifdef __CLC_VECSIZE +#ifndef __CLC_SCALAR FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __private); FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __local); diff --git a/libclc/generic/include/clc/math/modf.h b/libclc/generic/include/clc/math/modf.h index f0fb6ca81920a..76eb1284432e4 100644 --- a/libclc/generic/include/clc/math/modf.h +++ b/libclc/generic/include/clc/math/modf.h @@ -20,5 +20,8 @@ * THE SOFTWARE. */ -#define __CLC_BODY +#define __CLC_FUNCTION modf +#define __CLC_BODY #include + +#undef __CLC_FUNCTION diff --git a/libclc/generic/include/clc/math/nan.h b/libclc/generic/include/clc/math/nan.h index 090749307f9ee..af48855739545 100644 --- a/libclc/generic/include/clc/math/nan.h +++ b/libclc/generic/include/clc/math/nan.h @@ -1,8 +1,2 @@ -#define __CLC_CONCAT(x, y) x ## y -#define __CLC_XCONCAT(x, y) __CLC_CONCAT(x, y) - #define __CLC_BODY #include - -#undef __CLC_XCONCAT -#undef __CLC_CONCAT diff --git a/libclc/generic/include/clc/math/nan.inc b/libclc/generic/include/clc/math/nan.inc index 970dca7dae84b..d37aef726556f 100644 --- a/libclc/generic/include/clc/math/nan.inc +++ b/libclc/generic/include/clc/math/nan.inc @@ -1,18 +1 @@ -#ifdef __CLC_SCALAR -#define __CLC_VECSIZE -#endif - -#if __CLC_FPSIZE == 64 -#define __CLC_NATN __CLC_XCONCAT(ulong, __CLC_VECSIZE) -#elif __CLC_FPSIZE == 32 -#define __CLC_NATN __CLC_XCONCAT(uint, __CLC_VECSIZE) -#elif __CLC_FPSIZE == 16 -#define __CLC_NATN __CLC_XCONCAT(ushort, __CLC_VECSIZE) -#endif - -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE nan(__CLC_NATN code); - -#undef __CLC_NATN -#ifdef __CLC_SCALAR -#undef __CLC_VECSIZE -#endif +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE nan(__CLC_U_GENTYPE code); diff --git a/libclc/generic/include/clc/relational/select.h b/libclc/generic/include/clc/relational/select.h index 847884a07b7f2..c138060d1bd9e 100644 --- a/libclc/generic/include/clc/relational/select.h +++ b/libclc/generic/include/clc/relational/select.h @@ -1,7 +1,3 @@ -/* Duplciate these so we don't have to distribute utils.h */ -#define __CLC_CONCAT(x, y) x ## y -#define __CLC_XCONCAT(x, y) __CLC_CONCAT(x, y) - #define __CLC_SELECT_FN select #define __CLC_BODY @@ -10,5 +6,3 @@ #include #undef __CLC_SELECT_FN -#undef __CLC_CONCAT -#undef __CLC_XCONCAT diff --git a/libclc/generic/lib/math/frexp.cl b/libclc/generic/lib/math/frexp.cl index 75a9158ff318f..fa6613ac27459 100644 --- a/libclc/generic/lib/math/frexp.cl +++ b/libclc/generic/lib/math/frexp.cl @@ -1,17 +1,6 @@ #include -#include +#include -#define __CLC_BODY -#define __CLC_ADDRESS_SPACE private +#define FUNCTION frexp +#define __CLC_BODY #include -#undef __CLC_ADDRESS_SPACE - -#define __CLC_BODY -#define __CLC_ADDRESS_SPACE global -#include -#undef __CLC_ADDRESS_SPACE - -#define __CLC_BODY -#define __CLC_ADDRESS_SPACE local -#include -#undef __CLC_ADDRESS_SPACE diff --git a/libclc/generic/lib/math/frexp.inc b/libclc/generic/lib/math/frexp.inc deleted file mode 100644 index 0d938d23c26a1..0000000000000 --- a/libclc/generic/lib/math/frexp.inc +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2014 Advanced Micro Devices, Inc. - * Copyright (c) 2016 Aaron Watry - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include - -#define __CLC_AS_GENTYPE __CLC_XCONCAT(as_, __CLC_GENTYPE) -#define __CLC_AS_INTN __CLC_XCONCAT(as_, __CLC_INTN) - -#if __CLC_FPSIZE == 32 -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE __CLC_INTN *ep) { - __CLC_INTN i = __CLC_AS_INTN(x); - __CLC_INTN ai = i & 0x7fffffff; - __CLC_INTN d = ai > 0 & ai < 0x00800000; - /* scale subnormal by 2^26 without multiplying */ - __CLC_GENTYPE s = __CLC_AS_GENTYPE(ai | 0x0d800000) - 0x1.0p-100f; - ai = select(ai, __CLC_AS_INTN(s), d); - __CLC_INTN e = (ai >> 23) - 126 - select((__CLC_INTN)0, (__CLC_INTN)26, d); - __CLC_INTN t = ai == (__CLC_INTN)0 | e == (__CLC_INTN)129; - i = (i & (__CLC_INTN)0x80000000) | (__CLC_INTN)0x3f000000 | (ai & 0x007fffff); - *ep = select(e, (__CLC_INTN)0, t); - return select(__CLC_AS_GENTYPE(i), x, t); -} -#endif - -#if __CLC_FPSIZE == 16 -#ifdef __CLC_SCALAR -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE frexp(__CLC_GENTYPE x, - __CLC_ADDRESS_SPACE __CLC_INTN *ep) { - return (__CLC_GENTYPE)frexp((float)x, ep); -} -_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, __CLC_GENTYPE, frexp, - __CLC_GENTYPE, __CLC_ADDRESS_SPACE, __CLC_INTN); -#endif -#endif - -#if __CLC_FPSIZE == 64 -#ifdef __CLC_SCALAR -#define __CLC_AS_LONGN as_long -#define __CLC_LONGN long -#define __CLC_CONVERT_INTN convert_int -#else -#define __CLC_AS_LONGN __CLC_XCONCAT(as_long, __CLC_VECSIZE) -#define __CLC_LONGN __CLC_XCONCAT(long, __CLC_VECSIZE) -#define __CLC_CONVERT_INTN __CLC_XCONCAT(convert_int, __CLC_VECSIZE) -#endif - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE __CLC_INTN *ep) { - __CLC_LONGN i = __CLC_AS_LONGN(x); - __CLC_LONGN ai = i & 0x7fffffffffffffffL; - __CLC_LONGN d = ai > 0 & ai < 0x0010000000000000L; - // scale subnormal by 2^54 without multiplying - __CLC_GENTYPE s = __CLC_AS_GENTYPE(ai | 0x0370000000000000L) - 0x1.0p-968; - ai = select(ai, __CLC_AS_LONGN(s), d); - __CLC_LONGN e = (ai >> 52) - (__CLC_LONGN)1022 - select((__CLC_LONGN)0, (__CLC_LONGN)54, d); - __CLC_LONGN t = ai == 0 | e == 1025; - i = (i & (__CLC_LONGN)0x8000000000000000L) | (__CLC_LONGN)0x3fe0000000000000L | (ai & (__CLC_LONGN)0x000fffffffffffffL); - *ep = __CLC_CONVERT_INTN(select(e, 0L, t)); - return select(__CLC_AS_GENTYPE(i), x, t); -} - -#undef __CLC_AS_LONGN -#undef __CLC_LONGN -#undef __CLC_CONVERT_INTN -#endif - -#undef __CLC_AS_GENTYPE -#undef __CLC_AS_INTN diff --git a/libclc/generic/lib/math/maxmag.inc b/libclc/generic/lib/math/maxmag.inc index 226316ab39879..5ac2134baa8ce 100644 --- a/libclc/generic/lib/math/maxmag.inc +++ b/libclc/generic/lib/math/maxmag.inc @@ -1,7 +1,3 @@ -#ifdef __CLC_SCALAR -#define __CLC_VECSIZE -#endif - #if __CLC_FPSIZE == 64 #define __CLC_CONVERT_NATN __CLC_XCONCAT(convert_long, __CLC_VECSIZE) #elif __CLC_FPSIZE == 32 @@ -16,7 +12,3 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE maxmag(__CLC_GENTYPE x, __CLC_GENTYPE y) { } #undef __CLC_CONVERT_NATN - -#ifdef __CLC_SCALAR -#undef __CLC_VECSIZE -#endif diff --git a/libclc/generic/lib/math/modf.cl b/libclc/generic/lib/math/modf.cl index 5098a41d079c5..5a01a316132e2 100644 --- a/libclc/generic/lib/math/modf.cl +++ b/libclc/generic/lib/math/modf.cl @@ -21,7 +21,8 @@ */ #include -#include +#include -#define __CLC_BODY +#define FUNCTION modf +#define __CLC_BODY #include diff --git a/libclc/generic/lib/math/nan.cl b/libclc/generic/lib/math/nan.cl index 8f89e8e760de8..5ec60d4521fc0 100644 --- a/libclc/generic/lib/math/nan.cl +++ b/libclc/generic/lib/math/nan.cl @@ -1,6 +1,6 @@ #include #include -#define __CLC_AS_GENTYPE __CLC_XCONCAT(as_, __CLC_GENTYPE) +#define __OPENCL_AS_GENTYPE __CLC_XCONCAT(as_, __CLC_GENTYPE) #define __CLC_BODY #include diff --git a/libclc/generic/lib/math/nan.inc b/libclc/generic/lib/math/nan.inc index f6508c3ab521c..ded7f45072a93 100644 --- a/libclc/generic/lib/math/nan.inc +++ b/libclc/generic/lib/math/nan.inc @@ -1,27 +1,15 @@ -#ifdef __CLC_SCALAR -#define __CLC_VECSIZE -#endif - #if __CLC_FPSIZE == 64 -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE nan(__CLC_XCONCAT(ulong, __CLC_VECSIZE) code) -{ - return __CLC_AS_GENTYPE(code | 0x7ff0000000000000ul); -} +#define NAN_MASK 0x7ff0000000000000ul #elif __CLC_FPSIZE == 32 -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE nan(__CLC_XCONCAT(uint, __CLC_VECSIZE) code) -{ - return __CLC_AS_GENTYPE(code | 0x7fc00000); -} +#define NAN_MASK 0x7fc00000 #elif __CLC_FPSIZE == 16 -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE nan(__CLC_XCONCAT(ushort, __CLC_VECSIZE) code) -{ - const ushort mask = 0x7e00; - const __CLC_XCONCAT(ushort, __CLC_VECSIZE) res = code | mask; - return __CLC_AS_GENTYPE(res); -} +#define NAN_MASK 0x7e00 #endif +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE nan(__CLC_U_GENTYPE code) { + const __CLC_U_GENTYPE mask = NAN_MASK; + const __CLC_U_GENTYPE res = code | mask; + return __OPENCL_AS_GENTYPE(res); +} -#ifdef __CLC_SCALAR -#undef __CLC_VECSIZE -#endif +#undef NAN_MASK diff --git a/libclc/generic/lib/shared/vload_half.inc b/libclc/generic/lib/shared/vload_half.inc index 11b2bf7e6d6e6..5e60e3cb3658a 100644 --- a/libclc/generic/lib/shared/vload_half.inc +++ b/libclc/generic/lib/shared/vload_half.inc @@ -1,6 +1,6 @@ #if __CLC_FPSIZE == 32 -#ifdef __CLC_VECSIZE +#ifndef __CLC_SCALAR #if __CLC_VECSIZE == 3 # define __CLC_OFFSET 4 diff --git a/libclc/generic/lib/shared/vstore_half.inc b/libclc/generic/lib/shared/vstore_half.inc index 2d8337856236b..b3513a27bef9f 100644 --- a/libclc/generic/lib/shared/vstore_half.inc +++ b/libclc/generic/lib/shared/vstore_half.inc @@ -1,6 +1,6 @@ // This does not exist for fp16 #if __CLC_FPSIZE > 16 -#ifdef __CLC_VECSIZE +#ifndef __CLC_SCALAR #if __CLC_VECSIZE == 3 # define __CLC_OFFSET 4 diff --git a/libcxx/include/__locale_dir/locale_base_api.h b/libcxx/include/__locale_dir/locale_base_api.h index bbee9f49867fd..c1e73caeecced 100644 --- a/libcxx/include/__locale_dir/locale_base_api.h +++ b/libcxx/include/__locale_dir/locale_base_api.h @@ -23,12 +23,16 @@ // Variadic functions may be implemented as templates with a parameter pack instead // of C-style variadic functions. // +// Most of these functions are only required when building the library. Functions that are also +// required when merely using the headers are marked as such below. +// // TODO: __localeconv shouldn't take a reference, but the Windows implementation doesn't allow copying __locale_t +// TODO: Eliminate the need for any of these functions from the headers. // // Locale management // ----------------- // namespace __locale { -// using __locale_t = implementation-defined; +// using __locale_t = implementation-defined; // required by the headers // using __lconv_t = implementation-defined; // __locale_t __newlocale(int, const char*, __locale_t); // void __freelocale(__locale_t); @@ -36,6 +40,7 @@ // __lconv_t* __localeconv(__locale_t&); // } // +// // required by the headers // #define _LIBCPP_COLLATE_MASK /* implementation-defined */ // #define _LIBCPP_CTYPE_MASK /* implementation-defined */ // #define _LIBCPP_MONETARY_MASK /* implementation-defined */ @@ -48,6 +53,7 @@ // Strtonum functions // ------------------ // namespace __locale { +// // required by the headers // float __strtof(const char*, char**, __locale_t); // double __strtod(const char*, char**, __locale_t); // long double __strtold(const char*, char**, __locale_t); @@ -60,8 +66,8 @@ // namespace __locale { // int __islower(int, __locale_t); // int __isupper(int, __locale_t); -// int __isdigit(int, __locale_t); -// int __isxdigit(int, __locale_t); +// int __isdigit(int, __locale_t); // required by the headers +// int __isxdigit(int, __locale_t); // required by the headers // int __toupper(int, __locale_t); // int __tolower(int, __locale_t); // int __strcoll(const char*, const char*, __locale_t); @@ -99,9 +105,10 @@ // int __mbtowc(wchar_t*, const char*, size_t, __locale_t); // size_t __mbrlen(const char*, size_t, mbstate_t*, __locale_t); // size_t __mbsrtowcs(wchar_t*, const char**, size_t, mbstate_t*, __locale_t); -// int __snprintf(char*, size_t, __locale_t, const char*, ...); -// int __asprintf(char**, __locale_t, const char*, ...); -// int __sscanf(const char*, __locale_t, const char*, ...); +// +// int __snprintf(char*, size_t, __locale_t, const char*, ...); // required by the headers +// int __asprintf(char**, __locale_t, const char*, ...); // required by the headers +// int __sscanf(const char*, __locale_t, const char*, ...); // required by the headers // } #if defined(__APPLE__) @@ -143,8 +150,19 @@ namespace __locale { // // Locale management // +# define _LIBCPP_COLLATE_MASK LC_COLLATE_MASK +# define _LIBCPP_CTYPE_MASK LC_CTYPE_MASK +# define _LIBCPP_MONETARY_MASK LC_MONETARY_MASK +# define _LIBCPP_NUMERIC_MASK LC_NUMERIC_MASK +# define _LIBCPP_TIME_MASK LC_TIME_MASK +# define _LIBCPP_MESSAGES_MASK LC_MESSAGES_MASK +# define _LIBCPP_ALL_MASK LC_ALL_MASK +# define _LIBCPP_LC_ALL LC_ALL + using __locale_t _LIBCPP_NODEBUG = locale_t; -using __lconv_t _LIBCPP_NODEBUG = lconv; + +# if defined(_LIBCPP_BUILDING_LIBRARY) +using __lconv_t _LIBCPP_NODEBUG = lconv; inline _LIBCPP_HIDE_FROM_ABI __locale_t __newlocale(int __category_mask, const char* __name, __locale_t __loc) { return newlocale(__category_mask, __name, __loc); @@ -157,15 +175,7 @@ inline _LIBCPP_HIDE_FROM_ABI char* __setlocale(int __category, char const* __loc inline _LIBCPP_HIDE_FROM_ABI void __freelocale(__locale_t __loc) { freelocale(__loc); } inline _LIBCPP_HIDE_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc) { return __libcpp_localeconv_l(__loc); } - -# define _LIBCPP_COLLATE_MASK LC_COLLATE_MASK -# define _LIBCPP_CTYPE_MASK LC_CTYPE_MASK -# define _LIBCPP_MONETARY_MASK LC_MONETARY_MASK -# define _LIBCPP_NUMERIC_MASK LC_NUMERIC_MASK -# define _LIBCPP_TIME_MASK LC_TIME_MASK -# define _LIBCPP_MESSAGES_MASK LC_MESSAGES_MASK -# define _LIBCPP_ALL_MASK LC_ALL_MASK -# define _LIBCPP_LC_ALL LC_ALL +# endif // _LIBCPP_BUILDING_LIBRARY // // Strtonum functions @@ -194,10 +204,15 @@ __strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { // // Character manipulation functions // +# if defined(_LIBCPP_BUILDING_LIBRARY) inline _LIBCPP_HIDE_FROM_ABI int __islower(int __ch, __locale_t __loc) { return islower_l(__ch, __loc); } inline _LIBCPP_HIDE_FROM_ABI int __isupper(int __ch, __locale_t __loc) { return isupper_l(__ch, __loc); } +# endif + inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __ch, __locale_t __loc) { return isdigit_l(__ch, __loc); } inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __ch, __locale_t __loc) { return isxdigit_l(__ch, __loc); } + +# if defined(_LIBCPP_BUILDING_LIBRARY) inline _LIBCPP_HIDE_FROM_ABI int __strcoll(const char* __s1, const char* __s2, __locale_t __loc) { return strcoll_l(__s1, __s2, __loc); } @@ -207,7 +222,7 @@ inline _LIBCPP_HIDE_FROM_ABI size_t __strxfrm(char* __dest, const char* __src, s inline _LIBCPP_HIDE_FROM_ABI int __toupper(int __ch, __locale_t __loc) { return toupper_l(__ch, __loc); } inline _LIBCPP_HIDE_FROM_ABI int __tolower(int __ch, __locale_t __loc) { return tolower_l(__ch, __loc); } -# if _LIBCPP_HAS_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS inline _LIBCPP_HIDE_FROM_ABI int __wcscoll(const wchar_t* __s1, const wchar_t* __s2, __locale_t __loc) { return wcscoll_l(__s1, __s2, __loc); } @@ -229,7 +244,7 @@ inline _LIBCPP_HIDE_FROM_ABI int __iswpunct(wint_t __ch, __locale_t __loc) { ret inline _LIBCPP_HIDE_FROM_ABI int __iswxdigit(wint_t __ch, __locale_t __loc) { return iswxdigit_l(__ch, __loc); } inline _LIBCPP_HIDE_FROM_ABI wint_t __towupper(wint_t __ch, __locale_t __loc) { return towupper_l(__ch, __loc); } inline _LIBCPP_HIDE_FROM_ABI wint_t __towlower(wint_t __ch, __locale_t __loc) { return towlower_l(__ch, __loc); } -# endif +# endif inline _LIBCPP_HIDE_FROM_ABI size_t __strftime(char* __s, size_t __max, const char* __format, const tm* __tm, __locale_t __loc) { @@ -242,7 +257,7 @@ __strftime(char* __s, size_t __max, const char* __format, const tm* __tm, __loca inline _LIBCPP_HIDE_FROM_ABI decltype(__libcpp_mb_cur_max_l(__locale_t())) __mb_len_max(__locale_t __loc) { return __libcpp_mb_cur_max_l(__loc); } -# if _LIBCPP_HAS_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS inline _LIBCPP_HIDE_FROM_ABI wint_t __btowc(int __ch, __locale_t __loc) { return __libcpp_btowc_l(__ch, __loc); } inline _LIBCPP_HIDE_FROM_ABI int __wctob(wint_t __ch, __locale_t __loc) { return __libcpp_wctob_l(__ch, __loc); } inline _LIBCPP_HIDE_FROM_ABI size_t @@ -270,7 +285,8 @@ inline _LIBCPP_HIDE_FROM_ABI size_t __mbsrtowcs(wchar_t* __dest, const char** __src, size_t __len, mbstate_t* __ps, __locale_t __loc) { return __libcpp_mbsrtowcs_l(__dest, __src, __len, __ps, __loc); } -# endif +# endif // _LIBCPP_HAS_WIDE_CHARACTERS +# endif // _LIBCPP_BUILDING_LIBRARY _LIBCPP_DIAGNOSTIC_PUSH _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wgcc-compat") diff --git a/libcxx/include/__locale_dir/support/bsd_like.h b/libcxx/include/__locale_dir/support/bsd_like.h index c0080b13a08cf..405f1589c8c94 100644 --- a/libcxx/include/__locale_dir/support/bsd_like.h +++ b/libcxx/include/__locale_dir/support/bsd_like.h @@ -46,7 +46,8 @@ namespace __locale { #define _LIBCPP_LC_ALL LC_ALL using __locale_t = ::locale_t; -using __lconv_t = std::lconv; +#if defined(_LIBCPP_BUILDING_LIBRARY) +using __lconv_t = std::lconv; inline _LIBCPP_HIDE_FROM_ABI __locale_t __newlocale(int __category_mask, const char* __locale, __locale_t __base) { return ::newlocale(__category_mask, __locale, __base); @@ -59,6 +60,7 @@ inline _LIBCPP_HIDE_FROM_ABI char* __setlocale(int __category, char const* __loc } inline _LIBCPP_HIDE_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc) { return ::localeconv_l(__loc); } +#endif // _LIBCPP_BUILDING_LIBRARY // // Strtonum functions @@ -87,14 +89,17 @@ __strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { // // Character manipulation functions // +#if defined(_LIBCPP_BUILDING_LIBRARY) inline _LIBCPP_HIDE_FROM_ABI int __islower(int __c, __locale_t __loc) { return ::islower_l(__c, __loc); } inline _LIBCPP_HIDE_FROM_ABI int __isupper(int __c, __locale_t __loc) { return ::isupper_l(__c, __loc); } +#endif inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t __loc) { return ::isdigit_l(__c, __loc); } inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t __loc) { return ::isxdigit_l(__c, __loc); } +#if defined(_LIBCPP_BUILDING_LIBRARY) inline _LIBCPP_HIDE_FROM_ABI int __toupper(int __c, __locale_t __loc) { return ::toupper_l(__c, __loc); } inline _LIBCPP_HIDE_FROM_ABI int __tolower(int __c, __locale_t __loc) { return ::tolower_l(__c, __loc); } @@ -107,7 +112,7 @@ inline _LIBCPP_HIDE_FROM_ABI size_t __strxfrm(char* __dest, const char* __src, s return ::strxfrm_l(__dest, __src, __n, __loc); } -#if _LIBCPP_HAS_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS inline _LIBCPP_HIDE_FROM_ABI int __iswctype(wint_t __c, wctype_t __type, __locale_t __loc) { return ::iswctype_l(__c, __type, __loc); } @@ -143,7 +148,7 @@ inline _LIBCPP_HIDE_FROM_ABI int __wcscoll(const wchar_t* __ws1, const wchar_t* inline _LIBCPP_HIDE_FROM_ABI size_t __wcsxfrm(wchar_t* __dest, const wchar_t* __src, size_t __n, __locale_t __loc) { return ::wcsxfrm_l(__dest, __src, __n, __loc); } -#endif // _LIBCPP_HAS_WIDE_CHARACTERS +# endif // _LIBCPP_HAS_WIDE_CHARACTERS inline _LIBCPP_HIDE_FROM_ABI size_t __strftime(char* __s, size_t __max, const char* __format, const struct tm* __tm, __locale_t __loc) { @@ -155,14 +160,14 @@ __strftime(char* __s, size_t __max, const char* __format, const struct tm* __tm, // inline _LIBCPP_HIDE_FROM_ABI decltype(MB_CUR_MAX) __mb_len_max(__locale_t __loc) { return MB_CUR_MAX_L(__loc); } -#if _LIBCPP_HAS_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS inline _LIBCPP_HIDE_FROM_ABI wint_t __btowc(int __c, __locale_t __loc) { return ::btowc_l(__c, __loc); } inline _LIBCPP_HIDE_FROM_ABI int __wctob(wint_t __c, __locale_t __loc) { return ::wctob_l(__c, __loc); } inline _LIBCPP_HIDE_FROM_ABI size_t __wcsnrtombs(char* __dest, const wchar_t** __src, size_t __nwc, size_t __len, mbstate_t* __ps, __locale_t __loc) { - return ::wcsnrtombs_l(__dest, __src, __nwc, __len, __ps, __loc); + return ::wcsnrtombs_l(__dest, __src, __nwc, __len, __ps, __loc); // wcsnrtombs is a POSIX extension } inline _LIBCPP_HIDE_FROM_ABI size_t __wcrtomb(char* __s, wchar_t __wc, mbstate_t* __ps, __locale_t __loc) { @@ -171,7 +176,7 @@ inline _LIBCPP_HIDE_FROM_ABI size_t __wcrtomb(char* __s, wchar_t __wc, mbstate_t inline _LIBCPP_HIDE_FROM_ABI size_t __mbsnrtowcs(wchar_t* __dest, const char** __src, size_t __nms, size_t __len, mbstate_t* __ps, __locale_t __loc) { - return ::mbsnrtowcs_l(__dest, __src, __nms, __len, __ps, __loc); + return ::mbsnrtowcs_l(__dest, __src, __nms, __len, __ps, __loc); // mbsnrtowcs is a POSIX extension } inline _LIBCPP_HIDE_FROM_ABI size_t @@ -191,7 +196,8 @@ inline _LIBCPP_HIDE_FROM_ABI size_t __mbsrtowcs(wchar_t* __dest, const char** __src, size_t __len, mbstate_t* __ps, __locale_t __loc) { return ::mbsrtowcs_l(__dest, __src, __len, __ps, __loc); } -#endif +# endif // _LIBCPP_HAS_WIDE_CHARACTERS +#endif // _LIBCPP_BUILDING_LIBRARY _LIBCPP_DIAGNOSTIC_PUSH _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wgcc-compat") @@ -211,7 +217,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 4, 5) int __ template _LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __asprintf( char** __s, __locale_t __loc, const char* __format, _Args&&... __args) { - return ::asprintf_l(__s, __loc, __format, std::forward<_Args>(__args)...); + return ::asprintf_l(__s, __loc, __format, std::forward<_Args>(__args)...); // non-standard } template diff --git a/libcxx/include/__locale_dir/support/fuchsia.h b/libcxx/include/__locale_dir/support/fuchsia.h index 237f48562d6e0..fb9de74ab7c7b 100644 --- a/libcxx/include/__locale_dir/support/fuchsia.h +++ b/libcxx/include/__locale_dir/support/fuchsia.h @@ -50,7 +50,9 @@ struct __locale_guard { #define _LIBCPP_LC_ALL LC_ALL using __locale_t = locale_t; -using __lconv_t = std::lconv; + +#if defined(_LIBCPP_BUILDING_LIBRARY) +using __lconv_t = std::lconv; inline _LIBCPP_HIDE_FROM_ABI __locale_t __newlocale(int __category_mask, const char* __name, __locale_t __loc) { return ::newlocale(__category_mask, __name, __loc); @@ -74,7 +76,7 @@ inline _LIBCPP_HIDE_FROM_ABI decltype(MB_CUR_MAX) __mb_len_max(__locale_t __loc) __locale_guard __current(__loc); return MB_CUR_MAX; } -#if _LIBCPP_HAS_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS inline _LIBCPP_HIDE_FROM_ABI wint_t __btowc(int __ch, __locale_t __loc) { __locale_guard __current(__loc); return std::btowc(__ch); @@ -115,7 +117,8 @@ __mbsrtowcs(wchar_t* __dest, const char** __src, size_t __len, mbstate_t* __ps, __locale_guard __current(__loc); return ::mbsrtowcs(__dest, __src, __len, __ps); } -#endif +# endif // _LIBCPP_HAS_WIDE_CHARACTERS +#endif // _LIBCPP_BUILDING_LIBRARY _LIBCPP_DIAGNOSTIC_PUSH _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wgcc-compat") diff --git a/libcxx/include/__locale_dir/support/no_locale/characters.h b/libcxx/include/__locale_dir/support/no_locale/characters.h index 20e45fc350e2e..4fb48ed9ceac1 100644 --- a/libcxx/include/__locale_dir/support/no_locale/characters.h +++ b/libcxx/include/__locale_dir/support/no_locale/characters.h @@ -29,14 +29,17 @@ namespace __locale { // // Character manipulation functions // +#if defined(_LIBCPP_BUILDING_LIBRARY) inline _LIBCPP_HIDE_FROM_ABI int __islower(int __c, __locale_t) { return std::islower(__c); } inline _LIBCPP_HIDE_FROM_ABI int __isupper(int __c, __locale_t) { return std::isupper(__c); } +#endif inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t) { return std::isdigit(__c); } inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t) { return std::isxdigit(__c); } +#if defined(_LIBCPP_BUILDING_LIBRARY) inline _LIBCPP_HIDE_FROM_ABI int __toupper(int __c, __locale_t) { return std::toupper(__c); } inline _LIBCPP_HIDE_FROM_ABI int __tolower(int __c, __locale_t) { return std::tolower(__c); } @@ -49,7 +52,7 @@ inline _LIBCPP_HIDE_FROM_ABI size_t __strxfrm(char* __dest, const char* __src, s return std::strxfrm(__dest, __src, __n); } -#if _LIBCPP_HAS_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS inline _LIBCPP_HIDE_FROM_ABI int __iswctype(wint_t __c, wctype_t __type, __locale_t) { return std::iswctype(__c, __type); } @@ -85,12 +88,13 @@ inline _LIBCPP_HIDE_FROM_ABI int __wcscoll(const wchar_t* __ws1, const wchar_t* inline _LIBCPP_HIDE_FROM_ABI size_t __wcsxfrm(wchar_t* __dest, const wchar_t* __src, size_t __n, __locale_t) { return std::wcsxfrm(__dest, __src, __n); } -#endif // _LIBCPP_HAS_WIDE_CHARACTERS +# endif // _LIBCPP_HAS_WIDE_CHARACTERS inline _LIBCPP_HIDE_FROM_ABI size_t __strftime(char* __s, size_t __max, const char* __format, const struct tm* __tm, __locale_t) { return std::strftime(__s, __max, __format, __tm); } +#endif // _LIBCPP_BUILDING_LIBRARY } // namespace __locale _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__locale_dir/support/windows.h b/libcxx/include/__locale_dir/support/windows.h index f0f76c527264a..56d34c6f0e6ca 100644 --- a/libcxx/include/__locale_dir/support/windows.h +++ b/libcxx/include/__locale_dir/support/windows.h @@ -153,6 +153,7 @@ class __locale_t { __lconv_storage* __lc_ = nullptr; }; +#if defined(_LIBCPP_BUILDING_LIBRARY) _LIBCPP_EXPORTED_FROM_ABI __locale_t __newlocale(int __mask, const char* __locale, __locale_t __base); inline _LIBCPP_HIDE_FROM_ABI void __freelocale(__locale_t __loc) { ::_free_locale(__loc); } inline _LIBCPP_HIDE_FROM_ABI char* __setlocale(int __category, const char* __locale) { @@ -162,6 +163,7 @@ inline _LIBCPP_HIDE_FROM_ABI char* __setlocale(int __category, const char* __loc return __new_locale; } _LIBCPP_EXPORTED_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc); +#endif // _LIBCPP_BUILDING_LIBRARY // // Strtonum functions @@ -195,14 +197,17 @@ __strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { // // Character manipulation functions // +#if defined(_LIBCPP_BUILDING_LIBRARY) inline _LIBCPP_HIDE_FROM_ABI int __islower(int __c, __locale_t __loc) { return _islower_l(__c, __loc); } inline _LIBCPP_HIDE_FROM_ABI int __isupper(int __c, __locale_t __loc) { return _isupper_l(__c, __loc); } +#endif inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t __loc) { return _isdigit_l(__c, __loc); } inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t __loc) { return _isxdigit_l(__c, __loc); } +#if defined(_LIBCPP_BUILDING_LIBRARY) inline _LIBCPP_HIDE_FROM_ABI int __toupper(int __c, __locale_t __loc) { return ::_toupper_l(__c, __loc); } inline _LIBCPP_HIDE_FROM_ABI int __tolower(int __c, __locale_t __loc) { return ::_tolower_l(__c, __loc); } @@ -215,7 +220,7 @@ inline _LIBCPP_HIDE_FROM_ABI size_t __strxfrm(char* __dest, const char* __src, s return ::_strxfrm_l(__dest, __src, __n, __loc); } -#if _LIBCPP_HAS_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS inline _LIBCPP_HIDE_FROM_ABI int __iswctype(wint_t __c, wctype_t __type, __locale_t __loc) { return ::_iswctype_l(__c, __type, __loc); } @@ -240,16 +245,16 @@ inline _LIBCPP_HIDE_FROM_ABI int __wcscoll(const wchar_t* __ws1, const wchar_t* inline _LIBCPP_HIDE_FROM_ABI size_t __wcsxfrm(wchar_t* __dest, const wchar_t* __src, size_t __n, __locale_t __loc) { return ::_wcsxfrm_l(__dest, __src, __n, __loc); } -#endif // _LIBCPP_HAS_WIDE_CHARACTERS +# endif // _LIBCPP_HAS_WIDE_CHARACTERS -#if defined(__MINGW32__) && __MSVCRT_VERSION__ < 0x0800 +# if defined(__MINGW32__) && __MSVCRT_VERSION__ < 0x0800 _LIBCPP_EXPORTED_FROM_ABI size_t __strftime(char*, size_t, const char*, const struct tm*, __locale_t); -#else +# else inline _LIBCPP_HIDE_FROM_ABI size_t __strftime(char* __ret, size_t __n, const char* __format, const struct tm* __tm, __locale_t __loc) { return ::_strftime_l(__ret, __n, __format, __tm, __loc); } -#endif +# endif // // Other functions @@ -273,6 +278,7 @@ _LIBCPP_EXPORTED_FROM_ABI size_t __mbrlen(const char* __restrict, size_t, mbstat _LIBCPP_EXPORTED_FROM_ABI size_t __mbsrtowcs(wchar_t* __restrict, const char** __restrict, size_t, mbstate_t* __restrict, __locale_t); +#endif // _LIBCPP_BUILDING_LIBRARY _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 4, 5) int __snprintf( char* __ret, size_t __n, __locale_t __loc, const char* __format, ...); @@ -297,6 +303,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __s _LIBCPP_DIAGNOSTIC_POP #undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT +#if defined(_LIBCPP_BUILDING_LIBRARY) struct __locale_guard { _LIBCPP_HIDE_FROM_ABI __locale_guard(__locale_t __l) : __status(_configthreadlocale(_ENABLE_PER_THREAD_LOCALE)) { // Setting the locale can be expensive even when the locale given is @@ -328,6 +335,7 @@ struct __locale_guard { int __status; char* __locale_all = nullptr; }; +#endif // _LIBCPP_BUILDING_LIBRARY } // namespace __locale _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/test/benchmarks/algorithms/copy.bench.cpp b/libcxx/test/benchmarks/algorithms/copy.bench.cpp deleted file mode 100644 index b6f0f15eb7703..0000000000000 --- a/libcxx/test/benchmarks/algorithms/copy.bench.cpp +++ /dev/null @@ -1,89 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 - -#include -#include -#include - -static void bm_ranges_copy_vb(benchmark::State& state, bool aligned) { - auto n = state.range(); - std::vector in(n, true); - std::vector out(aligned ? n : n + 8); - benchmark::DoNotOptimize(&in); - auto dst = aligned ? out.begin() : out.begin() + 4; - for (auto _ : state) { - benchmark::DoNotOptimize(std::ranges::copy(in, dst)); - benchmark::DoNotOptimize(&out); - } -} - -static void bm_ranges_copy_n_vb(benchmark::State& state, bool aligned) { - auto n = state.range(); - std::vector in(n, true); - std::vector out(aligned ? n : n + 8); - benchmark::DoNotOptimize(&in); - auto src = in.begin(); - auto dst = aligned ? out.begin() : out.begin() + 4; - for (auto _ : state) { - benchmark::DoNotOptimize(std::ranges::copy_n(src, n, dst)); - benchmark::DoNotOptimize(&out); - } -} - -static void bm_copy_vb(benchmark::State& state, bool aligned) { - auto n = state.range(); - std::vector in(n, true); - std::vector out(aligned ? n : n + 8); - benchmark::DoNotOptimize(&in); - auto beg = in.begin(); - auto end = in.end(); - auto dst = aligned ? out.begin() : out.begin() + 4; - for (auto _ : state) { - benchmark::DoNotOptimize(std::copy(beg, end, dst)); - benchmark::DoNotOptimize(&out); - } -} - -static void bm_copy_n_vb(benchmark::State& state, bool aligned) { - auto n = state.range(); - std::vector in(n, true); - std::vector out(aligned ? n : n + 8); - benchmark::DoNotOptimize(&in); - auto src = in.begin(); - auto dst = aligned ? out.begin() : out.begin() + 4; - for (auto _ : state) { - benchmark::DoNotOptimize(std::copy_n(src, n, dst)); - benchmark::DoNotOptimize(&out); - } -} - -static void bm_ranges_copy_vb_aligned(benchmark::State& state) { bm_ranges_copy_vb(state, true); } -static void bm_ranges_copy_vb_unaligned(benchmark::State& state) { bm_ranges_copy_vb(state, false); } -static void bm_ranges_copy_n_vb_aligned(benchmark::State& state) { bm_ranges_copy_n_vb(state, true); } -static void bm_ranges_copy_n_vb_unaligned(benchmark::State& state) { bm_ranges_copy_n_vb(state, false); } - -static void bm_copy_vb_aligned(benchmark::State& state) { bm_copy_vb(state, true); } -static void bm_copy_vb_unaligned(benchmark::State& state) { bm_copy_vb(state, false); } -static void bm_copy_n_vb_aligned(benchmark::State& state) { bm_copy_n_vb(state, true); } -static void bm_copy_n_vb_unaligned(benchmark::State& state) { bm_copy_n_vb(state, false); } - -// Test std::ranges::copy for vector::iterator -BENCHMARK(bm_ranges_copy_vb_aligned)->Range(8, 1 << 16)->DenseRange(102400, 204800, 4096); -BENCHMARK(bm_ranges_copy_n_vb_aligned)->Range(8, 1 << 20); -BENCHMARK(bm_ranges_copy_vb_unaligned)->Range(8, 1 << 20); -BENCHMARK(bm_ranges_copy_n_vb_unaligned)->Range(8, 1 << 20); - -// Test std::copy for vector::iterator -BENCHMARK(bm_copy_vb_aligned)->Range(8, 1 << 20); -BENCHMARK(bm_copy_n_vb_aligned)->Range(8, 1 << 20); -BENCHMARK(bm_copy_vb_unaligned)->Range(8, 1 << 20); -BENCHMARK(bm_copy_n_vb_unaligned)->Range(8, 1 << 20); - -BENCHMARK_MAIN(); diff --git a/libcxx/test/benchmarks/algorithms/copy_backward.bench.cpp b/libcxx/test/benchmarks/algorithms/copy_backward.bench.cpp deleted file mode 100644 index c943d9a874b49..0000000000000 --- a/libcxx/test/benchmarks/algorithms/copy_backward.bench.cpp +++ /dev/null @@ -1,55 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 - -#include -#include -#include - -static void bm_ranges_copy_backward_vb(benchmark::State& state, bool aligned) { - auto n = state.range(); - std::vector in(n, true); - std::vector out(aligned ? n : n + 8); - benchmark::DoNotOptimize(&in); - auto dst = aligned ? out.end() : out.end() - 4; - for (auto _ : state) { - benchmark::DoNotOptimize(std::ranges::copy_backward(in, dst)); - benchmark::DoNotOptimize(&out); - } -} - -static void bm_copy_backward_vb(benchmark::State& state, bool aligned) { - auto n = state.range(); - std::vector in(n, true); - std::vector out(aligned ? n : n + 8); - benchmark::DoNotOptimize(&in); - auto beg = in.begin(); - auto end = in.end(); - auto dst = aligned ? out.end() : out.end() - 4; - for (auto _ : state) { - benchmark::DoNotOptimize(std::copy_backward(beg, end, dst)); - benchmark::DoNotOptimize(&out); - } -} - -static void bm_ranges_copy_backward_vb_aligned(benchmark::State& state) { bm_ranges_copy_backward_vb(state, true); } -static void bm_ranges_copy_backward_vb_unaligned(benchmark::State& state) { bm_ranges_copy_backward_vb(state, false); } - -static void bm_copy_backward_vb_aligned(benchmark::State& state) { bm_copy_backward_vb(state, true); } -static void bm_copy_backward_vb_unaligned(benchmark::State& state) { bm_copy_backward_vb(state, false); } - -// Test std::ranges::copy_backward for vector::iterator -BENCHMARK(bm_ranges_copy_backward_vb_aligned)->Range(8, 1 << 16)->DenseRange(102400, 204800, 4096); -BENCHMARK(bm_ranges_copy_backward_vb_unaligned)->Range(8, 1 << 20); - -// Test std::copy_backward for vector::iterator -BENCHMARK(bm_copy_backward_vb_aligned)->Range(8, 1 << 20); -BENCHMARK(bm_copy_backward_vb_unaligned)->Range(8, 1 << 20); - -BENCHMARK_MAIN(); diff --git a/libcxx/test/benchmarks/algorithms/modifying/copy.bench.cpp b/libcxx/test/benchmarks/algorithms/modifying/copy.bench.cpp new file mode 100644 index 0000000000000..3549d918478bd --- /dev/null +++ b/libcxx/test/benchmarks/algorithms/modifying/copy.bench.cpp @@ -0,0 +1,83 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 + +#include +#include +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" +#include "../../GenerateInput.h" +#include "test_macros.h" + +int main(int argc, char** argv) { + auto std_copy = [](auto first, auto last, auto out) { return std::copy(first, last, out); }; + + // {std,ranges}::copy(normal container) + { + auto bm = [](std::string name, auto copy) { + benchmark::RegisterBenchmark(name, [copy](auto& st) { + std::size_t const n = st.range(0); + using ValueType = typename Container::value_type; + Container c; + std::generate_n(std::back_inserter(c), n, [] { return Generate::random(); }); + + std::vector out(n); + + for ([[maybe_unused]] auto _ : st) { + benchmark::DoNotOptimize(c); + benchmark::DoNotOptimize(out); + auto result = copy(c.begin(), c.end(), out.begin()); + benchmark::DoNotOptimize(result); + } + })->Range(8, 1 << 20); + }; + bm.operator()>("std::copy(vector)", std_copy); + bm.operator()>("std::copy(deque)", std_copy); + bm.operator()>("std::copy(list)", std_copy); + bm.operator()>("rng::copy(vector)", std::ranges::copy); + bm.operator()>("rng::copy(deque)", std::ranges::copy); + bm.operator()>("rng::copy(list)", std::ranges::copy); + } + + // {std,ranges}::copy(vector) + { + auto bm = [](std::string name, auto copy) { + benchmark::RegisterBenchmark(name, [copy](auto& st) { + std::size_t const n = st.range(0); + std::vector in(n, true); + std::vector out(Aligned ? n : n + 8); + auto first = in.begin(); + auto last = in.end(); + auto dst = Aligned ? out.begin() : out.begin() + 4; + for ([[maybe_unused]] auto _ : st) { + benchmark::DoNotOptimize(in); + benchmark::DoNotOptimize(out); + auto result = copy(first, last, dst); + benchmark::DoNotOptimize(result); + } + })->Range(64, 1 << 20); + }; + bm.operator()("std::copy(vector) (aligned)", std_copy); + bm.operator()("std::copy(vector) (unaligned)", std_copy); +#if TEST_STD_VER >= 23 // vector::iterator is not an output_iterator before C++23 + bm.operator()("rng::copy(vector) (aligned)", std::ranges::copy); + bm.operator()("rng::copy(vector) (unaligned)", std::ranges::copy); +#endif + } + + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); + return 0; +} diff --git a/libcxx/test/benchmarks/algorithms/modifying/copy_backward.bench.cpp b/libcxx/test/benchmarks/algorithms/modifying/copy_backward.bench.cpp new file mode 100644 index 0000000000000..f97a7a84d5e0a --- /dev/null +++ b/libcxx/test/benchmarks/algorithms/modifying/copy_backward.bench.cpp @@ -0,0 +1,84 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 + +#include +#include +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" +#include "../../GenerateInput.h" +#include "test_macros.h" + +int main(int argc, char** argv) { + auto std_copy_backward = [](auto first, auto last, auto out) { return std::copy_backward(first, last, out); }; + + // {std,ranges}::copy_n(normal container) + { + auto bm = [](std::string name, auto copy_backward) { + benchmark::RegisterBenchmark(name, [copy_backward](auto& st) { + std::size_t const n = st.range(0); + using ValueType = typename Container::value_type; + Container c; + std::generate_n(std::back_inserter(c), n, [] { return Generate::random(); }); + + std::vector out(n); + + for ([[maybe_unused]] auto _ : st) { + benchmark::DoNotOptimize(c); + benchmark::DoNotOptimize(out); + auto result = copy_backward(c.begin(), c.end(), out.end()); + benchmark::DoNotOptimize(result); + } + })->Range(8, 1 << 20); + }; + bm.operator()>("std::copy_backward(vector)", std_copy_backward); + bm.operator()>("std::copy_backward(deque)", std_copy_backward); + bm.operator()>("std::copy_backward(list)", std_copy_backward); + bm.operator()>("rng::copy_backward(vector)", std::ranges::copy_backward); + bm.operator()>("rng::copy_backward(deque)", std::ranges::copy_backward); + bm.operator()>("rng::copy_backward(list)", std::ranges::copy_backward); + } + + // {std,ranges}::copy_n(vector) + { + auto bm = [](std::string name, auto copy_backward) { + benchmark::RegisterBenchmark(name, [copy_backward](auto& st) { + std::size_t const n = st.range(0); + std::vector in(n, true); + std::vector out(Aligned ? n : n + 8); + benchmark::DoNotOptimize(&in); + auto first = in.begin(); + auto last = in.end(); + auto dst = Aligned ? out.end() : out.end() - 4; + for ([[maybe_unused]] auto _ : st) { + benchmark::DoNotOptimize(in); + benchmark::DoNotOptimize(out); + auto result = copy_backward(first, last, dst); + benchmark::DoNotOptimize(result); + } + })->Range(64, 1 << 20); + }; + bm.operator()("std::copy_backward(vector) (aligned)", std_copy_backward); + bm.operator()("std::copy_backward(vector) (unaligned)", std_copy_backward); +#if TEST_STD_VER >= 23 // vector::iterator is not an output_iterator before C++23 + bm.operator()("rng::copy_backward(vector) (aligned)", std::ranges::copy_backward); + bm.operator()("rng::copy_backward(vector) (unaligned)", std::ranges::copy_backward); +#endif + } + + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); + return 0; +} diff --git a/libcxx/test/benchmarks/algorithms/modifying/copy_if.bench.cpp b/libcxx/test/benchmarks/algorithms/modifying/copy_if.bench.cpp new file mode 100644 index 0000000000000..76c653fc7f941 --- /dev/null +++ b/libcxx/test/benchmarks/algorithms/modifying/copy_if.bench.cpp @@ -0,0 +1,98 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 + +#include +#include +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" +#include "../../GenerateInput.h" + +int main(int argc, char** argv) { + auto std_copy_if = [](auto first, auto last, auto out, auto pred) { return std::copy_if(first, last, out, pred); }; + + // Benchmark {std,ranges}::copy_if where we copy one out of two element, in alternance. + // This is basically the worst case for this algorithm, I don't think there are many + // optimizations that can be applied in this case. + { + auto bm = [](std::string name, auto copy_if) { + benchmark::RegisterBenchmark(name, [copy_if](auto& st) { + std::size_t const n = st.range(0); + using ValueType = typename Container::value_type; + Container c; + std::generate_n(std::back_inserter(c), n, [] { return Generate::random(); }); + + std::vector out(n); + + for ([[maybe_unused]] auto _ : st) { + bool do_copy = false; + auto pred = [&do_copy](auto& element) { + benchmark::DoNotOptimize(element); + do_copy = !do_copy; + return do_copy; + }; + benchmark::DoNotOptimize(c); + benchmark::DoNotOptimize(out); + auto result = copy_if(c.begin(), c.end(), out.begin(), pred); + benchmark::DoNotOptimize(result); + } + })->Range(8, 1 << 20); + }; + bm.operator()>("std::copy_if(vector) (every other)", std_copy_if); + bm.operator()>("std::copy_if(deque) (every other)", std_copy_if); + bm.operator()>("std::copy_if(list) (every other)", std_copy_if); + + bm.operator()>("rng::copy_if(vector) (every other)", std::ranges::copy_if); + bm.operator()>("rng::copy_if(deque) (every other)", std::ranges::copy_if); + bm.operator()>("rng::copy_if(list) (every other)", std::ranges::copy_if); + } + + // Benchmark {std,ranges}::copy_if where we copy the full range. + // Copy the full range. + { + auto bm = [](std::string name, auto copy_if) { + benchmark::RegisterBenchmark(name, [copy_if](auto& st) { + std::size_t const n = st.range(0); + using ValueType = typename Container::value_type; + Container c; + std::generate_n(std::back_inserter(c), n, [] { return Generate::random(); }); + + std::vector out(n); + + for ([[maybe_unused]] auto _ : st) { + auto pred = [](auto& element) { + benchmark::DoNotOptimize(element); + return true; + }; + benchmark::DoNotOptimize(c); + benchmark::DoNotOptimize(out); + auto result = copy_if(c.begin(), c.end(), out.begin(), pred); + benchmark::DoNotOptimize(result); + } + })->Range(8, 1 << 20); + }; + bm.operator()>("std::copy_if(vector) (entire range)", std_copy_if); + bm.operator()>("std::copy_if(deque) (entire range)", std_copy_if); + bm.operator()>("std::copy_if(list) (entire range)", std_copy_if); + + bm.operator()>("rng::copy_if(vector) (entire range)", std::ranges::copy_if); + bm.operator()>("rng::copy_if(deque) (entire range)", std::ranges::copy_if); + bm.operator()>("rng::copy_if(list) (entire range)", std::ranges::copy_if); + } + + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); + return 0; +} diff --git a/libcxx/test/benchmarks/algorithms/modifying/copy_n.bench.cpp b/libcxx/test/benchmarks/algorithms/modifying/copy_n.bench.cpp new file mode 100644 index 0000000000000..35d3c5c78df13 --- /dev/null +++ b/libcxx/test/benchmarks/algorithms/modifying/copy_n.bench.cpp @@ -0,0 +1,82 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 + +#include +#include +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" +#include "../../GenerateInput.h" +#include "test_macros.h" + +int main(int argc, char** argv) { + auto std_copy_n = [](auto first, auto n, auto out) { return std::copy_n(first, n, out); }; + + // {std,ranges}::copy_n(normal container) + { + auto bm = [](std::string name, auto copy_n) { + benchmark::RegisterBenchmark(name, [copy_n](auto& st) { + std::size_t const n = st.range(0); + using ValueType = typename Container::value_type; + Container c; + std::generate_n(std::back_inserter(c), n, [] { return Generate::random(); }); + + std::vector out(n); + + for ([[maybe_unused]] auto _ : st) { + benchmark::DoNotOptimize(c); + benchmark::DoNotOptimize(out); + auto result = copy_n(c.begin(), n, out.begin()); + benchmark::DoNotOptimize(result); + } + })->Range(8, 1 << 20); + }; + bm.operator()>("std::copy_n(vector)", std_copy_n); + bm.operator()>("std::copy_n(deque)", std_copy_n); + bm.operator()>("std::copy_n(list)", std_copy_n); + bm.operator()>("rng::copy_n(vector)", std::ranges::copy_n); + bm.operator()>("rng::copy_n(deque)", std::ranges::copy_n); + bm.operator()>("rng::copy_n(list)", std::ranges::copy_n); + } + + // {std,ranges}::copy_n(vector) + { + auto bm = [](std::string name, auto copy_n) { + benchmark::RegisterBenchmark(name, [copy_n](auto& st) { + std::size_t const n = st.range(0); + std::vector in(n, true); + std::vector out(Aligned ? n : n + 8); + auto first = in.begin(); + auto dst = Aligned ? out.begin() : out.begin() + 4; + for ([[maybe_unused]] auto _ : st) { + benchmark::DoNotOptimize(in); + benchmark::DoNotOptimize(out); + auto result = copy_n(first, n, dst); + benchmark::DoNotOptimize(result); + } + })->Range(64, 1 << 20); + }; + bm.operator()("std::copy_n(vector) (aligned)", std_copy_n); + bm.operator()("std::copy_n(vector) (unaligned)", std_copy_n); +#if TEST_STD_VER >= 23 // vector::iterator is not an output_iterator before C++23 + bm.operator()("rng::copy_n(vector) (aligned)", std::ranges::copy_n); + bm.operator()("rng::copy_n(vector) (unaligned)", std::ranges::copy_n); +#endif + } + + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); + return 0; +} diff --git a/libcxx/test/libcxx/xopen_source.gen.py b/libcxx/test/libcxx/xopen_source.gen.py new file mode 100644 index 0000000000000..3f2686483730a --- /dev/null +++ b/libcxx/test/libcxx/xopen_source.gen.py @@ -0,0 +1,53 @@ +# ===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===----------------------------------------------------------------------===## + +# Make sure that libc++ headers work when defining _XOPEN_SOURCE=500. +# We may not want to guarantee this forever, but since this works today and +# it's something that users rely on, it makes sense to put a test on it. +# +# https://github.com/llvm/llvm-project/issues/117630 + +# RUN: %{python} %s %{libcxx-dir}/utils +# END. + +import sys + +sys.path.append(sys.argv[1]) +from libcxx.header_information import ( + lit_header_restrictions, + lit_header_undeprecations, + public_headers, +) + +for header in public_headers: + for version in (500, 600, 700): + # TODO: currently uses ::fseeko unguarded, which fails with _XOPEN_SOURCE=500. + if header == "fstream" and version == 500: + continue + + print( + f"""\ +//--- {header}.xopen_source_{version}.compile.pass.cpp + +// Some parts of the code like use non-standard functions in their implementation, +// and these functions are not provided when _XOPEN_SOURCE is set to older values. This +// breaks when building with modules even when we don't use the offending headers directly. +// UNSUPPORTED: clang-modules-build + +// The AIX localization support uses some functions as part of their headers that require a +// recent value of _XOPEN_SOURCE. +// UNSUPPORTED: LIBCXX-AIX-FIXME + +{lit_header_restrictions.get(header, '')} +{lit_header_undeprecations.get(header, '')} + +// ADDITIONAL_COMPILE_FLAGS: -D_XOPEN_SOURCE={version} + +#include <{header}> +""" + ) diff --git a/libcxx/test/std/utilities/template.bitset/bitset_test_cases.h b/libcxx/test/std/utilities/template.bitset/bitset_test_cases.h index dada9bcca8f48..b561b01ef19a9 100644 --- a/libcxx/test/std/utilities/template.bitset/bitset_test_cases.h +++ b/libcxx/test/std/utilities/template.bitset/bitset_test_cases.h @@ -20,161 +20,162 @@ TEST_CONSTEXPR_CXX23 std::vector > get_test_cases(); template <> TEST_CONSTEXPR_CXX23 inline std::vector > get_test_cases<0>() { - std::vector > cases; - cases.push_back(std::bitset<0>()); - return cases; + std::vector > cases; + cases.push_back(std::bitset<0>()); + return cases; } template <> TEST_CONSTEXPR_CXX23 inline std::vector > get_test_cases<1>() { - std::vector > cases; - cases.push_back(std::bitset<1>("0")); - cases.push_back(std::bitset<1>("1")); - return cases; + std::vector > cases; + cases.push_back(std::bitset<1>("0")); + cases.push_back(std::bitset<1>("1")); + return cases; } template <> TEST_CONSTEXPR_CXX23 inline std::vector > get_test_cases<2>() { - std::vector > cases; - cases.push_back(std::bitset<2>("00")); - cases.push_back(std::bitset<2>("01")); - cases.push_back(std::bitset<2>("10")); - cases.push_back(std::bitset<2>("11")); - return cases; + std::vector > cases; + cases.push_back(std::bitset<2>("00")); + cases.push_back(std::bitset<2>("01")); + cases.push_back(std::bitset<2>("10")); + cases.push_back(std::bitset<2>("11")); + return cases; } template <> TEST_CONSTEXPR_CXX23 inline std::vector > get_test_cases<31>() { - std::vector > cases; - cases.push_back(std::bitset<31>("0000000000000000000000000000000")); - cases.push_back(std::bitset<31>("0000000000000000000000000000001")); - cases.push_back(std::bitset<31>("1000000000000000000000000000000")); - cases.push_back(std::bitset<31>("1000000000000000000000000000001")); - cases.push_back(std::bitset<31>("1000000000000000000001000000001")); - cases.push_back(std::bitset<31>("0000000000000000111111111111111")); - cases.push_back(std::bitset<31>("1000000000000000111111111111111")); - cases.push_back(std::bitset<31>("1111111111111111000000000000000")); - cases.push_back(std::bitset<31>("1111111111111111000000000000001")); - cases.push_back(std::bitset<31>("1010101010101010101010101010101")); - cases.push_back(std::bitset<31>("0101010101010101010101010101010")); - cases.push_back(std::bitset<31>("1111111111111111111111111111111")); - return cases; + std::vector > cases; + cases.push_back(std::bitset<31>("0000000000000000000000000000000")); + cases.push_back(std::bitset<31>("0000000000000000000000000000001")); + cases.push_back(std::bitset<31>("1000000000000000000000000000000")); + cases.push_back(std::bitset<31>("1000000000000000000000000000001")); + cases.push_back(std::bitset<31>("1000000000000000000001000000001")); + cases.push_back(std::bitset<31>("0000000000000000111111111111111")); + cases.push_back(std::bitset<31>("1000000000000000111111111111111")); + cases.push_back(std::bitset<31>("1111111111111111000000000000000")); + cases.push_back(std::bitset<31>("1111111111111111000000000000001")); + cases.push_back(std::bitset<31>("1010101010101010101010101010101")); + cases.push_back(std::bitset<31>("0101010101010101010101010101010")); + cases.push_back(std::bitset<31>("1111111111111111111111111111111")); + return cases; } template <> TEST_CONSTEXPR_CXX23 inline std::vector > get_test_cases<32>() { - std::vector > cases; - cases.push_back(std::bitset<32>("00000000000000000000000000000000")); - cases.push_back(std::bitset<32>("00000000000000000000000000000001")); - cases.push_back(std::bitset<32>("10000000000000000000000000000000")); - cases.push_back(std::bitset<32>("10000000000000000000000000000001")); - cases.push_back(std::bitset<32>("10000000000000000000111000000001")); - cases.push_back(std::bitset<32>("00000000000000001111111111111111")); - cases.push_back(std::bitset<32>("10000000000000001111111111111111")); - cases.push_back(std::bitset<32>("11111111111111110000000000000000")); - cases.push_back(std::bitset<32>("11111111111111110000000000000001")); - cases.push_back(std::bitset<32>("10101010101010101010101010101010")); - cases.push_back(std::bitset<32>("01010101010101010101010101010101")); - cases.push_back(std::bitset<32>("11111111111111111111111111111111")); - return cases; + std::vector > cases; + cases.push_back(std::bitset<32>("00000000000000000000000000000000")); + cases.push_back(std::bitset<32>("00000000000000000000000000000001")); + cases.push_back(std::bitset<32>("10000000000000000000000000000000")); + cases.push_back(std::bitset<32>("10000000000000000000000000000001")); + cases.push_back(std::bitset<32>("10000000000000000000111000000001")); + cases.push_back(std::bitset<32>("00000000000000001111111111111111")); + cases.push_back(std::bitset<32>("10000000000000001111111111111111")); + cases.push_back(std::bitset<32>("11111111111111110000000000000000")); + cases.push_back(std::bitset<32>("11111111111111110000000000000001")); + cases.push_back(std::bitset<32>("10101010101010101010101010101010")); + cases.push_back(std::bitset<32>("01010101010101010101010101010101")); + cases.push_back(std::bitset<32>("11111111111111111111111111111111")); + return cases; } template <> TEST_CONSTEXPR_CXX23 inline std::vector > get_test_cases<33>() { - std::vector > cases; - cases.push_back(std::bitset<33>("000000000000000000000000000000000")); - cases.push_back(std::bitset<33>("000000000000000000000000000000001")); - cases.push_back(std::bitset<33>("100000000000000000000000000000000")); - cases.push_back(std::bitset<33>("100000000000000000000000000000001")); - cases.push_back(std::bitset<33>("100000000000000000001110000000001")); - cases.push_back(std::bitset<33>("000000000000000011111111111111111")); - cases.push_back(std::bitset<33>("100000000000000011111111111111111")); - cases.push_back(std::bitset<33>("111111111111111100000000000000000")); - cases.push_back(std::bitset<33>("111111111111111100000000000000001")); - cases.push_back(std::bitset<33>("101010101010101010101010101010101")); - cases.push_back(std::bitset<33>("010101010101010101010101010101010")); - cases.push_back(std::bitset<33>("111111111111111111111111111111111")); - return cases; + std::vector > cases; + cases.push_back(std::bitset<33>("000000000000000000000000000000000")); + cases.push_back(std::bitset<33>("000000000000000000000000000000001")); + cases.push_back(std::bitset<33>("100000000000000000000000000000000")); + cases.push_back(std::bitset<33>("100000000000000000000000000000001")); + cases.push_back(std::bitset<33>("100000000000000000001110000000001")); + cases.push_back(std::bitset<33>("000000000000000011111111111111111")); + cases.push_back(std::bitset<33>("100000000000000011111111111111111")); + cases.push_back(std::bitset<33>("111111111111111100000000000000000")); + cases.push_back(std::bitset<33>("111111111111111100000000000000001")); + cases.push_back(std::bitset<33>("101010101010101010101010101010101")); + cases.push_back(std::bitset<33>("010101010101010101010101010101010")); + cases.push_back(std::bitset<33>("111111111111111111111111111111111")); + return cases; } template <> TEST_CONSTEXPR_CXX23 inline std::vector > get_test_cases<63>() { - std::vector > cases; - cases.push_back(std::bitset<63>("000000000000000000000000000000000000000000000000000000000000000")); - cases.push_back(std::bitset<63>("000000000000000000000000000000000000000000000000000000000000001")); - cases.push_back(std::bitset<63>("100000000000000000000000000000000000000000000000000000000000000")); - cases.push_back(std::bitset<63>("100000000000000000000000000000000000000000000000000000000000001")); - cases.push_back(std::bitset<63>("100000000000000000000000001111100000000000000000000000000000001")); - cases.push_back(std::bitset<63>("000000000000000000000000000000001111111111111111111111111111111")); - cases.push_back(std::bitset<63>("100000000000000000000000000000001111111111111111111111111111111")); - cases.push_back(std::bitset<63>("111111111111111111111111111111110000000000000000000000000000000")); - cases.push_back(std::bitset<63>("111111111111111111111111111111110000000000000000000000000000001")); - cases.push_back(std::bitset<63>("101010101010101010101010101010101010101010101010101010101010101")); - cases.push_back(std::bitset<63>("010101010101010101010101010101010101010101010101010101010101010")); - cases.push_back(std::bitset<63>("111111111111111111111111111111111111111111111111111111111111111")); - return cases; + std::vector > cases; + cases.push_back(std::bitset<63>("000000000000000000000000000000000000000000000000000000000000000")); + cases.push_back(std::bitset<63>("000000000000000000000000000000000000000000000000000000000000001")); + cases.push_back(std::bitset<63>("100000000000000000000000000000000000000000000000000000000000000")); + cases.push_back(std::bitset<63>("100000000000000000000000000000000000000000000000000000000000001")); + cases.push_back(std::bitset<63>("100000000000000000000000001111100000000000000000000000000000001")); + cases.push_back(std::bitset<63>("000000000000000000000000000000001111111111111111111111111111111")); + cases.push_back(std::bitset<63>("100000000000000000000000000000001111111111111111111111111111111")); + cases.push_back(std::bitset<63>("111111111111111111111111111111110000000000000000000000000000000")); + cases.push_back(std::bitset<63>("111111111111111111111111111111110000000000000000000000000000001")); + cases.push_back(std::bitset<63>("101010101010101010101010101010101010101010101010101010101010101")); + cases.push_back(std::bitset<63>("010101010101010101010101010101010101010101010101010101010101010")); + cases.push_back(std::bitset<63>("111111111111111111111111111111111111111111111111111111111111111")); + return cases; } template <> TEST_CONSTEXPR_CXX23 inline std::vector > get_test_cases<64>() { - std::vector > cases; - cases.push_back(std::bitset<64>("0000000000000000000000000000000000000000000000000000000000000000")); - cases.push_back(std::bitset<64>("0000000000000000000000000000000000000000000000000000000000000001")); - cases.push_back(std::bitset<64>("1000000000000000000000000000000000000000000000000000000000000000")); - cases.push_back(std::bitset<64>("1000000000000000000000000000000000000000000000000000000000000001")); - cases.push_back(std::bitset<64>("1000000000000000000000000011111000000000000000000000000000000001")); - cases.push_back(std::bitset<64>("0000000000000000000000000000000011111111111111111111111111111111")); - cases.push_back(std::bitset<64>("1000000000000000000000000000000011111111111111111111111111111111")); - cases.push_back(std::bitset<64>("1111111111111111111111111111111100000000000000000000000000000000")); - cases.push_back(std::bitset<64>("1111111111111111111111111111111100000000000000000000000000000001")); - cases.push_back(std::bitset<64>("1010101010101010101010101010101010101010101010101010101010101010")); - cases.push_back(std::bitset<64>("0101010101010101010101010101010101010101010101010101010101010101")); - cases.push_back(std::bitset<64>("1111111111111111111111111111111111111111111111111111111111111111")); - return cases; + std::vector > cases; + cases.push_back(std::bitset<64>("0000000000000000000000000000000000000000000000000000000000000000")); + cases.push_back(std::bitset<64>("0000000000000000000000000000000000000000000000000000000000000001")); + cases.push_back(std::bitset<64>("1000000000000000000000000000000000000000000000000000000000000000")); + cases.push_back(std::bitset<64>("1000000000000000000000000000000000000000000000000000000000000001")); + cases.push_back(std::bitset<64>("1000000000000000000000000011111000000000000000000000000000000001")); + cases.push_back(std::bitset<64>("0000000000000000000000000000000011111111111111111111111111111111")); + cases.push_back(std::bitset<64>("1000000000000000000000000000000011111111111111111111111111111111")); + cases.push_back(std::bitset<64>("1111111111111111111111111111111100000000000000000000000000000000")); + cases.push_back(std::bitset<64>("1111111111111111111111111111111100000000000000000000000000000001")); + cases.push_back(std::bitset<64>("1010101010101010101010101010101010101010101010101010101010101010")); + cases.push_back(std::bitset<64>("0101010101010101010101010101010101010101010101010101010101010101")); + cases.push_back(std::bitset<64>("1111111111111111111111111111111111111111111111111111111111111111")); + return cases; } template <> TEST_CONSTEXPR_CXX23 inline std::vector > get_test_cases<65>() { - std::vector > cases; - cases.push_back(std::bitset<65>("00000000000000000000000000000000000000000000000000000000000000000")); - cases.push_back(std::bitset<65>("00000000000000000000000000000000000000000000000000000000000000001")); - cases.push_back(std::bitset<65>("10000000000000000000000000000000000000000000000000000000000000000")); - cases.push_back(std::bitset<65>("10000000000000000000000000000000000000000000000000000000000000001")); - cases.push_back(std::bitset<65>("10000000000000000000000000011111000000000000000000000000000000001")); - cases.push_back(std::bitset<65>("00000000000000000000000000000000011111111111111111111111111111111")); - cases.push_back(std::bitset<65>("10000000000000000000000000000000011111111111111111111111111111111")); - cases.push_back(std::bitset<65>("11111111111111111111111111111111000000000000000000000000000000000")); - cases.push_back(std::bitset<65>("11111111111111111111111111111111000000000000000000000000000000001")); - cases.push_back(std::bitset<65>("10101010101010101010101010101010101010101010101010101010101010101")); - cases.push_back(std::bitset<65>("01010101010101010101010101010101010101010101010101010101010101010")); - cases.push_back(std::bitset<65>("11111111111111111111111111111111111111111111111111111111111111111")); - return cases; + std::vector > cases; + cases.push_back(std::bitset<65>("00000000000000000000000000000000000000000000000000000000000000000")); + cases.push_back(std::bitset<65>("00000000000000000000000000000000000000000000000000000000000000001")); + cases.push_back(std::bitset<65>("10000000000000000000000000000000000000000000000000000000000000000")); + cases.push_back(std::bitset<65>("10000000000000000000000000000000000000000000000000000000000000001")); + cases.push_back(std::bitset<65>("10000000000000000000000000011111000000000000000000000000000000001")); + cases.push_back(std::bitset<65>("00000000000000000000000000000000011111111111111111111111111111111")); + cases.push_back(std::bitset<65>("10000000000000000000000000000000011111111111111111111111111111111")); + cases.push_back(std::bitset<65>("11111111111111111111111111111111000000000000000000000000000000000")); + cases.push_back(std::bitset<65>("11111111111111111111111111111111000000000000000000000000000000001")); + cases.push_back(std::bitset<65>("10101010101010101010101010101010101010101010101010101010101010101")); + cases.push_back(std::bitset<65>("01010101010101010101010101010101010101010101010101010101010101010")); + cases.push_back(std::bitset<65>("11111111111111111111111111111111111111111111111111111111111111111")); + return cases; } TEST_CONSTEXPR_CXX23 inline std::string str_repeat(std::string s, unsigned int n) { - std::string res = s; - for (; n != 0; --n) - res += s; - return res; + std::string res = s; + for (; n != 0; --n) + res += s; + return res; } template <> TEST_CONSTEXPR_CXX23 inline std::vector > get_test_cases<1000>() { - std::vector > cases; - cases.push_back(std::bitset<1000>(std::string(1000, '0'))); - cases.push_back(std::bitset<1000>(std::string(999, '0') + std::string(1, '1'))); - cases.push_back(std::bitset<1000>(std::string(1, '1') + std::string(999, '0'))); - cases.push_back(std::bitset<1000>(std::string(1, '1') + std::string(998, '0') + std::string(1, '1'))); - cases.push_back(std::bitset<1000>(std::string(1, '1') + std::string(400, '0') + std::string(99, '1') + std::string(499, '0') + std::string(1, '1'))); - cases.push_back(std::bitset<1000>(std::string(500, '0') + std::string(500, '1'))); - cases.push_back(std::bitset<1000>(std::string(1, '1') + std::string(499, '0') + std::string(500, '1'))); - cases.push_back(std::bitset<1000>(std::string(500, '1') + std::string(500, '0'))); - cases.push_back(std::bitset<1000>(std::string(500, '1') + std::string(499, '0') + std::string(1, '1'))); - cases.push_back(std::bitset<1000>(str_repeat("10", 500))); - cases.push_back(std::bitset<1000>(str_repeat("01", 500))); - cases.push_back(std::bitset<1000>(std::string(1000, '1'))); - - return cases; + std::vector > cases; + cases.push_back(std::bitset<1000>(std::string(1000, '0'))); + cases.push_back(std::bitset<1000>(std::string(999, '0') + std::string(1, '1'))); + cases.push_back(std::bitset<1000>(std::string(1, '1') + std::string(999, '0'))); + cases.push_back(std::bitset<1000>(std::string(1, '1') + std::string(998, '0') + std::string(1, '1'))); + cases.push_back(std::bitset<1000>(std::string(1, '1') + std::string(400, '0') + std::string(99, '1') + + std::string(499, '0') + std::string(1, '1'))); + cases.push_back(std::bitset<1000>(std::string(500, '0') + std::string(500, '1'))); + cases.push_back(std::bitset<1000>(std::string(1, '1') + std::string(499, '0') + std::string(500, '1'))); + cases.push_back(std::bitset<1000>(std::string(500, '1') + std::string(500, '0'))); + cases.push_back(std::bitset<1000>(std::string(500, '1') + std::string(499, '0') + std::string(1, '1'))); + cases.push_back(std::bitset<1000>(str_repeat("10", 500))); + cases.push_back(std::bitset<1000>(str_repeat("01", 500))); + cases.push_back(std::bitset<1000>(std::string(1000, '1'))); + + return cases; } #endif // !LIBCPP_TEST_BITSET_TEST_CASES_H diff --git a/libunwind/src/CMakeLists.txt b/libunwind/src/CMakeLists.txt index ecbd019bb29ea..d69013e5dace1 100644 --- a/libunwind/src/CMakeLists.txt +++ b/libunwind/src/CMakeLists.txt @@ -36,7 +36,6 @@ set(LIBUNWIND_HEADERS AddressSpace.hpp assembly.h CompactUnwinder.hpp - cet_unwind.h config.h dwarf2.h DwarfInstructions.hpp @@ -46,6 +45,7 @@ set(LIBUNWIND_HEADERS libunwind_ext.h Registers.hpp RWMutex.hpp + shadow_stack_unwind.h Unwind-EHABI.h UnwindCursor.hpp ../include/libunwind.h diff --git a/libunwind/src/Registers.hpp b/libunwind/src/Registers.hpp index 861e6b5f6f2c5..452f46a0d56ea 100644 --- a/libunwind/src/Registers.hpp +++ b/libunwind/src/Registers.hpp @@ -15,9 +15,9 @@ #include #include -#include "cet_unwind.h" #include "config.h" #include "libunwind.h" +#include "shadow_stack_unwind.h" namespace libunwind { @@ -48,7 +48,7 @@ class _LIBUNWIND_HIDDEN Registers_x86; extern "C" void __libunwind_Registers_x86_jumpto(Registers_x86 *); #if defined(_LIBUNWIND_USE_CET) -extern "C" void *__libunwind_cet_get_jump_target() { +extern "C" void *__libunwind_shstk_get_jump_target() { return reinterpret_cast(&__libunwind_Registers_x86_jumpto); } #endif @@ -268,7 +268,7 @@ class _LIBUNWIND_HIDDEN Registers_x86_64; extern "C" void __libunwind_Registers_x86_64_jumpto(Registers_x86_64 *); #if defined(_LIBUNWIND_USE_CET) -extern "C" void *__libunwind_cet_get_jump_target() { +extern "C" void *__libunwind_shstk_get_jump_target() { return reinterpret_cast(&__libunwind_Registers_x86_64_jumpto); } #endif @@ -1817,7 +1817,7 @@ class _LIBUNWIND_HIDDEN Registers_arm64; extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *); #if defined(_LIBUNWIND_USE_GCS) -extern "C" void *__libunwind_cet_get_jump_target() { +extern "C" void *__libunwind_shstk_get_jump_target() { return reinterpret_cast(&__libunwind_Registers_arm64_jumpto); } #endif diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp index 0923052b1b588..ca9927edc9990 100644 --- a/libunwind/src/UnwindCursor.hpp +++ b/libunwind/src/UnwindCursor.hpp @@ -11,7 +11,7 @@ #ifndef __UNWINDCURSOR_HPP__ #define __UNWINDCURSOR_HPP__ -#include "cet_unwind.h" +#include "shadow_stack_unwind.h" #include #include #include @@ -3122,7 +3122,7 @@ bool UnwindCursor::isReadableAddr(const pint_t addr) const { #endif #if defined(_LIBUNWIND_USE_CET) || defined(_LIBUNWIND_USE_GCS) -extern "C" void *__libunwind_cet_get_registers(unw_cursor_t *cursor) { +extern "C" void *__libunwind_shstk_get_registers(unw_cursor_t *cursor) { AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; return co->get_registers(); } diff --git a/libunwind/src/UnwindLevel1.c b/libunwind/src/UnwindLevel1.c index 7e785f4d31e71..a258a832a9c31 100644 --- a/libunwind/src/UnwindLevel1.c +++ b/libunwind/src/UnwindLevel1.c @@ -25,10 +25,10 @@ #include #include -#include "cet_unwind.h" #include "config.h" #include "libunwind.h" #include "libunwind_ext.h" +#include "shadow_stack_unwind.h" #include "unwind.h" #if !defined(_LIBUNWIND_ARM_EHABI) && !defined(__USING_SJLJ_EXCEPTIONS__) && \ @@ -36,14 +36,17 @@ #ifndef _LIBUNWIND_SUPPORT_SEH_UNWIND -// When CET is enabled, each "call" instruction will push return address to -// CET shadow stack, each "ret" instruction will pop current CET shadow stack -// top and compare it with target address which program will return. -// In exception handing, some stack frames will be skipped before jumping to -// landing pad and we must adjust CET shadow stack accordingly. -// _LIBUNWIND_POP_CET_SSP is used to adjust CET shadow stack pointer and we -// directly jump to __libunwind_Registers_x86/x86_64_jumpto instead of using -// a regular function call to avoid pushing to CET shadow stack again. +// When shadow stack is enabled, a separate stack containing only return +// addresses would be maintained. On function return, the return address would +// be compared to the popped address from shadow stack to ensure the return +// target is not tempered with. When unwinding, we're skipping the normal return +// procedure for multiple frames and thus need to pop the return addresses of +// the skipped frames from shadow stack to avoid triggering an exception (using +// `_LIBUNWIND_POP_SHSTK_SSP()`). Also, some architectures, like the x86-family +// CET, push the return adddresses onto shadow stack with common call +// instructions, so for these architectures, normal function calls should be +// avoided when invoking the `jumpto()` function. To do this, we use inline +// assemblies to "goto" the `jumpto()` for these architectures. #if !defined(_LIBUNWIND_USE_CET) && !defined(_LIBUNWIND_USE_GCS) #define __unw_phase2_resume(cursor, fn) \ do { \ @@ -51,38 +54,38 @@ __unw_resume((cursor)); \ } while (0) #elif defined(_LIBUNWIND_TARGET_I386) -#define __cet_ss_step_size 4 +#define __shstk_step_size (4) #define __unw_phase2_resume(cursor, fn) \ do { \ - _LIBUNWIND_POP_CET_SSP((fn)); \ - void *cetRegContext = __libunwind_cet_get_registers((cursor)); \ - void *cetJumpAddress = __libunwind_cet_get_jump_target(); \ + _LIBUNWIND_POP_SHSTK_SSP((fn)); \ + void *shstkRegContext = __libunwind_shstk_get_registers((cursor)); \ + void *shstkJumpAddress = __libunwind_shstk_get_jump_target(); \ __asm__ volatile("push %%edi\n\t" \ "sub $4, %%esp\n\t" \ - "jmp *%%edx\n\t" :: "D"(cetRegContext), \ - "d"(cetJumpAddress)); \ + "jmp *%%edx\n\t" ::"D"(shstkRegContext), \ + "d"(shstkJumpAddress)); \ } while (0) #elif defined(_LIBUNWIND_TARGET_X86_64) -#define __cet_ss_step_size 8 +#define __shstk_step_size (8) #define __unw_phase2_resume(cursor, fn) \ do { \ - _LIBUNWIND_POP_CET_SSP((fn)); \ - void *cetRegContext = __libunwind_cet_get_registers((cursor)); \ - void *cetJumpAddress = __libunwind_cet_get_jump_target(); \ - __asm__ volatile("jmpq *%%rdx\n\t" :: "D"(cetRegContext), \ - "d"(cetJumpAddress)); \ + _LIBUNWIND_POP_SHSTK_SSP((fn)); \ + void *shstkRegContext = __libunwind_shstk_get_registers((cursor)); \ + void *shstkJumpAddress = __libunwind_shstk_get_jump_target(); \ + __asm__ volatile("jmpq *%%rdx\n\t" ::"D"(shstkRegContext), \ + "d"(shstkJumpAddress)); \ } while (0) #elif defined(_LIBUNWIND_TARGET_AARCH64) -#define __cet_ss_step_size 8 +#define __shstk_step_size (8) #define __unw_phase2_resume(cursor, fn) \ do { \ - _LIBUNWIND_POP_CET_SSP((fn)); \ - void *cetRegContext = __libunwind_cet_get_registers((cursor)); \ - void *cetJumpAddress = __libunwind_cet_get_jump_target(); \ + _LIBUNWIND_POP_SHSTK_SSP((fn)); \ + void *shstkRegContext = __libunwind_shstk_get_registers((cursor)); \ + void *shstkJumpAddress = __libunwind_shstk_get_jump_target(); \ __asm__ volatile("mov x0, %0\n\t" \ "br %1\n\t" \ : \ - : "r"(cetRegContext), "r"(cetJumpAddress) \ + : "r"(shstkRegContext), "r"(shstkJumpAddress) \ : "x0"); \ } while (0) #endif @@ -255,16 +258,16 @@ unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except } #endif -// In CET enabled environment, we check return address stored in normal stack -// against return address stored in CET shadow stack, if the 2 addresses don't +// In shadow stack enabled environment, we check return address stored in normal +// stack against return address stored in shadow stack, if the 2 addresses don't // match, it means return address in normal stack has been corrupted, we return // _URC_FATAL_PHASE2_ERROR. #if defined(_LIBUNWIND_USE_CET) || defined(_LIBUNWIND_USE_GCS) if (shadowStackTop != 0) { unw_word_t retInNormalStack; __unw_get_reg(cursor, UNW_REG_IP, &retInNormalStack); - unsigned long retInShadowStack = *( - unsigned long *)(shadowStackTop + __cet_ss_step_size * framesWalked); + unsigned long retInShadowStack = + *(unsigned long *)(shadowStackTop + __shstk_step_size * framesWalked); if (retInNormalStack != retInShadowStack) return _URC_FATAL_PHASE2_ERROR; } diff --git a/libunwind/src/cet_unwind.h b/libunwind/src/shadow_stack_unwind.h similarity index 88% rename from libunwind/src/cet_unwind.h rename to libunwind/src/shadow_stack_unwind.h index 47d7616a7322c..1f229d8317116 100644 --- a/libunwind/src/cet_unwind.h +++ b/libunwind/src/shadow_stack_unwind.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LIBUNWIND_CET_UNWIND_H -#define LIBUNWIND_CET_UNWIND_H +#ifndef LIBUNWIND_SHADOW_STACK_UNWIND_H +#define LIBUNWIND_SHADOW_STACK_UNWIND_H #include "libunwind.h" @@ -21,7 +21,7 @@ #include #include -#define _LIBUNWIND_POP_CET_SSP(x) \ +#define _LIBUNWIND_POP_SHSTK_SSP(x) \ do { \ unsigned long ssp = _get_ssp(); \ if (ssp != 0) { \ @@ -46,7 +46,7 @@ #define _LIBUNWIND_USE_GCS 1 #endif -#define _LIBUNWIND_POP_CET_SSP(x) \ +#define _LIBUNWIND_POP_SHSTK_SSP(x) \ do { \ if (__chkfeat(_CHKFEAT_GCS)) { \ unsigned tmp = (x); \ @@ -57,7 +57,7 @@ #endif -extern void *__libunwind_cet_get_registers(unw_cursor_t *); -extern void *__libunwind_cet_get_jump_target(void); +extern void *__libunwind_shstk_get_registers(unw_cursor_t *); +extern void *__libunwind_shstk_get_jump_target(void); #endif diff --git a/lldb/include/lldb/API/SBCommandReturnObject.h b/lldb/include/lldb/API/SBCommandReturnObject.h index 9a63c1f96aa70..6386bd250afa5 100644 --- a/lldb/include/lldb/API/SBCommandReturnObject.h +++ b/lldb/include/lldb/API/SBCommandReturnObject.h @@ -136,6 +136,8 @@ class LLDB_API SBCommandReturnObject { void SetError(const char *error_cstr); + lldb::SBValueList GetValues(lldb::DynamicValueType use_dynamic); + protected: friend class SBCommandInterpreter; friend class SBOptions; diff --git a/lldb/include/lldb/API/SBValue.h b/lldb/include/lldb/API/SBValue.h index 46ef6daa95264..75d20a4378f09 100644 --- a/lldb/include/lldb/API/SBValue.h +++ b/lldb/include/lldb/API/SBValue.h @@ -442,6 +442,7 @@ class LLDB_API SBValue { protected: friend class SBBlock; + friend class SBCommandReturnObject; friend class SBFrame; friend class SBModule; friend class SBTarget; diff --git a/lldb/include/lldb/Core/Debugger.h b/lldb/include/lldb/Core/Debugger.h index 7f08f3dd26106..9c8a9623fe689 100644 --- a/lldb/include/lldb/Core/Debugger.h +++ b/lldb/include/lldb/Core/Debugger.h @@ -131,13 +131,16 @@ class Debugger : public std::enable_shared_from_this, void SetAsyncExecution(bool async); - File &GetInputFile() { return *m_input_file_sp; } - lldb::FileSP GetInputFileSP() { return m_input_file_sp; } + File &GetInputFile() { return *m_input_file_sp; } - lldb::FileSP GetOutputFileSP() { return m_output_stream_sp->GetFileSP(); } + lldb::FileSP GetOutputFileSP() { + return m_output_stream_sp->GetUnlockedFileSP(); + } - lldb::FileSP GetErrorFileSP() { return m_error_stream_sp->GetFileSP(); } + lldb::FileSP GetErrorFileSP() { + return m_error_stream_sp->GetUnlockedFileSP(); + } repro::DataRecorder *GetInputRecorder(); @@ -198,8 +201,8 @@ class Debugger : public std::enable_shared_from_this, // If any of the streams are not set, set them to the in/out/err stream of // the top most input reader to ensure they at least have something void AdoptTopIOHandlerFilesIfInvalid(lldb::FileSP &in, - lldb::StreamFileSP &out, - lldb::StreamFileSP &err); + lldb::LockableStreamFileSP &out, + lldb::LockableStreamFileSP &err); /// Run the given IO handler and return immediately. void RunIOHandlerAsync(const lldb::IOHandlerSP &reader_sp, @@ -649,8 +652,8 @@ class Debugger : public std::enable_shared_from_this, /// should not be used directly. Use GetAsyncOutputStream and /// GetAsyncErrorStream instead. /// @{ - lldb::StreamFileSP GetOutputStreamSP() { return m_output_stream_sp; } - lldb::StreamFileSP GetErrorStreamSP() { return m_error_stream_sp; } + lldb::LockableStreamFileSP GetOutputStreamSP() { return m_output_stream_sp; } + lldb::LockableStreamFileSP GetErrorStreamSP() { return m_error_stream_sp; } /// @} void PushIOHandler(const lldb::IOHandlerSP &reader_sp, @@ -693,8 +696,9 @@ class Debugger : public std::enable_shared_from_this, // these should never be NULL lldb::FileSP m_input_file_sp; - lldb::StreamFileSP m_output_stream_sp; - lldb::StreamFileSP m_error_stream_sp; + lldb::LockableStreamFileSP m_output_stream_sp; + lldb::LockableStreamFileSP m_error_stream_sp; + LockableStreamFile::Mutex m_output_mutex; /// Used for shadowing the input file when capturing a reproducer. repro::DataRecorder *m_input_recorder; diff --git a/lldb/include/lldb/Core/IOHandler.h b/lldb/include/lldb/Core/IOHandler.h index d6ac1cc8b5a14..fc0c676883b4a 100644 --- a/lldb/include/lldb/Core/IOHandler.h +++ b/lldb/include/lldb/Core/IOHandler.h @@ -53,8 +53,9 @@ class IOHandler { IOHandler(Debugger &debugger, IOHandler::Type type); IOHandler(Debugger &debugger, IOHandler::Type type, - const lldb::FileSP &input_sp, const lldb::StreamFileSP &output_sp, - const lldb::StreamFileSP &error_sp, uint32_t flags); + const lldb::FileSP &input_sp, + const lldb::LockableStreamFileSP &output_sp, + const lldb::LockableStreamFileSP &error_sp, uint32_t flags); virtual ~IOHandler(); @@ -112,17 +113,11 @@ class IOHandler { int GetErrorFD(); - FILE *GetInputFILE(); - - FILE *GetOutputFILE(); - - FILE *GetErrorFILE(); - lldb::FileSP GetInputFileSP(); - lldb::StreamFileSP GetOutputStreamFileSP(); + lldb::LockableStreamFileSP GetOutputStreamFileSP(); - lldb::StreamFileSP GetErrorStreamFileSP(); + lldb::LockableStreamFileSP GetErrorStreamFileSP(); Debugger &GetDebugger() { return m_debugger; } @@ -155,14 +150,11 @@ class IOHandler { virtual void PrintAsync(const char *s, size_t len, bool is_stdout); - std::recursive_mutex &GetOutputMutex() { return m_output_mutex; } - protected: Debugger &m_debugger; lldb::FileSP m_input_sp; - lldb::StreamFileSP m_output_sp; - lldb::StreamFileSP m_error_sp; - std::recursive_mutex m_output_mutex; + lldb::LockableStreamFileSP m_output_sp; + lldb::LockableStreamFileSP m_error_sp; Predicate m_popped; Flags m_flags; Type m_type; @@ -330,8 +322,8 @@ class IOHandlerEditline : public IOHandler { IOHandlerEditline(Debugger &debugger, IOHandler::Type type, const lldb::FileSP &input_sp, - const lldb::StreamFileSP &output_sp, - const lldb::StreamFileSP &error_sp, uint32_t flags, + const lldb::LockableStreamFileSP &output_sp, + const lldb::LockableStreamFileSP &error_sp, uint32_t flags, const char *editline_name, // Used for saving history files llvm::StringRef prompt, llvm::StringRef continuation_prompt, bool multi_line, bool color, @@ -345,9 +337,10 @@ class IOHandlerEditline : public IOHandler { IOHandlerDelegate &) = delete; IOHandlerEditline(Debugger &, IOHandler::Type, const lldb::FileSP &, - const lldb::StreamFileSP &, const lldb::StreamFileSP &, - uint32_t, const char *, const char *, const char *, bool, - bool, uint32_t, IOHandlerDelegate &) = delete; + const lldb::LockableStreamFileSP &, + const lldb::LockableStreamFileSP &, uint32_t, const char *, + const char *, const char *, bool, bool, uint32_t, + IOHandlerDelegate &) = delete; ~IOHandlerEditline() override; diff --git a/lldb/include/lldb/Host/Editline.h b/lldb/include/lldb/Host/Editline.h index 27b863870090c..8964d37be8823 100644 --- a/lldb/include/lldb/Host/Editline.h +++ b/lldb/include/lldb/Host/Editline.h @@ -34,6 +34,7 @@ #include #include +#include "lldb/Host/StreamFile.h" #include "lldb/lldb-private.h" #if !defined(_WIN32) && !defined(__ANDROID__) @@ -151,8 +152,9 @@ using namespace line_editor; /// facility. Both single- and multi-line editing are supported. class Editline { public: - Editline(const char *editor_name, FILE *input_file, FILE *output_file, - FILE *error_file, bool color, std::recursive_mutex &output_mutex); + Editline(const char *editor_name, FILE *input_file, + lldb::LockableStreamFileSP output_stream_sp, + lldb::LockableStreamFileSP error_stream_sp, bool color); ~Editline(); @@ -237,7 +239,8 @@ class Editline { /// Prompts for and reads a multi-line batch of user input. bool GetLines(int first_line_number, StringList &lines, bool &interrupted); - void PrintAsync(Stream *stream, const char *s, size_t len); + void PrintAsync(lldb::LockableStreamFileSP stream_sp, const char *s, + size_t len); /// Convert the current input lines into a UTF8 StringList StringList GetInputAsStringList(int line_count = UINT32_MAX); @@ -392,8 +395,11 @@ class Editline { volatile std::sig_atomic_t m_terminal_size_has_changed = 0; std::string m_editor_name; FILE *m_input_file; - FILE *m_output_file; - FILE *m_error_file; + lldb::LockableStreamFileSP m_output_stream_sp; + lldb::LockableStreamFileSP m_error_stream_sp; + + std::optional m_locked_output; + ConnectionFileDescriptor m_input_connection; IsInputCompleteCallbackType m_is_input_complete_callback; @@ -411,7 +417,6 @@ class Editline { std::string m_suggestion_ansi_suffix; std::size_t m_previous_autosuggestion_size = 0; - std::recursive_mutex &m_output_mutex; }; } diff --git a/lldb/include/lldb/Host/File.h b/lldb/include/lldb/Host/File.h index 5ce53c93b1b91..9e2d0abe0b1af 100644 --- a/lldb/include/lldb/Host/File.h +++ b/lldb/include/lldb/Host/File.h @@ -377,6 +377,11 @@ class File : public IOObject { class NativeFile : public File { public: + enum TransferOwnership : bool { + Owned = true, + Unowned = false, + }; + NativeFile() : m_descriptor(kInvalidDescriptor), m_stream(kInvalidStream) {} NativeFile(FILE *fh, bool transfer_ownership) diff --git a/lldb/include/lldb/Host/StreamFile.h b/lldb/include/lldb/Host/StreamFile.h index 2c96e13565a00..e37661a9938c0 100644 --- a/lldb/include/lldb/Host/StreamFile.h +++ b/lldb/include/lldb/Host/StreamFile.h @@ -13,9 +13,12 @@ #include "lldb/Utility/Stream.h" #include "lldb/lldb-defines.h" #include "lldb/lldb-enumerations.h" +#include "lldb/lldb-forward.h" #include #include +#include +#include namespace lldb_private { @@ -52,6 +55,55 @@ class StreamFile : public Stream { const StreamFile &operator=(const StreamFile &) = delete; }; +class LockableStreamFile; +class LockedStreamFile : public StreamFile { +public: + ~LockedStreamFile() { Flush(); } + + LockedStreamFile(LockedStreamFile &&other) + : StreamFile(other.m_file_sp), m_lock(std::move(other.m_lock)) {} + +private: + LockedStreamFile(std::shared_ptr file, std::recursive_mutex &mutex) + : StreamFile(file), m_lock(mutex) {} + + friend class LockableStreamFile; + + std::unique_lock m_lock; +}; + +class LockableStreamFile { +public: + using Mutex = std::recursive_mutex; + + LockableStreamFile(std::shared_ptr stream_file_sp, Mutex &mutex) + : m_file_sp(stream_file_sp->GetFileSP()), m_mutex(mutex) {} + LockableStreamFile(StreamFile &stream_file, Mutex &mutex) + : m_file_sp(stream_file.GetFileSP()), m_mutex(mutex) {} + LockableStreamFile(FILE *fh, bool transfer_ownership, Mutex &mutex) + : m_file_sp(std::make_shared(fh, transfer_ownership)), + m_mutex(mutex) {} + LockableStreamFile(std::shared_ptr file_sp, Mutex &mutex) + : m_file_sp(file_sp), m_mutex(mutex) {} + + LockedStreamFile Lock() { return LockedStreamFile(m_file_sp, m_mutex); } + + /// Unsafe accessors to get the underlying File without a lock. Exists for + /// legacy reasons. + /// @{ + File &GetUnlockedFile() { return *m_file_sp; } + std::shared_ptr GetUnlockedFileSP() { return m_file_sp; } + /// @} + +protected: + std::shared_ptr m_file_sp; + Mutex &m_mutex; + +private: + LockableStreamFile(const LockableStreamFile &) = delete; + const LockableStreamFile &operator=(const LockableStreamFile &) = delete; +}; + } // namespace lldb_private #endif // LLDB_HOST_STREAMFILE_H diff --git a/lldb/include/lldb/Interpreter/CommandReturnObject.h b/lldb/include/lldb/Interpreter/CommandReturnObject.h index 803bcd76995ed..d53aeb81be2ba 100644 --- a/lldb/include/lldb/Interpreter/CommandReturnObject.h +++ b/lldb/include/lldb/Interpreter/CommandReturnObject.h @@ -14,6 +14,7 @@ #include "lldb/Utility/StreamString.h" #include "lldb/Utility/StreamTee.h" #include "lldb/Utility/StructuredData.h" +#include "lldb/ValueObject/ValueObjectList.h" #include "lldb/lldb-private.h" #include "llvm/ADT/StringRef.h" @@ -134,7 +135,7 @@ class CommandReturnObject { __attribute__((format(printf, 2, 3))); template - void AppendMessageWithFormatv(const char *format, Args &&... args) { + void AppendMessageWithFormatv(const char *format, Args &&...args) { AppendMessage(llvm::formatv(format, std::forward(args)...).str()); } @@ -144,12 +145,12 @@ class CommandReturnObject { } template - void AppendWarningWithFormatv(const char *format, Args &&... args) { + void AppendWarningWithFormatv(const char *format, Args &&...args) { AppendWarning(llvm::formatv(format, std::forward(args)...).str()); } template - void AppendErrorWithFormatv(const char *format, Args &&... args) { + void AppendErrorWithFormatv(const char *format, Args &&...args) { AppendError(llvm::formatv(format, std::forward(args)...).str()); } @@ -165,6 +166,10 @@ class CommandReturnObject { return m_diagnostic_indent; } + const ValueObjectList &GetValueObjectList() const { return m_value_objects; } + + ValueObjectList &GetValueObjectList() { return m_value_objects; } + lldb::ReturnStatus GetStatus() const; void SetStatus(lldb::ReturnStatus status); @@ -197,6 +202,9 @@ class CommandReturnObject { lldb::ReturnStatus m_status = lldb::eReturnStatusStarted; + /// An optionally empty list of values produced by this command. + ValueObjectList m_value_objects; + bool m_did_change_process_state = false; bool m_suppress_immediate_output = false; diff --git a/lldb/include/lldb/Interpreter/ScriptInterpreter.h b/lldb/include/lldb/Interpreter/ScriptInterpreter.h index 2c2bd6f232e09..c5aa19959aa61 100644 --- a/lldb/include/lldb/Interpreter/ScriptInterpreter.h +++ b/lldb/include/lldb/Interpreter/ScriptInterpreter.h @@ -116,8 +116,12 @@ class ScriptInterpreterIORedirect { ~ScriptInterpreterIORedirect(); lldb::FileSP GetInputFile() const { return m_input_file_sp; } - lldb::FileSP GetOutputFile() const { return m_output_file_sp->GetFileSP(); } - lldb::FileSP GetErrorFile() const { return m_error_file_sp->GetFileSP(); } + lldb::FileSP GetOutputFile() const { + return m_output_file_sp->GetUnlockedFileSP(); + } + lldb::FileSP GetErrorFile() const { + return m_error_file_sp->GetUnlockedFileSP(); + } /// Flush our output and error file handles. void Flush(); @@ -128,8 +132,9 @@ class ScriptInterpreterIORedirect { ScriptInterpreterIORedirect(Debugger &debugger, CommandReturnObject *result); lldb::FileSP m_input_file_sp; - lldb::StreamFileSP m_output_file_sp; - lldb::StreamFileSP m_error_file_sp; + lldb::LockableStreamFileSP m_output_file_sp; + lldb::LockableStreamFileSP m_error_file_sp; + LockableStreamFile::Mutex m_output_mutex; ThreadedCommunication m_communication; bool m_disconnect; }; @@ -478,7 +483,7 @@ class ScriptInterpreter : public PluginInterface { dest.clear(); return false; } - + virtual StructuredData::ObjectSP GetOptionsForCommandObject(StructuredData::GenericSP cmd_obj_sp) { return {}; @@ -488,9 +493,9 @@ class ScriptInterpreter : public PluginInterface { GetArgumentsForCommandObject(StructuredData::GenericSP cmd_obj_sp) { return {}; } - + virtual bool SetOptionValueForCommandObject( - StructuredData::GenericSP cmd_obj_sp, ExecutionContext *exe_ctx, + StructuredData::GenericSP cmd_obj_sp, ExecutionContext *exe_ctx, llvm::StringRef long_option, llvm::StringRef value) { return false; } diff --git a/lldb/include/lldb/Symbol/LineTable.h b/lldb/include/lldb/Symbol/LineTable.h index f66081b6ee110..8dda9c7362f12 100644 --- a/lldb/include/lldb/Symbol/LineTable.h +++ b/lldb/include/lldb/Symbol/LineTable.h @@ -20,25 +20,11 @@ namespace lldb_private { -/// \class LineSequence LineTable.h "lldb/Symbol/LineTable.h" An abstract base -/// class used during symbol table creation. -class LineSequence { -public: - LineSequence(); - - virtual ~LineSequence() = default; - - virtual void Clear() = 0; - -private: - LineSequence(const LineSequence &) = delete; - const LineSequence &operator=(const LineSequence &) = delete; -}; - /// \class LineTable LineTable.h "lldb/Symbol/LineTable.h" /// A line table class. class LineTable { public: + class Sequence; /// Construct with compile unit. /// /// \param[in] comp_unit @@ -49,8 +35,7 @@ class LineTable { /// /// \param[in] sequences /// Unsorted list of line sequences. - LineTable(CompileUnit *comp_unit, - std::vector> &&sequences); + LineTable(CompileUnit *comp_unit, std::vector &&sequences); /// Destructor. ~LineTable(); @@ -73,20 +58,17 @@ class LineTable { bool is_start_of_basic_block, bool is_prologue_end, bool is_epilogue_begin, bool is_terminal_entry); - // Used to instantiate the LineSequence helper class - static std::unique_ptr CreateLineSequenceContainer(); - // Append an entry to a caller-provided collection that will later be // inserted in this line table. - static void AppendLineEntryToSequence(LineSequence *sequence, lldb::addr_t file_addr, - uint32_t line, uint16_t column, - uint16_t file_idx, bool is_start_of_statement, - bool is_start_of_basic_block, - bool is_prologue_end, bool is_epilogue_begin, - bool is_terminal_entry); + static void + AppendLineEntryToSequence(Sequence &sequence, lldb::addr_t file_addr, + uint32_t line, uint16_t column, uint16_t file_idx, + bool is_start_of_statement, + bool is_start_of_basic_block, bool is_prologue_end, + bool is_epilogue_begin, bool is_terminal_entry); // Insert a sequence of entries into this line table. - void InsertSequence(LineSequence *sequence); + void InsertSequence(Sequence sequence); /// Dump all line entries in this line table to the stream \a s. /// @@ -102,18 +84,19 @@ class LineTable { void GetDescription(Stream *s, Target *target, lldb::DescriptionLevel level); - /// Helper function for line table iteration. \c lower_bound returns the index - /// of the first line entry which ends after the given address (i.e., the - /// first entry which contains the given address or it comes after it). - /// \c upper_bound returns the index of the first line entry which begins on - /// or after the given address (i.e., the entry which would come after the - /// entry containing the given address, if such an entry exists). Functions - /// return GetSize() if there is no such entry. The functions are - /// most useful in combination: iterating from lower_bound(a) to - /// upper_bound(b) returns all line tables which intersect the half-open - /// range [a,b). + /// Returns the index of the first line entry which ends after the given + /// address (i.e., the first entry which contains the given address or it + /// comes after it). Returns GetSize() if there is no such entry. uint32_t lower_bound(const Address &so_addr) const; - uint32_t upper_bound(const Address &so_addr) const; + + /// Returns the (half-open) range of line entry indexes which overlap the + /// given address range. Line entries partially overlapping the range (on + /// either side) are included as well. Returns an empty range + /// (first==second) pointing to the "right" place in the list if + /// there are no such line entries. Empty input ranges always result in an + /// empty output range. + std::pair + GetLineEntryIndexRange(const AddressRange &range) const; /// Find a line entry that contains the section offset address \a so_addr. /// @@ -273,17 +256,6 @@ class LineTable { return 0; } - class LessThanBinaryPredicate { - public: - LessThanBinaryPredicate(LineTable *line_table); - bool operator()(const LineTable::Entry &, const LineTable::Entry &) const; - bool operator()(const std::unique_ptr &, - const std::unique_ptr &) const; - - protected: - LineTable *m_line_table; - }; - static bool EntryAddressLessThan(const Entry &lhs, const Entry &rhs) { return lhs.file_addr < rhs.file_addr; } @@ -315,6 +287,35 @@ class LineTable { uint16_t file_idx = 0; }; + class Sequence { + public: + Sequence() = default; + // Moving clears moved-from object so it can be used anew. Copying is + // generally an error. C++ doesn't guarantee that a moved-from vector is + // empty(), so we clear it explicitly. + Sequence(Sequence &&rhs) : m_entries(std::exchange(rhs.m_entries, {})) {} + Sequence &operator=(Sequence &&rhs) { + m_entries = std::exchange(rhs.m_entries, {}); + return *this; + } + Sequence(const Sequence &) = delete; + Sequence &operator=(const Sequence &) = delete; + + private: + std::vector m_entries; + friend class LineTable; + }; + + class LessThanBinaryPredicate { + public: + LessThanBinaryPredicate(LineTable *line_table) : m_line_table(line_table) {} + bool operator()(const LineTable::Entry &, const LineTable::Entry &) const; + bool operator()(const Sequence &, const Sequence &) const; + + protected: + LineTable *m_line_table; + }; + protected: struct EntrySearchInfo { LineTable *line_table; @@ -333,19 +334,6 @@ class LineTable { entry_collection m_entries; ///< The collection of line entries in this line table. - // Helper class - class LineSequenceImpl : public LineSequence { - public: - LineSequenceImpl() = default; - - ~LineSequenceImpl() override = default; - - void Clear() override; - - entry_collection - m_entries; ///< The collection of line entries in this sequence. - }; - bool ConvertEntryAtIndexToLineEntry(uint32_t idx, LineEntry &line_entry); private: diff --git a/lldb/include/lldb/Utility/RangeMap.h b/lldb/include/lldb/Utility/RangeMap.h index 433466eebced8..8af690e813c4a 100644 --- a/lldb/include/lldb/Utility/RangeMap.h +++ b/lldb/include/lldb/Utility/RangeMap.h @@ -493,36 +493,27 @@ class RangeDataVector { #ifdef ASSERT_RANGEMAP_ARE_SORTED assert(IsSorted()); #endif - typename Collection::iterator pos; - typename Collection::iterator end; - typename Collection::iterator prev; - bool can_combine = false; - // First we determine if we can combine any of the Entry objects so we - // don't end up allocating and making a new collection for no reason - for (pos = m_entries.begin(), end = m_entries.end(), prev = end; pos != end; - prev = pos++) { - if (prev != end && prev->data == pos->data) { - can_combine = true; - break; - } - } + auto first_intersect = std::adjacent_find( + m_entries.begin(), m_entries.end(), [](const Entry &a, const Entry &b) { + return a.DoesAdjoinOrIntersect(b) && a.data == b.data; + }); - // We can combine at least one entry, then we make a new collection and - // populate it accordingly, and then swap it into place. - if (can_combine) { - Collection minimal_ranges; - for (pos = m_entries.begin(), end = m_entries.end(), prev = end; - pos != end; prev = pos++) { - if (prev != end && prev->data == pos->data) - minimal_ranges.back().SetRangeEnd(pos->GetRangeEnd()); - else - minimal_ranges.push_back(*pos); - } - // Use the swap technique in case our new vector is much smaller. We must - // swap when using the STL because std::vector objects never release or - // reduce the memory once it has been allocated/reserved. - m_entries.swap(minimal_ranges); + if (first_intersect == m_entries.end()) + return; + + // We can combine at least one entry. Make a new collection and populate it + // accordingly, and then swap it into place. + auto pos = std::next(first_intersect); + Collection minimal_ranges(m_entries.begin(), pos); + for (; pos != m_entries.end(); ++pos) { + Entry &back = minimal_ranges.back(); + if (back.DoesAdjoinOrIntersect(*pos) && back.data == pos->data) + back.SetRangeEnd(std::max(back.GetRangeEnd(), pos->GetRangeEnd())); + else + minimal_ranges.push_back(*pos); } + m_entries.swap(minimal_ranges); + ComputeUpperBounds(0, m_entries.size()); } void Clear() { m_entries.clear(); } diff --git a/lldb/include/lldb/ValueObject/ValueObjectList.h b/lldb/include/lldb/ValueObject/ValueObjectList.h index 5d63c65a96e5f..524b66b209c23 100644 --- a/lldb/include/lldb/ValueObject/ValueObjectList.h +++ b/lldb/include/lldb/ValueObject/ValueObjectList.h @@ -22,8 +22,6 @@ class ValueObject; /// A collection of ValueObject values that. class ValueObjectList { public: - const ValueObjectList &operator=(const ValueObjectList &rhs); - void Append(const lldb::ValueObjectSP &val_obj_sp); void Append(const ValueObjectList &valobj_list); diff --git a/lldb/include/lldb/lldb-forward.h b/lldb/include/lldb/lldb-forward.h index fc7456a4b9a32..cda55ef06e549 100644 --- a/lldb/include/lldb/lldb-forward.h +++ b/lldb/include/lldb/lldb-forward.h @@ -215,6 +215,7 @@ class StoppointCallbackContext; class Stream; class StreamFile; class StreamString; +class LockableStreamFile; class StringList; class StringTableReader; class StructuredDataImpl; @@ -432,6 +433,7 @@ typedef std::unique_ptr typedef std::shared_ptr StopInfoSP; typedef std::shared_ptr StreamSP; typedef std::shared_ptr StreamFileSP; +typedef std::shared_ptr LockableStreamFileSP; typedef std::shared_ptr StringTypeSummaryImplSP; typedef std::unique_ptr StructuredDataImplUP; diff --git a/lldb/source/API/SBCommandReturnObject.cpp b/lldb/source/API/SBCommandReturnObject.cpp index 6f54581e64ef4..e78e213aa23af 100644 --- a/lldb/source/API/SBCommandReturnObject.cpp +++ b/lldb/source/API/SBCommandReturnObject.cpp @@ -12,11 +12,14 @@ #include "lldb/API/SBFile.h" #include "lldb/API/SBStream.h" #include "lldb/API/SBStructuredData.h" +#include "lldb/API/SBValue.h" +#include "lldb/API/SBValueList.h" #include "lldb/Core/StructuredDataImpl.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Utility/ConstString.h" #include "lldb/Utility/Instrumentation.h" #include "lldb/Utility/Status.h" +#include "lldb/lldb-forward.h" using namespace lldb; using namespace lldb_private; @@ -356,3 +359,18 @@ void SBCommandReturnObject::SetError(const char *error_cstr) { if (error_cstr) ref().AppendError(error_cstr); } + +SBValueList +SBCommandReturnObject::GetValues(lldb::DynamicValueType use_dynamic) { + LLDB_INSTRUMENT_VA(this, use_dynamic); + + SBValueList value_list; + for (ValueObjectSP value_object_sp : + ref().GetValueObjectList().GetObjects()) { + SBValue value_sb; + value_sb.SetSP(value_object_sp, use_dynamic); + value_list.Append(value_sb); + } + + return value_list; +} diff --git a/lldb/source/Commands/CommandObjectBreakpointCommand.cpp b/lldb/source/Commands/CommandObjectBreakpointCommand.cpp index ac2db5973effa..a913ed5fa12b3 100644 --- a/lldb/source/Commands/CommandObjectBreakpointCommand.cpp +++ b/lldb/source/Commands/CommandObjectBreakpointCommand.cpp @@ -193,10 +193,12 @@ are no syntax errors may indicate that a function was declared but never called. Options *GetOptions() override { return &m_all_options; } void IOHandlerActivated(IOHandler &io_handler, bool interactive) override { - StreamFileSP output_sp(io_handler.GetOutputStreamFileSP()); - if (output_sp && interactive) { - output_sp->PutCString(g_reader_instructions); - output_sp->Flush(); + if (interactive) { + if (lldb::LockableStreamFileSP output_sp = + io_handler.GetOutputStreamFileSP()) { + LockedStreamFile locked_stream = output_sp->Lock(); + locked_stream.PutCString(g_reader_instructions); + } } } diff --git a/lldb/source/Commands/CommandObjectCommands.cpp b/lldb/source/Commands/CommandObjectCommands.cpp index f069b2feb5cb7..dd841cb5cb4cc 100644 --- a/lldb/source/Commands/CommandObjectCommands.cpp +++ b/lldb/source/Commands/CommandObjectCommands.cpp @@ -11,6 +11,7 @@ #include "CommandObjectRegexCommand.h" #include "lldb/Core/Debugger.h" #include "lldb/Core/IOHandler.h" +#include "lldb/Host/StreamFile.h" #include "lldb/Interpreter/CommandHistory.h" #include "lldb/Interpreter/CommandInterpreter.h" #include "lldb/Interpreter/CommandOptionArgumentTable.h" @@ -792,12 +793,15 @@ a number follows 'f':" protected: void IOHandlerActivated(IOHandler &io_handler, bool interactive) override { - StreamFileSP output_sp(io_handler.GetOutputStreamFileSP()); - if (output_sp && interactive) { - output_sp->PutCString("Enter one or more sed substitution commands in " - "the form: 's///'.\nTerminate the " - "substitution list with an empty line.\n"); - output_sp->Flush(); + if (interactive) { + if (lldb::LockableStreamFileSP output_sp = + io_handler.GetOutputStreamFileSP()) { + LockedStreamFile locked_stream = output_sp->Lock(); + locked_stream.PutCString( + "Enter one or more sed substitution commands in " + "the form: 's///'.\nTerminate the " + "substitution list with an empty line.\n"); + } } } @@ -2377,16 +2381,18 @@ class CommandObjectCommandsScriptAdd : public CommandObjectParsed, }; void IOHandlerActivated(IOHandler &io_handler, bool interactive) override { - StreamFileSP output_sp(io_handler.GetOutputStreamFileSP()); - if (output_sp && interactive) { - output_sp->PutCString(g_python_command_instructions); - output_sp->Flush(); + if (interactive) { + if (lldb::LockableStreamFileSP output_sp = + io_handler.GetOutputStreamFileSP()) { + LockedStreamFile locked_stream = output_sp->Lock(); + locked_stream.PutCString(g_python_command_instructions); + } } } void IOHandlerInputComplete(IOHandler &io_handler, std::string &data) override { - StreamFileSP error_sp = io_handler.GetErrorStreamFileSP(); + LockableStreamFileSP error_sp = io_handler.GetErrorStreamFileSP(); ScriptInterpreter *interpreter = GetDebugger().GetScriptInterpreter(); if (interpreter) { @@ -2396,9 +2402,10 @@ class CommandObjectCommandsScriptAdd : public CommandObjectParsed, std::string funct_name_str; if (interpreter->GenerateScriptAliasFunction(lines, funct_name_str)) { if (funct_name_str.empty()) { - error_sp->Printf("error: unable to obtain a function name, didn't " - "add python command.\n"); - error_sp->Flush(); + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf( + "error: unable to obtain a function name, didn't " + "add python command.\n"); } else { // everything should be fine now, let's add this alias @@ -2409,33 +2416,36 @@ class CommandObjectCommandsScriptAdd : public CommandObjectParsed, Status error = m_interpreter.AddUserCommand( m_cmd_name, command_obj_sp, m_overwrite); if (error.Fail()) { - error_sp->Printf("error: unable to add selected command: '%s'", - error.AsCString()); - error_sp->Flush(); + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf( + "error: unable to add selected command: '%s'", + error.AsCString()); } } else { llvm::Error llvm_error = m_container->LoadUserSubcommand( m_cmd_name, command_obj_sp, m_overwrite); if (llvm_error) { - error_sp->Printf("error: unable to add selected command: '%s'", - llvm::toString(std::move(llvm_error)).c_str()); - error_sp->Flush(); + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf( + "error: unable to add selected command: '%s'", + llvm::toString(std::move(llvm_error)).c_str()); } } } } else { - error_sp->Printf( + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf( "error: unable to create function, didn't add python command\n"); - error_sp->Flush(); } } else { - error_sp->Printf("error: empty function, didn't add python command\n"); - error_sp->Flush(); + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf( + "error: empty function, didn't add python command\n"); } } else { - error_sp->Printf( + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf( "error: script interpreter missing, didn't add python command\n"); - error_sp->Flush(); } io_handler.SetIsDone(true); diff --git a/lldb/source/Commands/CommandObjectDWIMPrint.cpp b/lldb/source/Commands/CommandObjectDWIMPrint.cpp index d4d038d28f675..04142427717bd 100644 --- a/lldb/source/Commands/CommandObjectDWIMPrint.cpp +++ b/lldb/source/Commands/CommandObjectDWIMPrint.cpp @@ -205,6 +205,9 @@ void CommandObjectDWIMPrint::DoExecute(StringRef command, ExpressionResults expr_result = target.EvaluateExpression( expr, exe_scope, valobj_sp, eval_options, &fixed_expression); + if (valobj_sp) + result.GetValueObjectList().Append(valobj_sp); + // Record the position of the expression in the command. std::optional indent; if (fixed_expression.empty()) { diff --git a/lldb/source/Commands/CommandObjectExpression.cpp b/lldb/source/Commands/CommandObjectExpression.cpp index 7e26381c92405..a95dea63720ac 100644 --- a/lldb/source/Commands/CommandObjectExpression.cpp +++ b/lldb/source/Commands/CommandObjectExpression.cpp @@ -12,6 +12,7 @@ #include "lldb/Expression/REPL.h" #include "lldb/Expression/UserExpression.h" #include "lldb/Host/OptionParser.h" +#include "lldb/Host/StreamFile.h" #include "lldb/Interpreter/CommandInterpreter.h" #include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" @@ -22,6 +23,7 @@ #include "lldb/Target/Target.h" #include "lldb/Utility/DiagnosticsRendering.h" #include "lldb/lldb-enumerations.h" +#include "lldb/lldb-forward.h" #include "lldb/lldb-private-enumerations.h" using namespace lldb; @@ -434,6 +436,8 @@ bool CommandObjectExpression::EvaluateExpression(llvm::StringRef expr, } if (result_valobj_sp) { + result.GetValueObjectList().Append(result_valobj_sp); + Format format = m_format_options.GetFormat(); if (result_valobj_sp->GetError().Success()) { @@ -542,11 +546,10 @@ void CommandObjectExpression::GetMultilineExpression() { 1, // Show line numbers starting at 1 *this)); - StreamFileSP output_sp = io_handler_sp->GetOutputStreamFileSP(); - if (output_sp) { - output_sp->PutCString( + if (LockableStreamFileSP output_sp = io_handler_sp->GetOutputStreamFileSP()) { + LockedStreamFile locked_stream = output_sp->Lock(); + locked_stream.PutCString( "Enter expressions, then terminate with an empty line to evaluate:\n"); - output_sp->Flush(); } debugger.RunIOHandlerAsync(io_handler_sp); } diff --git a/lldb/source/Commands/CommandObjectFrame.cpp b/lldb/source/Commands/CommandObjectFrame.cpp index a5709b36f52ee..7e42ef2615319 100644 --- a/lldb/source/Commands/CommandObjectFrame.cpp +++ b/lldb/source/Commands/CommandObjectFrame.cpp @@ -152,6 +152,7 @@ class CommandObjectFrameDiagnose : public CommandObjectParsed { return; } + result.GetValueObjectList().Append(valobj_sp); DumpValueObjectOptions::DeclPrintingHelper helper = [&valobj_sp](ConstString type, ConstString var, const DumpValueObjectOptions &opts, @@ -317,10 +318,10 @@ class CommandObjectFrameSelect : public CommandObjectParsed { } else if (*m_options.relative_frame_offset > 0) { // I don't want "up 20" where "20" takes you past the top of the stack // to produce an error, but rather to just go to the top. OTOH, start - // by seeing if the requested frame exists, in which case we can avoid + // by seeing if the requested frame exists, in which case we can avoid // counting the stack here... - const uint32_t frame_requested = frame_idx - + *m_options.relative_frame_offset; + const uint32_t frame_requested = + frame_idx + *m_options.relative_frame_offset; StackFrameSP frame_sp = thread->GetStackFrameAtIndex(frame_requested); if (frame_sp) frame_idx = frame_requested; @@ -515,8 +516,8 @@ may even involve JITing and running code in the target program.)"); if (error.Fail() && (!variable_list || variable_list->GetSize() == 0)) { result.AppendError(error.AsCString()); - } + ValueObjectSP valobj_sp; TypeSummaryImplSP summary_format_sp; @@ -564,6 +565,8 @@ may even involve JITing and running code in the target program.)"); valobj_sp = frame->GetValueObjectForFrameVariable( var_sp, m_varobj_options.use_dynamic); if (valobj_sp) { + result.GetValueObjectList().Append(valobj_sp); + std::string scope_string; if (m_option_variable.show_scope) scope_string = GetScopeString(var_sp).str(); @@ -604,6 +607,8 @@ may even involve JITing and running code in the target program.)"); entry.ref(), m_varobj_options.use_dynamic, expr_path_options, var_sp, error); if (valobj_sp) { + result.GetValueObjectList().Append(valobj_sp); + std::string scope_string; if (m_option_variable.show_scope) scope_string = GetScopeString(var_sp).str(); @@ -653,6 +658,8 @@ may even involve JITing and running code in the target program.)"); valobj_sp = frame->GetValueObjectForFrameVariable( var_sp, m_varobj_options.use_dynamic); if (valobj_sp) { + result.GetValueObjectList().Append(valobj_sp); + // When dumping all variables, don't print any variables that are // not in scope to avoid extra unneeded output if (valobj_sp->IsInScope()) { @@ -694,6 +701,7 @@ may even involve JITing and running code in the target program.)"); recognized_frame->GetRecognizedArguments(); if (recognized_arg_list) { for (auto &rec_value_sp : recognized_arg_list->GetObjects()) { + result.GetValueObjectList().Append(rec_value_sp); options.SetFormat(m_option_format.GetFormat()); options.SetVariableFormatDisplayLanguage( rec_value_sp->GetPreferredDisplayLanguage()); diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp index da50fe04fa2b6..bd9470b804949 100644 --- a/lldb/source/Commands/CommandObjectTarget.cpp +++ b/lldb/source/Commands/CommandObjectTarget.cpp @@ -57,6 +57,7 @@ #include "lldb/Utility/Timer.h" #include "lldb/ValueObject/ValueObjectVariable.h" #include "lldb/lldb-enumerations.h" +#include "lldb/lldb-forward.h" #include "lldb/lldb-private-enumerations.h" #include "clang/Frontend/CompilerInstance.h" @@ -803,7 +804,9 @@ class CommandObjectTargetVariable : public CommandObjectParsed { protected: void DumpGlobalVariableList(const ExecutionContext &exe_ctx, const SymbolContext &sc, - const VariableList &variable_list, Stream &s) { + const VariableList &variable_list, + CommandReturnObject &result) { + Stream &s = result.GetOutputStream(); if (variable_list.Empty()) return; if (sc.module_sp) { @@ -824,15 +827,16 @@ class CommandObjectTargetVariable : public CommandObjectParsed { ValueObjectSP valobj_sp(ValueObjectVariable::Create( exe_ctx.GetBestExecutionContextScope(), var_sp)); - if (valobj_sp) + if (valobj_sp) { + result.GetValueObjectList().Append(valobj_sp); DumpValueObject(s, var_sp, valobj_sp, var_sp->GetName().GetCString()); + } } } void DoExecute(Args &args, CommandReturnObject &result) override { Target *target = m_exe_ctx.GetTargetPtr(); const size_t argc = args.GetArgumentCount(); - Stream &s = result.GetOutputStream(); if (argc > 0) { for (const Args::ArgEntry &arg : args) { @@ -874,7 +878,7 @@ class CommandObjectTargetVariable : public CommandObjectParsed { m_exe_ctx.GetBestExecutionContextScope(), var_sp); if (valobj_sp) - DumpValueObject(s, var_sp, valobj_sp, + DumpValueObject(result.GetOutputStream(), var_sp, valobj_sp, use_var_name ? var_sp->GetName().GetCString() : arg.c_str()); } @@ -903,7 +907,8 @@ class CommandObjectTargetVariable : public CommandObjectParsed { if (comp_unit_varlist_sp) { size_t count = comp_unit_varlist_sp->GetSize(); if (count > 0) { - DumpGlobalVariableList(m_exe_ctx, sc, *comp_unit_varlist_sp, s); + DumpGlobalVariableList(m_exe_ctx, sc, *comp_unit_varlist_sp, + result); success = true; } } @@ -964,7 +969,8 @@ class CommandObjectTargetVariable : public CommandObjectParsed { VariableListSP comp_unit_varlist_sp( sc.comp_unit->GetVariableList(can_create)); if (comp_unit_varlist_sp) - DumpGlobalVariableList(m_exe_ctx, sc, *comp_unit_varlist_sp, s); + DumpGlobalVariableList(m_exe_ctx, sc, *comp_unit_varlist_sp, + result); } else if (sc.module_sp) { // Get all global variables for this module lldb_private::RegularExpression all_globals_regex( @@ -972,7 +978,7 @@ class CommandObjectTargetVariable : public CommandObjectParsed { VariableList variable_list; sc.module_sp->FindGlobalVariables(all_globals_regex, UINT32_MAX, variable_list); - DumpGlobalVariableList(m_exe_ctx, sc, variable_list, s); + DumpGlobalVariableList(m_exe_ctx, sc, variable_list, result); } } } @@ -4918,11 +4924,13 @@ Filter Options: protected: void IOHandlerActivated(IOHandler &io_handler, bool interactive) override { - StreamFileSP output_sp(io_handler.GetOutputStreamFileSP()); - if (output_sp && interactive) { - output_sp->PutCString( - "Enter your stop hook command(s). Type 'DONE' to end.\n"); - output_sp->Flush(); + if (interactive) { + if (lldb::LockableStreamFileSP output_sp = + io_handler.GetOutputStreamFileSP()) { + LockedStreamFile locked_stream = output_sp->Lock(); + locked_stream.PutCString( + "Enter your stop hook command(s). Type 'DONE' to end.\n"); + } } } @@ -4930,12 +4938,12 @@ Filter Options: std::string &line) override { if (m_stop_hook_sp) { if (line.empty()) { - StreamFileSP error_sp(io_handler.GetErrorStreamFileSP()); - if (error_sp) { - error_sp->Printf("error: stop hook #%" PRIu64 - " aborted, no commands.\n", - m_stop_hook_sp->GetID()); - error_sp->Flush(); + if (lldb::LockableStreamFileSP error_sp = + io_handler.GetErrorStreamFileSP()) { + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf("error: stop hook #%" PRIu64 + " aborted, no commands.\n", + m_stop_hook_sp->GetID()); } GetTarget().UndoCreateStopHook(m_stop_hook_sp->GetID()); } else { @@ -4944,11 +4952,11 @@ Filter Options: static_cast(m_stop_hook_sp.get()); hook_ptr->SetActionFromString(line); - StreamFileSP output_sp(io_handler.GetOutputStreamFileSP()); - if (output_sp) { - output_sp->Printf("Stop hook #%" PRIu64 " added.\n", - m_stop_hook_sp->GetID()); - output_sp->Flush(); + if (lldb::LockableStreamFileSP output_sp = + io_handler.GetOutputStreamFileSP()) { + LockedStreamFile locked_stream = output_sp->Lock(); + locked_stream.Printf("Stop hook #%" PRIu64 " added.\n", + m_stop_hook_sp->GetID()); } } m_stop_hook_sp.reset(); diff --git a/lldb/source/Commands/CommandObjectType.cpp b/lldb/source/Commands/CommandObjectType.cpp index e4c6e374446e8..41630b61c2f0e 100644 --- a/lldb/source/Commands/CommandObjectType.cpp +++ b/lldb/source/Commands/CommandObjectType.cpp @@ -14,6 +14,7 @@ #include "lldb/DataFormatters/FormatClasses.h" #include "lldb/Host/Config.h" #include "lldb/Host/OptionParser.h" +#include "lldb/Host/StreamFile.h" #include "lldb/Interpreter/CommandInterpreter.h" #include "lldb/Interpreter/CommandObject.h" #include "lldb/Interpreter/CommandOptionArgumentTable.h" @@ -32,6 +33,7 @@ #include "lldb/Utility/ConstString.h" #include "lldb/Utility/RegularExpression.h" #include "lldb/Utility/StringList.h" +#include "lldb/lldb-forward.h" #include "llvm/ADT/STLExtras.h" @@ -167,16 +169,17 @@ class CommandObjectTypeSummaryAdd : public CommandObjectParsed, "for\n" " internal_dict: an LLDB support object not to be used\"\"\"\n"; - StreamFileSP output_sp(io_handler.GetOutputStreamFileSP()); - if (output_sp && interactive) { - output_sp->PutCString(g_summary_addreader_instructions); - output_sp->Flush(); + if (interactive) { + if (LockableStreamFileSP output_sp = io_handler.GetOutputStreamFileSP()) { + LockedStreamFile locked_stream = output_sp->Lock(); + locked_stream.PutCString(g_summary_addreader_instructions); + } } } void IOHandlerInputComplete(IOHandler &io_handler, std::string &data) override { - StreamFileSP error_sp = io_handler.GetErrorStreamFileSP(); + LockableStreamFileSP error_sp = io_handler.GetErrorStreamFileSP(); #if LLDB_ENABLE_PYTHON ScriptInterpreter *interpreter = GetDebugger().GetScriptInterpreter(); @@ -197,9 +200,10 @@ class CommandObjectTypeSummaryAdd : public CommandObjectParsed, if (interpreter->GenerateTypeScriptFunction(lines, funct_name_str)) { if (funct_name_str.empty()) { - error_sp->Printf("unable to obtain a valid function name from " - "the script interpreter.\n"); - error_sp->Flush(); + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf( + "unable to obtain a valid function name from " + "the script interpreter.\n"); } else { // now I have a valid function name, let's add this as script // for every type in the list @@ -216,8 +220,8 @@ class CommandObjectTypeSummaryAdd : public CommandObjectParsed, options->m_match_type, options->m_category, &error); if (error.Fail()) { - error_sp->Printf("error: %s", error.AsCString()); - error_sp->Flush(); + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf("error: %s", error.AsCString()); } } @@ -228,41 +232,42 @@ class CommandObjectTypeSummaryAdd : public CommandObjectParsed, CommandObjectTypeSummaryAdd::AddNamedSummary( options->m_name, script_format, &error); if (error.Fail()) { - error_sp->Printf("error: %s", error.AsCString()); - error_sp->Flush(); + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf("error: %s", error.AsCString()); } } else { - error_sp->Printf("error: %s", error.AsCString()); - error_sp->Flush(); + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf("error: %s", error.AsCString()); } } else { if (error.AsCString()) { - error_sp->Printf("error: %s", error.AsCString()); - error_sp->Flush(); + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf("error: %s", error.AsCString()); } } } } else { - error_sp->Printf("error: unable to generate a function.\n"); - error_sp->Flush(); + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf("error: unable to generate a function.\n"); } } else { - error_sp->Printf("error: no script interpreter.\n"); - error_sp->Flush(); + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf("error: no script interpreter.\n"); } } else { - error_sp->Printf("error: internal synchronization information " - "missing or invalid.\n"); - error_sp->Flush(); + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf("error: internal synchronization information " + "missing or invalid.\n"); } } else { - error_sp->Printf("error: empty function, didn't add python command.\n"); - error_sp->Flush(); + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf( + "error: empty function, didn't add python command.\n"); } } else { - error_sp->Printf( + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf( "error: script interpreter missing, didn't add python command.\n"); - error_sp->Flush(); } #endif io_handler.SetIsDone(true); @@ -404,16 +409,17 @@ class CommandObjectTypeSynthAdd : public CommandObjectParsed, } void IOHandlerActivated(IOHandler &io_handler, bool interactive) override { - StreamFileSP output_sp(io_handler.GetOutputStreamFileSP()); - if (output_sp && interactive) { - output_sp->PutCString(g_synth_addreader_instructions); - output_sp->Flush(); + if (interactive) { + if (LockableStreamFileSP output_sp = io_handler.GetOutputStreamFileSP()) { + LockedStreamFile locked_stream = output_sp->Lock(); + locked_stream.PutCString(g_synth_addreader_instructions); + } } } void IOHandlerInputComplete(IOHandler &io_handler, std::string &data) override { - StreamFileSP error_sp = io_handler.GetErrorStreamFileSP(); + LockableStreamFileSP error_sp = io_handler.GetErrorStreamFileSP(); #if LLDB_ENABLE_PYTHON ScriptInterpreter *interpreter = GetDebugger().GetScriptInterpreter(); @@ -433,9 +439,10 @@ class CommandObjectTypeSynthAdd : public CommandObjectParsed, std::string class_name_str; if (interpreter->GenerateTypeSynthClass(lines, class_name_str)) { if (class_name_str.empty()) { - error_sp->Printf( + + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf( "error: unable to obtain a proper name for the class.\n"); - error_sp->Flush(); } else { // everything should be fine now, let's add the synth provider // class @@ -459,37 +466,39 @@ class CommandObjectTypeSynthAdd : public CommandObjectParsed, if (AddSynth(ConstString(type_name), synth_provider, options->m_match_type, options->m_category, &error)) { - error_sp->Printf("error: %s\n", error.AsCString()); - error_sp->Flush(); + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf("error: %s\n", error.AsCString()); break; } } else { - error_sp->Printf("error: invalid type name.\n"); - error_sp->Flush(); + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf("error: invalid type name.\n"); break; } } } } else { - error_sp->Printf("error: unable to generate a class.\n"); - error_sp->Flush(); + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf("error: unable to generate a class.\n"); } } else { - error_sp->Printf("error: no script interpreter.\n"); - error_sp->Flush(); + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf("error: no script interpreter.\n"); } } else { - error_sp->Printf("error: internal synchronization data missing.\n"); - error_sp->Flush(); + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf( + "error: internal synchronization data missing.\n"); } } else { - error_sp->Printf("error: empty function, didn't add python command.\n"); - error_sp->Flush(); + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf( + "error: empty function, didn't add python command.\n"); } } else { - error_sp->Printf( + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf( "error: script interpreter missing, didn't add python command.\n"); - error_sp->Flush(); } #endif @@ -952,8 +961,8 @@ class CommandObjectTypeFormatterClear : public CommandObjectParsed { class CommandObjectTypeFormatDelete : public CommandObjectTypeFormatterDelete { public: CommandObjectTypeFormatDelete(CommandInterpreter &interpreter) - : CommandObjectTypeFormatterDelete( - interpreter, eFormatCategoryItemFormat) {} + : CommandObjectTypeFormatterDelete(interpreter, + eFormatCategoryItemFormat) {} ~CommandObjectTypeFormatDelete() override = default; }; @@ -1603,8 +1612,8 @@ bool CommandObjectTypeSummaryAdd::AddSummary(ConstString type_name, class CommandObjectTypeSummaryDelete : public CommandObjectTypeFormatterDelete { public: CommandObjectTypeSummaryDelete(CommandInterpreter &interpreter) - : CommandObjectTypeFormatterDelete( - interpreter, eFormatCategoryItemSummary) {} + : CommandObjectTypeFormatterDelete(interpreter, + eFormatCategoryItemSummary) {} ~CommandObjectTypeSummaryDelete() override = default; @@ -2070,8 +2079,8 @@ class CommandObjectTypeSynthList class CommandObjectTypeFilterDelete : public CommandObjectTypeFormatterDelete { public: CommandObjectTypeFilterDelete(CommandInterpreter &interpreter) - : CommandObjectTypeFormatterDelete( - interpreter, eFormatCategoryItemFilter) {} + : CommandObjectTypeFormatterDelete(interpreter, + eFormatCategoryItemFilter) {} ~CommandObjectTypeFilterDelete() override = default; }; @@ -2081,13 +2090,12 @@ class CommandObjectTypeFilterDelete : public CommandObjectTypeFormatterDelete { class CommandObjectTypeSynthDelete : public CommandObjectTypeFormatterDelete { public: CommandObjectTypeSynthDelete(CommandInterpreter &interpreter) - : CommandObjectTypeFormatterDelete( - interpreter, eFormatCategoryItemSynth) {} + : CommandObjectTypeFormatterDelete(interpreter, + eFormatCategoryItemSynth) {} ~CommandObjectTypeSynthDelete() override = default; }; - // CommandObjectTypeFilterClear class CommandObjectTypeFilterClear : public CommandObjectTypeFormatterClear { diff --git a/lldb/source/Commands/CommandObjectWatchpointCommand.cpp b/lldb/source/Commands/CommandObjectWatchpointCommand.cpp index ab1a2b390936c..507ef3fbe4759 100644 --- a/lldb/source/Commands/CommandObjectWatchpointCommand.cpp +++ b/lldb/source/Commands/CommandObjectWatchpointCommand.cpp @@ -14,11 +14,13 @@ #include "lldb/Breakpoint/Watchpoint.h" #include "lldb/Core/IOHandler.h" #include "lldb/Host/OptionParser.h" +#include "lldb/Host/StreamFile.h" #include "lldb/Interpreter/CommandInterpreter.h" #include "lldb/Interpreter/CommandOptionArgumentTable.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionArgParser.h" #include "lldb/Target/Target.h" +#include "lldb/lldb-forward.h" using namespace lldb; using namespace lldb_private; @@ -170,11 +172,13 @@ are no syntax errors may indicate that a function was declared but never called. Options *GetOptions() override { return &m_options; } void IOHandlerActivated(IOHandler &io_handler, bool interactive) override { - StreamFileSP output_sp(io_handler.GetOutputStreamFileSP()); - if (output_sp && interactive) { - output_sp->PutCString( - "Enter your debugger command(s). Type 'DONE' to end.\n"); - output_sp->Flush(); + if (interactive) { + if (lldb::LockableStreamFileSP output_sp = + io_handler.GetOutputStreamFileSP()) { + LockedStreamFile locked_stream = output_sp->Lock(); + locked_stream.PutCString( + "Enter your debugger command(s). Type 'DONE' to end.\n"); + } } } diff --git a/lldb/source/Core/Debugger.cpp b/lldb/source/Core/Debugger.cpp index 8b7814d434ee9..242ef1c8a4596 100644 --- a/lldb/source/Core/Debugger.cpp +++ b/lldb/source/Core/Debugger.cpp @@ -873,9 +873,11 @@ llvm::StringRef Debugger::GetStaticBroadcasterClass() { Debugger::Debugger(lldb::LogOutputCallback log_callback, void *baton) : UserID(g_unique_id++), Properties(std::make_shared()), - m_input_file_sp(std::make_shared(stdin, false)), - m_output_stream_sp(std::make_shared(stdout, false)), - m_error_stream_sp(std::make_shared(stderr, false)), + m_input_file_sp(std::make_shared(stdin, NativeFile::Unowned)), + m_output_stream_sp(std::make_shared( + stdout, NativeFile::Unowned, m_output_mutex)), + m_error_stream_sp(std::make_shared( + stderr, NativeFile::Unowned, m_output_mutex)), m_input_recorder(nullptr), m_broadcaster_manager_sp(BroadcasterManager::MakeBroadcasterManager()), m_terminal_state(), m_target_list(*this), m_platform_list(), @@ -1083,12 +1085,14 @@ void Debugger::SetInputFile(FileSP file_sp) { void Debugger::SetOutputFile(FileSP file_sp) { assert(file_sp && file_sp->IsValid()); - m_output_stream_sp = std::make_shared(file_sp); + m_output_stream_sp = + std::make_shared(file_sp, m_output_mutex); } void Debugger::SetErrorFile(FileSP file_sp) { assert(file_sp && file_sp->IsValid()); - m_error_stream_sp = std::make_shared(file_sp); + m_error_stream_sp = + std::make_shared(file_sp, m_output_mutex); } void Debugger::SaveInputTerminalState() { @@ -1198,9 +1202,10 @@ bool Debugger::CheckTopIOHandlerTypes(IOHandler::Type top_type, void Debugger::PrintAsync(const char *s, size_t len, bool is_stdout) { bool printed = m_io_handler_stack.PrintAsync(s, len, is_stdout); if (!printed) { - lldb::StreamFileSP stream = + LockableStreamFileSP stream_sp = is_stdout ? m_output_stream_sp : m_error_stream_sp; - stream->Write(s, len); + LockedStreamFile locked_stream = stream_sp->Lock(); + locked_stream.Write(s, len); } } @@ -1225,8 +1230,9 @@ void Debugger::RunIOHandlerAsync(const IOHandlerSP &reader_sp, PushIOHandler(reader_sp, cancel_top_handler); } -void Debugger::AdoptTopIOHandlerFilesIfInvalid(FileSP &in, StreamFileSP &out, - StreamFileSP &err) { +void Debugger::AdoptTopIOHandlerFilesIfInvalid(FileSP &in, + LockableStreamFileSP &out, + LockableStreamFileSP &err) { // Before an IOHandler runs, it must have in/out/err streams. This function // is called when one ore more of the streams are nullptr. We use the top // input reader's in/out/err streams, or fall back to the debugger file @@ -1242,27 +1248,29 @@ void Debugger::AdoptTopIOHandlerFilesIfInvalid(FileSP &in, StreamFileSP &out, in = GetInputFileSP(); // If there is nothing, use stdin if (!in) - in = std::make_shared(stdin, false); + in = std::make_shared(stdin, NativeFile::Unowned); } // If no STDOUT has been set, then set it appropriately - if (!out || !out->GetFile().IsValid()) { + if (!out || !out->GetUnlockedFile().IsValid()) { if (top_reader_sp) out = top_reader_sp->GetOutputStreamFileSP(); else out = GetOutputStreamSP(); // If there is nothing, use stdout if (!out) - out = std::make_shared(stdout, false); + out = std::make_shared(stdout, NativeFile::Unowned, + m_output_mutex); } // If no STDERR has been set, then set it appropriately - if (!err || !err->GetFile().IsValid()) { + if (!err || !err->GetUnlockedFile().IsValid()) { if (top_reader_sp) err = top_reader_sp->GetErrorStreamFileSP(); else err = GetErrorStreamSP(); // If there is nothing, use stderr if (!err) - err = std::make_shared(stderr, false); + err = std::make_shared(stderr, NativeFile::Unowned, + m_output_mutex); } } diff --git a/lldb/source/Core/IOHandler.cpp b/lldb/source/Core/IOHandler.cpp index ca06b52b874db..98d14758f1987 100644 --- a/lldb/source/Core/IOHandler.cpp +++ b/lldb/source/Core/IOHandler.cpp @@ -54,17 +54,17 @@ using llvm::StringRef; IOHandler::IOHandler(Debugger &debugger, IOHandler::Type type) : IOHandler(debugger, type, - FileSP(), // Adopt STDIN from top input reader - StreamFileSP(), // Adopt STDOUT from top input reader - StreamFileSP(), // Adopt STDERR from top input reader - 0 // Flags + FileSP(), // Adopt STDIN from top input reader + LockableStreamFileSP(), // Adopt STDOUT from top input reader + LockableStreamFileSP(), // Adopt STDERR from top input reader + 0 // Flags ) {} IOHandler::IOHandler(Debugger &debugger, IOHandler::Type type, const lldb::FileSP &input_sp, - const lldb::StreamFileSP &output_sp, - const lldb::StreamFileSP &error_sp, uint32_t flags) + const lldb::LockableStreamFileSP &output_sp, + const lldb::LockableStreamFileSP &error_sp, uint32_t flags) : m_debugger(debugger), m_input_sp(input_sp), m_output_sp(output_sp), m_error_sp(error_sp), m_popped(false), m_flags(flags), m_type(type), m_user_data(nullptr), m_done(false), m_active(false) { @@ -81,30 +81,18 @@ int IOHandler::GetInputFD() { } int IOHandler::GetOutputFD() { - return (m_output_sp ? m_output_sp->GetFile().GetDescriptor() : -1); + return (m_output_sp ? m_output_sp->GetUnlockedFile().GetDescriptor() : -1); } int IOHandler::GetErrorFD() { - return (m_error_sp ? m_error_sp->GetFile().GetDescriptor() : -1); -} - -FILE *IOHandler::GetInputFILE() { - return (m_input_sp ? m_input_sp->GetStream() : nullptr); -} - -FILE *IOHandler::GetOutputFILE() { - return (m_output_sp ? m_output_sp->GetFile().GetStream() : nullptr); -} - -FILE *IOHandler::GetErrorFILE() { - return (m_error_sp ? m_error_sp->GetFile().GetStream() : nullptr); + return (m_error_sp ? m_error_sp->GetUnlockedFile().GetDescriptor() : -1); } FileSP IOHandler::GetInputFileSP() { return m_input_sp; } -StreamFileSP IOHandler::GetOutputStreamFileSP() { return m_output_sp; } +LockableStreamFileSP IOHandler::GetOutputStreamFileSP() { return m_output_sp; } -StreamFileSP IOHandler::GetErrorStreamFileSP() { return m_error_sp; } +LockableStreamFileSP IOHandler::GetErrorStreamFileSP() { return m_error_sp; } bool IOHandler::GetIsInteractive() { return GetInputFileSP() ? GetInputFileSP()->GetIsInteractive() : false; @@ -119,10 +107,9 @@ void IOHandler::SetPopped(bool b) { m_popped.SetValue(b, eBroadcastOnChange); } void IOHandler::WaitForPop() { m_popped.WaitForValueEqualTo(true); } void IOHandler::PrintAsync(const char *s, size_t len, bool is_stdout) { - std::lock_guard guard(m_output_mutex); - lldb::StreamFileSP stream = is_stdout ? m_output_sp : m_error_sp; - stream->Write(s, len); - stream->Flush(); + lldb::LockableStreamFileSP stream_sp = is_stdout ? m_output_sp : m_error_sp; + LockedStreamFile locked_Stream = stream_sp->Lock(); + locked_Stream.Write(s, len); } bool IOHandlerStack::PrintAsync(const char *s, size_t len, bool is_stdout) { @@ -228,19 +215,20 @@ IOHandlerEditline::IOHandlerEditline( llvm::StringRef prompt, llvm::StringRef continuation_prompt, bool multi_line, bool color, uint32_t line_number_start, IOHandlerDelegate &delegate) - : IOHandlerEditline(debugger, type, - FileSP(), // Inherit input from top input reader - StreamFileSP(), // Inherit output from top input reader - StreamFileSP(), // Inherit error from top input reader - 0, // Flags - editline_name, // Used for saving history files - prompt, continuation_prompt, multi_line, color, - line_number_start, delegate) {} + : IOHandlerEditline( + debugger, type, + FileSP(), // Inherit input from top input reader + LockableStreamFileSP(), // Inherit output from top input reader + LockableStreamFileSP(), // Inherit error from top input reader + 0, // Flags + editline_name, // Used for saving history files + prompt, continuation_prompt, multi_line, color, line_number_start, + delegate) {} IOHandlerEditline::IOHandlerEditline( Debugger &debugger, IOHandler::Type type, const lldb::FileSP &input_sp, - const lldb::StreamFileSP &output_sp, const lldb::StreamFileSP &error_sp, - uint32_t flags, + const lldb::LockableStreamFileSP &output_sp, + const lldb::LockableStreamFileSP &error_sp, uint32_t flags, const char *editline_name, // Used for saving history files llvm::StringRef prompt, llvm::StringRef continuation_prompt, bool multi_line, bool color, uint32_t line_number_start, @@ -256,15 +244,12 @@ IOHandlerEditline::IOHandlerEditline( SetPrompt(prompt); #if LLDB_ENABLE_LIBEDIT - bool use_editline = false; - - use_editline = GetInputFILE() && GetOutputFILE() && GetErrorFILE() && - m_input_sp && m_input_sp->GetIsRealTerminal(); - + const bool use_editline = m_input_sp && m_output_sp && m_error_sp && + m_input_sp->GetIsRealTerminal(); if (use_editline) { - m_editline_up = std::make_unique(editline_name, GetInputFILE(), - GetOutputFILE(), GetErrorFILE(), - m_color, GetOutputMutex()); + m_editline_up = std::make_unique( + editline_name, m_input_sp ? m_input_sp->GetStream() : nullptr, + m_output_sp, m_error_sp, m_color); m_editline_up->SetIsInputCompleteCallback( [this](Editline *editline, StringList &lines) { return this->IsInputCompleteCallback(editline, lines); @@ -366,8 +351,8 @@ bool IOHandlerEditline::GetLine(std::string &line, bool &interrupted) { if (prompt && prompt[0]) { if (m_output_sp) { - m_output_sp->Printf("%s", prompt); - m_output_sp->Flush(); + LockedStreamFile locked_stream = m_output_sp->Lock(); + locked_stream.Printf("%s", prompt); } } } @@ -380,7 +365,7 @@ bool IOHandlerEditline::GetLine(std::string &line, bool &interrupted) { return false; } - FILE *in = GetInputFILE(); + FILE *in = m_input_sp ? m_input_sp->GetStream() : nullptr; char buffer[256]; if (!got_line && !in && m_input_sp) { @@ -545,9 +530,10 @@ bool IOHandlerEditline::GetLines(StringList &lines, bool &interrupted) { std::string line; if (m_base_line_number > 0 && GetIsInteractive()) { if (m_output_sp) { - m_output_sp->Printf("%u%s", - m_base_line_number + (uint32_t)lines.GetSize(), - GetPrompt() == nullptr ? " " : ""); + LockedStreamFile locked_stream = m_output_sp->Lock(); + locked_stream.Printf("%u%s", + m_base_line_number + (uint32_t)lines.GetSize(), + GetPrompt() == nullptr ? " " : ""); } } @@ -630,9 +616,8 @@ void IOHandlerEditline::GotEOF() { void IOHandlerEditline::PrintAsync(const char *s, size_t len, bool is_stdout) { #if LLDB_ENABLE_LIBEDIT if (m_editline_up) { - std::lock_guard guard(m_output_mutex); - lldb::StreamFileSP stream = is_stdout ? m_output_sp : m_error_sp; - m_editline_up->PrintAsync(stream.get(), s, len); + lldb::LockableStreamFileSP stream_sp = is_stdout ? m_output_sp : m_error_sp; + m_editline_up->PrintAsync(stream_sp, s, len); } else #endif { diff --git a/lldb/source/Core/IOHandlerCursesGUI.cpp b/lldb/source/Core/IOHandlerCursesGUI.cpp index c5eed0c0b4089..ee6e847cdb688 100644 --- a/lldb/source/Core/IOHandlerCursesGUI.cpp +++ b/lldb/source/Core/IOHandlerCursesGUI.cpp @@ -7576,7 +7576,9 @@ IOHandlerCursesGUI::IOHandlerCursesGUI(Debugger &debugger) void IOHandlerCursesGUI::Activate() { IOHandler::Activate(); if (!m_app_up) { - m_app_up = std::make_unique(GetInputFILE(), GetOutputFILE()); + m_app_up = std::make_unique( + m_input_sp ? m_input_sp->GetStream() : nullptr, + m_output_sp ? m_input_sp->GetStream() : nullptr); // This is both a window and a menu delegate std::shared_ptr app_delegate_sp( diff --git a/lldb/source/Expression/REPL.cpp b/lldb/source/Expression/REPL.cpp index 4b53537e50e62..e5377d3114af3 100644 --- a/lldb/source/Expression/REPL.cpp +++ b/lldb/source/Expression/REPL.cpp @@ -103,8 +103,8 @@ void REPL::IOHandlerActivated(IOHandler &io_handler, bool interactive) { lldb::ProcessSP process_sp = m_target.GetProcessSP(); if (process_sp && process_sp->IsAlive()) return; - lldb::StreamFileSP error_sp(io_handler.GetErrorStreamFileSP()); - error_sp->Printf("REPL requires a running target process.\n"); + LockedStreamFile locked_stream = io_handler.GetErrorStreamFileSP()->Lock(); + locked_stream.Printf("REPL requires a running target process.\n"); io_handler.SetIsDone(true); } @@ -219,8 +219,10 @@ static bool ReadCode(const std::string &path, std::string &code, } void REPL::IOHandlerInputComplete(IOHandler &io_handler, std::string &code) { - lldb::StreamFileSP output_sp(io_handler.GetOutputStreamFileSP()); - lldb::StreamFileSP error_sp(io_handler.GetErrorStreamFileSP()); + lldb::StreamFileSP output_sp = std::make_shared( + io_handler.GetOutputStreamFileSP()->GetUnlockedFileSP()); + lldb::StreamFileSP error_sp = std::make_shared( + io_handler.GetErrorStreamFileSP()->GetUnlockedFileSP()); bool extra_line = false; bool did_quit = false; diff --git a/lldb/source/Host/common/Editline.cpp b/lldb/source/Host/common/Editline.cpp index 73da1d8481618..5f7a8b0190a1d 100644 --- a/lldb/source/Host/common/Editline.cpp +++ b/lldb/source/Host/common/Editline.cpp @@ -14,6 +14,7 @@ #include "lldb/Host/Editline.h" #include "lldb/Host/FileSystem.h" #include "lldb/Host/Host.h" +#include "lldb/Host/StreamFile.h" #include "lldb/Utility/AnsiTerminal.h" #include "lldb/Utility/CompletionRequest.h" #include "lldb/Utility/FileSpec.h" @@ -23,6 +24,7 @@ #include "lldb/Utility/StreamString.h" #include "lldb/Utility/StringList.h" #include "lldb/Utility/Timeout.h" +#include "lldb/lldb-forward.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/FileSystem.h" @@ -77,6 +79,19 @@ using namespace lldb_private::line_editor; #endif // #if LLDB_EDITLINE_USE_WCHAR +template class ScopedOptional { +public: + template + ScopedOptional(std::optional &optional, Args &&...args) + : m_optional(optional) { + m_optional.emplace(std::forward(args)...); + } + ~ScopedOptional() { m_optional.reset(); } + +private: + std::optional &m_optional; +}; + bool IsOnlySpaces(const EditLineStringType &content) { for (wchar_t ch : content) { if (ch != EditLineCharType(' ')) @@ -389,11 +404,13 @@ void Editline::MoveCursor(CursorLocation from, CursorLocation to) { (int)((info->cursor - info->buffer) + GetPromptWidth()); int editline_cursor_row = editline_cursor_position / m_terminal_width; + LockedStreamFile locked_stream = m_output_stream_sp->Lock(); + // Determine relative starting and ending lines int fromLine = GetLineIndexForLocation(from, editline_cursor_row); int toLine = GetLineIndexForLocation(to, editline_cursor_row); if (toLine != fromLine) { - fprintf(m_output_file, + fprintf(locked_stream.GetFile().GetStream(), (toLine > fromLine) ? ANSI_DOWN_N_ROWS : ANSI_UP_N_ROWS, std::abs(toLine - fromLine)); } @@ -409,21 +426,23 @@ void Editline::MoveCursor(CursorLocation from, CursorLocation to) { 80) + 1; } - fprintf(m_output_file, ANSI_SET_COLUMN_N, toColumn); + fprintf(locked_stream.GetFile().GetStream(), ANSI_SET_COLUMN_N, toColumn); } void Editline::DisplayInput(int firstIndex) { - fprintf(m_output_file, ANSI_SET_COLUMN_N ANSI_CLEAR_BELOW, 1); + LockedStreamFile locked_stream = m_output_stream_sp->Lock(); + fprintf(locked_stream.GetFile().GetStream(), + ANSI_SET_COLUMN_N ANSI_CLEAR_BELOW, 1); int line_count = (int)m_input_lines.size(); for (int index = firstIndex; index < line_count; index++) { - fprintf(m_output_file, + fprintf(locked_stream.GetFile().GetStream(), "%s" "%s" "%s" EditLineStringFormatSpec " ", m_prompt_ansi_prefix.c_str(), PromptForIndex(index).c_str(), m_prompt_ansi_suffix.c_str(), m_input_lines[index].c_str()); if (index < line_count - 1) - fprintf(m_output_file, "\n"); + fprintf(locked_stream.GetFile().GetStream(), "\n"); } } @@ -535,8 +554,10 @@ int Editline::GetCharacter(EditLineGetCharType *c) { // Paint a ANSI formatted version of the desired prompt over the version // libedit draws. (will only be requested if colors are supported) if (m_needs_prompt_repaint) { + ScopedOptional scope(m_locked_output, + m_output_stream_sp->Lock()); MoveCursor(CursorLocation::EditingCursor, CursorLocation::EditingPrompt); - fprintf(m_output_file, + fprintf(m_locked_output->GetFile().GetStream(), "%s" "%s" "%s", @@ -574,10 +595,10 @@ int Editline::GetCharacter(EditLineGetCharType *c) { // indefinitely. This gives a chance for someone to interrupt us. After // Read returns, immediately lock the mutex again and check if we were // interrupted. - m_output_mutex.unlock(); + m_locked_output.reset(); int read_count = m_input_connection.Read(&ch, 1, std::nullopt, status, nullptr); - m_output_mutex.lock(); + m_locked_output.emplace(m_output_stream_sp->Lock()); if (m_editor_status == EditorStatus::Interrupted) { while (read_count > 0 && status == lldb::eConnectionStatusSuccess) read_count = @@ -700,12 +721,14 @@ unsigned char Editline::EndOrAddLineCommand(int ch) { } } MoveCursor(CursorLocation::EditingCursor, CursorLocation::BlockEnd); - fprintf(m_output_file, "\n"); + LockedStreamFile locked_stream = m_output_stream_sp->Lock(); + fprintf(locked_stream.GetFile().GetStream(), "\n"); m_editor_status = EditorStatus::Complete; return CC_NEWLINE; } unsigned char Editline::DeleteNextCharCommand(int ch) { + LockedStreamFile locked_stream = m_output_stream_sp->Lock(); LineInfoW *info = const_cast(el_wline(m_editline)); // Just delete the next character normally if possible @@ -719,7 +742,7 @@ unsigned char Editline::DeleteNextCharCommand(int ch) { // line is empty, in which case it is treated as EOF if (m_current_line_index == m_input_lines.size() - 1) { if (ch == 4 && info->buffer == info->lastchar) { - fprintf(m_output_file, "^D\n"); + fprintf(locked_stream.GetFile().GetStream(), "^D\n"); m_editor_status = EditorStatus::EndOfInput; return CC_EOF; } @@ -767,7 +790,8 @@ unsigned char Editline::DeletePreviousCharCommand(int ch) { priorLine + m_input_lines[m_current_line_index]; // Repaint from the new line down - fprintf(m_output_file, ANSI_UP_N_ROWS ANSI_SET_COLUMN_N, + LockedStreamFile locked_stream = m_output_stream_sp->Lock(); + fprintf(locked_stream.GetFile().GetStream(), ANSI_UP_N_ROWS ANSI_SET_COLUMN_N, CountRowsForLine(priorLine), 1); DisplayInput(m_current_line_index); @@ -785,17 +809,19 @@ unsigned char Editline::PreviousLineCommand(int ch) { return RecallHistory(HistoryOperation::Older); } + LockedStreamFile locked_stream = m_output_stream_sp->Lock(); + // Start from a known location MoveCursor(CursorLocation::EditingCursor, CursorLocation::EditingPrompt); // Treat moving up from a blank last line as a deletion of that line if (m_current_line_index == m_input_lines.size() - 1 && IsOnlySpaces()) { m_input_lines.erase(m_input_lines.begin() + m_current_line_index); - fprintf(m_output_file, ANSI_CLEAR_BELOW); + fprintf(locked_stream.GetFile().GetStream(), ANSI_CLEAR_BELOW); } SetCurrentLine(m_current_line_index - 1); - fprintf(m_output_file, ANSI_UP_N_ROWS ANSI_SET_COLUMN_N, + fprintf(locked_stream.GetFile().GetStream(), ANSI_UP_N_ROWS ANSI_SET_COLUMN_N, CountRowsForLine(m_input_lines[m_current_line_index]), 1); return CC_NEWLINE; } @@ -829,9 +855,11 @@ unsigned char Editline::NextLineCommand(int ch) { const LineInfoW *info = el_wline(m_editline); int cursor_position = (int)((info->cursor - info->buffer) + GetPromptWidth()); int cursor_row = cursor_position / m_terminal_width; + + LockedStreamFile locked_stream = m_output_stream_sp->Lock(); for (int line_count = 0; line_count < m_current_line_rows - cursor_row; line_count++) { - fprintf(m_output_file, "\n"); + fprintf(locked_stream.GetFile().GetStream(), "\n"); } return CC_NEWLINE; } @@ -1031,7 +1059,9 @@ void Editline::DisplayCompletions( Editline &editline, llvm::ArrayRef results) { assert(!results.empty()); - fprintf(editline.m_output_file, + LockedStreamFile locked_stream = editline.m_output_stream_sp->Lock(); + + fprintf(locked_stream.GetFile().GetStream(), "\n" ANSI_CLEAR_BELOW "Available completions:\n"); /// Account for the current line, the line showing "Available completions" @@ -1049,15 +1079,15 @@ void Editline::DisplayCompletions( size_t cur_pos = 0; while (cur_pos < results.size()) { - cur_pos += - PrintCompletion(editline.m_output_file, results.slice(cur_pos), max_len, - editline.GetTerminalWidth(), - all ? std::nullopt : std::optional(page_size)); + cur_pos += PrintCompletion( + locked_stream.GetFile().GetStream(), results.slice(cur_pos), max_len, + editline.GetTerminalWidth(), + all ? std::nullopt : std::optional(page_size)); if (cur_pos >= results.size()) break; - fprintf(editline.m_output_file, "More (Y/n/a): "); + fprintf(locked_stream.GetFile().GetStream(), "More (Y/n/a): "); // The type for the output and the type for the parameter are different, // to allow interoperability with older versions of libedit. The container // for the reply must be as wide as what our implementation is using, @@ -1069,11 +1099,11 @@ void Editline::DisplayCompletions( // Check for a ^C or other interruption. if (editline.m_editor_status == EditorStatus::Interrupted) { editline.m_editor_status = EditorStatus::Editing; - fprintf(editline.m_output_file, "^C\n"); + fprintf(locked_stream.GetFile().GetStream(), "^C\n"); break; } - fprintf(editline.m_output_file, "\n"); + fprintf(locked_stream.GetFile().GetStream(), "\n"); if (got_char == -1 || reply == 'n') break; if (reply == 'a') @@ -1182,17 +1212,18 @@ unsigned char Editline::TypedCharacter(int ch) { line_info->lastchar - line_info->buffer); if (std::optional to_add = m_suggestion_callback(line)) { + LockedStreamFile locked_stream = m_output_stream_sp->Lock(); std::string to_add_color = m_suggestion_ansi_prefix + to_add.value() + m_suggestion_ansi_suffix; - fputs(typed.c_str(), m_output_file); - fputs(to_add_color.c_str(), m_output_file); + fputs(typed.c_str(), locked_stream.GetFile().GetStream()); + fputs(to_add_color.c_str(), locked_stream.GetFile().GetStream()); size_t new_autosuggestion_size = line.size() + to_add->length(); // Print spaces to hide any remains of a previous longer autosuggestion. if (new_autosuggestion_size < m_previous_autosuggestion_size) { size_t spaces_to_print = m_previous_autosuggestion_size - new_autosuggestion_size; std::string spaces = std::string(spaces_to_print, ' '); - fputs(spaces.c_str(), m_output_file); + fputs(spaces.c_str(), locked_stream.GetFile().GetStream()); } m_previous_autosuggestion_size = new_autosuggestion_size; @@ -1201,7 +1232,7 @@ unsigned char Editline::TypedCharacter(int ch) { int editline_cursor_row = editline_cursor_position / m_terminal_width; int toColumn = editline_cursor_position - (editline_cursor_row * m_terminal_width); - fprintf(m_output_file, ANSI_SET_COLUMN_N, toColumn); + fprintf(locked_stream.GetFile().GetStream(), ANSI_SET_COLUMN_N, toColumn); return CC_REFRESH; } @@ -1236,13 +1267,17 @@ void Editline::ConfigureEditor(bool multiline) { el_end(m_editline); } - m_editline = - el_init(m_editor_name.c_str(), m_input_file, m_output_file, m_error_file); + LockedStreamFile locked_output_stream = m_output_stream_sp->Lock(); + LockedStreamFile locked_error_stream = m_output_stream_sp->Lock(); + m_editline = el_init(m_editor_name.c_str(), m_input_file, + locked_output_stream.GetFile().GetStream(), + locked_error_stream.GetFile().GetStream()); ApplyTerminalSizeChange(); if (m_history_sp && m_history_sp->IsValid()) { if (!m_history_sp->Load()) { - fputs("Could not load history file\n.", m_output_file); + fputs("Could not load history file\n.", + locked_output_stream.GetFile().GetStream()); } el_wset(m_editline, EL_HIST, history, m_history_sp->GetHistoryPtr()); } @@ -1473,12 +1508,12 @@ Editline *Editline::InstanceFor(EditLine *editline) { } Editline::Editline(const char *editline_name, FILE *input_file, - FILE *output_file, FILE *error_file, bool color, - std::recursive_mutex &output_mutex) + lldb::LockableStreamFileSP output_stream_sp, + lldb::LockableStreamFileSP error_stream_sp, bool color) : m_editor_status(EditorStatus::Complete), m_input_file(input_file), - m_output_file(output_file), m_error_file(error_file), - m_input_connection(fileno(input_file), false), m_color(color), - m_output_mutex(output_mutex) { + m_output_stream_sp(output_stream_sp), m_error_stream_sp(error_stream_sp), + m_input_connection(fileno(input_file), false), m_color(color) { + assert(output_stream_sp && error_stream_sp); // Get a shared history instance m_editor_name = (editline_name == nullptr) ? "lldb-tmp" : editline_name; m_history_sp = EditlineHistory::GetHistory(m_editor_name); @@ -1552,9 +1587,9 @@ uint32_t Editline::GetCurrentLine() { return m_current_line_index; } bool Editline::Interrupt() { bool result = true; - std::lock_guard guard(m_output_mutex); + LockedStreamFile locked_stream = m_output_stream_sp->Lock(); if (m_editor_status == EditorStatus::Editing) { - fprintf(m_output_file, "^C\n"); + fprintf(locked_stream.GetFile().GetStream(), "^C\n"); result = m_input_connection.InterruptRead(); } m_editor_status = EditorStatus::Interrupted; @@ -1563,10 +1598,10 @@ bool Editline::Interrupt() { bool Editline::Cancel() { bool result = true; - std::lock_guard guard(m_output_mutex); + LockedStreamFile locked_stream = m_output_stream_sp->Lock(); if (m_editor_status == EditorStatus::Editing) { MoveCursor(CursorLocation::EditingCursor, CursorLocation::BlockStart); - fprintf(m_output_file, ANSI_CLEAR_BELOW); + fprintf(locked_stream.GetFile().GetStream(), ANSI_CLEAR_BELOW); result = m_input_connection.InterruptRead(); } m_editor_status = EditorStatus::Interrupted; @@ -1578,7 +1613,8 @@ bool Editline::GetLine(std::string &line, bool &interrupted) { m_input_lines = std::vector(); m_input_lines.insert(m_input_lines.begin(), EditLineConstString("")); - std::lock_guard guard(m_output_mutex); + ScopedOptional scope(m_locked_output, + m_output_stream_sp->Lock()); lldbassert(m_editor_status != EditorStatus::Editing); if (m_editor_status == EditorStatus::Interrupted) { @@ -1598,7 +1634,7 @@ bool Editline::GetLine(std::string &line, bool &interrupted) { interrupted = m_editor_status == EditorStatus::Interrupted; if (!interrupted) { if (input == nullptr) { - fprintf(m_output_file, "\n"); + fprintf(m_locked_output->GetFile().GetStream(), "\n"); m_editor_status = EditorStatus::EndOfInput; } else { m_history_sp->Enter(input); @@ -1623,7 +1659,9 @@ bool Editline::GetLines(int first_line_number, StringList &lines, m_input_lines = std::vector(); m_input_lines.insert(m_input_lines.begin(), EditLineConstString("")); - std::lock_guard guard(m_output_mutex); + ScopedOptional scope(m_locked_output, + m_output_stream_sp->Lock()); + // Begin the line editing loop DisplayInput(); SetCurrentLine(0); @@ -1652,15 +1690,15 @@ bool Editline::GetLines(int first_line_number, StringList &lines, return m_editor_status != EditorStatus::EndOfInput; } -void Editline::PrintAsync(Stream *stream, const char *s, size_t len) { - std::lock_guard guard(m_output_mutex); +void Editline::PrintAsync(lldb::LockableStreamFileSP stream_sp, const char *s, + size_t len) { + LockedStreamFile locked_stream = m_output_stream_sp->Lock(); if (m_editor_status == EditorStatus::Editing) { SaveEditedLine(); MoveCursor(CursorLocation::EditingCursor, CursorLocation::BlockStart); - fprintf(m_output_file, ANSI_CLEAR_BELOW); + fprintf(locked_stream.GetFile().GetStream(), ANSI_CLEAR_BELOW); } - stream->Write(s, len); - stream->Flush(); + locked_stream.Write(s, len); if (m_editor_status == EditorStatus::Editing) { DisplayInput(); MoveCursor(CursorLocation::BlockEnd, CursorLocation::EditingCursor); diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp index 5346d5a2d162a..c363f20081f9e 100644 --- a/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/lldb/source/Interpreter/CommandInterpreter.cpp @@ -57,6 +57,7 @@ #include "lldb/Utility/Timer.h" #include "lldb/Host/Config.h" +#include "lldb/lldb-forward.h" #if LLDB_ENABLE_LIBEDIT #include "lldb/Host/Editline.h" #endif @@ -2843,7 +2844,7 @@ void CommandInterpreter::HandleCommandsFromFile( // Used for inheriting the right settings when "command source" might // have nested "command source" commands - lldb::StreamFileSP empty_stream_sp; + lldb::LockableStreamFileSP empty_stream_sp; m_command_source_flags.push_back(flags); IOHandlerSP io_handler_sp(new IOHandlerEditline( debugger, IOHandler::Type::CommandInterpreter, input_file_sp, @@ -3100,25 +3101,26 @@ void CommandInterpreter::PrintCommandOutput(IOHandler &io_handler, llvm::StringRef str, bool is_stdout) { - lldb::StreamFileSP stream = is_stdout ? io_handler.GetOutputStreamFileSP() - : io_handler.GetErrorStreamFileSP(); + lldb::LockableStreamFileSP stream = is_stdout + ? io_handler.GetOutputStreamFileSP() + : io_handler.GetErrorStreamFileSP(); // Split the output into lines and poll for interrupt requests bool had_output = !str.empty(); while (!str.empty()) { llvm::StringRef line; std::tie(line, str) = str.split('\n'); { - std::lock_guard guard(io_handler.GetOutputMutex()); - stream->Write(line.data(), line.size()); - stream->Write("\n", 1); + LockedStreamFile stream_file = stream->Lock(); + stream_file.Write(line.data(), line.size()); + stream_file.Write("\n", 1); } } - std::lock_guard guard(io_handler.GetOutputMutex()); + LockedStreamFile stream_file = stream->Lock(); if (had_output && INTERRUPT_REQUESTED(GetDebugger(), "Interrupted dumping command output")) - stream->Printf("\n... Interrupted.\n"); - stream->Flush(); + stream_file.Printf("\n... Interrupted.\n"); + stream_file.Flush(); } bool CommandInterpreter::EchoCommandNonInteractive( @@ -3160,9 +3162,9 @@ void CommandInterpreter::IOHandlerInputComplete(IOHandler &io_handler, // from a file) we need to echo the command out so we don't just see the // command output and no command... if (EchoCommandNonInteractive(line, io_handler.GetFlags())) { - std::lock_guard guard(io_handler.GetOutputMutex()); - io_handler.GetOutputStreamFileSP()->Printf( - "%s%s\n", io_handler.GetPrompt(), line.c_str()); + LockedStreamFile locked_stream = + io_handler.GetOutputStreamFileSP()->Lock(); + locked_stream.Printf("%s%s\n", io_handler.GetPrompt(), line.c_str()); } } diff --git a/lldb/source/Interpreter/ScriptInterpreter.cpp b/lldb/source/Interpreter/ScriptInterpreter.cpp index a392d5777a021..4424b6c894356 100644 --- a/lldb/source/Interpreter/ScriptInterpreter.cpp +++ b/lldb/source/Interpreter/ScriptInterpreter.cpp @@ -206,7 +206,8 @@ ScriptInterpreterIORedirect::Create(bool enable_io, Debugger &debugger, ScriptInterpreterIORedirect::ScriptInterpreterIORedirect( std::unique_ptr input, std::unique_ptr output) : m_input_file_sp(std::move(input)), - m_output_file_sp(std::make_shared(std::move(output))), + m_output_file_sp(std::make_shared(std::move(output), + m_output_mutex)), m_error_file_sp(m_output_file_sp), m_communication("lldb.ScriptInterpreterIORedirect.comm"), m_disconnect(false) {} @@ -240,7 +241,9 @@ ScriptInterpreterIORedirect::ScriptInterpreterIORedirect( m_disconnect = true; FILE *outfile_handle = fdopen(pipe.ReleaseWriteFileDescriptor(), "w"); - m_output_file_sp = std::make_shared(outfile_handle, true); + m_output_file_sp = std::make_shared( + std::make_shared(outfile_handle, NativeFile::Owned), + m_output_mutex); m_error_file_sp = m_output_file_sp; if (outfile_handle) ::setbuf(outfile_handle, nullptr); @@ -257,9 +260,9 @@ ScriptInterpreterIORedirect::ScriptInterpreterIORedirect( void ScriptInterpreterIORedirect::Flush() { if (m_output_file_sp) - m_output_file_sp->Flush(); + m_output_file_sp->Lock().Flush(); if (m_error_file_sp) - m_error_file_sp->Flush(); + m_error_file_sp->Lock().Flush(); } ScriptInterpreterIORedirect::~ScriptInterpreterIORedirect() { @@ -273,7 +276,7 @@ ScriptInterpreterIORedirect::~ScriptInterpreterIORedirect() { // Close the write end of the pipe since we are done with our one line // script. This should cause the read thread that output_comm is using to // exit. - m_output_file_sp->GetFile().Close(); + m_output_file_sp->GetUnlockedFile().Close(); // The close above should cause this thread to exit when it gets to the end // of file, so let it get all its data. m_communication.JoinReadThread(); diff --git a/lldb/source/Plugins/ABI/RISCV/ABISysV_riscv.cpp b/lldb/source/Plugins/ABI/RISCV/ABISysV_riscv.cpp index 8412991933d27..c463bd006b3db 100644 --- a/lldb/source/Plugins/ABI/RISCV/ABISysV_riscv.cpp +++ b/lldb/source/Plugins/ABI/RISCV/ABISysV_riscv.cpp @@ -850,8 +850,62 @@ void ABISysV_riscv::AugmentRegisterInfo( it.value().alt_name.SetCString("x3"); else if (it.value().name == "fp") it.value().alt_name.SetCString("s0"); + else if (it.value().name == "tp") + it.value().alt_name.SetCString("x4"); else if (it.value().name == "s0") it.value().alt_name.SetCString("x8"); + else if (it.value().name == "s1") + it.value().alt_name.SetCString("x9"); + else if (it.value().name == "t0") + it.value().alt_name.SetCString("x5"); + else if (it.value().name == "t1") + it.value().alt_name.SetCString("x6"); + else if (it.value().name == "t2") + it.value().alt_name.SetCString("x7"); + else if (it.value().name == "a0") + it.value().alt_name.SetCString("x10"); + else if (it.value().name == "a1") + it.value().alt_name.SetCString("x11"); + else if (it.value().name == "a2") + it.value().alt_name.SetCString("x12"); + else if (it.value().name == "a3") + it.value().alt_name.SetCString("x13"); + else if (it.value().name == "a4") + it.value().alt_name.SetCString("x14"); + else if (it.value().name == "a5") + it.value().alt_name.SetCString("x15"); + else if (it.value().name == "a6") + it.value().alt_name.SetCString("x16"); + else if (it.value().name == "a7") + it.value().alt_name.SetCString("x17"); + else if (it.value().name == "s2") + it.value().alt_name.SetCString("x18"); + else if (it.value().name == "s3") + it.value().alt_name.SetCString("x19"); + else if (it.value().name == "s4") + it.value().alt_name.SetCString("x20"); + else if (it.value().name == "s5") + it.value().alt_name.SetCString("x21"); + else if (it.value().name == "s6") + it.value().alt_name.SetCString("x22"); + else if (it.value().name == "s7") + it.value().alt_name.SetCString("x23"); + else if (it.value().name == "s8") + it.value().alt_name.SetCString("x24"); + else if (it.value().name == "s9") + it.value().alt_name.SetCString("x25"); + else if (it.value().name == "s10") + it.value().alt_name.SetCString("x26"); + else if (it.value().name == "s11") + it.value().alt_name.SetCString("x27"); + else if (it.value().name == "t3") + it.value().alt_name.SetCString("x28"); + else if (it.value().name == "t4") + it.value().alt_name.SetCString("x29"); + else if (it.value().name == "t5") + it.value().alt_name.SetCString("x30"); + else if (it.value().name == "t6") + it.value().alt_name.SetCString("x31"); // Set generic regnum so lldb knows what the PC, etc is it.value().regnum_generic = GetGenericNum(it.value().name.GetStringRef()); diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp index 51e9a6d81b839..665500f23e95d 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp @@ -651,8 +651,8 @@ BreakpointSP PlatformDarwin::SetThreadCreationBreakpoint(Target &target) { "start_wqthread", "_pthread_wqthread", "_pthread_start", }; - static const char *g_bp_modules[] = {"libsystem_c.dylib", - "libSystem.B.dylib"}; + static const char *g_bp_modules[] = {"libsystem_c.dylib", "libSystem.B.dylib", + "libsystem_pthread.dylib"}; FileSpecList bp_modules; for (size_t i = 0; i < std::size(g_bp_modules); i++) { diff --git a/lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.cpp b/lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.cpp index 6d028e324ee4e..191863ae25d7b 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.cpp @@ -17,6 +17,7 @@ #include "lldb/Utility/Stream.h" #include "lldb/Utility/StringList.h" #include "lldb/Utility/Timer.h" +#include "lldb/lldb-forward.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/FormatAdapters.h" #include @@ -76,8 +77,13 @@ class IOHandlerLuaInterpreter : public IOHandlerDelegate, } if (instructions == nullptr) return; - if (interactive) - *io_handler.GetOutputStreamFileSP() << instructions; + if (interactive) { + if (lldb::LockableStreamFileSP output_sp = + io_handler.GetOutputStreamFileSP()) { + LockedStreamFile locked_stream = output_sp->Lock(); + locked_stream << instructions; + } + } } bool IOHandlerIsInputComplete(IOHandler &io_handler, @@ -112,8 +118,11 @@ class IOHandlerLuaInterpreter : public IOHandlerDelegate, for (BreakpointOptions &bp_options : *bp_options_vec) { Status error = m_script_interpreter.SetBreakpointCommandCallback( bp_options, data.c_str(), /*is_callback=*/false); - if (error.Fail()) - *io_handler.GetErrorStreamFileSP() << error.AsCString() << '\n'; + if (error.Fail()) { + LockedStreamFile locked_stream = + io_handler.GetErrorStreamFileSP()->Lock(); + locked_stream << error.AsCString() << '\n'; + } } io_handler.SetIsDone(true); } break; @@ -130,8 +139,11 @@ class IOHandlerLuaInterpreter : public IOHandlerDelegate, io_handler.SetIsDone(true); return; } - if (llvm::Error error = m_script_interpreter.GetLua().Run(data)) - *io_handler.GetErrorStreamFileSP() << toString(std::move(error)); + if (llvm::Error error = m_script_interpreter.GetLua().Run(data)) { + LockedStreamFile locked_stream = + io_handler.GetErrorStreamFileSP()->Lock(); + locked_stream << toString(std::move(error)); + } break; } } diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index 9ea5b95a3d803..00d01981c64ff 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "lldb/Host/Config.h" -#include "lldb/lldb-enumerations.h" #if LLDB_ENABLE_PYTHON @@ -33,6 +32,7 @@ #include "lldb/Host/FileSystem.h" #include "lldb/Host/HostInfo.h" #include "lldb/Host/Pipe.h" +#include "lldb/Host/StreamFile.h" #include "lldb/Interpreter/CommandInterpreter.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Target/Thread.h" @@ -41,6 +41,8 @@ #include "lldb/Utility/LLDBLog.h" #include "lldb/Utility/Timer.h" #include "lldb/ValueObject/ValueObject.h" +#include "lldb/lldb-enumerations.h" +#include "lldb/lldb-forward.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" @@ -489,11 +491,11 @@ def function (frame, bp_loc, internal_dict): break; } - if (instructions) { - StreamFileSP output_sp(io_handler.GetOutputStreamFileSP()); - if (output_sp && interactive) { - output_sp->PutCString(instructions); - output_sp->Flush(); + if (instructions && interactive) { + if (LockableStreamFileSP stream_sp = io_handler.GetOutputStreamFileSP()) { + LockedStreamFile locked_stream = stream_sp->Lock(); + locked_stream.PutCString(instructions); + locked_stream.Flush(); } } } @@ -527,10 +529,9 @@ void ScriptInterpreterPythonImpl::IOHandlerInputComplete(IOHandler &io_handler, bp_options.SetCallback( ScriptInterpreterPythonImpl::BreakpointCallbackFunction, baton_sp); } else if (!batch_mode) { - StreamFileSP error_sp = io_handler.GetErrorStreamFileSP(); - if (error_sp) { - error_sp->Printf("Warning: No command attached to breakpoint.\n"); - error_sp->Flush(); + if (LockableStreamFileSP error_sp = io_handler.GetErrorStreamFileSP()) { + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf("Warning: No command attached to breakpoint.\n"); } } } @@ -550,10 +551,9 @@ void ScriptInterpreterPythonImpl::IOHandlerInputComplete(IOHandler &io_handler, wp_options->SetCallback( ScriptInterpreterPythonImpl::WatchpointCallbackFunction, baton_sp); } else if (!batch_mode) { - StreamFileSP error_sp = io_handler.GetErrorStreamFileSP(); - if (error_sp) { - error_sp->Printf("Warning: No command attached to breakpoint.\n"); - error_sp->Flush(); + if (LockableStreamFileSP error_sp = io_handler.GetErrorStreamFileSP()) { + LockedStreamFile locked_stream = error_sp->Lock(); + locked_stream.Printf("Warning: No command attached to breakpoint.\n"); } } m_active_io_handler = eIOHandlerNone; @@ -680,7 +680,7 @@ bool ScriptInterpreterPythonImpl::EnterSession(uint16_t on_entry_flags, PythonDictionary &sys_module_dict = GetSysModuleDictionary(); if (sys_module_dict.IsValid()) { lldb::FileSP top_in_sp; - lldb::StreamFileSP top_out_sp, top_err_sp; + lldb::LockableStreamFileSP top_out_sp, top_err_sp; if (!in_sp || !out_sp || !err_sp || !*in_sp || !*out_sp || !*err_sp) m_debugger.AdoptTopIOHandlerFilesIfInvalid(top_in_sp, top_out_sp, top_err_sp); @@ -696,12 +696,14 @@ bool ScriptInterpreterPythonImpl::EnterSession(uint16_t on_entry_flags, if (!SetStdHandle(out_sp, "stdout", m_saved_stdout, "w")) { if (top_out_sp) - SetStdHandle(top_out_sp->GetFileSP(), "stdout", m_saved_stdout, "w"); + SetStdHandle(top_out_sp->GetUnlockedFileSP(), "stdout", m_saved_stdout, + "w"); } if (!SetStdHandle(err_sp, "stderr", m_saved_stderr, "w")) { if (top_err_sp) - SetStdHandle(top_err_sp->GetFileSP(), "stderr", m_saved_stderr, "w"); + SetStdHandle(top_err_sp->GetUnlockedFileSP(), "stderr", m_saved_stderr, + "w"); } } diff --git a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp index c7229568e1a0c..dee5a7ce2876d 100644 --- a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp +++ b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp @@ -837,18 +837,16 @@ void SymbolFileBreakpad::ParseLineTableAndSupportFiles(CompileUnit &cu, "How did we create compile units without a base address?"); SupportFileMap map; - std::vector> sequences; - std::unique_ptr line_seq_up = - LineTable::CreateLineSequenceContainer(); + std::vector sequences; + LineTable::Sequence sequence; std::optional next_addr; auto finish_sequence = [&]() { LineTable::AppendLineEntryToSequence( - line_seq_up.get(), *next_addr, /*line=*/0, /*column=*/0, + sequence, *next_addr, /*line=*/0, /*column=*/0, /*file_idx=*/0, /*is_start_of_statement=*/false, /*is_start_of_basic_block=*/false, /*is_prologue_end=*/false, /*is_epilogue_begin=*/false, /*is_terminal_entry=*/true); - sequences.push_back(std::move(line_seq_up)); - line_seq_up = LineTable::CreateLineSequenceContainer(); + sequences.push_back(std::move(sequence)); }; LineIterator It(*m_objfile_sp, Record::Func, data.bookmark), @@ -870,7 +868,7 @@ void SymbolFileBreakpad::ParseLineTableAndSupportFiles(CompileUnit &cu, finish_sequence(); } LineTable::AppendLineEntryToSequence( - line_seq_up.get(), record->Address, record->LineNum, /*column=*/0, + sequence, record->Address, record->LineNum, /*column=*/0, map[record->FileNum], /*is_start_of_statement=*/true, /*is_start_of_basic_block=*/false, /*is_prologue_end=*/false, /*is_epilogue_begin=*/false, /*is_terminal_entry=*/false); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index a96757afabddf..58b544a9a137b 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -1232,7 +1232,7 @@ bool SymbolFileDWARF::ParseLineTable(CompileUnit &comp_unit) { // FIXME: Rather than parsing the whole line table and then copying it over // into LLDB, we should explore using a callback to populate the line table // while we parse to reduce memory usage. - std::vector> sequences; + std::vector sequences; // The Sequences view contains only valid line sequences. Don't iterate over // the Rows directly. for (const llvm::DWARFDebugLine::Sequence &seq : line_table->Sequences) { @@ -1242,12 +1242,11 @@ bool SymbolFileDWARF::ParseLineTable(CompileUnit &comp_unit) { // m_first_code_address declaration for more details on this. if (seq.LowPC < m_first_code_address) continue; - std::unique_ptr sequence = - LineTable::CreateLineSequenceContainer(); + LineTable::Sequence sequence; for (unsigned idx = seq.FirstRowIndex; idx < seq.LastRowIndex; ++idx) { const llvm::DWARFDebugLine::Row &row = line_table->Rows[idx]; LineTable::AppendLineEntryToSequence( - sequence.get(), row.Address.Address, row.Line, row.Column, row.File, + sequence, row.Address.Address, row.Line, row.Column, row.File, row.IsStmt, row.BasicBlock, row.PrologueEnd, row.EpilogueBegin, row.EndSequence); } diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp index 6338f12402b73..4e472d0a0b0f2 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp @@ -1310,18 +1310,17 @@ bool SymbolFileNativePDB::ParseLineTable(CompileUnit &comp_unit) { cii->m_global_line_table.Clear(); // Add line entries in line_set to line_table. - auto line_table = std::make_unique(&comp_unit); - std::unique_ptr sequence( - line_table->CreateLineSequenceContainer()); + std::vector sequence(1); for (const auto &line_entry : line_set) { - line_table->AppendLineEntryToSequence( - sequence.get(), line_entry.file_addr, line_entry.line, + LineTable::AppendLineEntryToSequence( + sequence.back(), line_entry.file_addr, line_entry.line, line_entry.column, line_entry.file_idx, line_entry.is_start_of_statement, line_entry.is_start_of_basic_block, line_entry.is_prologue_end, line_entry.is_epilogue_begin, line_entry.is_terminal_entry); } - line_table->InsertSequence(sequence.get()); + auto line_table = + std::make_unique(&comp_unit, std::move(sequence)); if (line_table->GetSize() == 0) return false; diff --git a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp index 293be12ee6333..352163ceaae9e 100644 --- a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp @@ -1761,11 +1761,10 @@ bool SymbolFilePDB::ParseCompileUnitLineTable(CompileUnit &comp_unit, if (!files) return false; - // For each source and header file, create a LineSequence for contributions - // to the compiland from that file, and add the sequence. + // For each source and header file, create a LineTable::Sequence for + // contributions to the compiland from that file, and add the sequence. while (auto file = files->getNext()) { - std::unique_ptr sequence( - line_table->CreateLineSequenceContainer()); + LineTable::Sequence sequence; auto lines = m_session_up->findLineNumbers(*compiland_up, *file); if (!lines) continue; @@ -1794,12 +1793,11 @@ bool SymbolFilePDB::ParseCompileUnitLineTable(CompileUnit &comp_unit, // of the previous entry's address range if the current entry resulted in // a gap from the previous entry. if (is_gap && ShouldAddLine(match_line, prev_line, prev_length)) { - line_table->AppendLineEntryToSequence( - sequence.get(), prev_addr + prev_length, prev_line, 0, - prev_source_idx, false, false, false, false, true); + line_table->AppendLineEntryToSequence(sequence, prev_addr + prev_length, + prev_line, 0, prev_source_idx, + false, false, false, false, true); - line_table->InsertSequence(sequence.get()); - sequence = line_table->CreateLineSequenceContainer(); + line_table->InsertSequence(std::move(sequence)); } if (ShouldAddLine(match_line, lno, length)) { @@ -1818,7 +1816,7 @@ bool SymbolFilePDB::ParseCompileUnitLineTable(CompileUnit &comp_unit, is_epilogue = (addr == epilogue->getVirtualAddress()); } - line_table->AppendLineEntryToSequence(sequence.get(), addr, lno, col, + line_table->AppendLineEntryToSequence(sequence, addr, lno, col, source_idx, is_statement, false, is_prologue, is_epilogue, false); } @@ -1831,12 +1829,12 @@ bool SymbolFilePDB::ParseCompileUnitLineTable(CompileUnit &comp_unit, if (entry_count > 0 && ShouldAddLine(match_line, prev_line, prev_length)) { // The end is always a terminal entry, so insert it regardless. - line_table->AppendLineEntryToSequence( - sequence.get(), prev_addr + prev_length, prev_line, 0, - prev_source_idx, false, false, false, false, true); + line_table->AppendLineEntryToSequence(sequence, prev_addr + prev_length, + prev_line, 0, prev_source_idx, + false, false, false, false, true); } - line_table->InsertSequence(sequence.get()); + line_table->InsertSequence(std::move(sequence)); } if (line_table->GetSize()) { diff --git a/lldb/source/Symbol/LineTable.cpp b/lldb/source/Symbol/LineTable.cpp index aae4ab59ff156..ab50fd4a17022 100644 --- a/lldb/source/Symbol/LineTable.cpp +++ b/lldb/source/Symbol/LineTable.cpp @@ -21,15 +21,13 @@ using namespace lldb_private; LineTable::LineTable(CompileUnit *comp_unit) : m_comp_unit(comp_unit), m_entries() {} -LineTable::LineTable(CompileUnit *comp_unit, - std::vector> &&sequences) +LineTable::LineTable(CompileUnit *comp_unit, std::vector &&sequences) : m_comp_unit(comp_unit), m_entries() { - LineTable::Entry::LessThanBinaryPredicate less_than_bp(this); + LessThanBinaryPredicate less_than_bp(this); llvm::stable_sort(sequences, less_than_bp); - for (const auto &sequence : sequences) { - LineSequenceImpl *seq = static_cast(sequence.get()); - m_entries.insert(m_entries.end(), seq->m_entries.begin(), - seq->m_entries.end()); + for (const Sequence &seq : sequences) { + m_entries.insert(m_entries.end(), seq.m_entries.begin(), + seq.m_entries.end()); } } @@ -46,7 +44,7 @@ void LineTable::InsertLineEntry(lldb::addr_t file_addr, uint32_t line, is_start_of_basic_block, is_prologue_end, is_epilogue_begin, is_terminal_entry); - LineTable::Entry::LessThanBinaryPredicate less_than_bp(this); + LessThanBinaryPredicate less_than_bp(this); entry_collection::iterator pos = llvm::upper_bound(m_entries, entry, less_than_bp); @@ -58,25 +56,14 @@ void LineTable::InsertLineEntry(lldb::addr_t file_addr, uint32_t line, // Dump (&s, Address::DumpStyleFileAddress); } -LineSequence::LineSequence() = default; - -void LineTable::LineSequenceImpl::Clear() { m_entries.clear(); } - -std::unique_ptr LineTable::CreateLineSequenceContainer() { - return std::make_unique(); -} - void LineTable::AppendLineEntryToSequence( - LineSequence *sequence, lldb::addr_t file_addr, uint32_t line, - uint16_t column, uint16_t file_idx, bool is_start_of_statement, - bool is_start_of_basic_block, bool is_prologue_end, bool is_epilogue_begin, - bool is_terminal_entry) { - assert(sequence != nullptr); - LineSequenceImpl *seq = reinterpret_cast(sequence); + Sequence &sequence, lldb::addr_t file_addr, uint32_t line, uint16_t column, + uint16_t file_idx, bool is_start_of_statement, bool is_start_of_basic_block, + bool is_prologue_end, bool is_epilogue_begin, bool is_terminal_entry) { Entry entry(file_addr, line, column, file_idx, is_start_of_statement, is_start_of_basic_block, is_prologue_end, is_epilogue_begin, is_terminal_entry); - entry_collection &entries = seq->m_entries; + entry_collection &entries = sequence.m_entries; // Replace the last entry if the address is the same, otherwise append it. If // we have multiple line entries at the same address, this indicates illegal // DWARF so this "fixes" the line table to be correct. If not fixed this can @@ -102,26 +89,24 @@ void LineTable::AppendLineEntryToSequence( entries.push_back(entry); } -void LineTable::InsertSequence(LineSequence *sequence) { - assert(sequence != nullptr); - LineSequenceImpl *seq = reinterpret_cast(sequence); - if (seq->m_entries.empty()) +void LineTable::InsertSequence(Sequence sequence) { + if (sequence.m_entries.empty()) return; - Entry &entry = seq->m_entries.front(); + const Entry &entry = sequence.m_entries.front(); // If the first entry address in this sequence is greater than or equal to // the address of the last item in our entry collection, just append. if (m_entries.empty() || !Entry::EntryAddressLessThan(entry, m_entries.back())) { - m_entries.insert(m_entries.end(), seq->m_entries.begin(), - seq->m_entries.end()); + m_entries.insert(m_entries.end(), sequence.m_entries.begin(), + sequence.m_entries.end()); return; } // Otherwise, find where this belongs in the collection entry_collection::iterator begin_pos = m_entries.begin(); entry_collection::iterator end_pos = m_entries.end(); - LineTable::Entry::LessThanBinaryPredicate less_than_bp(this); + LessThanBinaryPredicate less_than_bp(this); entry_collection::iterator pos = std::upper_bound(begin_pos, end_pos, entry, less_than_bp); @@ -139,15 +124,11 @@ void LineTable::InsertSequence(LineSequence *sequence) { assert(prev_pos->is_terminal_entry); } #endif - m_entries.insert(pos, seq->m_entries.begin(), seq->m_entries.end()); + m_entries.insert(pos, sequence.m_entries.begin(), sequence.m_entries.end()); } -LineTable::Entry::LessThanBinaryPredicate::LessThanBinaryPredicate( - LineTable *line_table) - : m_line_table(line_table) {} - -bool LineTable::Entry::LessThanBinaryPredicate:: -operator()(const LineTable::Entry &a, const LineTable::Entry &b) const { +bool LineTable::LessThanBinaryPredicate::operator()(const Entry &a, + const Entry &b) const { #define LT_COMPARE(a, b) \ if (a != b) \ return a < b @@ -166,12 +147,9 @@ operator()(const LineTable::Entry &a, const LineTable::Entry &b) const { #undef LT_COMPARE } -bool LineTable::Entry::LessThanBinaryPredicate:: -operator()(const std::unique_ptr &sequence_a, - const std::unique_ptr &sequence_b) const { - auto *seq_a = static_cast(sequence_a.get()); - auto *seq_b = static_cast(sequence_b.get()); - return (*this)(seq_a->m_entries.front(), seq_b->m_entries.front()); +bool LineTable::LessThanBinaryPredicate::operator()( + const Sequence &seq_a, const Sequence &seq_b) const { + return (*this)(seq_a.m_entries.front(), seq_b.m_entries.front()); } uint32_t LineTable::GetSize() const { return m_entries.size(); } @@ -206,25 +184,27 @@ uint32_t LineTable::lower_bound(const Address &so_addr) const { return std::distance(m_entries.begin(), pos); } -uint32_t LineTable::upper_bound(const Address &so_addr) const { - if (so_addr.GetModule() != m_comp_unit->GetModule()) - return GetSize(); +std::pair +LineTable::GetLineEntryIndexRange(const AddressRange &range) const { + uint32_t first = lower_bound(range.GetBaseAddress()); + if (first >= GetSize() || range.GetByteSize() == 0) + return {first, first}; Entry search_entry; - search_entry.file_addr = so_addr.GetFileAddress(); - if (search_entry.file_addr == LLDB_INVALID_ADDRESS) - return GetSize(); + search_entry.file_addr = + range.GetBaseAddress().GetFileAddress() + range.GetByteSize(); - // This is not a typo. lower_bound returns the first entry which starts on or - // after the given address, which is exactly what we want -- *except* if the - // entry is a termination entry (in that case, we want the one after it). + // lower_bound returns the first entry which starts on or after the given + // address, which is exactly what we want -- *except* if the entry is a + // termination entry (in that case, we want the one after it). auto pos = - llvm::lower_bound(m_entries, search_entry, Entry::EntryAddressLessThan); + std::lower_bound(std::next(m_entries.begin(), first), m_entries.end(), + search_entry, Entry::EntryAddressLessThan); if (pos != m_entries.end() && pos->file_addr == search_entry.file_addr && pos->is_terminal_entry) ++pos; - return std::distance(m_entries.begin(), pos); + return {first, std::distance(m_entries.begin(), pos)}; } bool LineTable::FindLineEntryByAddress(const Address &so_addr, @@ -447,7 +427,7 @@ size_t LineTable::GetContiguousFileAddressRanges(FileAddressRanges &file_ranges, LineTable *LineTable::LinkLineTable(const FileRangeMap &file_range_map) { std::unique_ptr line_table_up(new LineTable(m_comp_unit)); - LineSequenceImpl sequence; + Sequence sequence; const size_t count = m_entries.size(); LineEntry line_entry; const FileRangeMap::Entry *file_range_entry = nullptr; @@ -509,8 +489,7 @@ LineTable *LineTable::LinkLineTable(const FileRangeMap &file_range_map) { sequence.m_entries.back().is_terminal_entry = true; // Append the sequence since we just terminated the previous one - line_table_up->InsertSequence(&sequence); - sequence.Clear(); + line_table_up->InsertSequence(std::move(sequence)); } // Now link the current entry @@ -525,8 +504,7 @@ LineTable *LineTable::LinkLineTable(const FileRangeMap &file_range_map) { // insert this sequence into our new line table. if (!sequence.m_entries.empty() && sequence.m_entries.back().is_terminal_entry) { - line_table_up->InsertSequence(&sequence); - sequence.Clear(); + line_table_up->InsertSequence(std::move(sequence)); prev_entry_was_linked = false; } else { prev_entry_was_linked = file_range_entry != nullptr; diff --git a/lldb/source/ValueObject/ValueObjectList.cpp b/lldb/source/ValueObject/ValueObjectList.cpp index a79a040bdf7fd..fdee9579b093b 100644 --- a/lldb/source/ValueObject/ValueObjectList.cpp +++ b/lldb/source/ValueObject/ValueObjectList.cpp @@ -16,12 +16,6 @@ using namespace lldb; using namespace lldb_private; -const ValueObjectList &ValueObjectList::operator=(const ValueObjectList &rhs) { - if (this != &rhs) - m_value_objects = rhs.m_value_objects; - return *this; -} - void ValueObjectList::Append(const ValueObjectSP &val_obj_sp) { m_value_objects.push_back(val_obj_sp); } diff --git a/lldb/test/API/api/command-return-object/TestSBCommandReturnObject.py b/lldb/test/API/api/command-return-object/TestSBCommandReturnObject.py index 2193b7270d0b4..f8632fd720325 100644 --- a/lldb/test/API/api/command-return-object/TestSBCommandReturnObject.py +++ b/lldb/test/API/api/command-return-object/TestSBCommandReturnObject.py @@ -33,3 +33,16 @@ def test_get_command(self): ci.HandleCommand("help help", res) self.assertTrue(res.Succeeded()) self.assertEqual(res.GetCommand(), "help help") + + value_list = res.GetValues(lldb.eNoDynamicValues) + self.assertEqual(value_list.GetSize(), 0) + + def test_get_value(self): + res = lldb.SBCommandReturnObject() + ci = self.dbg.GetCommandInterpreter() + ci.HandleCommand("p 1 + 1", res) + self.assertTrue(res.Succeeded()) + + value_list = res.GetValues(lldb.eNoDynamicValues) + self.assertEqual(value_list.GetSize(), 1) + self.assertEqual(value_list.GetValueAtIndex(0).GetValue(), "2") diff --git a/lldb/test/API/commands/frame/var/TestFrameVar.py b/lldb/test/API/commands/frame/var/TestFrameVar.py index 7211cade5c7c8..d8260a5657618 100644 --- a/lldb/test/API/commands/frame/var/TestFrameVar.py +++ b/lldb/test/API/commands/frame/var/TestFrameVar.py @@ -2,7 +2,6 @@ Make sure the frame variable -g, -a, and -l flags work. """ - import lldb import lldbsuite.test.lldbutil as lldbutil from lldbsuite.test.decorators import * @@ -79,6 +78,14 @@ def do_test(self): self.assertNotIn("test_var", output, "Args found a local") self.assertNotIn("g_var", output, "Args found a global") + value_list = command_result.GetValues(lldb.eNoDynamicValues) + self.assertGreaterEqual(value_list.GetSize(), 2) + value_names = [] + for value in value_list: + value_names.append(value.GetName()) + self.assertIn("argc", value_names) + self.assertIn("argv", value_names) + # Just get locals: result = interp.HandleCommand("frame var -a", command_result) self.assertEqual( diff --git a/lldb/test/API/functionalities/dead-strip/TestDeadStrip.py b/lldb/test/API/functionalities/dead-strip/TestDeadStrip.py index edaf609e98618..ef3b00d75b599 100644 --- a/lldb/test/API/functionalities/dead-strip/TestDeadStrip.py +++ b/lldb/test/API/functionalities/dead-strip/TestDeadStrip.py @@ -10,7 +10,6 @@ class DeadStripTestCase(TestBase): - @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr44429") def test(self): """Test breakpoint works correctly with dead-code stripping.""" self.build() diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestGDBServerTargetXML.py b/lldb/test/API/functionalities/gdb_remote_client/TestGDBServerTargetXML.py index 22f5553e40802..9a70f67b264ce 100644 --- a/lldb/test/API/functionalities/gdb_remote_client/TestGDBServerTargetXML.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestGDBServerTargetXML.py @@ -652,6 +652,154 @@ def haltReason(self): ) self.match("register read s31", ["s31 = 128"]) + @skipIfXmlSupportMissing + @skipIfRemote + @skipIfLLVMTargetMissing("RISCV") + def test_riscv64_regs(self): + """Test grabbing various riscv64 registers from gdbserver.""" + + class MyResponder(MockGDBServerResponder): + reg_data = ( + "0102030405060708" # zero + "0102030405060708" # ra + "0102030405060708" # sp + "0102030405060708" # gp + "0102030405060708" # tp + "0102030405060708" # t0 + "0102030405060708" # t1 + "0102030405060708" # t2 + "0102030405060708" # fp + "0102030405060708" # s1 + "0102030405060708" # a0 + "0102030405060708" # a1 + "0102030405060708" # a2 + "0102030405060708" # a3 + "0102030405060708" # a4 + "0102030405060708" # a5 + "0102030405060708" # a6 + "0102030405060708" # a7 + "0102030405060708" # s2 + "0102030405060708" # s3 + "0102030405060708" # s4 + "0102030405060708" # s5 + "0102030405060708" # s6 + "0102030405060708" # s7 + "0102030405060708" # s8 + "0102030405060708" # s9 + "0102030405060708" # s10 + "0102030405060708" # s11 + "0102030405060708" # t3 + "0102030405060708" # t4 + "0102030405060708" # t5 + "0102030405060708" # t6 + ) + + def qXferRead(self, obj, annex, offset, length): + if annex == "target.xml": + # Note that this XML does not include any aliases, LLDB must generate them itself. + return ( + """ + + + riscv + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + """, + False, + ) + else: + return None, False + + def readRegister(self, regnum): + return "" + + def readRegisters(self): + return self.reg_data + + def writeRegisters(self, reg_hex): + self.reg_data = reg_hex + return "OK" + + def haltReason(self): + return "T02thread:1ff0d;threads:1ff0d;thread-pcs:000000010001bc00;07:0102030405060708;10:1112131415161718;" + + self.server.responder = MyResponder() + + target = self.createTarget("basic_eh_frame-riscv64.yaml") + process = self.connect(target) + lldbutil.expect_state_changes( + self, self.dbg.GetListener(), process, [lldb.eStateStopped] + ) + + # test generic aliases + self.match("register read x0", ["zero = 0x0807060504030201"]) + self.match("register read x1", ["ra = 0x0807060504030201"]) + self.match("register read x2", ["sp = 0x0807060504030201"]) + self.match("register read x3", ["gp = 0x0807060504030201"]) + self.match("register read x4", ["tp = 0x0807060504030201"]) + self.match("register read x5", ["t0 = 0x0807060504030201"]) + self.match("register read x6", ["t1 = 0x0807060504030201"]) + self.match("register read x7", ["t2 = 0x0807060504030201"]) + # Register x8 is probably not working because it has two aliases fp, s0 + # See issue #127900 + # self.match("register read x8", ["fp = 0x0807060504030201"]) + self.match("register read x9", ["s1 = 0x0807060504030201"]) + self.match("register read x10", ["a0 = 0x0807060504030201"]) + self.match("register read x11", ["a1 = 0x0807060504030201"]) + self.match("register read x12", ["a2 = 0x0807060504030201"]) + self.match("register read x13", ["a3 = 0x0807060504030201"]) + self.match("register read x14", ["a4 = 0x0807060504030201"]) + self.match("register read x15", ["a5 = 0x0807060504030201"]) + self.match("register read x16", ["a6 = 0x0807060504030201"]) + self.match("register read x17", ["a7 = 0x0807060504030201"]) + self.match("register read x18", ["s2 = 0x0807060504030201"]) + self.match("register read x19", ["s3 = 0x0807060504030201"]) + self.match("register read x20", ["s4 = 0x0807060504030201"]) + self.match("register read x21", ["s5 = 0x0807060504030201"]) + self.match("register read x22", ["s6 = 0x0807060504030201"]) + self.match("register read x23", ["s7 = 0x0807060504030201"]) + self.match("register read x24", ["s8 = 0x0807060504030201"]) + self.match("register read x25", ["s9 = 0x0807060504030201"]) + self.match("register read x26", ["s10 = 0x0807060504030201"]) + self.match("register read x27", ["s11 = 0x0807060504030201"]) + self.match("register read x28", ["t3 = 0x0807060504030201"]) + self.match("register read x29", ["t4 = 0x0807060504030201"]) + self.match("register read x30", ["t5 = 0x0807060504030201"]) + self.match("register read x31", ["t6 = 0x0807060504030201"]) + @skipIfXmlSupportMissing @skipIfRemote @skipIfLLVMTargetMissing("X86") diff --git a/lldb/test/API/functionalities/gdb_remote_client/basic_eh_frame-riscv64.yaml b/lldb/test/API/functionalities/gdb_remote_client/basic_eh_frame-riscv64.yaml new file mode 100644 index 0000000000000..50cdd23f42667 --- /dev/null +++ b/lldb/test/API/functionalities/gdb_remote_client/basic_eh_frame-riscv64.yaml @@ -0,0 +1,20 @@ +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_RISCV +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x0000000000000570 + AddressAlign: 0x0000000000000004 + Content: DEADBEEF + - Name: .eh_frame + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x0000000000000688 + AddressAlign: 0x0000000000000008 + Content: 000006881000000000000000037a5200017c0101000006981b0d02001000000018000000ccfeffff000006a82200000000070100200000002c000000000006b89cffffff1000000000420e1042880242000006c80c080046c80c0210420e000000000000 +... diff --git a/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py b/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py index 376d6492d83b6..60caedf4737da 100644 --- a/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py +++ b/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py @@ -703,42 +703,43 @@ def test_riscv64_regs_gpr_fpr(self): self.assertTrue(target, VALID_TARGET) process = target.LoadCore("linux-riscv64.gpr_fpr.core") - values = {} - values["pc"] = "0x000000000001016e" - values["ra"] = "0x00000000000101a4" - values["sp"] = "0x0000003fffc1d2d0" - values["gp"] = "0x0000002ae6eccf50" - values["tp"] = "0x0000003ff3cb5400" - values["t0"] = "0x7f7f7f7fffffffff" - values["t1"] = "0x0000002ae6eb9b1c" - values["t2"] = "0xffffffffffffffff" - values["fp"] = "0x0000003fffc1d300" - values["s1"] = "0x0000002ae6eced98" - values["a0"] = "0x0" - values["a1"] = "0x0000000000010144" - values["a2"] = "0x0000002ae6ecedb0" - values["a3"] = "0xafdbdbff81cf7f81" - values["a4"] = "0x00000000000101e4" - values["a5"] = "0x0" - values["a6"] = "0x2f5b5a40014e0001" - values["a7"] = "0x00000000000000dd" - values["s2"] = "0x0000002ae6ec8860" - values["s3"] = "0x0000002ae6ecedb0" - values["s4"] = "0x0000003fff886c18" - values["s5"] = "0x0000002ae6eceb78" - values["s6"] = "0x0000002ae6ec8860" - values["s7"] = "0x0000002ae6ec8860" - values["s8"] = "0x0" - values["s9"] = "0x000000000000000f" - values["s10"] = "0x0000002ae6ecc8d0" - values["s11"] = "0x0000000000000008" - values["t3"] = "0x0000003ff3be3728" - values["t4"] = "0x0" - values["t5"] = "0x0000000000000002" - values["t6"] = "0x0000002ae6ed08b9" - values["zero"] = "0x0" - values["fa5"] = "0xffffffff423c0000" - values["fcsr"] = "0x00000000" + values = { + "pc": ("0x000000000001016e", None), + "zero": ("0x0", "x0"), + "ra": ("0x00000000000101a4", "x1"), + "sp": ("0x0000003fffc1d2d0", "x2"), + "gp": ("0x0000002ae6eccf50", "x3"), + "tp": ("0x0000003ff3cb5400", "x4"), + "t0": ("0x7f7f7f7fffffffff", "x5"), + "t1": ("0x0000002ae6eb9b1c", "x6"), + "t2": ("0xffffffffffffffff", "x7"), + "fp": ("0x0000003fffc1d300", "x8"), + "s1": ("0x0000002ae6eced98", "x9"), + "a0": ("0x0000000000000000", "x10"), + "a1": ("0x0000000000010144", "x11"), + "a2": ("0x0000002ae6ecedb0", "x12"), + "a3": ("0xafdbdbff81cf7f81", "x13"), + "a4": ("0x00000000000101e4", "x14"), + "a5": ("0x0000000000000000", "x15"), + "a6": ("0x2f5b5a40014e0001", "x16"), + "a7": ("0x00000000000000dd", "x17"), + "s2": ("0x0000002ae6ec8860", "x18"), + "s3": ("0x0000002ae6ecedb0", "x19"), + "s4": ("0x0000003fff886c18", "x20"), + "s5": ("0x0000002ae6eceb78", "x21"), + "s6": ("0x0000002ae6ec8860", "x22"), + "s7": ("0x0000002ae6ec8860", "x23"), + "s8": ("0x0000000000000000", "x24"), + "s9": ("0x000000000000000f", "x25"), + "s10": ("0x0000002ae6ecc8d0", "x26"), + "s11": ("0x0000000000000008", "x27"), + "t3": ("0x0000003ff3be3728", "x28"), + "t4": ("0x0000000000000000", "x29"), + "t5": ("0x0000000000000002", "x30"), + "t6": ("0x0000002ae6ed08b9", "x31"), + "fa5": ("0xffffffff423c0000", None), + "fcsr": ("0x00000000", None), + } fpr_names = { "ft0", @@ -776,11 +777,17 @@ def test_riscv64_regs_gpr_fpr(self): } fpr_value = "0x0000000000000000" - for regname, value in values.items(): + for regname in values: + value, alias = values[regname] self.expect( "register read {}".format(regname), substrs=["{} = {}".format(regname, value)], ) + if alias: + self.expect( + "register read {}".format(alias), + substrs=["{} = {}".format(regname, value)], + ) for regname in fpr_names: self.expect( @@ -797,46 +804,53 @@ def test_riscv64_regs_gpr_only(self): self.assertTrue(target, VALID_TARGET) process = target.LoadCore("linux-riscv64.gpr_only.core") - values = {} - values["pc"] = "0x0000000000010164" - values["ra"] = "0x0000000000010194" - values["sp"] = "0x00fffffff4d5fcc0" - values["gp"] = "0x0000000000157678" - values["tp"] = "0x00ffffff99c43400" - values["t0"] = "0x00ffffff99c6b260" - values["t1"] = "0x00ffffff99b7bd54" - values["t2"] = "0x0000000003f0b27f" - values["fp"] = "0x00fffffff4d5fcf0" - values["s1"] = "0x0000000000000003" - values["a0"] = "0x0" - values["a1"] = "0x0000000000010144" - values["a2"] = "0x0000000000176460" - values["a3"] = "0x000000000015ee38" - values["a4"] = "0x00000000423c0000" - values["a5"] = "0x0" - values["a6"] = "0x0" - values["a7"] = "0x00000000000000dd" - values["s2"] = "0x0" - values["s3"] = "0x000000000014ddf8" - values["s4"] = "0x000000000003651c" - values["s5"] = "0x00fffffffccd8d28" - values["s6"] = "0x000000000014ddf8" - values["s7"] = "0x00ffffff99c69d48" - values["s8"] = "0x00ffffff99c6a008" - values["s9"] = "0x0" - values["s10"] = "0x0" - values["s11"] = "0x0" - values["t3"] = "0x00ffffff99c42000" - values["t4"] = "0x00ffffff99af8e20" - values["t5"] = "0x0000000000000005" - values["t6"] = "0x44760bdd8d5f6381" - values["zero"] = "0x0" + values = { + "pc": ("0x0000000000010164", None), + "zero": ("0x0", "x0"), + "ra": ("0x0000000000010194", "x1"), + "sp": ("0x00fffffff4d5fcc0", "x2"), + "gp": ("0x0000000000157678", "x3"), + "tp": ("0x00ffffff99c43400", "x4"), + "t0": ("0x00ffffff99c6b260", "x5"), + "t1": ("0x00ffffff99b7bd54", "x6"), + "t2": ("0x0000000003f0b27f", "x7"), + "fp": ("0x00fffffff4d5fcf0", "x8"), + "s1": ("0x0000000000000003", "x9"), + "a0": ("0x0", "x10"), + "a1": ("0x0000000000010144", "x11"), + "a2": ("0x0000000000176460", "x12"), + "a3": ("0x000000000015ee38", "x13"), + "a4": ("0x00000000423c0000", "x14"), + "a5": ("0x0", "x15"), + "a6": ("0x0", "x16"), + "a7": ("0x00000000000000dd", "x17"), + "s2": ("0x0", "x18"), + "s3": ("0x000000000014ddf8", "x19"), + "s4": ("0x000000000003651c", "x20"), + "s5": ("0x00fffffffccd8d28", "x21"), + "s6": ("0x000000000014ddf8", "x22"), + "s7": ("0x00ffffff99c69d48", "x23"), + "s8": ("0x00ffffff99c6a008", "x24"), + "s9": ("0x0", "x25"), + "s10": ("0x0", "x26"), + "s11": ("0x0", "x27"), + "t3": ("0x00ffffff99c42000", "x28"), + "t4": ("0x00ffffff99af8e20", "x29"), + "t5": ("0x0000000000000005", "x30"), + "t6": ("0x44760bdd8d5f6381", "x31"), + } - for regname, value in values.items(): + for regname in values: + value, alias = values[regname] self.expect( "register read {}".format(regname), substrs=["{} = {}".format(regname, value)], ) + if alias: + self.expect( + "register read {}".format(alias), + substrs=["{} = {}".format(regname, value)], + ) # Check that LLDB does not try to read other registers from core file self.expect( diff --git a/lldb/test/API/functionalities/target_var/TestTargetVar.py b/lldb/test/API/functionalities/target_var/TestTargetVar.py index 54b7b77b6773c..0ef3d008e8f19 100644 --- a/lldb/test/API/functionalities/target_var/TestTargetVar.py +++ b/lldb/test/API/functionalities/target_var/TestTargetVar.py @@ -34,3 +34,12 @@ def testTargetVarExpr(self): error=True, substrs=["can't find global variable 'var[0]'"], ) + + command_result = lldb.SBCommandReturnObject() + result = self.ci.HandleCommand("target var", command_result) + value_list = command_result.GetValues(lldb.eNoDynamicValues) + self.assertGreaterEqual(value_list.GetSize(), 2) + value_names = [] + for value in value_list: + value_names.append(value.GetName()) + self.assertIn("i", value_names) diff --git a/lldb/test/API/macosx/thread_start_bps/Makefile b/lldb/test/API/macosx/thread_start_bps/Makefile new file mode 100644 index 0000000000000..10495940055b6 --- /dev/null +++ b/lldb/test/API/macosx/thread_start_bps/Makefile @@ -0,0 +1,3 @@ +C_SOURCES := main.c + +include Makefile.rules diff --git a/lldb/test/API/macosx/thread_start_bps/TestBreakpointsThreadInit.py b/lldb/test/API/macosx/thread_start_bps/TestBreakpointsThreadInit.py new file mode 100644 index 0000000000000..1c6fd4f91c73e --- /dev/null +++ b/lldb/test/API/macosx/thread_start_bps/TestBreakpointsThreadInit.py @@ -0,0 +1,37 @@ +"""Test that we get thread names when interrupting a process.""" + +import time +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class TestInterruptThreadNames(TestBase): + @skipUnlessDarwin + def test_internal_bps_resolved(self): + self.build() + + source_file = lldb.SBFileSpec("main.c") + target, process, thread, bkpt = lldbutil.run_to_source_breakpoint( + self, "initial hello", source_file + ) + + thread_start_func_names = [ + "start_wqthread", + "_pthread_wqthread", + "_pthread_start", + ] + known_module_names = [ + "libsystem_c.dylib", + "libSystem.B.dylib", + "libsystem_pthread.dylib", + ] + bps = [] + for func in thread_start_func_names: + for module in known_module_names: + bps.append(target.BreakpointCreateByName(func, module)) + num_resolved = 0 + for bp in bps: + num_resolved += bp.GetNumResolvedLocations() + self.assertGreater(num_resolved, 0) diff --git a/lldb/test/API/macosx/thread_start_bps/main.c b/lldb/test/API/macosx/thread_start_bps/main.c new file mode 100644 index 0000000000000..1a0af000b5b04 --- /dev/null +++ b/lldb/test/API/macosx/thread_start_bps/main.c @@ -0,0 +1,5 @@ +#include +int main() { + puts("initial hello"); + puts("hello from the other side"); +} diff --git a/lldb/test/Shell/Commands/command-disassemble.s b/lldb/test/Shell/Commands/command-disassemble.s index eb84a9ce39d4a..14f416d221231 100644 --- a/lldb/test/Shell/Commands/command-disassemble.s +++ b/lldb/test/Shell/Commands/command-disassemble.s @@ -94,8 +94,7 @@ # CHECK-EMPTY: # CHECK-NEXT: command-disassemble.s.tmp`n2::case3: # CHECK-NEXT: command-disassemble.s.tmp[0x9046] <+0>: jmp 0x6046 ; <-12288> -## FIXME: This should resolve to `middle_of_case3` -# CHECK-NEXT: command-disassemble.s.tmp[0x904b] <+5>: jmp 0x7046 ; n2::case3 - 8192 +# CHECK-NEXT: command-disassemble.s.tmp[0x904b] <+5>: jmp 0x7046 ; middle_of_case3 # CHECK-NEXT: command-disassemble.s.tmp[0x9050] <+10>: int $0x2a # CHECK-EMPTY: # CHECK-NEXT: command-disassemble.s.tmp`n1::case3: diff --git a/lldb/test/Shell/Register/Inputs/riscv64-gp-read.cpp b/lldb/test/Shell/Register/Inputs/riscv64-gp-read.cpp new file mode 100644 index 0000000000000..d24fcac6d8b24 --- /dev/null +++ b/lldb/test/Shell/Register/Inputs/riscv64-gp-read.cpp @@ -0,0 +1,36 @@ +int main() { + asm volatile("li x0, 0\n\t" + "li x1, 1\n\t" + "li x2, 2\n\t" + "li x3, 3\n\t" + "li x4, 4\n\t" + "li x5, 5\n\t" + "li x6, 6\n\t" + "li x7, 7\n\t" + "li x9, 9\n\t" + "li x10, 10\n\t" + "li x11, 11\n\t" + "li x12, 12\n\t" + "li x13, 13\n\t" + "li x14, 14\n\t" + "li x15, 15\n\t" + "li x16, 16\n\t" + "li x17, 17\n\t" + "li x18, 18\n\t" + "li x19, 19\n\t" + "li x20, 20\n\t" + "li x21, 21\n\t" + "li x22, 22\n\t" + "li x23, 23\n\t" + "li x24, 24\n\t" + "li x25, 25\n\t" + "li x26, 26\n\t" + "li x27, 27\n\t" + "li x28, 28\n\t" + "li x29, 29\n\t" + "li x30, 30\n\t" + "li x31, 31\n\t" + "ebreak \n\t"); + + return 0; +} diff --git a/lldb/test/Shell/Register/riscv64-gp-read.test b/lldb/test/Shell/Register/riscv64-gp-read.test new file mode 100644 index 0000000000000..ccd073becd472 --- /dev/null +++ b/lldb/test/Shell/Register/riscv64-gp-read.test @@ -0,0 +1,108 @@ +# REQUIRES: native && target-riscv64 +# RUN: %clangxx_host %p/Inputs/riscv64-gp-read.cpp -o %t +# RUN: %lldb -b -s %s %t | FileCheck %s +process launch + +register read --all +# CHECK-DAG: ra = 0x1 +# CHECK-DAG: sp = 0x2 +# CHECK-DAG: gp = 0x3 +# CHECK-DAG: tp = 0x4 +# CHECK-DAG: t0 = 0x5 +# CHECK-DAG: t1 = 0x6 +# CHECK-DAG: t2 = 0x7 +# CHECK-DAG: fp = 0x00007ffffffffc60 +# CHECK-DAG: s1 = 0x9 +# CHECK-DAG: a0 = 0xa +# CHECK-DAG: a1 = 0xb +# CHECK-DAG: a2 = 0xc +# CHECK-DAG: a3 = 0xd +# CHECK-DAG: a4 = 0xe +# CHECK-DAG: a5 = 0xf +# CHECK-DAG: a6 = 0x10 +# CHECK-DAG: a7 = 0x11 +# CHECK-DAG: s2 = 0x12 +# CHECK-DAG: s3 = 0x13 +# CHECK-DAG: s4 = 0x14 +# CHECK-DAG: s5 = 0x15 +# CHECK-DAG: s6 = 0x16 +# CHECK-DAG: s7 = 0x17 +# CHECK-DAG: s8 = 0x18 +# CHECK-DAG: s9 = 0x19 +# CHECK-DAG: s10 = 0x1a +# CHECK-DAG: s11 = 0x1b +# CHECK-DAG: t3 = 0x1c +# CHECK-DAG: t4 = 0x1d +# CHECK-DAG: t5 = 0x1e +# CHECK-DAG: t6 = 0x1f +# CHECK-DAG: zero = 0x0 + +register read zero ra sp gp tp t0 t1 t2 s0 fp s1 a0 a1 a2 a3 a4 a5 a6 a7 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 t3 t4 t5 t6 +# CHECK-DAG: zero = 0x0 +# CHECK-DAG: ra = 0x1 +# CHECK-DAG: sp = 0x2 +# CHECK-DAG: gp = 0x3 +# CHECK-DAG: tp = 0x4 +# CHECK-DAG: t0 = 0x5 +# CHECK-DAG: t1 = 0x6 +# CHECK-DAG: t2 = 0x7 +# CHECK-DAG: fp = 0x00007ffffffffc60 +# CHECK-DAG: fp = 0x00007ffffffffc60 +# CHECK-DAG: s1 = 0x9 +# CHECK-DAG: a0 = 0xa +# CHECK-DAG: a1 = 0xb +# CHECK-DAG: a2 = 0xc +# CHECK-DAG: a3 = 0xd +# CHECK-DAG: a4 = 0xe +# CHECK-DAG: a5 = 0xf +# CHECK-DAG: a6 = 0x10 +# CHECK-DAG: a7 = 0x11 +# CHECK-DAG: s2 = 0x12 +# CHECK-DAG: s3 = 0x13 +# CHECK-DAG: s4 = 0x14 +# CHECK-DAG: s5 = 0x15 +# CHECK-DAG: s6 = 0x16 +# CHECK-DAG: s7 = 0x17 +# CHECK-DAG: s8 = 0x18 +# CHECK-DAG: s9 = 0x19 +# CHECK-DAG: s10 = 0x1a +# CHECK-DAG: s11 = 0x1b +# CHECK-DAG: t3 = 0x1c +# CHECK-DAG: t4 = 0x1d +# CHECK-DAG: t5 = 0x1e +# CHECK-DAG: t6 = 0x1f + +register read x0 x1 x2 x3 x4 x5 x6 x7 x9 x10 x11 x12 x13 x14 x15 x16 x17 x18 x19 x20 x21 x22 x23 x24 x25 x26 x27 x28 x29 x30 x31 +# CHECK-DAG: zero = 0x0 +# CHECK-DAG: ra = 0x1 +# CHECK-DAG: sp = 0x2 +# CHECK-DAG: gp = 0x3 +# CHECK-DAG: tp = 0x4 +# CHECK-DAG: t0 = 0x5 +# CHECK-DAG: t1 = 0x6 +# CHECK-DAG: t2 = 0x7 +# x8 has been skipped here because it has two aliases +# which causes problems. See issue #127900 +# CHECK-DAG: s1 = 0x9 +# CHECK-DAG: a0 = 0xa +# CHECK-DAG: a1 = 0xb +# CHECK-DAG: a2 = 0xc +# CHECK-DAG: a3 = 0xd +# CHECK-DAG: a4 = 0xe +# CHECK-DAG: a5 = 0xf +# CHECK-DAG: a6 = 0x10 +# CHECK-DAG: a7 = 0x11 +# CHECK-DAG: s2 = 0x12 +# CHECK-DAG: s3 = 0x13 +# CHECK-DAG: s4 = 0x14 +# CHECK-DAG: s5 = 0x15 +# CHECK-DAG: s6 = 0x16 +# CHECK-DAG: s7 = 0x17 +# CHECK-DAG: s8 = 0x18 +# CHECK-DAG: s9 = 0x19 +# CHECK-DAG: s10 = 0x1a +# CHECK-DAG: s11 = 0x1b +# CHECK-DAG: t3 = 0x1c +# CHECK-DAG: t4 = 0x1d +# CHECK-DAG: t5 = 0x1e +# CHECK-DAG: t6 = 0x1f \ No newline at end of file diff --git a/lldb/unittests/Editline/EditlineTest.cpp b/lldb/unittests/Editline/EditlineTest.cpp index 1327b587e7c3d..6c5a0c907a33e 100644 --- a/lldb/unittests/Editline/EditlineTest.cpp +++ b/lldb/unittests/Editline/EditlineTest.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "lldb/Host/Config.h" +#include "lldb/Host/File.h" #if LLDB_ENABLE_LIBEDIT @@ -25,6 +26,7 @@ #include "lldb/Host/FileSystem.h" #include "lldb/Host/Pipe.h" #include "lldb/Host/PseudoTerminal.h" +#include "lldb/Host/StreamFile.h" #include "lldb/Utility/Status.h" #include "lldb/Utility/StringList.h" @@ -115,10 +117,17 @@ EditlineAdapter::EditlineAdapter() if (*_el_secondary_file == nullptr) return; + lldb::LockableStreamFileSP output_stream_sp = + std::make_shared(*_el_secondary_file, + NativeFile::Unowned, output_mutex); + lldb::LockableStreamFileSP error_stream_sp = + std::make_shared(*_el_secondary_file, + NativeFile::Unowned, output_mutex); + // Create an Editline instance. _editline_sp.reset(new lldb_private::Editline( - "gtest editor", *_el_secondary_file, *_el_secondary_file, - *_el_secondary_file, /*color=*/false, output_mutex)); + "gtest editor", *_el_secondary_file, output_stream_sp, error_stream_sp, + /*color=*/false)); _editline_sp->SetPrompt("> "); // Hookup our input complete callback. diff --git a/lldb/unittests/Symbol/LineTableTest.cpp b/lldb/unittests/Symbol/LineTableTest.cpp index 2fa2913f67f9e..f67ac46c738e7 100644 --- a/lldb/unittests/Symbol/LineTableTest.cpp +++ b/lldb/unittests/Symbol/LineTableTest.cpp @@ -129,26 +129,21 @@ class LineTableTest : public testing::Test { class LineSequenceBuilder { public: - std::vector> Build() { - return std::move(m_sequences); - } + std::vector Build() { return std::move(m_sequences); } enum Terminal : bool { Terminal = true }; void Entry(addr_t addr, bool terminal = false) { LineTable::AppendLineEntryToSequence( - m_seq_up.get(), addr, /*line=*/1, /*column=*/0, + m_sequence, addr, /*line=*/1, /*column=*/0, /*file_idx=*/0, /*is_start_of_statement=*/false, /*is_start_of_basic_block=*/false, /*is_prologue_end=*/false, /*is_epilogue_begin=*/false, terminal); - if (terminal) { - m_sequences.push_back(std::move(m_seq_up)); - m_seq_up = LineTable::CreateLineSequenceContainer(); - } + if (terminal) + m_sequences.push_back(std::move(m_sequence)); } private: - std::vector> m_sequences; - std::unique_ptr m_seq_up = - LineTable::CreateLineSequenceContainer(); + std::vector m_sequences; + LineTable::Sequence m_sequence; }; } // namespace @@ -156,7 +151,7 @@ class LineSequenceBuilder { char FakeSymbolFile::ID; static llvm::Expected -CreateFakeModule(std::vector> line_sequences) { +CreateFakeModule(std::vector line_sequences) { Expected file = TestFile::fromYaml(R"( --- !ELF FileHeader: @@ -194,7 +189,7 @@ CreateFakeModule(std::vector> line_sequences) { std::move(text_sp), line_table}; } -TEST_F(LineTableTest, LowerAndUpperBound) { +TEST_F(LineTableTest, lower_bound) { LineSequenceBuilder builder; builder.Entry(0); builder.Entry(10); @@ -211,41 +206,63 @@ TEST_F(LineTableTest, LowerAndUpperBound) { auto make_addr = [&](addr_t addr) { return Address(fixture->text_sp, addr); }; - // Both functions return the same value for boundary values. This way the - // index range for e.g. [0,10) is [0,1). EXPECT_EQ(table->lower_bound(make_addr(0)), 0u); - EXPECT_EQ(table->upper_bound(make_addr(0)), 0u); + EXPECT_EQ(table->lower_bound(make_addr(9)), 0u); EXPECT_EQ(table->lower_bound(make_addr(10)), 1u); - EXPECT_EQ(table->upper_bound(make_addr(10)), 1u); + EXPECT_EQ(table->lower_bound(make_addr(19)), 1u); + + // Skips over the terminal entry. EXPECT_EQ(table->lower_bound(make_addr(20)), 3u); - EXPECT_EQ(table->upper_bound(make_addr(20)), 3u); + EXPECT_EQ(table->lower_bound(make_addr(29)), 3u); - // In case there's no "real" entry at this address, they return the first real - // entry. + // In case there's no "real" entry at this address, the function returns the + // first real entry. EXPECT_EQ(table->lower_bound(make_addr(30)), 5u); - EXPECT_EQ(table->upper_bound(make_addr(30)), 5u); - EXPECT_EQ(table->lower_bound(make_addr(40)), 5u); - EXPECT_EQ(table->upper_bound(make_addr(40)), 5u); - // For in-between values, their result differs by one. [9,19) maps to [0,2) - // because the first two entries contain a part of that range. - EXPECT_EQ(table->lower_bound(make_addr(9)), 0u); - EXPECT_EQ(table->upper_bound(make_addr(9)), 1u); - EXPECT_EQ(table->lower_bound(make_addr(19)), 1u); - EXPECT_EQ(table->upper_bound(make_addr(19)), 2u); - EXPECT_EQ(table->lower_bound(make_addr(29)), 3u); - EXPECT_EQ(table->upper_bound(make_addr(29)), 4u); + // In a gap, return the first entry after the gap. + EXPECT_EQ(table->lower_bound(make_addr(39)), 5u); - // In a gap, they both return the first entry after the gap. - EXPECT_EQ(table->upper_bound(make_addr(39)), 5u); - EXPECT_EQ(table->upper_bound(make_addr(39)), 5u); - - // And if there's no such entry, they return the size of the list. + // And if there's no such entry, return the size of the list. EXPECT_EQ(table->lower_bound(make_addr(50)), table->GetSize()); - EXPECT_EQ(table->upper_bound(make_addr(50)), table->GetSize()); EXPECT_EQ(table->lower_bound(make_addr(59)), table->GetSize()); - EXPECT_EQ(table->upper_bound(make_addr(59)), table->GetSize()); +} + +TEST_F(LineTableTest, GetLineEntryIndexRange) { + LineSequenceBuilder builder; + builder.Entry(0); + builder.Entry(10); + builder.Entry(20, LineSequenceBuilder::Terminal); + + llvm::Expected fixture = CreateFakeModule(builder.Build()); + ASSERT_THAT_EXPECTED(fixture, llvm::Succeeded()); + + LineTable *table = fixture->line_table; + + auto make_range = [&](addr_t addr, addr_t size) { + return AddressRange(fixture->text_sp, addr, size); + }; + + EXPECT_THAT(table->GetLineEntryIndexRange(make_range(0, 10)), + testing::Pair(0, 1)); + EXPECT_THAT(table->GetLineEntryIndexRange(make_range(0, 20)), + testing::Pair(0, 3)); // Includes the terminal entry. + // Partial overlap on one side. + EXPECT_THAT(table->GetLineEntryIndexRange(make_range(3, 7)), + testing::Pair(0, 1)); + // On the other side + EXPECT_THAT(table->GetLineEntryIndexRange(make_range(0, 15)), + testing::Pair(0, 2)); + // On both sides + EXPECT_THAT(table->GetLineEntryIndexRange(make_range(2, 3)), + testing::Pair(0, 1)); + // Empty ranges + EXPECT_THAT(table->GetLineEntryIndexRange(make_range(0, 0)), + testing::Pair(0, 0)); + EXPECT_THAT(table->GetLineEntryIndexRange(make_range(5, 0)), + testing::Pair(0, 0)); + EXPECT_THAT(table->GetLineEntryIndexRange(make_range(10, 0)), + testing::Pair(1, 1)); } TEST_F(LineTableTest, FindLineEntryByAddress) { diff --git a/lldb/unittests/Utility/RangeMapTest.cpp b/lldb/unittests/Utility/RangeMapTest.cpp index 981fa2a7d1c34..2022a2374fb8d 100644 --- a/lldb/unittests/Utility/RangeMapTest.cpp +++ b/lldb/unittests/Utility/RangeMapTest.cpp @@ -238,3 +238,24 @@ TEST(RangeDataVector, FindEntryIndexesThatContain_Overlap) { EXPECT_THAT(FindEntryIndexes(39, Map), testing::ElementsAre(10)); EXPECT_THAT(FindEntryIndexes(40, Map), testing::ElementsAre()); } + +TEST(RangeDataVector, CombineConsecutiveEntriesWithEqualData) { + RangeDataVectorT Map; + Map.Append(EntryT(0, 10, 47)); + Map.Append(EntryT(10, 10, 47)); + Map.Sort(); + Map.CombineConsecutiveEntriesWithEqualData(); + EXPECT_THAT(FindEntryIndexes(5, Map), testing::ElementsAre(47)); + EXPECT_THAT(FindEntryIndexes(15, Map), testing::ElementsAre(47)); + EXPECT_THAT(FindEntryIndexes(25, Map), testing::ElementsAre()); + + Map.Clear(); + Map.Append(EntryT(0, 10, 47)); + Map.Append(EntryT(20, 10, 47)); + Map.Sort(); + Map.CombineConsecutiveEntriesWithEqualData(); + EXPECT_THAT(FindEntryIndexes(5, Map), testing::ElementsAre(47)); + EXPECT_THAT(FindEntryIndexes(15, Map), testing::ElementsAre()); + EXPECT_THAT(FindEntryIndexes(25, Map), testing::ElementsAre(47)); + EXPECT_THAT(FindEntryIndexes(35, Map), testing::ElementsAre()); +} diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index deb87365ae8d7..5356aee87b35f 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -2932,9 +2932,8 @@ the behavior is undefined, unless one of the following exceptions applies: must be a null pointer, otherwise the behavior is undefined. * ``dereferenceable()`` operand bundles only guarantee the pointer is - dereferenceable at the point of the assumption. The pointer may not be - dereferenceable at later pointers, e.g. because it could have been - freed. + dereferenceable at the point of the assumption. The pointer may not be + dereferenceable at later pointers, e.g. because it could have been freed. In addition to allowing operand bundles encoding function and parameter attributes, an assume operand bundle my also encode a ``separate_storage`` diff --git a/llvm/docs/SPIRVUsage.rst b/llvm/docs/SPIRVUsage.rst index b7601b26beb89..93c53a04bc447 100644 --- a/llvm/docs/SPIRVUsage.rst +++ b/llvm/docs/SPIRVUsage.rst @@ -155,6 +155,8 @@ list of supported SPIR-V extensions, sorted alphabetically by their extension na - Adds atomic min and max instruction on floating-point numbers. * - ``SPV_INTEL_arbitrary_precision_integers`` - Allows generating arbitrary width integer types. + * - ``SPV_INTEL_bindless_images`` + - Adds instructions to convert convert unsigned integer handles to images, samplers and sampled images. * - ``SPV_INTEL_bfloat16_conversion`` - Adds instructions to convert between single-precision 32-bit floating-point values and 16-bit bfloat16 values. * - ``SPV_INTEL_cache_controls`` diff --git a/llvm/include/llvm/ADT/SetOperations.h b/llvm/include/llvm/ADT/SetOperations.h index 86a27b683ebc1..4d4ff4045f813 100644 --- a/llvm/include/llvm/ADT/SetOperations.h +++ b/llvm/include/llvm/ADT/SetOperations.h @@ -157,6 +157,26 @@ bool set_is_subset(const S1Ty &S1, const S2Ty &S2) { return true; } +namespace detail { + +template +bool set_intersects_impl(const S1Ty &S1, const S2Ty &S2) { + for (const auto &E : S1) + if (S2.count(E)) + return true; + return false; +} + +} // namespace detail + +/// set_intersects(A, B) - Return true iff A ^ B is non empty +template +bool set_intersects(const S1Ty &S1, const S2Ty &S2) { + if (S1.size() < S2.size()) + return detail::set_intersects_impl(S1, S2); + return detail::set_intersects_impl(S2, S1); +} + } // namespace llvm #endif diff --git a/llvm/include/llvm/Analysis/GlobalsModRef.h b/llvm/include/llvm/Analysis/GlobalsModRef.h index ab8ab8295b556..36a95e095aaa5 100644 --- a/llvm/include/llvm/Analysis/GlobalsModRef.h +++ b/llvm/include/llvm/Analysis/GlobalsModRef.h @@ -118,7 +118,8 @@ class GlobalsAAResult : public AAResultBase { bool AnalyzeIndirectGlobalMemory(GlobalVariable *GV); void CollectSCCMembership(CallGraph &CG); - bool isNonEscapingGlobalNoAlias(const GlobalValue *GV, const Value *V); + bool isNonEscapingGlobalNoAlias(const GlobalValue *GV, const Value *V, + const Instruction *CtxI); ModRefInfo getModRefInfoForArgument(const CallBase *Call, const GlobalValue *GV, AAQueryInfo &AAQI); }; diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index b675c4f875448..cb6f47e3a76be 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -367,15 +367,17 @@ class MemoryDepChecker { struct DepDistanceStrideAndSizeInfo { const SCEV *Dist; - /// Strides could either be scaled (in bytes, taking the size of the - /// underlying type into account), or unscaled (in indexing units; unscaled - /// stride = scaled stride / size of underlying type). Here, strides are - /// unscaled. + /// Strides here are scaled; i.e. in bytes, taking the size of the + /// underlying type into account. uint64_t MaxStride; std::optional CommonStride; bool ShouldRetryWithRuntimeCheck; + + /// TypeByteSize is either the common store size of both accesses, or 0 when + /// store sizes mismatch. uint64_t TypeByteSize; + bool AIsWrite; bool BIsWrite; @@ -394,8 +396,9 @@ class MemoryDepChecker { /// there's no dependence or the analysis fails. Outlined to lambda to limit /// he scope of various temporary variables, like A/BPtr, StrideA/BPtr and /// others. Returns either the dependence result, if it could already be - /// determined, or a struct containing (Distance, Stride, TypeSize, AIsWrite, - /// BIsWrite). + /// determined, or a DepDistanceStrideAndSizeInfo struct, noting that + /// TypeByteSize could be 0 when store sizes mismatch, and this should be + /// checked in the caller. std::variant getDependenceDistanceStrideAndSize(const MemAccessInfo &A, Instruction *AInst, const MemAccessInfo &B, diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h index da0e412f2a0e0..0e7dd2f5b5c1a 100644 --- a/llvm/include/llvm/CGData/CodeGenData.h +++ b/llvm/include/llvm/CGData/CodeGenData.h @@ -265,7 +265,7 @@ std::unique_ptr loadModuleForTwoRounds(BitcodeModule &OrigModule, Expected mergeCodeGenData(ArrayRef ObjectFiles); void warn(Error E, StringRef Whence = ""); -void warn(Twine Message, std::string Whence = "", std::string Hint = ""); +void warn(Twine Message, StringRef Whence = "", StringRef Hint = ""); } // end namespace cgdata diff --git a/llvm/include/llvm/CodeGen/RDFRegisters.h b/llvm/include/llvm/CodeGen/RDFRegisters.h index 7eed0b4e1e7b8..174ee115a1501 100644 --- a/llvm/include/llvm/CodeGen/RDFRegisters.h +++ b/llvm/include/llvm/CodeGen/RDFRegisters.h @@ -114,11 +114,9 @@ struct RegisterRef { return Register::isPhysicalRegister(Id); } static constexpr bool isUnitId(unsigned Id) { - return Register::isVirtualRegister(Id); - } - static constexpr bool isMaskId(unsigned Id) { - return Register::isStackSlot(Id); + return Register(Id).isVirtual(); } + static constexpr bool isMaskId(unsigned Id) { return Register(Id).isStack(); } static constexpr RegisterId toUnitId(unsigned Idx) { return Idx | MCRegister::VirtualRegFlag; @@ -147,7 +145,7 @@ struct PhysicalRegisterInfo { } const uint32_t *getRegMaskBits(RegisterId R) const { - return RegMasks.get(Register::stackSlot2Index(R)); + return RegMasks.get(Register(R).stackSlotIndex()); } bool alias(RegisterRef RA, RegisterRef RB) const; @@ -160,7 +158,7 @@ struct PhysicalRegisterInfo { } const BitVector &getMaskUnits(RegisterId MaskId) const { - return MaskInfos[Register::stackSlot2Index(MaskId)].Units; + return MaskInfos[Register(MaskId).stackSlotIndex()].Units; } std::set getUnits(RegisterRef RR) const; diff --git a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h b/llvm/include/llvm/CodeGen/RegAllocPriorityAdvisor.h similarity index 54% rename from llvm/lib/CodeGen/RegAllocPriorityAdvisor.h rename to llvm/include/llvm/CodeGen/RegAllocPriorityAdvisor.h index 0758743c2b140..b0355795370d8 100644 --- a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h +++ b/llvm/include/llvm/CodeGen/RegAllocPriorityAdvisor.h @@ -9,8 +9,10 @@ #ifndef LLVM_CODEGEN_REGALLOCPRIORITYADVISOR_H #define LLVM_CODEGEN_REGALLOCPRIORITYADVISOR_H +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/RegAllocEvictionAdvisor.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" namespace llvm { @@ -68,20 +70,72 @@ class DummyPriorityAdvisor : public RegAllocPriorityAdvisor { unsigned getPriority(const LiveInterval &LI) const override; }; -class RegAllocPriorityAdvisorAnalysis : public ImmutablePass { +/// Common provider for getting the priority advisor and logging rewards. +/// Legacy analysis forwards all calls to this provider. +/// New analysis serves the provider as the analysis result. +/// Expensive setup is done in the constructor, so that the advisor can be +/// created quickly for every machine function. +/// TODO: Remove once legacy PM support is dropped. +class RegAllocPriorityAdvisorProvider { public: enum class AdvisorMode : int { Default, Release, Development, Dummy }; - RegAllocPriorityAdvisorAnalysis(AdvisorMode Mode) - : ImmutablePass(ID), Mode(Mode){}; + RegAllocPriorityAdvisorProvider(AdvisorMode Mode) : Mode(Mode) {} + + virtual ~RegAllocPriorityAdvisorProvider() = default; + + virtual void logRewardIfNeeded(const MachineFunction &MF, + function_ref GetReward) {}; + + virtual std::unique_ptr + getAdvisor(const MachineFunction &MF, const RAGreedy &RA, + SlotIndexes &SI) = 0; + + AdvisorMode getAdvisorMode() const { return Mode; } + +private: + const AdvisorMode Mode; +}; + +class RegAllocPriorityAdvisorAnalysis + : public AnalysisInfoMixin { + static AnalysisKey Key; + friend AnalysisInfoMixin; + +public: + struct Result { + // Owned by this analysis. + RegAllocPriorityAdvisorProvider *Provider; + + bool invalidate(MachineFunction &MF, const PreservedAnalyses &PA, + MachineFunctionAnalysisManager::Invalidator &Inv) { + auto PAC = PA.getChecker(); + return !PAC.preservedWhenStateless() || + Inv.invalidate(MF, PA); + } + }; + + Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); + +private: + void initializeProvider(LLVMContext &Ctx); + void initializeMLProvider(RegAllocPriorityAdvisorProvider::AdvisorMode Mode, + LLVMContext &Ctx); + std::unique_ptr Provider; +}; + +class RegAllocPriorityAdvisorAnalysisLegacy : public ImmutablePass { +public: + using AdvisorMode = RegAllocPriorityAdvisorProvider::AdvisorMode; + RegAllocPriorityAdvisorAnalysisLegacy(AdvisorMode Mode) + : ImmutablePass(ID), Mode(Mode) {}; static char ID; /// Get an advisor for the given context (i.e. machine function, etc) - virtual std::unique_ptr - getAdvisor(const MachineFunction &MF, const RAGreedy &RA) = 0; + RegAllocPriorityAdvisorProvider &getProvider() { return *Provider; } AdvisorMode getAdvisorMode() const { return Mode; } virtual void logRewardIfNeeded(const MachineFunction &MF, - llvm::function_ref GetReward){}; + llvm::function_ref GetReward) {}; protected: // This analysis preserves everything, and subclasses may have additional @@ -90,6 +144,8 @@ class RegAllocPriorityAdvisorAnalysis : public ImmutablePass { AU.setPreservesAll(); } + std::unique_ptr Provider; + private: StringRef getPassName() const override; const AdvisorMode Mode; @@ -97,11 +153,19 @@ class RegAllocPriorityAdvisorAnalysis : public ImmutablePass { /// Specialization for the API used by the analysis infrastructure to create /// an instance of the priority advisor. -template <> Pass *callDefaultCtor(); +template <> Pass *callDefaultCtor(); + +RegAllocPriorityAdvisorAnalysisLegacy * +createReleaseModePriorityAdvisorAnalysis(); + +RegAllocPriorityAdvisorAnalysisLegacy * +createDevelopmentModePriorityAdvisorAnalysis(); -RegAllocPriorityAdvisorAnalysis *createReleaseModePriorityAdvisor(); +LLVM_ATTRIBUTE_RETURNS_NONNULL RegAllocPriorityAdvisorProvider * +createReleaseModePriorityAdvisorProvider(); -RegAllocPriorityAdvisorAnalysis *createDevelopmentModePriorityAdvisor(); +LLVM_ATTRIBUTE_RETURNS_NONNULL RegAllocPriorityAdvisorProvider * +createDevelopmentModePriorityAdvisorProvider(LLVMContext &Ctx); } // namespace llvm diff --git a/llvm/include/llvm/CodeGen/Register.h b/llvm/include/llvm/CodeGen/Register.h index ad05368bea6a4..03e462872d3c2 100644 --- a/llvm/include/llvm/CodeGen/Register.h +++ b/llvm/include/llvm/CodeGen/Register.h @@ -36,25 +36,12 @@ class Register { static_assert(std::numeric_limits::max() >= 0xFFFFFFFF, "Reg isn't large enough to hold full range."); - /// isStackSlot - Sometimes it is useful to be able to store a non-negative - /// frame index in a variable that normally holds a register. isStackSlot() - /// returns true if Reg is in the range used for stack slots. - /// - /// FIXME: remove in favor of member. - static constexpr bool isStackSlot(unsigned Reg) { + /// Return true if this is a stack slot. + constexpr bool isStack() const { return MCRegister::FirstStackSlot <= Reg && Reg < MCRegister::VirtualRegFlag; } - /// Return true if this is a stack slot. - constexpr bool isStack() const { return isStackSlot(Reg); } - - /// Compute the frame index from a register value representing a stack slot. - static int stackSlot2Index(Register Reg) { - assert(Reg.isStack() && "Not a stack slot"); - return int(Reg.id() - MCRegister::FirstStackSlot); - } - /// Convert a non-negative frame index to a stack slot register value. static Register index2StackSlot(int FI) { assert(FI >= 0 && "Cannot hold a negative frame index."); @@ -67,19 +54,6 @@ class Register { return MCRegister::isPhysicalRegister(Reg); } - /// Return true if the specified register number is in - /// the virtual register namespace. - static constexpr bool isVirtualRegister(unsigned Reg) { - return Reg & MCRegister::VirtualRegFlag; - } - - /// Convert a virtual register number to a 0-based index. - /// The first virtual register in a function will get the index 0. - static unsigned virtReg2Index(Register Reg) { - assert(Reg.isVirtual() && "Not a virtual register"); - return Reg.id() & ~MCRegister::VirtualRegFlag; - } - /// Convert a 0-based index to a virtual register number. /// This is the inverse operation of VirtReg2IndexFunctor below. static Register index2VirtReg(unsigned Index) { @@ -89,7 +63,7 @@ class Register { /// Return true if the specified register number is in the virtual register /// namespace. - constexpr bool isVirtual() const { return isVirtualRegister(Reg); } + constexpr bool isVirtual() const { return Reg & MCRegister::VirtualRegFlag; } /// Return true if the specified register number is in the physical register /// namespace. @@ -97,7 +71,10 @@ class Register { /// Convert a virtual register number to a 0-based index. The first virtual /// register in a function will get the index 0. - unsigned virtRegIndex() const { return virtReg2Index(Reg); } + unsigned virtRegIndex() const { + assert(isVirtual() && "Not a virtual register"); + return Reg & ~MCRegister::VirtualRegFlag; + } /// Compute the frame index from a register value representing a stack slot. int stackSlotIndex() const { @@ -173,14 +150,14 @@ class VirtRegOrUnit { public: constexpr explicit VirtRegOrUnit(MCRegUnit Unit) : VRegOrUnit(Unit) { - assert(!Register::isVirtualRegister(VRegOrUnit)); + assert(!Register(VRegOrUnit).isVirtual()); } constexpr explicit VirtRegOrUnit(Register Reg) : VRegOrUnit(Reg.id()) { assert(Reg.isVirtual()); } constexpr bool isVirtualReg() const { - return Register::isVirtualRegister(VRegOrUnit); + return Register(VRegOrUnit).isVirtual(); } constexpr MCRegUnit asMCRegUnit() const { diff --git a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h index aaa10e684687c..7c534805b8333 100644 --- a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -59,7 +59,7 @@ namespace llvm { : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {} unsigned getSparseSetIndex() const { - return Register::virtReg2Index(VirtReg); + return Register(VirtReg).virtRegIndex(); } }; diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 6eff6bfe8d5b1..75c4fabe03dd4 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -989,6 +989,8 @@ END_TWO_BYTE_PACK() /// Helper method returns the APInt value of a ConstantSDNode. inline const APInt &getAsAPIntVal() const; + inline std::optional bitcastToAPInt() const; + const SDValue &getOperand(unsigned Num) const { assert(Num < NumOperands && "Invalid child # of SDNode!"); return OperandList[Num]; @@ -1785,6 +1787,14 @@ class ConstantFPSDNode : public SDNode { } }; +std::optional SDNode::bitcastToAPInt() const { + if (auto *CN = dyn_cast(this)) + return CN->getAPIntValue(); + if (auto *CFPN = dyn_cast(this)) + return CFPN->getValueAPF().bitcastToAPInt(); + return std::nullopt; +} + /// Returns true if \p V is a constant integer zero. bool isNullConstant(SDValue V); diff --git a/llvm/include/llvm/Frontend/Driver/CodeGenOptions.h b/llvm/include/llvm/Frontend/Driver/CodeGenOptions.h index 0180670c4c699..c51476e9ad3fe 100644 --- a/llvm/include/llvm/Frontend/Driver/CodeGenOptions.h +++ b/llvm/include/llvm/Frontend/Driver/CodeGenOptions.h @@ -33,7 +33,7 @@ enum class VectorLibrary { AMDLIBM // AMD vector math library. }; -TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, +TargetLibraryInfoImpl *createTLII(const llvm::Triple &TargetTriple, VectorLibrary Veclib); } // end namespace llvm::driver diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 1e4f25c642493..876a6f816ad3f 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -3463,4 +3463,11 @@ def int_amdgcn_addrspacecast_nonnull : DefaultAttrsIntrinsic< [llvm_anyptr_ty], [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable] >; + +/// Make it clear to the backend that this value is really dead. For instance, +/// when used as an input to a phi node, it will make it possible for the +/// backend to allocate the dead lanes for operations within the corresponding +/// incoming block. +def int_amdgcn_dead: DefaultAttrsIntrinsic<[llvm_any_ty], [], + [IntrNoMem, IntrWillReturn, IntrNoCallback]>; } diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 81a602c8889d8..5b30eb53208a8 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -253,7 +253,7 @@ void initializeReachingDefAnalysisPass(PassRegistry &); void initializeReassociateLegacyPassPass(PassRegistry &); void initializeRegAllocEvictionAdvisorAnalysisLegacyPass(PassRegistry &); void initializeRegAllocFastPass(PassRegistry &); -void initializeRegAllocPriorityAdvisorAnalysisPass(PassRegistry &); +void initializeRegAllocPriorityAdvisorAnalysisLegacyPass(PassRegistry &); void initializeRegAllocScoringPass(PassRegistry &); void initializeRegBankSelectPass(PassRegistry &); void initializeRegToMemWrapperPassPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 2b5e258682585..373bd047e2395 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -115,6 +115,7 @@ MACHINE_FUNCTION_ANALYSIS("machine-post-dom-tree", MACHINE_FUNCTION_ANALYSIS("machine-trace-metrics", MachineTraceMetricsAnalysis()) MACHINE_FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) MACHINE_FUNCTION_ANALYSIS("regalloc-evict", RegAllocEvictionAdvisorAnalysis()) +MACHINE_FUNCTION_ANALYSIS("regalloc-priority", RegAllocPriorityAdvisorAnalysis()) MACHINE_FUNCTION_ANALYSIS("slot-indexes", SlotIndexesAnalysis()) MACHINE_FUNCTION_ANALYSIS("spill-code-placement", SpillPlacementAnalysis()) MACHINE_FUNCTION_ANALYSIS("virtregmap", VirtRegMapAnalysis()) diff --git a/llvm/include/llvm/SandboxIR/Value.h b/llvm/include/llvm/SandboxIR/Value.h index 28e33ca0f2312..2e91b96bb22e6 100644 --- a/llvm/include/llvm/SandboxIR/Value.h +++ b/llvm/include/llvm/SandboxIR/Value.h @@ -9,6 +9,7 @@ #ifndef LLVM_SANDBOXIR_VALUE_H #define LLVM_SANDBOXIR_VALUE_H +#include "llvm/IR/Metadata.h" #include "llvm/IR/Value.h" #include "llvm/SandboxIR/Use.h" @@ -282,6 +283,28 @@ class Value { #endif }; +class OpaqueValue : public Value { +protected: + OpaqueValue(llvm::Value *V, Context &Ctx) + : Value(ClassID::OpaqueValue, V, Ctx) {} + friend class Context; // For constructor. + +public: + static bool classof(const Value *From) { + return From->getSubclassID() == ClassID::OpaqueValue; + } +#ifndef NDEBUG + void verify() const override { + assert((isa(Val) || isa(Val)) && + "Expected Metadata or InlineAssembly!"); + } + void dumpOS(raw_ostream &OS) const override { + dumpCommonPrefix(OS); + dumpCommonSuffix(OS); + } +#endif // NDEBUG +}; + } // namespace llvm::sandboxir #endif // LLVM_SANDBOXIR_VALUE_H diff --git a/llvm/include/llvm/SandboxIR/Values.def b/llvm/include/llvm/SandboxIR/Values.def index 3d8ad6ce197f4..f5ead54a08e10 100644 --- a/llvm/include/llvm/SandboxIR/Values.def +++ b/llvm/include/llvm/SandboxIR/Values.def @@ -21,6 +21,7 @@ DEF_CONST(Function, Function) DEF_VALUE(Argument, Argument) +DEF_VALUE(OpaqueValue, OpaqueValue) DEF_USER(User, User) DEF_VALUE(Block, BasicBlock) diff --git a/llvm/lib/Analysis/CaptureTracking.cpp b/llvm/lib/Analysis/CaptureTracking.cpp index 49baf2eb84bb3..d82647c7c70cf 100644 --- a/llvm/lib/Analysis/CaptureTracking.cpp +++ b/llvm/lib/Analysis/CaptureTracking.cpp @@ -376,11 +376,12 @@ UseCaptureKind llvm::DetermineUseCaptureKind( case Instruction::ICmp: { unsigned Idx = U.getOperandNo(); unsigned OtherIdx = 1 - Idx; - if (auto *CPN = dyn_cast(I->getOperand(OtherIdx))) { + if (isa(I->getOperand(OtherIdx)) && + cast(I)->isEquality()) { // Don't count comparisons of a no-alias return value against null as // captures. This allows us to ignore comparisons of malloc results // with null, for example. - if (CPN->getType()->getAddressSpace() == 0) + if (U->getType()->getPointerAddressSpace() == 0) if (isNoAliasCall(U.get()->stripPointerCasts())) return UseCaptureKind::NO_CAPTURE; if (!I->getFunction()->nullPointerIsDefined()) { diff --git a/llvm/lib/Analysis/GlobalsModRef.cpp b/llvm/lib/Analysis/GlobalsModRef.cpp index 1ceb1b2629418..644969dd25212 100644 --- a/llvm/lib/Analysis/GlobalsModRef.cpp +++ b/llvm/lib/Analysis/GlobalsModRef.cpp @@ -713,13 +713,20 @@ static bool isNonEscapingGlobalNoAliasWithLoad(const GlobalValue *GV, // active, or to be forced to operate as a module pass that cannot co-exist // with an alias analysis such as GMR. bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV, - const Value *V) { + const Value *V, + const Instruction *CtxI) { // In order to know that the underlying object cannot alias the // non-addr-taken global, we must know that it would have to be an escape. // Thus if the underlying object is a function argument, a load from // a global, or the return of a function, it cannot alias. We can also // recurse through PHI nodes and select nodes provided all of their inputs // resolve to one of these known-escaping roots. + + // A non-addr-taken global cannot alias with any non-pointer value. + // Check this early and exit. + if (!V->getType()->isPointerTy()) + return true; + SmallPtrSet Visited; SmallVector Inputs; Visited.insert(V); @@ -762,6 +769,14 @@ bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV, continue; } + if (CtxI) + if (auto *CPN = dyn_cast(Input)) { + // Null pointer cannot alias with a non-addr-taken global. + const Function *F = CtxI->getFunction(); + if (!NullPointerIsDefined(F, CPN->getType()->getAddressSpace())) + continue; + } + // Recurse through a limited number of selects, loads and PHIs. This is an // arbitrary depth of 4, lower numbers could be used to fix compile time // issues if needed, but this is generally expected to be only be important @@ -820,7 +835,7 @@ bool GlobalsAAResult::invalidate(Module &, const PreservedAnalyses &PA, /// address of the global isn't taken. AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA, const MemoryLocation &LocB, - AAQueryInfo &AAQI, const Instruction *) { + AAQueryInfo &AAQI, const Instruction *CtxI) { // Get the base object these pointers point to. const Value *UV1 = getUnderlyingObject(LocA.Ptr->stripPointerCastsForAliasAnalysis()); @@ -856,7 +871,7 @@ AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA, if ((GV1 || GV2) && GV1 != GV2) { const GlobalValue *GV = GV1 ? GV1 : GV2; const Value *UV = GV1 ? UV2 : UV1; - if (isNonEscapingGlobalNoAlias(GV, UV)) + if (isNonEscapingGlobalNoAlias(GV, UV, CtxI)) return AliasResult::NoAlias; } @@ -920,7 +935,7 @@ ModRefInfo GlobalsAAResult::getModRefInfoForArgument(const CallBase *Call, !all_of(Objects, [&](const Value *V) { return this->alias(MemoryLocation::getBeforeOrAfter(V), MemoryLocation::getBeforeOrAfter(GV), AAQI, - nullptr) == AliasResult::NoAlias; + Call) == AliasResult::NoAlias; })) return ConservativeResult; diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 5dc5b025599b1..cab70c5c01a45 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1786,22 +1786,21 @@ void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) { Status = S; } -/// Given a dependence-distance \p Dist between two -/// memory accesses, that have strides in the same direction whose absolute -/// value of the maximum stride is given in \p MaxStride, and that have the same -/// type size \p TypeByteSize, in a loop whose maximum backedge taken count is -/// \p MaxBTC, check if it is possible to prove statically that the dependence +/// Given a dependence-distance \p Dist between two memory accesses, that have +/// strides in the same direction whose absolute value of the maximum stride is +/// given in \p MaxStride, in a loop whose maximum backedge taken count is \p +/// MaxBTC, check if it is possible to prove statically that the dependence /// distance is larger than the range that the accesses will travel through the /// execution of the loop. If so, return true; false otherwise. This is useful /// for example in loops such as the following (PR31098): +/// /// for (i = 0; i < D; ++i) { /// = out[i]; /// out[i+D] = /// } static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV &MaxBTC, const SCEV &Dist, - uint64_t MaxStride, - uint64_t TypeByteSize) { + uint64_t MaxStride) { // If we can prove that // (**) |Dist| > MaxBTC * Step @@ -1820,8 +1819,7 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, // will be executed only if LoopCount >= VF, proving distance >= LoopCount // also guarantees that distance >= VF. // - const uint64_t ByteStride = MaxStride * TypeByteSize; - const SCEV *Step = SE.getConstant(MaxBTC.getType(), ByteStride); + const SCEV *Step = SE.getConstant(MaxBTC.getType(), MaxStride); const SCEV *Product = SE.getMulExpr(&MaxBTC, Step); const SCEV *CastedDist = &Dist; @@ -1851,8 +1849,8 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, } /// Check the dependence for two accesses with the same stride \p Stride. -/// \p Distance is the positive distance and \p TypeByteSize is type size in -/// bytes. +/// \p Distance is the positive distance in bytes, and \p TypeByteSize is type +/// size in bytes. /// /// \returns true if they are independent. static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, @@ -1865,14 +1863,12 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, if (Distance % TypeByteSize) return false; - uint64_t ScaledDist = Distance / TypeByteSize; - - // No dependence if the scaled distance is not multiple of the stride. + // No dependence if the distance is not multiple of the stride. // E.g. // for (i = 0; i < 1024 ; i += 4) // A[i+2] = A[i] + 1; // - // Two accesses in memory (scaled distance is 2, stride is 4): + // Two accesses in memory (distance is 2, stride is 4): // | A[0] | | | | A[4] | | | | // | | | A[2] | | | | A[6] | | // @@ -1880,10 +1876,10 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, // for (i = 0; i < 1024 ; i += 3) // A[i+4] = A[i] + 1; // - // Two accesses in memory (scaled distance is 4, stride is 3): + // Two accesses in memory (distance is 4, stride is 3): // | A[0] | | | A[3] | | | A[6] | | | // | | | | | A[4] | | | A[7] | | - return ScaledDist % Stride; + return Distance % Stride; } std::variant CommonStride; - if (StrideAPtrInt == StrideBPtrInt) - CommonStride = StrideAPtrInt; + if (StrideAScaled == StrideBScaled) + CommonStride = StrideAScaled; // TODO: Historically, we don't retry with runtime checks unless the // (unscaled) strides are the same. Fix this once the condition for runtime // checks in isDependent is fixed. - bool ShouldRetryWithRuntimeCheck = CommonStride.has_value(); + bool ShouldRetryWithRuntimeCheck = StrideAPtrInt == StrideBPtrInt; return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride, ShouldRetryWithRuntimeCheck, TypeByteSize, @@ -2050,9 +2049,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // upper bound of the number of iterations), the accesses are independet, i.e. // they are far enough appart that accesses won't access the same location // across all loop ierations. - if (HasSameSize && isSafeDependenceDistance( - DL, SE, *(PSE.getSymbolicMaxBackedgeTakenCount()), - *Dist, MaxStride, TypeByteSize)) + if (HasSameSize && + isSafeDependenceDistance( + DL, SE, *(PSE.getSymbolicMaxBackedgeTakenCount()), *Dist, MaxStride)) return Dependence::NoDep; const SCEVConstant *ConstDist = dyn_cast(Dist); @@ -2156,8 +2155,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // It's not vectorizable if the distance is smaller than the minimum distance // needed for a vectroized/unrolled version. Vectorizing one iteration in - // front needs TypeByteSize * Stride. Vectorizing the last iteration needs - // TypeByteSize (No need to plus the last gap distance). + // front needs CommonStride. Vectorizing the last iteration needs TypeByteSize + // (No need to plus the last gap distance). // // E.g. Assume one char is 1 byte in memory and one int is 4 bytes. // foo(int *A) { @@ -2166,7 +2165,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // B[i] = A[i] + 1; // } // - // Two accesses in memory (stride is 2): + // Two accesses in memory (stride is 4 * 2): // | A[0] | | A[2] | | A[4] | | A[6] | | // | B[0] | | B[2] | | B[4] | // @@ -2184,8 +2183,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // We know that Dist is positive, but it may not be constant. Use the signed // minimum for computations below, as this ensures we compute the closest // possible dependence distance. - uint64_t MinDistanceNeeded = - TypeByteSize * *CommonStride * (MinNumIter - 1) + TypeByteSize; + uint64_t MinDistanceNeeded = *CommonStride * (MinNumIter - 1) + TypeByteSize; if (MinDistanceNeeded > static_cast(MinDistance)) { if (!ConstDist) { // For non-constant distances, we checked the lower bound of the @@ -2241,7 +2239,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits // since there is a backwards dependency. - uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * *CommonStride); + uint64_t MaxVF = MinDepDistBytes / *CommonStride; LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance << " with max VF = " << MaxVF << '\n'); diff --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp index 88dcdfd1f931a..bb6b9c7721d55 100644 --- a/llvm/lib/CGData/CodeGenData.cpp +++ b/llvm/lib/CGData/CodeGenData.cpp @@ -204,7 +204,7 @@ Expected
Header::readFromBuffer(const unsigned char *Curr) { namespace cgdata { -void warn(Twine Message, std::string Whence, std::string Hint) { +void warn(Twine Message, StringRef Whence, StringRef Hint) { WithColor::warning(); if (!Whence.empty()) errs() << Whence << ": "; @@ -216,7 +216,7 @@ void warn(Twine Message, std::string Whence, std::string Hint) { void warn(Error E, StringRef Whence) { if (E.isA()) { handleAllErrors(std::move(E), [&](const CGDataError &IPE) { - warn(IPE.message(), Whence.str(), ""); + warn(IPE.message(), Whence, ""); }); } } diff --git a/llvm/lib/CodeGen/DetectDeadLanes.cpp b/llvm/lib/CodeGen/DetectDeadLanes.cpp index a6d2640ed044f..301cb6e1a2d18 100644 --- a/llvm/lib/CodeGen/DetectDeadLanes.cpp +++ b/llvm/lib/CodeGen/DetectDeadLanes.cpp @@ -276,7 +276,7 @@ LaneBitmask DeadLaneDetector::determineInitialDefinedLanes(unsigned Reg) { if (lowersToCopies(DefMI)) { // Start optimisatically with no used or defined lanes for copy // instructions. The following dataflow analysis will add more bits. - unsigned RegIdx = Register::virtReg2Index(Reg); + unsigned RegIdx = Register(Reg).virtRegIndex(); DefinedByCopy.set(RegIdx); PutInWorklist(RegIdx); diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp index caec0524e7ab6..48d8319892637 100644 --- a/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -522,8 +522,8 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) { if (PI.PHI->getOperand(i+1).getMBB() == FPred) PI.FReg = PI.PHI->getOperand(i).getReg(); } - assert(Register::isVirtualRegister(PI.TReg) && "Bad PHI"); - assert(Register::isVirtualRegister(PI.FReg) && "Bad PHI"); + assert(Register(PI.TReg).isVirtual() && "Bad PHI"); + assert(Register(PI.FReg).isVirtual() && "Bad PHI"); // Get target information. if (!TII->canInsertSelect(*Head, Cond, PI.PHI->getOperand(0).getReg(), diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index d4cb224c35d74..3fb1347b58e4b 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -7816,13 +7816,13 @@ LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) { if (AreExactFloatBounds) { // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat. auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat); - auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, + auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Src, MaxC); auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC); // Clamp by MaxFloat from above. NaN cannot occur. auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat); auto MinP = - MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Max, + MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, SrcTy.changeElementSize(1), Max, MinC, MachineInstr::FmNoNans); auto Min = MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans); diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index 302dd37ff3d67..3834a6d7a355e 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -1285,8 +1285,7 @@ void InlineSpiller::spillAll() { void InlineSpiller::spill(LiveRangeEdit &edit) { ++NumSpilledRanges; Edit = &edit; - assert(!Register::isStackSlot(edit.getReg()) && - "Trying to spill a stack slot."); + assert(!edit.getReg().isStack() && "Trying to spill a stack slot."); // Share a stack slot among all descendants of Original. Original = VRM.getOriginal(edit.getReg()); StackSlot = VRM.getStackSlot(Original); diff --git a/llvm/lib/CodeGen/LiveInterval.cpp b/llvm/lib/CodeGen/LiveInterval.cpp index 0683353d9cdba..404ffad01c229 100644 --- a/llvm/lib/CodeGen/LiveInterval.cpp +++ b/llvm/lib/CodeGen/LiveInterval.cpp @@ -876,7 +876,7 @@ static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR, unsigned ComposeSubRegIdx) { // Phys reg should not be tracked at subreg level. // Same for noreg (Reg == 0). - if (!Register::isVirtualRegister(Reg) || !Reg) + if (!Register(Reg).isVirtual() || !Reg) return; // Remove the values that don't define those lanes. SmallVector ToBeRemoved; diff --git a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp index 49c8a0e466337..7600a2f08dc4f 100644 --- a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp +++ b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp @@ -137,7 +137,7 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) { } unsigned VRegRenamer::createVirtualRegister(unsigned VReg) { - assert(Register::isVirtualRegister(VReg) && "Expected Virtual Registers"); + assert(Register(VReg).isVirtual() && "Expected Virtual Registers"); std::string Name = getInstructionOpcodeHash(*MRI.getVRegDef(VReg)); return createVirtualRegisterWithLowerName(VReg, Name); } diff --git a/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp index 1a8e11de909e8..8215d07b74dba 100644 --- a/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp +++ b/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp @@ -116,7 +116,7 @@ class RegAllocScoring : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -1242,8 +1242,8 @@ bool RegAllocScoring::runOnMachineFunction(MachineFunction &MF) { getAnalysis().logRewardIfNeeded( MF, GetReward); - getAnalysis().logRewardIfNeeded(MF, - GetReward); + getAnalysis().logRewardIfNeeded( + MF, GetReward); return false; } #endif // #ifdef LLVM_HAVE_TFLITE diff --git a/llvm/lib/CodeGen/MLRegAllocPriorityAdvisor.cpp b/llvm/lib/CodeGen/MLRegAllocPriorityAdvisor.cpp index 9638df81770c1..50ecd3ce2c88c 100644 --- a/llvm/lib/CodeGen/MLRegAllocPriorityAdvisor.cpp +++ b/llvm/lib/CodeGen/MLRegAllocPriorityAdvisor.cpp @@ -12,7 +12,6 @@ #include "AllocationOrder.h" #include "RegAllocGreedy.h" -#include "RegAllocPriorityAdvisor.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/InteractiveModelRunner.h" #include "llvm/Analysis/MLModelRunner.h" @@ -25,6 +24,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegAllocPriorityAdvisor.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/VirtRegMap.h" @@ -121,25 +121,14 @@ static const std::vector InputFeatures{ // =================================== // Release (AOT) - specifics // =================================== -class ReleaseModePriorityAdvisorAnalysis final - : public RegAllocPriorityAdvisorAnalysis { +class ReleaseModePriorityAdvisorProvider final + : public RegAllocPriorityAdvisorProvider { public: - ReleaseModePriorityAdvisorAnalysis() - : RegAllocPriorityAdvisorAnalysis(AdvisorMode::Release) {} - // support for isa<> and dyn_cast. - static bool classof(const RegAllocPriorityAdvisorAnalysis *R) { - return R->getAdvisorMode() == AdvisorMode::Release; - } - -private: - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - AU.addRequired(); - RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU); - } - + ReleaseModePriorityAdvisorProvider() + : RegAllocPriorityAdvisorProvider(AdvisorMode::Release) {} std::unique_ptr - getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override { + getAdvisor(const MachineFunction &MF, const RAGreedy &RA, + SlotIndexes &SI) override { if (!Runner) { if (InteractiveChannelBaseName.empty()) Runner = std::make_unique>( @@ -150,12 +139,36 @@ class ReleaseModePriorityAdvisorAnalysis final InteractiveChannelBaseName + ".out", InteractiveChannelBaseName + ".in"); } - return std::make_unique( - MF, RA, &getAnalysis().getSI(), Runner.get()); + return std::make_unique(MF, RA, &SI, Runner.get()); } + +private: std::unique_ptr Runner; }; +class ReleaseModePriorityAdvisorAnalysisLegacy final + : public RegAllocPriorityAdvisorAnalysisLegacy { +public: + ReleaseModePriorityAdvisorAnalysisLegacy() + : RegAllocPriorityAdvisorAnalysisLegacy(AdvisorMode::Release) {} + // support for isa<> and dyn_cast. + static bool classof(const RegAllocPriorityAdvisorAnalysisLegacy *R) { + return R->getAdvisorMode() == AdvisorMode::Release; + } + +private: + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired(); + RegAllocPriorityAdvisorAnalysisLegacy::getAnalysisUsage(AU); + } + + bool doInitialization(Module &M) override { + Provider = std::make_unique(); + return false; + } +}; + // =================================== // Development mode-specifics // =================================== @@ -186,46 +199,17 @@ class DevelopmentModePriorityAdvisor : public MLPriorityAdvisor { Logger *const Log; }; -class DevelopmentModePriorityAdvisorAnalysis final - : public RegAllocPriorityAdvisorAnalysis { -public: - DevelopmentModePriorityAdvisorAnalysis() - : RegAllocPriorityAdvisorAnalysis(AdvisorMode::Development) {} - // support for isa<> and dyn_cast. - static bool classof(const RegAllocPriorityAdvisorAnalysis *R) { - return R->getAdvisorMode() == AdvisorMode::Development; - } - - void logRewardIfNeeded(const MachineFunction &MF, - llvm::function_ref GetReward) override { - if (!Log || !Log->hasAnyObservationForContext(MF.getName())) - return; - // The function pass manager would run all the function passes for a - // function, so we assume the last context belongs to this function. If - // this invariant ever changes, we can implement at that time switching - // contexts. At this point, it'd be an error - if (Log->currentContext() != MF.getName()) { - MF.getFunction().getContext().emitError( - "The training log context shouldn't have had changed."); - } - if (Log->hasObservationInProgress()) - Log->logReward(GetReward()); - } - -private: - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - AU.addRequired(); - RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU); - } +class DevelopmentModePriorityAdvisorProvider final + : public RegAllocPriorityAdvisorProvider { +public: // Save all the logs (when requested). - bool doInitialization(Module &M) override { - LLVMContext &Ctx = M.getContext(); + DevelopmentModePriorityAdvisorProvider(LLVMContext &Ctx) + : RegAllocPriorityAdvisorProvider(AdvisorMode::Development) { if (ModelUnderTraining.empty() && TrainingLog.empty()) { Ctx.emitError("Regalloc development mode should be requested with at " "least logging enabled and/or a training model"); - return false; + return; } if (ModelUnderTraining.empty()) Runner = std::make_unique(Ctx, InputFeatures); @@ -234,15 +218,15 @@ class DevelopmentModePriorityAdvisorAnalysis final Ctx, ModelUnderTraining, DecisionName, TrainingInputFeatures); if (!Runner) { Ctx.emitError("Regalloc: could not set up the model runner"); - return false; + return; } if (TrainingLog.empty()) - return false; + return; std::error_code EC; auto OS = std::make_unique(TrainingLog, EC); if (EC) { - M.getContext().emitError(EC.message() + ":" + TrainingLog); - return false; + Ctx.emitError(EC.message() + ":" + TrainingLog); + return; } std::vector LFS = InputFeatures; if (auto *MUTR = dyn_cast(Runner.get())) @@ -254,33 +238,80 @@ class DevelopmentModePriorityAdvisorAnalysis final Log = std::make_unique(std::move(OS), LFS, Reward, /*IncludeReward*/ true); - return false; + } + + void logRewardIfNeeded(const MachineFunction &MF, + llvm::function_ref GetReward) override { + if (!Log || !Log->hasAnyObservationForContext(MF.getName())) + return; + // The function pass manager would run all the function passes for a + // function, so we assume the last context belongs to this function. If + // this invariant ever changes, we can implement at that time switching + // contexts. At this point, it'd be an error + if (Log->currentContext() != MF.getName()) { + MF.getFunction().getContext().emitError( + "The training log context shouldn't have had changed."); + } + if (Log->hasObservationInProgress()) + Log->logReward(GetReward()); } std::unique_ptr - getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override { + getAdvisor(const MachineFunction &MF, const RAGreedy &RA, + SlotIndexes &SI) override { if (!Runner) return nullptr; if (Log) { Log->switchContext(MF.getName()); } - return std::make_unique( - MF, RA, &getAnalysis().getSI(), Runner.get(), - Log.get()); + MF, RA, &SI, Runner.get(), Log.get()); } std::unique_ptr Runner; std::unique_ptr Log; }; + +class DevelopmentModePriorityAdvisorAnalysisLegacy final + : public RegAllocPriorityAdvisorAnalysisLegacy { +public: + DevelopmentModePriorityAdvisorAnalysisLegacy() + : RegAllocPriorityAdvisorAnalysisLegacy(AdvisorMode::Development) {} + + // support for isa<> and dyn_cast. + static bool classof(const RegAllocPriorityAdvisorAnalysisLegacy *R) { + return R->getAdvisorMode() == AdvisorMode::Development; + } + + void logRewardIfNeeded(const MachineFunction &MF, + llvm::function_ref GetReward) override { + Provider->logRewardIfNeeded(MF, GetReward); + } + +private: + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired(); + RegAllocPriorityAdvisorAnalysisLegacy::getAnalysisUsage(AU); + } + + // Save all the logs (when requested). + bool doInitialization(Module &M) override { + Provider = std::make_unique( + M.getContext()); + return false; + ; + } +}; #endif //#ifdef LLVM_HAVE_TFLITE } // namespace llvm -RegAllocPriorityAdvisorAnalysis *llvm::createReleaseModePriorityAdvisor() { +RegAllocPriorityAdvisorAnalysisLegacy * +llvm::createReleaseModePriorityAdvisorAnalysis() { return llvm::isEmbeddedModelEvaluatorValid() || !InteractiveChannelBaseName.empty() - ? new ReleaseModePriorityAdvisorAnalysis() + ? new ReleaseModePriorityAdvisorAnalysisLegacy() : nullptr; } @@ -310,8 +341,9 @@ unsigned MLPriorityAdvisor::getPriority(const LiveInterval &LI) const { } #ifdef LLVM_HAVE_TFLITE -RegAllocPriorityAdvisorAnalysis *llvm::createDevelopmentModePriorityAdvisor() { - return new DevelopmentModePriorityAdvisorAnalysis(); +RegAllocPriorityAdvisorAnalysisLegacy * +llvm::createDevelopmentModePriorityAdvisorAnalysis() { + return new DevelopmentModePriorityAdvisorAnalysisLegacy(); } unsigned @@ -356,4 +388,14 @@ DevelopmentModePriorityAdvisor::getPriority(const LiveInterval &LI) const { return static_cast(Prio); } +RegAllocPriorityAdvisorProvider * +llvm::createDevelopmentModePriorityAdvisorProvider(LLVMContext &Ctx) { + return new DevelopmentModePriorityAdvisorProvider(Ctx); +} + #endif // #ifdef LLVM_HAVE_TFLITE + +RegAllocPriorityAdvisorProvider * +llvm::createReleaseModePriorityAdvisorProvider() { + return new ReleaseModePriorityAdvisorProvider(); +} diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp index 021c1a058c020..a86ad0f6c46dc 100644 --- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -682,7 +682,7 @@ struct DataDep { /// Create a DataDep from an SSA form virtual register. DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp) : UseOp(UseOp) { - assert(Register::isVirtualRegister(VirtReg)); + assert(Register(VirtReg).isVirtual()); MachineOperand *DefMO = MRI->getOneDef(VirtReg); assert(DefMO && "Register does not have unique def"); DefMI = DefMO->getParent(); diff --git a/llvm/lib/CodeGen/RDFRegisters.cpp b/llvm/lib/CodeGen/RDFRegisters.cpp index 7ce00a66b3ae6..b8d54cadc07f6 100644 --- a/llvm/lib/CodeGen/RDFRegisters.cpp +++ b/llvm/lib/CodeGen/RDFRegisters.cpp @@ -263,7 +263,7 @@ void PhysicalRegisterInfo::print(raw_ostream &OS, RegisterRef A) const { } else { assert(A.isMask()); // RegMask SS flag is preserved by idx(). - unsigned Idx = Register::stackSlot2Index(A.idx()); + unsigned Idx = Register(A.idx()).stackSlotIndex(); const char *Fmt = Idx < 0x10000 ? "%04x" : "%08x"; OS << "M#" << format(Fmt, Idx); } diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp index 2369615ef0fb6..30523611977f4 100644 --- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp +++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp @@ -12,11 +12,11 @@ #include "llvm/CodeGen/RegAllocEvictionAdvisor.h" #include "AllocationOrder.h" #include "RegAllocGreedy.h" -#include "RegAllocPriorityAdvisor.h" #include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/RegAllocPriorityAdvisor.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/IR/Module.h" diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 9318c1df0b5e2..bd81d630f9d1f 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -15,7 +15,6 @@ #include "AllocationOrder.h" #include "InterferenceCache.h" #include "RegAllocBase.h" -#include "RegAllocPriorityAdvisor.h" #include "SplitKit.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" @@ -46,6 +45,7 @@ #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegAllocEvictionAdvisor.h" +#include "llvm/CodeGen/RegAllocPriorityAdvisor.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/SlotIndexes.h" @@ -165,7 +165,7 @@ INITIALIZE_PASS_DEPENDENCY(EdgeBundlesWrapperLegacy) INITIALIZE_PASS_DEPENDENCY(SpillPlacementWrapperLegacy) INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) INITIALIZE_PASS_DEPENDENCY(RegAllocEvictionAdvisorAnalysisLegacy) -INITIALIZE_PASS_DEPENDENCY(RegAllocPriorityAdvisorAnalysis) +INITIALIZE_PASS_DEPENDENCY(RegAllocPriorityAdvisorAnalysisLegacy) INITIALIZE_PASS_END(RAGreedy, "greedy", "Greedy Register Allocator", false, false) @@ -220,7 +220,7 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -2770,8 +2770,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { getAnalysis().getProvider(); EvictAdvisor = EvictAdvisorProvider.getAdvisor(*MF, *this, MBFI, Loops); - PriorityAdvisor = - getAnalysis().getAdvisor(*MF, *this); + PriorityAdvisor = getAnalysis() + .getProvider() + .getAdvisor(*MF, *this, *Indexes); VRAI = std::make_unique(*MF, *LIS, *VRM, *Loops, *MBFI); SpillerInstance.reset( diff --git a/llvm/lib/CodeGen/RegAllocGreedy.h b/llvm/lib/CodeGen/RegAllocGreedy.h index 1d55a8241d760..1698607984bcd 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.h +++ b/llvm/lib/CodeGen/RegAllocGreedy.h @@ -14,7 +14,6 @@ #include "InterferenceCache.h" #include "RegAllocBase.h" -#include "RegAllocPriorityAdvisor.h" #include "SplitKit.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" @@ -28,6 +27,7 @@ #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/RegAllocPriorityAdvisor.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/SpillPlacement.h" #include "llvm/CodeGen/Spiller.h" diff --git a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp b/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp index 4525b8fc5a383..544ff72a04efd 100644 --- a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp +++ b/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp @@ -10,7 +10,7 @@ // //===----------------------------------------------------------------------===// -#include "RegAllocPriorityAdvisor.h" +#include "llvm/CodeGen/RegAllocPriorityAdvisor.h" #include "RegAllocGreedy.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/VirtRegMap.h" @@ -20,107 +20,180 @@ using namespace llvm; -static cl::opt Mode( +static cl::opt Mode( "regalloc-enable-priority-advisor", cl::Hidden, - cl::init(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Default), + cl::init(RegAllocPriorityAdvisorProvider::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values( - clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Default, + clEnumValN(RegAllocPriorityAdvisorProvider::AdvisorMode::Default, "default", "Default"), - clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Release, + clEnumValN(RegAllocPriorityAdvisorProvider::AdvisorMode::Release, "release", "precompiled"), - clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Development, + clEnumValN(RegAllocPriorityAdvisorProvider::AdvisorMode::Development, "development", "for training"), clEnumValN( - RegAllocPriorityAdvisorAnalysis::AdvisorMode::Dummy, "dummy", + RegAllocPriorityAdvisorProvider::AdvisorMode::Dummy, "dummy", "prioritize low virtual register numbers for test and debug"))); -char RegAllocPriorityAdvisorAnalysis::ID = 0; -INITIALIZE_PASS(RegAllocPriorityAdvisorAnalysis, "regalloc-priority", +char RegAllocPriorityAdvisorAnalysisLegacy::ID = 0; +INITIALIZE_PASS(RegAllocPriorityAdvisorAnalysisLegacy, "regalloc-priority", "Regalloc priority policy", false, true) namespace { -class DefaultPriorityAdvisorAnalysis final - : public RegAllocPriorityAdvisorAnalysis { + +class DefaultPriorityAdvisorProvider final + : public RegAllocPriorityAdvisorProvider { +public: + DefaultPriorityAdvisorProvider(bool NotAsRequested, LLVMContext &Ctx) + : RegAllocPriorityAdvisorProvider(AdvisorMode::Default) { + if (NotAsRequested) + Ctx.emitError("Requested regalloc priority advisor analysis " + "could be created. Using default"); + } + + // support for isa<> and dyn_cast. + static bool classof(const RegAllocPriorityAdvisorProvider *R) { + return R->getAdvisorMode() == AdvisorMode::Default; + } + + std::unique_ptr + getAdvisor(const MachineFunction &MF, const RAGreedy &RA, + SlotIndexes &SI) override { + return std::make_unique(MF, RA, &SI); + } +}; + +class DummyPriorityAdvisorProvider final + : public RegAllocPriorityAdvisorProvider { +public: + DummyPriorityAdvisorProvider() + : RegAllocPriorityAdvisorProvider(AdvisorMode::Dummy) {} + + static bool classof(const RegAllocPriorityAdvisorProvider *R) { + return R->getAdvisorMode() == AdvisorMode::Dummy; + } + + std::unique_ptr + getAdvisor(const MachineFunction &MF, const RAGreedy &RA, + SlotIndexes &SI) override { + return std::make_unique(MF, RA, &SI); + } +}; + +class DefaultPriorityAdvisorAnalysisLegacy final + : public RegAllocPriorityAdvisorAnalysisLegacy { public: - DefaultPriorityAdvisorAnalysis(bool NotAsRequested) - : RegAllocPriorityAdvisorAnalysis(AdvisorMode::Default), + DefaultPriorityAdvisorAnalysisLegacy(bool NotAsRequested) + : RegAllocPriorityAdvisorAnalysisLegacy(AdvisorMode::Default), NotAsRequested(NotAsRequested) {} // support for isa<> and dyn_cast. - static bool classof(const RegAllocPriorityAdvisorAnalysis *R) { + static bool classof(const RegAllocPriorityAdvisorAnalysisLegacy *R) { return R->getAdvisorMode() == AdvisorMode::Default; } private: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); - RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU); - } - std::unique_ptr - getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override { - return std::make_unique( - MF, RA, &getAnalysis().getSI()); + RegAllocPriorityAdvisorAnalysisLegacy::getAnalysisUsage(AU); } + bool doInitialization(Module &M) override { - if (NotAsRequested) - M.getContext().emitError("Requested regalloc priority advisor analysis " - "could be created. Using default"); - return RegAllocPriorityAdvisorAnalysis::doInitialization(M); + Provider.reset( + new DefaultPriorityAdvisorProvider(NotAsRequested, M.getContext())); + return false; } + const bool NotAsRequested; }; class DummyPriorityAdvisorAnalysis final - : public RegAllocPriorityAdvisorAnalysis { + : public RegAllocPriorityAdvisorAnalysisLegacy { public: + using RegAllocPriorityAdvisorAnalysisLegacy::AdvisorMode; DummyPriorityAdvisorAnalysis() - : RegAllocPriorityAdvisorAnalysis(AdvisorMode::Dummy) {} + : RegAllocPriorityAdvisorAnalysisLegacy(AdvisorMode::Dummy) {} // support for isa<> and dyn_cast. - static bool classof(const RegAllocPriorityAdvisorAnalysis *R) { + static bool classof(const RegAllocPriorityAdvisorAnalysisLegacy *R) { return R->getAdvisorMode() == AdvisorMode::Dummy; } private: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); - RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU); + RegAllocPriorityAdvisorAnalysisLegacy::getAnalysisUsage(AU); } - std::unique_ptr - getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override { - return std::make_unique( - MF, RA, &getAnalysis().getSI()); + bool doInitialization(Module &M) override { + Provider.reset(new DummyPriorityAdvisorProvider()); + return false; } }; } // namespace -template <> Pass *llvm::callDefaultCtor() { +void RegAllocPriorityAdvisorAnalysis::initializeProvider(LLVMContext &Ctx) { + if (Provider) + return; + switch (Mode) { + case RegAllocPriorityAdvisorProvider::AdvisorMode::Dummy: + Provider.reset(new DummyPriorityAdvisorProvider()); + return; + case RegAllocPriorityAdvisorProvider::AdvisorMode::Default: + Provider.reset( + new DefaultPriorityAdvisorProvider(/*NotAsRequested=*/false, Ctx)); + return; + case RegAllocPriorityAdvisorProvider::AdvisorMode::Development: +#if defined(LLVM_HAVE_TFLITE) + Provider.reset(createDevelopmentModePriorityAdvisorProvider(Ctx)); +#else + Provider.reset( + new DefaultPriorityAdvisorProvider(/*NotAsRequested=*/true, Ctx)); +#endif + return; + case RegAllocPriorityAdvisorProvider::AdvisorMode::Release: + Provider.reset(createReleaseModePriorityAdvisorProvider()); + return; + } +} + +AnalysisKey RegAllocPriorityAdvisorAnalysis::Key; + +RegAllocPriorityAdvisorAnalysis::Result +RegAllocPriorityAdvisorAnalysis::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + // Lazily initialize the provider. + initializeProvider(MF.getFunction().getContext()); + // The requiring analysis will construct the advisor. + return Result{Provider.get()}; +} + +template <> +Pass *llvm::callDefaultCtor() { Pass *Ret = nullptr; switch (Mode) { - case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Default: - Ret = new DefaultPriorityAdvisorAnalysis(/*NotAsRequested*/ false); - break; - case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Dummy: - Ret = new DummyPriorityAdvisorAnalysis(); + case RegAllocPriorityAdvisorProvider::AdvisorMode::Default: + Ret = new DefaultPriorityAdvisorAnalysisLegacy(/*NotAsRequested*/ false); break; - case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Development: + case RegAllocPriorityAdvisorProvider::AdvisorMode::Development: #if defined(LLVM_HAVE_TFLITE) - Ret = createDevelopmentModePriorityAdvisor(); + Ret = createDevelopmentModePriorityAdvisorAnalysis(); #endif break; - case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Release: - Ret = createReleaseModePriorityAdvisor(); + case RegAllocPriorityAdvisorProvider::AdvisorMode::Release: + Ret = createReleaseModePriorityAdvisorAnalysis(); + break; + case RegAllocPriorityAdvisorProvider::AdvisorMode::Dummy: + Ret = new DummyPriorityAdvisorAnalysis(); break; } if (Ret) return Ret; - return new DefaultPriorityAdvisorAnalysis(/*NotAsRequested*/ true); + return new DefaultPriorityAdvisorAnalysisLegacy(/*NotAsRequested*/ true); } -StringRef RegAllocPriorityAdvisorAnalysis::getPassName() const { +StringRef RegAllocPriorityAdvisorAnalysisLegacy::getPassName() const { switch (getAdvisorMode()) { case AdvisorMode::Default: return "Default Regalloc Priority Advisor"; diff --git a/llvm/lib/CodeGen/RegisterPressure.cpp b/llvm/lib/CodeGen/RegisterPressure.cpp index ca51b670b46cc..5a4c3a0efef2a 100644 --- a/llvm/lib/CodeGen/RegisterPressure.cpp +++ b/llvm/lib/CodeGen/RegisterPressure.cpp @@ -231,7 +231,7 @@ void LiveRegSet::clear() { } static const LiveRange *getLiveRange(const LiveIntervals &LIS, unsigned Reg) { - if (Register::isVirtualRegister(Reg)) + if (Register(Reg).isVirtual()) return &LIS.getInterval(Reg); return LIS.getCachedRegUnit(Reg); } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f52447b86a7e4..b07f3814d9d2d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -27419,23 +27419,20 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { continue; } - APInt Bits; - if (auto *Cst = dyn_cast(Elt)) - Bits = Cst->getAPIntValue(); - else if (auto *CstFP = dyn_cast(Elt)) - Bits = CstFP->getValueAPF().bitcastToAPInt(); - else + std::optional Bits = Elt->bitcastToAPInt(); + if (!Bits) return SDValue(); // Extract the sub element from the constant bit mask. if (DAG.getDataLayout().isBigEndian()) - Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits); + *Bits = + Bits->extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits); else - Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits); + *Bits = Bits->extractBits(NumSubBits, SubIdx * NumSubBits); - if (Bits.isAllOnes()) + if (Bits->isAllOnes()) Indices.push_back(i); - else if (Bits == 0) + else if (*Bits == 0) Indices.push_back(i + NumSubElts); else return SDValue(); diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 5a314570c776a..91571ed204317 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -2229,8 +2229,7 @@ Register FastISel::fastEmitInst_i(unsigned MachineInstOpcode, Register FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, uint32_t Idx) { Register ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); - assert(Register::isVirtualRegister(Op0) && - "Cannot yet extract from physregs"); + assert(Register(Op0).isVirtual() && "Cannot yet extract from physregs"); const TargetRegisterClass *RC = MRI.getRegClass(Op0); MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 4b79bd28e2750..0244c170a2123 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1569,6 +1569,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break; case ISD::STRICT_FREM: case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break; + case ISD::FMODF: ExpandFloatRes_FMODF(N); break; // clang-format on } @@ -1619,6 +1620,23 @@ void DAGTypeLegalizer::ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC, GetPairElements(Tmp.first, Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FMODF(SDNode *N) { + ExpandFloatRes_UnaryWithTwoFPResults(N, RTLIB::getMODF(N->getValueType(0)), + /*CallRetResNo=*/0); +} + +void DAGTypeLegalizer::ExpandFloatRes_UnaryWithTwoFPResults( + SDNode *N, RTLIB::Libcall LC, std::optional CallRetResNo) { + assert(!N->isStrictFPOpcode() && "strictfp not implemented"); + SmallVector Results; + DAG.expandMultipleResultFPLibCall(LC, N, Results, CallRetResNo); + for (auto [ResNo, Res] : enumerate(Results)) { + SDValue Lo, Hi; + GetPairElements(Res, Lo, Hi); + SetExpandedFloat(SDValue(N, ResNo), Lo, Hi); + } +} + void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(N->getValueType(0) == MVT::ppcf128 && diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 69c687a797485..cac969f7e2185 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -668,6 +668,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue &Lo, SDValue &Hi); void ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_UnaryWithTwoFPResults( + SDNode *N, RTLIB::Libcall LC, std::optional CallRetResNo = {}); + // clang-format off void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FACOS (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -714,6 +717,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMODF(SDNode *N); // clang-format on // Float Operand Expansion. diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index d04bd6e98097e..4125d223dc325 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -116,11 +116,11 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, if (Op != 2 || User->getOpcode() != ISD::CopyToReg) return; - unsigned Reg = cast(User->getOperand(1))->getReg(); + Register Reg = cast(User->getOperand(1))->getReg(); if (TLI.checkForPhysRegDependency(Def, User, Op, TRI, TII, PhysReg, Cost)) return; - if (Register::isVirtualRegister(Reg)) + if (Reg.isVirtual()) return; unsigned ResNo = User->getOperand(2).getResNo(); @@ -664,8 +664,8 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx); if (Latency > 1U && Use->getOpcode() == ISD::CopyToReg && !BB->succ_empty()) { - unsigned Reg = cast(Use->getOperand(1))->getReg(); - if (Register::isVirtualRegister(Reg)) + Register Reg = cast(Use->getOperand(1))->getReg(); + if (Reg.isVirtual()) // This copy is a liveout value. It is likely coalesced, so reduce the // latency so not to penalize the def. // FIXME: need target specific adjustment here? diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index de092cba333c2..0a3210a10d394 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -152,14 +152,10 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT, bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) { if (N->getOpcode() == ISD::SPLAT_VECTOR) { - unsigned EltSize = - N->getValueType(0).getVectorElementType().getSizeInBits(); - if (auto *Op0 = dyn_cast(N->getOperand(0))) { - SplatVal = Op0->getAPIntValue().trunc(EltSize); - return true; - } - if (auto *Op0 = dyn_cast(N->getOperand(0))) { - SplatVal = Op0->getValueAPF().bitcastToAPInt().trunc(EltSize); + if (auto OptAPInt = N->getOperand(0)->bitcastToAPInt()) { + unsigned EltSize = + N->getValueType(0).getVectorElementType().getSizeInBits(); + SplatVal = OptAPInt->trunc(EltSize); return true; } } @@ -215,12 +211,9 @@ bool ISD::isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly) { // we care if the resultant vector is all ones, not whether the individual // constants are. SDValue NotZero = N->getOperand(i); - unsigned EltSize = N->getValueType(0).getScalarSizeInBits(); - if (ConstantSDNode *CN = dyn_cast(NotZero)) { - if (CN->getAPIntValue().countr_one() < EltSize) - return false; - } else if (ConstantFPSDNode *CFPN = dyn_cast(NotZero)) { - if (CFPN->getValueAPF().bitcastToAPInt().countr_one() < EltSize) + if (auto OptAPInt = NotZero->bitcastToAPInt()) { + unsigned EltSize = N->getValueType(0).getScalarSizeInBits(); + if (OptAPInt->countr_one() < EltSize) return false; } else return false; @@ -259,12 +252,9 @@ bool ISD::isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly) { // We only want to check enough bits to cover the vector elements, because // we care if the resultant vector is all zeros, not whether the individual // constants are. - unsigned EltSize = N->getValueType(0).getScalarSizeInBits(); - if (ConstantSDNode *CN = dyn_cast(Op)) { - if (CN->getAPIntValue().countr_zero() < EltSize) - return false; - } else if (ConstantFPSDNode *CFPN = dyn_cast(Op)) { - if (CFPN->getValueAPF().bitcastToAPInt().countr_zero() < EltSize) + if (auto OptAPInt = Op->bitcastToAPInt()) { + unsigned EltSize = N->getValueType(0).getScalarSizeInBits(); + if (OptAPInt->countr_zero() < EltSize) return false; } else return false; @@ -3405,13 +3395,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, KnownBits Known(BitWidth); // Don't know anything. - if (auto *C = dyn_cast(Op)) { + if (auto OptAPInt = Op->bitcastToAPInt()) { // We know all of the bits for a constant! - return KnownBits::makeConstant(C->getAPIntValue()); - } - if (auto *C = dyn_cast(Op)) { - // We know all of the bits for a constant fp! - return KnownBits::makeConstant(C->getValueAPF().bitcastToAPInt()); + return KnownBits::makeConstant(*std::move(OptAPInt)); } if (Depth >= MaxRecursionDepth) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 1c58a7f05446c..133ac6b1327dd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -908,8 +908,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, // If the source register was virtual and if we know something about it, // add an assert node. - if (!Register::isVirtualRegister(Regs[Part + i]) || - !RegisterVT.isInteger()) + if (!Regs[Part + i].isVirtual() || !RegisterVT.isInteger()) continue; const FunctionLoweringInfo::LiveOutInfo *LOI = @@ -1023,7 +1022,7 @@ void RegsForValue::AddInlineAsmOperands(InlineAsm::Kind Code, bool HasMatching, InlineAsm::Flag Flag(Code, Regs.size()); if (HasMatching) Flag.setMatchingOp(MatchingIdx); - else if (!Regs.empty() && Register::isVirtualRegister(Regs.front())) { + else if (!Regs.empty() && Regs.front().isVirtual()) { // Put the register class of the virtual registers in the flag word. That // way, later passes can recompute register class constraints for inline // assembly as well as normal instructions. diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp index e735c904e1b60..cb55a00b9e03b 100644 --- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -160,8 +160,8 @@ Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) { Printable printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { return Printable([Unit, TRI](raw_ostream &OS) { - if (Register::isVirtualRegister(Unit)) { - OS << '%' << Register::virtReg2Index(Unit); + if (Register(Unit).isVirtual()) { + OS << '%' << Register(Unit).virtRegIndex(); } else { OS << printRegUnit(Unit, TRI); } diff --git a/llvm/lib/Frontend/Driver/CodeGenOptions.cpp b/llvm/lib/Frontend/Driver/CodeGenOptions.cpp index 2d74a91f62dc0..ed7c57a930aca 100644 --- a/llvm/lib/Frontend/Driver/CodeGenOptions.cpp +++ b/llvm/lib/Frontend/Driver/CodeGenOptions.cpp @@ -12,7 +12,7 @@ namespace llvm::driver { -TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, +TargetLibraryInfoImpl *createTLII(const llvm::Triple &TargetTriple, driver::VectorLibrary Veclib) { TargetLibraryInfoImpl *TLII = new TargetLibraryInfoImpl(TargetTriple); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 96939f89279c6..5bb2e7d0abdd9 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -129,6 +129,7 @@ #include "llvm/CodeGen/PreISelIntrinsicLowering.h" #include "llvm/CodeGen/RegAllocEvictionAdvisor.h" #include "llvm/CodeGen/RegAllocFast.h" +#include "llvm/CodeGen/RegAllocPriorityAdvisor.h" #include "llvm/CodeGen/RegUsageInfoCollector.h" #include "llvm/CodeGen/RegUsageInfoPropagate.h" #include "llvm/CodeGen/RegisterCoalescerPass.h" diff --git a/llvm/lib/SandboxIR/BasicBlock.cpp b/llvm/lib/SandboxIR/BasicBlock.cpp index 983a5e8b8825e..b45c046402487 100644 --- a/llvm/lib/SandboxIR/BasicBlock.cpp +++ b/llvm/lib/SandboxIR/BasicBlock.cpp @@ -67,12 +67,6 @@ void BasicBlock::buildBasicBlockFromLLVMIR(llvm::BasicBlock *LLVMBB) { // Skip instruction's label operands if (isa(Op)) continue; - // Skip metadata - if (isa(Op)) - continue; - // Skip asm - if (isa(Op)) - continue; Ctx.getOrCreateValue(Op); } } diff --git a/llvm/lib/SandboxIR/Context.cpp b/llvm/lib/SandboxIR/Context.cpp index 6a397b02d6bde..21039ce7ed834 100644 --- a/llvm/lib/SandboxIR/Context.cpp +++ b/llvm/lib/SandboxIR/Context.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/SandboxIR/Context.h" +#include "llvm/IR/InlineAsm.h" #include "llvm/SandboxIR/Function.h" #include "llvm/SandboxIR/Instruction.h" #include "llvm/SandboxIR/Module.h" @@ -58,26 +59,264 @@ Value *Context::getOrCreateValueInternal(llvm::Value *LLVMV, llvm::User *U) { if (!Pair.second) return It->second.get(); - if (auto *C = dyn_cast(LLVMV)) { - switch (C->getValueID()) { + // Instruction + if (auto *LLVMI = dyn_cast(LLVMV)) { + switch (LLVMI->getOpcode()) { + case llvm::Instruction::VAArg: { + auto *LLVMVAArg = cast(LLVMV); + It->second = std::unique_ptr(new VAArgInst(LLVMVAArg, *this)); + return It->second.get(); + } + case llvm::Instruction::Freeze: { + auto *LLVMFreeze = cast(LLVMV); + It->second = + std::unique_ptr(new FreezeInst(LLVMFreeze, *this)); + return It->second.get(); + } + case llvm::Instruction::Fence: { + auto *LLVMFence = cast(LLVMV); + It->second = std::unique_ptr(new FenceInst(LLVMFence, *this)); + return It->second.get(); + } + case llvm::Instruction::Select: { + auto *LLVMSel = cast(LLVMV); + It->second = std::unique_ptr(new SelectInst(LLVMSel, *this)); + return It->second.get(); + } + case llvm::Instruction::ExtractElement: { + auto *LLVMIns = cast(LLVMV); + It->second = std::unique_ptr( + new ExtractElementInst(LLVMIns, *this)); + return It->second.get(); + } + case llvm::Instruction::InsertElement: { + auto *LLVMIns = cast(LLVMV); + It->second = std::unique_ptr( + new InsertElementInst(LLVMIns, *this)); + return It->second.get(); + } + case llvm::Instruction::ShuffleVector: { + auto *LLVMIns = cast(LLVMV); + It->second = std::unique_ptr( + new ShuffleVectorInst(LLVMIns, *this)); + return It->second.get(); + } + case llvm::Instruction::ExtractValue: { + auto *LLVMIns = cast(LLVMV); + It->second = std::unique_ptr( + new ExtractValueInst(LLVMIns, *this)); + return It->second.get(); + } + case llvm::Instruction::InsertValue: { + auto *LLVMIns = cast(LLVMV); + It->second = + std::unique_ptr(new InsertValueInst(LLVMIns, *this)); + return It->second.get(); + } + case llvm::Instruction::Br: { + auto *LLVMBr = cast(LLVMV); + It->second = std::unique_ptr(new BranchInst(LLVMBr, *this)); + return It->second.get(); + } + case llvm::Instruction::Load: { + auto *LLVMLd = cast(LLVMV); + It->second = std::unique_ptr(new LoadInst(LLVMLd, *this)); + return It->second.get(); + } + case llvm::Instruction::Store: { + auto *LLVMSt = cast(LLVMV); + It->second = std::unique_ptr(new StoreInst(LLVMSt, *this)); + return It->second.get(); + } + case llvm::Instruction::Ret: { + auto *LLVMRet = cast(LLVMV); + It->second = std::unique_ptr(new ReturnInst(LLVMRet, *this)); + return It->second.get(); + } + case llvm::Instruction::Call: { + auto *LLVMCall = cast(LLVMV); + It->second = std::unique_ptr(new CallInst(LLVMCall, *this)); + return It->second.get(); + } + case llvm::Instruction::Invoke: { + auto *LLVMInvoke = cast(LLVMV); + It->second = + std::unique_ptr(new InvokeInst(LLVMInvoke, *this)); + return It->second.get(); + } + case llvm::Instruction::CallBr: { + auto *LLVMCallBr = cast(LLVMV); + It->second = + std::unique_ptr(new CallBrInst(LLVMCallBr, *this)); + return It->second.get(); + } + case llvm::Instruction::LandingPad: { + auto *LLVMLPad = cast(LLVMV); + It->second = + std::unique_ptr(new LandingPadInst(LLVMLPad, *this)); + return It->second.get(); + } + case llvm::Instruction::CatchPad: { + auto *LLVMCPI = cast(LLVMV); + It->second = + std::unique_ptr(new CatchPadInst(LLVMCPI, *this)); + return It->second.get(); + } + case llvm::Instruction::CleanupPad: { + auto *LLVMCPI = cast(LLVMV); + It->second = + std::unique_ptr(new CleanupPadInst(LLVMCPI, *this)); + return It->second.get(); + } + case llvm::Instruction::CatchRet: { + auto *LLVMCRI = cast(LLVMV); + It->second = + std::unique_ptr(new CatchReturnInst(LLVMCRI, *this)); + return It->second.get(); + } + case llvm::Instruction::CleanupRet: { + auto *LLVMCRI = cast(LLVMV); + It->second = std::unique_ptr( + new CleanupReturnInst(LLVMCRI, *this)); + return It->second.get(); + } + case llvm::Instruction::GetElementPtr: { + auto *LLVMGEP = cast(LLVMV); + It->second = std::unique_ptr( + new GetElementPtrInst(LLVMGEP, *this)); + return It->second.get(); + } + case llvm::Instruction::CatchSwitch: { + auto *LLVMCatchSwitchInst = cast(LLVMV); + It->second = std::unique_ptr( + new CatchSwitchInst(LLVMCatchSwitchInst, *this)); + return It->second.get(); + } + case llvm::Instruction::Resume: { + auto *LLVMResumeInst = cast(LLVMV); + It->second = + std::unique_ptr(new ResumeInst(LLVMResumeInst, *this)); + return It->second.get(); + } + case llvm::Instruction::Switch: { + auto *LLVMSwitchInst = cast(LLVMV); + It->second = + std::unique_ptr(new SwitchInst(LLVMSwitchInst, *this)); + return It->second.get(); + } + case llvm::Instruction::FNeg: { + auto *LLVMUnaryOperator = cast(LLVMV); + It->second = std::unique_ptr( + new UnaryOperator(LLVMUnaryOperator, *this)); + return It->second.get(); + } + case llvm::Instruction::Add: + case llvm::Instruction::FAdd: + case llvm::Instruction::Sub: + case llvm::Instruction::FSub: + case llvm::Instruction::Mul: + case llvm::Instruction::FMul: + case llvm::Instruction::UDiv: + case llvm::Instruction::SDiv: + case llvm::Instruction::FDiv: + case llvm::Instruction::URem: + case llvm::Instruction::SRem: + case llvm::Instruction::FRem: + case llvm::Instruction::Shl: + case llvm::Instruction::LShr: + case llvm::Instruction::AShr: + case llvm::Instruction::And: + case llvm::Instruction::Or: + case llvm::Instruction::Xor: { + auto *LLVMBinaryOperator = cast(LLVMV); + It->second = std::unique_ptr( + new BinaryOperator(LLVMBinaryOperator, *this)); + return It->second.get(); + } + case llvm::Instruction::AtomicRMW: { + auto *LLVMAtomicRMW = cast(LLVMV); + It->second = std::unique_ptr( + new AtomicRMWInst(LLVMAtomicRMW, *this)); + return It->second.get(); + } + case llvm::Instruction::AtomicCmpXchg: { + auto *LLVMAtomicCmpXchg = cast(LLVMV); + It->second = std::unique_ptr( + new AtomicCmpXchgInst(LLVMAtomicCmpXchg, *this)); + return It->second.get(); + } + case llvm::Instruction::Alloca: { + auto *LLVMAlloca = cast(LLVMV); + It->second = + std::unique_ptr(new AllocaInst(LLVMAlloca, *this)); + return It->second.get(); + } + case llvm::Instruction::ZExt: + case llvm::Instruction::SExt: + case llvm::Instruction::FPToUI: + case llvm::Instruction::FPToSI: + case llvm::Instruction::FPExt: + case llvm::Instruction::PtrToInt: + case llvm::Instruction::IntToPtr: + case llvm::Instruction::SIToFP: + case llvm::Instruction::UIToFP: + case llvm::Instruction::Trunc: + case llvm::Instruction::FPTrunc: + case llvm::Instruction::BitCast: + case llvm::Instruction::AddrSpaceCast: { + auto *LLVMCast = cast(LLVMV); + It->second = std::unique_ptr(new CastInst(LLVMCast, *this)); + return It->second.get(); + } + case llvm::Instruction::PHI: { + auto *LLVMPhi = cast(LLVMV); + It->second = std::unique_ptr(new PHINode(LLVMPhi, *this)); + return It->second.get(); + } + case llvm::Instruction::ICmp: { + auto *LLVMICmp = cast(LLVMV); + It->second = std::unique_ptr(new ICmpInst(LLVMICmp, *this)); + return It->second.get(); + } + case llvm::Instruction::FCmp: { + auto *LLVMFCmp = cast(LLVMV); + It->second = std::unique_ptr(new FCmpInst(LLVMFCmp, *this)); + return It->second.get(); + } + case llvm::Instruction::Unreachable: { + auto *LLVMUnreachable = cast(LLVMV); + It->second = std::unique_ptr( + new UnreachableInst(LLVMUnreachable, *this)); + return It->second.get(); + } + default: + break; + } + It->second = std::unique_ptr( + new OpaqueInst(cast(LLVMV), *this)); + return It->second.get(); + } + // Constant + if (auto *LLVMC = dyn_cast(LLVMV)) { + switch (LLVMC->getValueID()) { case llvm::Value::ConstantIntVal: It->second = std::unique_ptr( - new ConstantInt(cast(C), *this)); + new ConstantInt(cast(LLVMC), *this)); return It->second.get(); case llvm::Value::ConstantFPVal: It->second = std::unique_ptr( - new ConstantFP(cast(C), *this)); + new ConstantFP(cast(LLVMC), *this)); return It->second.get(); case llvm::Value::BlockAddressVal: It->second = std::unique_ptr( - new BlockAddress(cast(C), *this)); + new BlockAddress(cast(LLVMC), *this)); return It->second.get(); case llvm::Value::ConstantTokenNoneVal: It->second = std::unique_ptr( - new ConstantTokenNone(cast(C), *this)); + new ConstantTokenNone(cast(LLVMC), *this)); return It->second.get(); case llvm::Value::ConstantAggregateZeroVal: { - auto *CAZ = cast(C); + auto *CAZ = cast(LLVMC); It->second = std::unique_ptr( new ConstantAggregateZero(CAZ, *this)); auto *Ret = It->second.get(); @@ -90,19 +329,19 @@ Value *Context::getOrCreateValueInternal(llvm::Value *LLVMV, llvm::User *U) { return Ret; } case llvm::Value::ConstantPointerNullVal: - It->second = std::unique_ptr( - new ConstantPointerNull(cast(C), *this)); + It->second = std::unique_ptr(new ConstantPointerNull( + cast(LLVMC), *this)); return It->second.get(); case llvm::Value::PoisonValueVal: It->second = std::unique_ptr( - new PoisonValue(cast(C), *this)); + new PoisonValue(cast(LLVMC), *this)); return It->second.get(); case llvm::Value::UndefValueVal: It->second = std::unique_ptr( - new UndefValue(cast(C), *this)); + new UndefValue(cast(LLVMC), *this)); return It->second.get(); case llvm::Value::DSOLocalEquivalentVal: { - auto *DSOLE = cast(C); + auto *DSOLE = cast(LLVMC); It->second = std::unique_ptr( new DSOLocalEquivalent(DSOLE, *this)); auto *Ret = It->second.get(); @@ -111,297 +350,77 @@ Value *Context::getOrCreateValueInternal(llvm::Value *LLVMV, llvm::User *U) { } case llvm::Value::ConstantArrayVal: It->second = std::unique_ptr( - new ConstantArray(cast(C), *this)); + new ConstantArray(cast(LLVMC), *this)); break; case llvm::Value::ConstantStructVal: It->second = std::unique_ptr( - new ConstantStruct(cast(C), *this)); + new ConstantStruct(cast(LLVMC), *this)); break; case llvm::Value::ConstantVectorVal: It->second = std::unique_ptr( - new ConstantVector(cast(C), *this)); + new ConstantVector(cast(LLVMC), *this)); break; case llvm::Value::FunctionVal: It->second = std::unique_ptr( - new Function(cast(C), *this)); + new Function(cast(LLVMC), *this)); break; case llvm::Value::GlobalIFuncVal: It->second = std::unique_ptr( - new GlobalIFunc(cast(C), *this)); + new GlobalIFunc(cast(LLVMC), *this)); break; case llvm::Value::GlobalVariableVal: It->second = std::unique_ptr( - new GlobalVariable(cast(C), *this)); + new GlobalVariable(cast(LLVMC), *this)); break; case llvm::Value::GlobalAliasVal: It->second = std::unique_ptr( - new GlobalAlias(cast(C), *this)); + new GlobalAlias(cast(LLVMC), *this)); break; case llvm::Value::NoCFIValueVal: It->second = std::unique_ptr( - new NoCFIValue(cast(C), *this)); + new NoCFIValue(cast(LLVMC), *this)); break; case llvm::Value::ConstantPtrAuthVal: It->second = std::unique_ptr( - new ConstantPtrAuth(cast(C), *this)); + new ConstantPtrAuth(cast(LLVMC), *this)); break; case llvm::Value::ConstantExprVal: It->second = std::unique_ptr( - new ConstantExpr(cast(C), *this)); + new ConstantExpr(cast(LLVMC), *this)); break; default: - It->second = std::unique_ptr(new Constant(C, *this)); + It->second = std::unique_ptr(new Constant(LLVMC, *this)); break; } auto *NewC = It->second.get(); - for (llvm::Value *COp : C->operands()) - getOrCreateValueInternal(COp, C); + for (llvm::Value *COp : LLVMC->operands()) + getOrCreateValueInternal(COp, LLVMC); return NewC; } - if (auto *Arg = dyn_cast(LLVMV)) { - It->second = std::unique_ptr(new Argument(Arg, *this)); + // Argument + if (auto *LLVMArg = dyn_cast(LLVMV)) { + It->second = std::unique_ptr(new Argument(LLVMArg, *this)); return It->second.get(); } - if (auto *BB = dyn_cast(LLVMV)) { + // BasicBlock + if (auto *LLVMBB = dyn_cast(LLVMV)) { assert(isa(U) && "This won't create a SBBB, don't call this function directly!"); - if (auto *SBBB = getValue(BB)) + if (auto *SBBB = getValue(LLVMBB)) return SBBB; return nullptr; } - assert(isa(LLVMV) && "Expected Instruction"); - - switch (cast(LLVMV)->getOpcode()) { - case llvm::Instruction::VAArg: { - auto *LLVMVAArg = cast(LLVMV); - It->second = std::unique_ptr(new VAArgInst(LLVMVAArg, *this)); - return It->second.get(); - } - case llvm::Instruction::Freeze: { - auto *LLVMFreeze = cast(LLVMV); - It->second = std::unique_ptr(new FreezeInst(LLVMFreeze, *this)); - return It->second.get(); - } - case llvm::Instruction::Fence: { - auto *LLVMFence = cast(LLVMV); - It->second = std::unique_ptr(new FenceInst(LLVMFence, *this)); - return It->second.get(); - } - case llvm::Instruction::Select: { - auto *LLVMSel = cast(LLVMV); - It->second = std::unique_ptr(new SelectInst(LLVMSel, *this)); + // Metadata + if (auto *LLVMMD = dyn_cast(LLVMV)) { + It->second = std::unique_ptr(new OpaqueValue(LLVMMD, *this)); return It->second.get(); } - case llvm::Instruction::ExtractElement: { - auto *LLVMIns = cast(LLVMV); - It->second = std::unique_ptr( - new ExtractElementInst(LLVMIns, *this)); + // InlineAsm + if (auto *LLVMAsm = dyn_cast(LLVMV)) { + It->second = std::unique_ptr(new OpaqueValue(LLVMAsm, *this)); return It->second.get(); } - case llvm::Instruction::InsertElement: { - auto *LLVMIns = cast(LLVMV); - It->second = std::unique_ptr( - new InsertElementInst(LLVMIns, *this)); - return It->second.get(); - } - case llvm::Instruction::ShuffleVector: { - auto *LLVMIns = cast(LLVMV); - It->second = std::unique_ptr( - new ShuffleVectorInst(LLVMIns, *this)); - return It->second.get(); - } - case llvm::Instruction::ExtractValue: { - auto *LLVMIns = cast(LLVMV); - It->second = - std::unique_ptr(new ExtractValueInst(LLVMIns, *this)); - return It->second.get(); - } - case llvm::Instruction::InsertValue: { - auto *LLVMIns = cast(LLVMV); - It->second = - std::unique_ptr(new InsertValueInst(LLVMIns, *this)); - return It->second.get(); - } - case llvm::Instruction::Br: { - auto *LLVMBr = cast(LLVMV); - It->second = std::unique_ptr(new BranchInst(LLVMBr, *this)); - return It->second.get(); - } - case llvm::Instruction::Load: { - auto *LLVMLd = cast(LLVMV); - It->second = std::unique_ptr(new LoadInst(LLVMLd, *this)); - return It->second.get(); - } - case llvm::Instruction::Store: { - auto *LLVMSt = cast(LLVMV); - It->second = std::unique_ptr(new StoreInst(LLVMSt, *this)); - return It->second.get(); - } - case llvm::Instruction::Ret: { - auto *LLVMRet = cast(LLVMV); - It->second = std::unique_ptr(new ReturnInst(LLVMRet, *this)); - return It->second.get(); - } - case llvm::Instruction::Call: { - auto *LLVMCall = cast(LLVMV); - It->second = std::unique_ptr(new CallInst(LLVMCall, *this)); - return It->second.get(); - } - case llvm::Instruction::Invoke: { - auto *LLVMInvoke = cast(LLVMV); - It->second = std::unique_ptr(new InvokeInst(LLVMInvoke, *this)); - return It->second.get(); - } - case llvm::Instruction::CallBr: { - auto *LLVMCallBr = cast(LLVMV); - It->second = std::unique_ptr(new CallBrInst(LLVMCallBr, *this)); - return It->second.get(); - } - case llvm::Instruction::LandingPad: { - auto *LLVMLPad = cast(LLVMV); - It->second = - std::unique_ptr(new LandingPadInst(LLVMLPad, *this)); - return It->second.get(); - } - case llvm::Instruction::CatchPad: { - auto *LLVMCPI = cast(LLVMV); - It->second = - std::unique_ptr(new CatchPadInst(LLVMCPI, *this)); - return It->second.get(); - } - case llvm::Instruction::CleanupPad: { - auto *LLVMCPI = cast(LLVMV); - It->second = - std::unique_ptr(new CleanupPadInst(LLVMCPI, *this)); - return It->second.get(); - } - case llvm::Instruction::CatchRet: { - auto *LLVMCRI = cast(LLVMV); - It->second = - std::unique_ptr(new CatchReturnInst(LLVMCRI, *this)); - return It->second.get(); - } - case llvm::Instruction::CleanupRet: { - auto *LLVMCRI = cast(LLVMV); - It->second = std::unique_ptr( - new CleanupReturnInst(LLVMCRI, *this)); - return It->second.get(); - } - case llvm::Instruction::GetElementPtr: { - auto *LLVMGEP = cast(LLVMV); - It->second = std::unique_ptr( - new GetElementPtrInst(LLVMGEP, *this)); - return It->second.get(); - } - case llvm::Instruction::CatchSwitch: { - auto *LLVMCatchSwitchInst = cast(LLVMV); - It->second = std::unique_ptr( - new CatchSwitchInst(LLVMCatchSwitchInst, *this)); - return It->second.get(); - } - case llvm::Instruction::Resume: { - auto *LLVMResumeInst = cast(LLVMV); - It->second = - std::unique_ptr(new ResumeInst(LLVMResumeInst, *this)); - return It->second.get(); - } - case llvm::Instruction::Switch: { - auto *LLVMSwitchInst = cast(LLVMV); - It->second = - std::unique_ptr(new SwitchInst(LLVMSwitchInst, *this)); - return It->second.get(); - } - case llvm::Instruction::FNeg: { - auto *LLVMUnaryOperator = cast(LLVMV); - It->second = std::unique_ptr( - new UnaryOperator(LLVMUnaryOperator, *this)); - return It->second.get(); - } - case llvm::Instruction::Add: - case llvm::Instruction::FAdd: - case llvm::Instruction::Sub: - case llvm::Instruction::FSub: - case llvm::Instruction::Mul: - case llvm::Instruction::FMul: - case llvm::Instruction::UDiv: - case llvm::Instruction::SDiv: - case llvm::Instruction::FDiv: - case llvm::Instruction::URem: - case llvm::Instruction::SRem: - case llvm::Instruction::FRem: - case llvm::Instruction::Shl: - case llvm::Instruction::LShr: - case llvm::Instruction::AShr: - case llvm::Instruction::And: - case llvm::Instruction::Or: - case llvm::Instruction::Xor: { - auto *LLVMBinaryOperator = cast(LLVMV); - It->second = std::unique_ptr( - new BinaryOperator(LLVMBinaryOperator, *this)); - return It->second.get(); - } - case llvm::Instruction::AtomicRMW: { - auto *LLVMAtomicRMW = cast(LLVMV); - It->second = - std::unique_ptr(new AtomicRMWInst(LLVMAtomicRMW, *this)); - return It->second.get(); - } - case llvm::Instruction::AtomicCmpXchg: { - auto *LLVMAtomicCmpXchg = cast(LLVMV); - It->second = std::unique_ptr( - new AtomicCmpXchgInst(LLVMAtomicCmpXchg, *this)); - return It->second.get(); - } - case llvm::Instruction::Alloca: { - auto *LLVMAlloca = cast(LLVMV); - It->second = std::unique_ptr(new AllocaInst(LLVMAlloca, *this)); - return It->second.get(); - } - case llvm::Instruction::ZExt: - case llvm::Instruction::SExt: - case llvm::Instruction::FPToUI: - case llvm::Instruction::FPToSI: - case llvm::Instruction::FPExt: - case llvm::Instruction::PtrToInt: - case llvm::Instruction::IntToPtr: - case llvm::Instruction::SIToFP: - case llvm::Instruction::UIToFP: - case llvm::Instruction::Trunc: - case llvm::Instruction::FPTrunc: - case llvm::Instruction::BitCast: - case llvm::Instruction::AddrSpaceCast: { - auto *LLVMCast = cast(LLVMV); - It->second = std::unique_ptr(new CastInst(LLVMCast, *this)); - return It->second.get(); - } - case llvm::Instruction::PHI: { - auto *LLVMPhi = cast(LLVMV); - It->second = std::unique_ptr(new PHINode(LLVMPhi, *this)); - return It->second.get(); - } - case llvm::Instruction::ICmp: { - auto *LLVMICmp = cast(LLVMV); - It->second = std::unique_ptr(new ICmpInst(LLVMICmp, *this)); - return It->second.get(); - } - case llvm::Instruction::FCmp: { - auto *LLVMFCmp = cast(LLVMV); - It->second = std::unique_ptr(new FCmpInst(LLVMFCmp, *this)); - return It->second.get(); - } - case llvm::Instruction::Unreachable: { - auto *LLVMUnreachable = cast(LLVMV); - It->second = std::unique_ptr( - new UnreachableInst(LLVMUnreachable, *this)); - return It->second.get(); - } - default: - break; - } - - It->second = std::unique_ptr( - new OpaqueInst(cast(LLVMV), *this)); - return It->second.get(); + llvm_unreachable("Unhandled LLVMV type!"); } Argument *Context::getOrCreateArgument(llvm::Argument *LLVMArg) { diff --git a/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp index 9e31243cd696c..8de4489de8f28 100644 --- a/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp +++ b/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp @@ -105,14 +105,14 @@ static bool isGPR64(unsigned Reg, unsigned SubReg, const MachineRegisterInfo *MRI) { if (SubReg) return false; - if (Register::isVirtualRegister(Reg)) + if (Register(Reg).isVirtual()) return MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::GPR64RegClass); return AArch64::GPR64RegClass.contains(Reg); } static bool isFPR64(unsigned Reg, unsigned SubReg, const MachineRegisterInfo *MRI) { - if (Register::isVirtualRegister(Reg)) + if (Register(Reg).isVirtual()) return (MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::FPR64RegClass) && SubReg == 0) || (MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::FPR128RegClass) && diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index f1f25b65fc53f..eaca75b80dd12 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -208,6 +208,9 @@ class AArch64AsmPrinter : public AsmPrinter { void emitAttributes(unsigned Flags, uint64_t PAuthABIPlatform, uint64_t PAuthABIVersion, AArch64TargetStreamer *TS); + // Emit expansion of Compare-and-branch pseudo instructions + void emitCBPseudoExpansion(const MachineInstr *MI); + void EmitToStreamer(MCStreamer &S, const MCInst &Inst); void EmitToStreamer(const MCInst &Inst) { EmitToStreamer(*OutStreamer, Inst); @@ -2589,6 +2592,124 @@ AArch64AsmPrinter::lowerBlockAddressConstant(const BlockAddress &BA) { return BAE; } +void AArch64AsmPrinter::emitCBPseudoExpansion(const MachineInstr *MI) { + bool IsImm = false; + bool Is32Bit = false; + + switch (MI->getOpcode()) { + default: + llvm_unreachable("This is not a CB pseudo instruction"); + case AArch64::CBWPrr: + Is32Bit = true; + break; + case AArch64::CBXPrr: + Is32Bit = false; + break; + case AArch64::CBWPri: + IsImm = true; + Is32Bit = true; + break; + case AArch64::CBXPri: + IsImm = true; + break; + } + + AArch64CC::CondCode CC = + static_cast(MI->getOperand(0).getImm()); + bool NeedsRegSwap = false; + bool NeedsImmDec = false; + bool NeedsImmInc = false; + + // Decide if we need to either swap register operands or increment/decrement + // immediate operands + unsigned MCOpC; + switch (CC) { + default: + llvm_unreachable("Invalid CB condition code"); + case AArch64CC::EQ: + MCOpC = IsImm ? (Is32Bit ? AArch64::CBEQWri : AArch64::CBEQXri) + : (Is32Bit ? AArch64::CBEQWrr : AArch64::CBEQXrr); + break; + case AArch64CC::NE: + MCOpC = IsImm ? (Is32Bit ? AArch64::CBNEWri : AArch64::CBNEXri) + : (Is32Bit ? AArch64::CBNEWrr : AArch64::CBNEXrr); + break; + case AArch64CC::HS: + MCOpC = IsImm ? (Is32Bit ? AArch64::CBHIWri : AArch64::CBHIXri) + : (Is32Bit ? AArch64::CBHSWrr : AArch64::CBHSXrr); + NeedsImmDec = IsImm; + break; + case AArch64CC::LO: + MCOpC = IsImm ? (Is32Bit ? AArch64::CBLOWri : AArch64::CBLOXri) + : (Is32Bit ? AArch64::CBHIWrr : AArch64::CBHIXrr); + NeedsRegSwap = !IsImm; + break; + case AArch64CC::HI: + MCOpC = IsImm ? (Is32Bit ? AArch64::CBHIWri : AArch64::CBHIXri) + : (Is32Bit ? AArch64::CBHIWrr : AArch64::CBHIXrr); + break; + case AArch64CC::LS: + MCOpC = IsImm ? (Is32Bit ? AArch64::CBLOWri : AArch64::CBLOXri) + : (Is32Bit ? AArch64::CBHSWrr : AArch64::CBHSXrr); + NeedsRegSwap = !IsImm; + NeedsImmInc = IsImm; + break; + case AArch64CC::GE: + MCOpC = IsImm ? (Is32Bit ? AArch64::CBGTWri : AArch64::CBGTXri) + : (Is32Bit ? AArch64::CBGEWrr : AArch64::CBGEXrr); + NeedsImmDec = IsImm; + break; + case AArch64CC::LT: + MCOpC = IsImm ? (Is32Bit ? AArch64::CBLTWri : AArch64::CBLTXri) + : (Is32Bit ? AArch64::CBGTWrr : AArch64::CBGTXrr); + NeedsRegSwap = !IsImm; + break; + case AArch64CC::GT: + MCOpC = IsImm ? (Is32Bit ? AArch64::CBGTWri : AArch64::CBGTXri) + : (Is32Bit ? AArch64::CBGTWrr : AArch64::CBGTXrr); + break; + case AArch64CC::LE: + MCOpC = IsImm ? (Is32Bit ? AArch64::CBLTWri : AArch64::CBLTXri) + : (Is32Bit ? AArch64::CBGEWrr : AArch64::CBGEXrr); + NeedsRegSwap = !IsImm; + NeedsImmInc = IsImm; + break; + } + + MCInst Inst; + Inst.setOpcode(MCOpC); + + MCOperand Lhs, Rhs, Trgt; + lowerOperand(MI->getOperand(1), Lhs); + lowerOperand(MI->getOperand(2), Rhs); + lowerOperand(MI->getOperand(3), Trgt); + + // Now swap, increment or decrement + if (NeedsRegSwap) { + assert(Lhs.isReg() && "Expected register operand for CB"); + assert(Rhs.isReg() && "Expected register operand for CB"); + Inst.addOperand(Rhs); + Inst.addOperand(Lhs); + } else if (NeedsImmDec) { + Rhs.setImm(Rhs.getImm() - 1); + Inst.addOperand(Lhs); + Inst.addOperand(Rhs); + } else if (NeedsImmInc) { + Rhs.setImm(Rhs.getImm() + 1); + Inst.addOperand(Lhs); + Inst.addOperand(Rhs); + } else { + Inst.addOperand(Lhs); + Inst.addOperand(Rhs); + } + + assert((!IsImm || (Rhs.getImm() >= 0 && Rhs.getImm() < 64)) && + "CB immediate operand out-of-bounds"); + + Inst.addOperand(Trgt); + EmitToStreamer(*OutStreamer, Inst); +} + // Simple pseudo-instructions have their lowering (with expansion to real // instructions) auto-generated. #include "AArch64GenMCPseudoLowering.inc" @@ -3155,13 +3276,20 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) { return; case AArch64::BLR: - case AArch64::BR: + case AArch64::BR: { recordIfImportCall(MI); MCInst TmpInst; MCInstLowering.Lower(MI, TmpInst); EmitToStreamer(*OutStreamer, TmpInst); return; } + case AArch64::CBWPri: + case AArch64::CBXPri: + case AArch64::CBWPrr: + case AArch64::CBXPrr: + emitCBPseudoExpansion(MI); + return; + } // Finally, do the automated lowerings for everything else. MCInst TmpInst; diff --git a/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp index 0301032e84977..9d7c4448e4cf8 100644 --- a/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp +++ b/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp @@ -258,7 +258,7 @@ bool SSACCmpConv::isDeadDef(unsigned DstReg) { // Writes to the zero register are dead. if (DstReg == AArch64::WZR || DstReg == AArch64::XZR) return true; - if (!Register::isVirtualRegister(DstReg)) + if (!Register(DstReg).isVirtual()) return false; // A virtual register def without any uses will be marked dead later, and // eventually replaced by the zero register. diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 1387a224fa660..3ca9107cb2ce5 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -507,6 +507,8 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { bool SelectAllActivePredicate(SDValue N); bool SelectAnyPredicate(SDValue N); + + bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm); }; class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy { @@ -7489,3 +7491,52 @@ bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize, Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); return true; } + +bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, + SDValue &Imm) { + AArch64CC::CondCode CC = + static_cast(P->getConstantOperandVal(1)); + if (auto *CN = dyn_cast(N)) { + // Check conservatively if the immediate fits the valid range [0, 64). + // Immediate variants for GE and HS definitely need to be decremented + // when lowering the pseudos later, so an immediate of 1 would become 0. + // For the inverse conditions LT and LO we don't know for sure if they + // will need a decrement but should the decision be made to reverse the + // branch condition, we again end up with the need to decrement. + // The same argument holds for LE, LS, GT and HI and possibly + // incremented immediates. This can lead to slightly less optimal + // codegen, e.g. we never codegen the legal case + // cblt w0, #63, A + // because we could end up with the illegal case + // cbge w0, #64, B + // should the decision to reverse the branch direction be made. For the + // lower bound cases this is no problem since we can express comparisons + // against 0 with either tbz/tnbz or using wzr/xzr. + uint64_t LowerBound = 0, UpperBound = 64; + switch (CC) { + case AArch64CC::GE: + case AArch64CC::HS: + case AArch64CC::LT: + case AArch64CC::LO: + LowerBound = 1; + break; + case AArch64CC::LE: + case AArch64CC::LS: + case AArch64CC::GT: + case AArch64CC::HI: + UpperBound = 63; + break; + default: + break; + } + + if (CN->getAPIntValue().uge(LowerBound) && + CN->getAPIntValue().ult(UpperBound)) { + SDLoc DL(N); + Imm = CurDAG->getTargetConstant(CN->getZExtValue(), DL, N.getValueType()); + return true; + } + } + + return false; +} diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 50be082777835..d519bfc06af05 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2993,6 +2993,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::CTTZ_ELTS) MAKE_CASE(AArch64ISD::CALL_ARM64EC_TO_X64) MAKE_CASE(AArch64ISD::URSHR_I_PRED) + MAKE_CASE(AArch64ISD::CB) } #undef MAKE_CASE return nullptr; @@ -10603,6 +10604,17 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { DAG.getConstant(SignBitPos, dl, MVT::i64), Dest); } + // Try to emit Armv9.6 CB instructions. We prefer tb{n}z/cb{n}z due to their + // larger branch displacement but do prefer CB over cmp + br. + if (Subtarget->hasCMPBR() && + AArch64CC::isValidCBCond(changeIntCCToAArch64CC(CC)) && + ProduceNonFlagSettingCondBr) { + SDValue Cond = + DAG.getTargetConstant(changeIntCCToAArch64CC(CC), dl, MVT::i32); + return DAG.getNode(AArch64ISD::CB, dl, MVT::Other, Chain, Cond, LHS, RHS, + Dest); + } + SDValue CCVal; SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl); return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index b26f28dc79f88..1987c892ac080 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -529,6 +529,9 @@ enum NodeType : unsigned { // SME ZA loads and stores SME_ZA_LDR, SME_ZA_STR, + + // Compare-and-branch + CB, }; } // end namespace AArch64ISD diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 03ae42493a035..7a8b743faf4a3 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -400,6 +400,16 @@ def uimm6_32b : Operand, ImmLeaf= 0 && Imm < 64; }]> { let ParserMatchClass = UImm6Operand; } +def CmpBranchUImm6Operand_32b + : ComplexPattern { + let WantsParent = true; +} + +def CmpBranchUImm6Operand_64b + : ComplexPattern { + let WantsParent = true; +} + def UImm6Plus1Operand : AsmOperandClass { let Name = "UImm6P1"; let DiagnosticType = "InvalidImm1_64"; @@ -13225,6 +13235,21 @@ multiclass CmpBranchRegisterAlias { def : InstAlias(insn # "Xrr") GPR64:$Rm, GPR64:$Rt, am_brcmpcond:$target), 0>; } + +class CmpBranchRegisterPseudo + : Pseudo<(outs), (ins ccode:$Cond, regtype:$Rt, regtype:$Rm, am_brcmpcond:$Target), []>, + Sched<[WriteBr]> { + let isBranch = 1; + let isTerminator = 1; +} + +class CmpBranchImmediatePseudo + : Pseudo<(outs), (ins ccode:$Cond, regtype:$Rt, imtype:$Imm, am_brcmpcond:$Target), []>, + Sched<[WriteBr]> { + let isBranch = 1; + let isTerminator = 1; +} + //---------------------------------------------------------------------------- // Allow the size specifier tokens to be upper case, not just lower. def : TokenAlias<".4B", ".4b">; // Add dot product diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 0f2b969fba35c..41e15ab1e5942 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -62,6 +62,10 @@ using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR #include "AArch64GenInstrInfo.inc" +static cl::opt + CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9), + cl::desc("Restrict range of CB instructions (DEBUG)")); + static cl::opt TBZDisplacementBits( "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)")); @@ -216,6 +220,18 @@ static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); Cond.push_back(LastInst->getOperand(0)); Cond.push_back(LastInst->getOperand(1)); + break; + case AArch64::CBWPri: + case AArch64::CBXPri: + case AArch64::CBWPrr: + case AArch64::CBXPrr: + Target = LastInst->getOperand(3).getMBB(); + Cond.push_back(MachineOperand::CreateImm(-1)); + Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); + Cond.push_back(LastInst->getOperand(0)); + Cond.push_back(LastInst->getOperand(1)); + Cond.push_back(LastInst->getOperand(2)); + break; } } @@ -237,6 +253,11 @@ static unsigned getBranchDisplacementBits(unsigned Opc) { return CBZDisplacementBits; case AArch64::Bcc: return BCCDisplacementBits; + case AArch64::CBWPri: + case AArch64::CBXPri: + case AArch64::CBWPrr: + case AArch64::CBXPrr: + return CBDisplacementBits; } } @@ -266,6 +287,11 @@ AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const { case AArch64::CBNZX: case AArch64::Bcc: return MI.getOperand(1).getMBB(); + case AArch64::CBWPri: + case AArch64::CBXPri: + case AArch64::CBWPrr: + case AArch64::CBXPrr: + return MI.getOperand(3).getMBB(); } } @@ -543,6 +569,17 @@ bool AArch64InstrInfo::reverseBranchCondition( case AArch64::TBNZX: Cond[1].setImm(AArch64::TBZX); break; + + // Cond is { -1, Opcode, CC, Op0, Op1 } + case AArch64::CBWPri: + case AArch64::CBXPri: + case AArch64::CBWPrr: + case AArch64::CBXPrr: { + // Pseudos using standard 4bit Arm condition codes + AArch64CC::CondCode CC = + static_cast(Cond[2].getImm()); + Cond[2].setImm(AArch64CC::getInvertedCondCode(CC)); + } } } @@ -593,10 +630,19 @@ void AArch64InstrInfo::instantiateCondBranch( } else { // Folded compare-and-branch // Note that we use addOperand instead of addReg to keep the flags. + + // cbz, cbnz const MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]); + + // tbz/tbnz if (Cond.size() > 3) - MIB.addImm(Cond[3].getImm()); + MIB.add(Cond[3]); + + // cb + if (Cond.size() > 4) + MIB.add(Cond[4]); + MIB.addMBB(TBB); } } @@ -631,7 +677,7 @@ unsigned AArch64InstrInfo::insertBranch( // Find the original register that VReg is copied from. static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { - while (Register::isVirtualRegister(VReg)) { + while (Register(VReg).isVirtual()) { const MachineInstr *DefMI = MRI.getVRegDef(VReg); if (!DefMI->isFullCopy()) return VReg; @@ -646,7 +692,7 @@ static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewVReg = nullptr) { VReg = removeCopies(MRI, VReg); - if (!Register::isVirtualRegister(VReg)) + if (!Register(VReg).isVirtual()) return 0; bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); @@ -842,6 +888,48 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64)); break; } + case 5: { // cb + // We must insert a cmp, that is a subs + // 0 1 2 3 4 + // Cond is { -1, Opcode, CC, Op0, Op1 } + unsigned SUBSOpC, SUBSDestReg; + bool IsImm = false; + CC = static_cast(Cond[2].getImm()); + switch (Cond[1].getImm()) { + default: + llvm_unreachable("Unknown branch opcode in Cond"); + case AArch64::CBWPri: + SUBSOpC = AArch64::SUBSWri; + SUBSDestReg = AArch64::WZR; + IsImm = true; + break; + case AArch64::CBXPri: + SUBSOpC = AArch64::SUBSXri; + SUBSDestReg = AArch64::XZR; + IsImm = true; + break; + case AArch64::CBWPrr: + SUBSOpC = AArch64::SUBSWrr; + SUBSDestReg = AArch64::WZR; + IsImm = false; + break; + case AArch64::CBXPrr: + SUBSOpC = AArch64::SUBSXrr; + SUBSDestReg = AArch64::XZR; + IsImm = false; + break; + } + + if (IsImm) + BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg) + .addReg(Cond[3].getReg()) + .addImm(Cond[4].getImm()) + .addImm(0); + else + BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg) + .addReg(Cond[3].getReg()) + .addReg(Cond[4].getReg()); + } } unsigned Opc = 0; @@ -6033,9 +6121,9 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( Register SrcReg = SrcMO.getReg(); // This is slightly expensive to compute for physical regs since // getMinimalPhysRegClass is slow. - auto getRegClass = [&](unsigned Reg) { - return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg) - : TRI.getMinimalPhysRegClass(Reg); + auto getRegClass = [&](Register Reg) { + return Reg.isVirtual() ? MRI.getRegClass(Reg) + : TRI.getMinimalPhysRegClass(Reg); }; if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) { @@ -7368,7 +7456,7 @@ static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, MRI.constrainRegClass(SrcReg0, RC); if (SrcReg1.isVirtual()) MRI.constrainRegClass(SrcReg1, RC); - if (Register::isVirtualRegister(VR)) + if (Register(VR).isVirtual()) MRI.constrainRegClass(VR, RC); MachineInstrBuilder MIB = @@ -8426,6 +8514,10 @@ bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const { default: llvm_unreachable("Unknown branch instruction?"); case AArch64::Bcc: + case AArch64::CBWPri: + case AArch64::CBXPri: + case AArch64::CBWPrr: + case AArch64::CBXPrr: return false; case AArch64::CBZW: case AArch64::CBZX: diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 9a0034223ab9b..f7a1485cdd26a 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -692,6 +692,10 @@ static inline bool isCondBranchOpcode(int Opc) { case AArch64::TBZX: case AArch64::TBNZW: case AArch64::TBNZX: + case AArch64::CBWPri: + case AArch64::CBXPri: + case AArch64::CBWPrr: + case AArch64::CBXPrr: return true; default: return false; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 93a6100ce54e9..fc86dd4742bc4 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -516,6 +516,10 @@ def SDT_AArch64TBL : SDTypeProfile<1, 2, [ SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2> ]>; +def SDT_AArch64cb : SDTypeProfile<0, 4, + [SDTCisVT<0, i32>, SDTCisInt<1>, SDTCisInt<2>, + SDTCisVT<3, OtherVT>]>; + // non-extending masked load fragment. def nonext_masked_load : PatFrag<(ops node:$ptr, node:$pred, node:$def), @@ -692,6 +696,7 @@ def topbitsallzero64: PatLeaf<(i64 GPR64:$src), [{ }]>; // Node definitions. +def AArch64CB : SDNode<"AArch64ISD::CB", SDT_AArch64cb, [SDNPHasChain]>; def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>; def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>; @@ -10459,6 +10464,10 @@ defm : PromoteBinaryv8f16Tov4f32; defm : PromoteBinaryv8f16Tov4f32; defm : PromoteBinaryv8f16Tov4f32; +//===----------------------------------------------------------------------===// +// Compare and Branch (FEAT_CMPBR) +//===----------------------------------------------------------------------===// + let Predicates = [HasCMPBR] in { defm CBGT : CmpBranchRegister<0b000, "cbgt">; defm CBGE : CmpBranchRegister<0b001, "cbge">; @@ -10507,6 +10516,25 @@ let Predicates = [HasCMPBR] in { defm : CmpBranchWRegisterAlias<"cbhlo", "CBHHI">; defm : CmpBranchWRegisterAlias<"cbhls", "CBHHS">; defm : CmpBranchWRegisterAlias<"cbhlt", "CBHGT">; + + // Pseudos for codegen + def CBWPrr : CmpBranchRegisterPseudo; + def CBXPrr : CmpBranchRegisterPseudo; + def CBWPri : CmpBranchImmediatePseudo; + def CBXPri : CmpBranchImmediatePseudo; + + def : Pat<(AArch64CB i32:$Cond, GPR32:$Rn, CmpBranchUImm6Operand_32b:$Imm, + bb:$Target), + (CBWPri i32:$Cond, GPR32:$Rn, uimm6_32b:$Imm, + am_brcmpcond:$Target)>; + def : Pat<(AArch64CB i32:$Cond, GPR64:$Rn, CmpBranchUImm6Operand_64b:$Imm, + bb:$Target), + (CBXPri i32:$Cond, GPR64:$Rn, uimm6_64b:$Imm, + am_brcmpcond:$Target)>; + def : Pat<(AArch64CB i32:$Cond, GPR32:$Rn, GPR32:$Rt, bb:$Target), + (CBWPrr ccode:$Cond, GPR32:$Rn, GPR32:$Rt, am_brcmpcond:$Target)>; + def : Pat<(AArch64CB i32:$Cond, GPR64:$Rn, GPR64:$Rt, bb:$Target), + (CBXPrr ccode:$Cond, GPR64:$Rn, GPR64:$Rt, am_brcmpcond:$Target)>; } // HasCMPBR diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 9671fa3b3d92f..a4ee963e2cce0 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -362,6 +362,26 @@ inline static unsigned getNZCVToSatisfyCondCode(CondCode Code) { } } +/// True, if a given condition code can be used in a fused compare-and-branch +/// instructions, false otherwise. +inline static bool isValidCBCond(AArch64CC::CondCode Code) { + switch (Code) { + default: + return false; + case AArch64CC::EQ: + case AArch64CC::NE: + case AArch64CC::HS: + case AArch64CC::LO: + case AArch64CC::HI: + case AArch64CC::LS: + case AArch64CC::GE: + case AArch64CC::LT: + case AArch64CC::GT: + case AArch64CC::LE: + return true; + } +} + } // end namespace AArch64CC struct SysAlias { diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 42392e22643b2..80786c6fefd3a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -368,7 +368,7 @@ struct AMDGPUUnifyMetadataPass : PassInfoMixin { PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; -void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&); +void initializeSIOptimizeExecMaskingPreRALegacyPass(PassRegistry &); extern char &SIOptimizeExecMaskingPreRAID; void initializeSIOptimizeVGPRLiveRangeLegacyPass(PassRegistry &); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index cf3843869808b..28c5a53508556 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1190,6 +1190,12 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const { case Intrinsic::amdgcn_permlane16_swap: case Intrinsic::amdgcn_permlane32_swap: return selectPermlaneSwapIntrin(I, IntrinsicID); + case Intrinsic::amdgcn_dead: { + I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); + I.removeOperand(1); // drop intrinsic ID + return RBI.constrainGenericRegister(I.getOperand(0).getReg(), + AMDGPU::VGPR_32RegClass, *MRI); + } default: return selectImpl(I, *CoverageInfo); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index fd1341e8c91b2..a4504d78c7250 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -113,6 +113,7 @@ MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass()) MACHINE_FUNCTION_PASS("si-lower-wwm-copies", SILowerWWMCopiesPass()) MACHINE_FUNCTION_PASS("si-opt-vgpr-liverange", SIOptimizeVGPRLiveRangePass()) MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass()) +MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", SIOptimizeExecMaskingPreRAPass()) MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass()) MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass()) MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass()) @@ -130,7 +131,6 @@ DUMMY_MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass()) DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", SILateBranchLoweringPass()) DUMMY_MACHINE_FUNCTION_PASS("si-memory-legalizer", SIMemoryLegalizerPass()) DUMMY_MACHINE_FUNCTION_PASS("si-mode-register", SIModeRegisterPass()) -DUMMY_MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", SIOptimizeExecMaskingPreRAPass()) DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass()) // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it // already exists. diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 2e5f42c3bdc40..2693ad3894cca 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4676,6 +4676,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_set_inactive_chain_arg: case Intrinsic::amdgcn_permlane64: case Intrinsic::amdgcn_ds_bpermute_fi_b32: + case Intrinsic::amdgcn_dead: return getDefaultMappingAllVGPR(MI); case Intrinsic::amdgcn_cvt_pkrtz: if (Subtarget.hasSALUFloatInsts() && isSALUMapping(MI)) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td index 10175557fadc7..3b62dcf3c92cd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -362,6 +362,8 @@ def : SourceOfDivergence; foreach intr = AMDGPUImageDimAtomicIntrinsics in def : SourceOfDivergence; +def : SourceOfDivergence; + class AlwaysUniform { Intrinsic Intr = intr; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 7c9377e61230b..dbd126d18785a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -51,6 +51,7 @@ #include "SIMachineFunctionInfo.h" #include "SIMachineScheduler.h" #include "SIOptimizeExecMasking.h" +#include "SIOptimizeExecMaskingPreRA.h" #include "SIOptimizeVGPRLiveRange.h" #include "SIPeepholeSDWA.h" #include "SIPreAllocateWWMRegs.h" @@ -501,7 +502,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSIFoldOperandsLegacyPass(*PR); initializeSIPeepholeSDWALegacyPass(*PR); initializeSIShrinkInstructionsLegacyPass(*PR); - initializeSIOptimizeExecMaskingPreRAPass(*PR); + initializeSIOptimizeExecMaskingPreRALegacyPass(*PR); initializeSIOptimizeVGPRLiveRangeLegacyPass(*PR); initializeSILoadStoreOptimizerLegacyPass(*PR); initializeAMDGPUCtorDtorLoweringLegacyPass(*PR); diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 3faf0795157dc..598475763d02d 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -4276,3 +4276,9 @@ def V_ILLEGAL : Enc32, InstSI<(outs), (ins), "v_illegal"> { let hasSideEffects = 1; let SubtargetPredicate = isGFX10Plus; } + +// FIXME: Would be nice if we could set the register class for the destination +// register too. +def IMP_DEF_FROM_INTRINSIC: Pat< + (i32 (int_amdgcn_dead)), (IMPLICIT_DEF)>; + diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index 31f65d82a4d2b..b2228574378f1 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -12,6 +12,7 @@ /// //===----------------------------------------------------------------------===// +#include "SIOptimizeExecMaskingPreRA.h" #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" @@ -25,7 +26,7 @@ using namespace llvm; namespace { -class SIOptimizeExecMaskingPreRA : public MachineFunctionPass { +class SIOptimizeExecMaskingPreRA { private: const SIRegisterInfo *TRI; const SIInstrInfo *TII; @@ -42,11 +43,18 @@ class SIOptimizeExecMaskingPreRA : public MachineFunctionPass { bool optimizeVcndVcmpPair(MachineBasicBlock &MBB); bool optimizeElseBranch(MachineBasicBlock &MBB); +public: + SIOptimizeExecMaskingPreRA(LiveIntervals *LIS) : LIS(LIS) {} + bool run(MachineFunction &MF); +}; + +class SIOptimizeExecMaskingPreRALegacy : public MachineFunctionPass { public: static char ID; - SIOptimizeExecMaskingPreRA() : MachineFunctionPass(ID) { - initializeSIOptimizeExecMaskingPreRAPass(*PassRegistry::getPassRegistry()); + SIOptimizeExecMaskingPreRALegacy() : MachineFunctionPass(ID) { + initializeSIOptimizeExecMaskingPreRALegacyPass( + *PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; @@ -64,18 +72,18 @@ class SIOptimizeExecMaskingPreRA : public MachineFunctionPass { } // End anonymous namespace. -INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingPreRA, DEBUG_TYPE, +INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingPreRALegacy, DEBUG_TYPE, "SI optimize exec mask operations pre-RA", false, false) INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) -INITIALIZE_PASS_END(SIOptimizeExecMaskingPreRA, DEBUG_TYPE, +INITIALIZE_PASS_END(SIOptimizeExecMaskingPreRALegacy, DEBUG_TYPE, "SI optimize exec mask operations pre-RA", false, false) -char SIOptimizeExecMaskingPreRA::ID = 0; +char SIOptimizeExecMaskingPreRALegacy::ID = 0; -char &llvm::SIOptimizeExecMaskingPreRAID = SIOptimizeExecMaskingPreRA::ID; +char &llvm::SIOptimizeExecMaskingPreRAID = SIOptimizeExecMaskingPreRALegacy::ID; FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() { - return new SIOptimizeExecMaskingPreRA(); + return new SIOptimizeExecMaskingPreRALegacy(); } // See if there is a def between \p AndIdx and \p SelIdx that needs to live @@ -340,15 +348,28 @@ bool SIOptimizeExecMaskingPreRA::optimizeElseBranch(MachineBasicBlock &MBB) { return true; } -bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) { +PreservedAnalyses +SIOptimizeExecMaskingPreRAPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + auto &LIS = MFAM.getResult(MF); + SIOptimizeExecMaskingPreRA(&LIS).run(MF); + return PreservedAnalyses::all(); +} + +bool SIOptimizeExecMaskingPreRALegacy::runOnMachineFunction( + MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; + auto *LIS = &getAnalysis().getLIS(); + return SIOptimizeExecMaskingPreRA(LIS).run(MF); +} + +bool SIOptimizeExecMaskingPreRA::run(MachineFunction &MF) { const GCNSubtarget &ST = MF.getSubtarget(); TRI = ST.getRegisterInfo(); TII = ST.getInstrInfo(); MRI = &MF.getRegInfo(); - LIS = &getAnalysis().getLIS(); const bool Wave32 = ST.isWave32(); AndOpc = Wave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64; diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h new file mode 100644 index 0000000000000..cf9c6ce5f0083 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h @@ -0,0 +1,23 @@ +//===- SIOptimizeExecMaskingPreRA.h.h ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKINGPRERA_H +#define LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKINGPRERA_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { +class SIOptimizeExecMaskingPreRAPass + : public PassInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); +}; +} // namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKINGPRERA_H diff --git a/llvm/lib/Target/ARC/ARCOptAddrMode.cpp b/llvm/lib/Target/ARC/ARCOptAddrMode.cpp index 36f811c0aa003..f2b5ce6de4a60 100644 --- a/llvm/lib/Target/ARC/ARCOptAddrMode.cpp +++ b/llvm/lib/Target/ARC/ARCOptAddrMode.cpp @@ -151,7 +151,7 @@ static bool dominatesAllUsesOf(const MachineInstr *MI, unsigned VReg, MachineDominatorTree *MDT, MachineRegisterInfo *MRI) { - assert(Register::isVirtualRegister(VReg) && "Expected virtual register!"); + assert(Register(VReg).isVirtual() && "Expected virtual register!"); for (const MachineOperand &Use : MRI->use_nodbg_operands(VReg)) { const MachineInstr *User = Use.getParent(); @@ -216,7 +216,7 @@ MachineInstr *ARCOptAddrMode::tryToCombine(MachineInstr &Ldst) { } Register B = Base.getReg(); - if (Register::isStackSlot(B) || !Register::isVirtualRegister(B)) { + if (!B.isVirtual()) LLVM_DEBUG(dbgs() << "[ABAW] Base is not VReg\n"); return nullptr; } diff --git a/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/llvm/lib/Target/ARM/A15SDOptimizer.cpp index bb9a0a2bdf98b..452159406085d 100644 --- a/llvm/lib/Target/ARM/A15SDOptimizer.cpp +++ b/llvm/lib/Target/ARM/A15SDOptimizer.cpp @@ -152,7 +152,7 @@ unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) { // Get the subreg type that is most likely to be coalesced // for an SPR register that will be used in VDUP32d pseudo. unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) { - if (!Register::isVirtualRegister(SReg)) + if (!Register(SReg).isVirtual()) return getDPRLaneFromSPR(SReg); MachineInstr *MI = MRI->getVRegDef(SReg); @@ -166,7 +166,7 @@ unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) { SReg = MI->getOperand(1).getReg(); } - if (Register::isVirtualRegister(SReg)) { + if (Register(SReg).isVirtual()) { if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1; return ARM::ssub_0; } @@ -598,7 +598,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { // we can end up with multiple defs of this DPR. SmallVector DefSrcs; - if (!Register::isVirtualRegister(I)) + if (!Register(I).isVirtual()) continue; MachineInstr *Def = MRI->getVRegDef(I); if (!Def) diff --git a/llvm/lib/Target/ARM/ARMLatencyMutations.cpp b/llvm/lib/Target/ARM/ARMLatencyMutations.cpp index 85bad4f1925a4..30e7ede68d787 100644 --- a/llvm/lib/Target/ARM/ARMLatencyMutations.cpp +++ b/llvm/lib/Target/ARM/ARMLatencyMutations.cpp @@ -756,7 +756,7 @@ signed M85Overrides::modifyMixedWidthFP(const MachineInstr *SrcMI, !II->producesQP(SrcMI->getOpcode())) return 0; - if (Register::isVirtualRegister(RegID)) { + if (Register(RegID).isVirtual()) { if (II->producesSP(SrcMI->getOpcode()) && II->consumesDP(DstMI->getOpcode())) { for (auto &OP : SrcMI->operands()) diff --git a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp index a8927d834630e..1f6b6163dd3b6 100644 --- a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp +++ b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp @@ -253,15 +253,15 @@ bool AVRDAGToDAGISel::SelectInlineAsmMemoryOperand( SDValue ImmOp = Op->getOperand(1); ConstantSDNode *ImmNode = dyn_cast(ImmOp); - unsigned Reg; + Register Reg; bool CanHandleRegImmOpt = ImmNode && ImmNode->getAPIntValue().ult(64); if (CopyFromRegOp->getOpcode() == ISD::CopyFromReg) { RegisterSDNode *RegNode = cast(CopyFromRegOp->getOperand(1)); Reg = RegNode->getReg(); - CanHandleRegImmOpt &= (Register::isVirtualRegister(Reg) || - AVR::PTRDISPREGSRegClass.contains(Reg)); + CanHandleRegImmOpt &= + (Reg.isVirtual() || AVR::PTRDISPREGSRegClass.contains(Reg)); } else { CanHandleRegImmOpt = false; } diff --git a/llvm/lib/Target/Hexagon/BitTracker.cpp b/llvm/lib/Target/Hexagon/BitTracker.cpp index 4d5789a3c5fe1..8e88f45aeafe5 100644 --- a/llvm/lib/Target/Hexagon/BitTracker.cpp +++ b/llvm/lib/Target/Hexagon/BitTracker.cpp @@ -84,7 +84,7 @@ namespace { raw_ostream &operator<< (raw_ostream &OS, const printv &PV) { if (PV.R) - OS << 'v' << Register::virtReg2Index(PV.R); + OS << 'v' << Register(PV.R).virtRegIndex(); else OS << 's'; return OS; diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp index 67d822a67e53c..2ee537d2193b3 100644 --- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -176,7 +176,7 @@ namespace { } static inline unsigned v2x(unsigned v) { - return Register::virtReg2Index(v); + return Register(v).virtRegIndex(); } static inline unsigned x2v(unsigned x) { diff --git a/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp b/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp index 59c882bf37afa..1a0cdd811762f 100644 --- a/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp +++ b/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp @@ -139,8 +139,7 @@ void HexagonCopyHoisting::addMItoCopyList(MachineInstr *MI) { Register DstReg = MI->getOperand(0).getReg(); Register SrcReg = MI->getOperand(1).getReg(); - if (!Register::isVirtualRegister(DstReg) || - !Register::isVirtualRegister(SrcReg) || + if (!DstReg.isVirtual() || !SrcReg.isVirtual() || MRI->getRegClass(DstReg) != &Hexagon::IntRegsRegClass || MRI->getRegClass(SrcReg) != &Hexagon::IntRegsRegClass) return; diff --git a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp index 5e52cf03cfbc7..cc9485789d211 100644 --- a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp +++ b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp @@ -167,7 +167,7 @@ namespace { } static inline unsigned v2x(unsigned v) { - return Register::virtReg2Index(v); + return Register(v).virtRegIndex(); } static inline unsigned x2v(unsigned x) { @@ -271,7 +271,7 @@ namespace { CellMapShadow(const BitTracker &T) : BT(T) {} const BitTracker::RegisterCell &lookup(unsigned VR) { - unsigned RInd = Register::virtReg2Index(VR); + unsigned RInd = Register(VR).virtRegIndex(); // Grow the vector to at least 32 elements. if (RInd >= CVect.size()) CVect.resize(std::max(RInd+16, 32U), nullptr); @@ -1578,7 +1578,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { IterListType Out; for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { - unsigned Idx = Register::virtReg2Index(I->first); + unsigned Idx = Register(I->first).virtRegIndex(); if (Idx >= Cutoff) Out.push_back(I); } diff --git a/llvm/lib/Target/M68k/M68kISelLowering.cpp b/llvm/lib/Target/M68k/M68kISelLowering.cpp index 39b307b28889c..5a678bfcf410c 100644 --- a/llvm/lib/Target/M68k/M68kISelLowering.cpp +++ b/llvm/lib/Target/M68k/M68kISelLowering.cpp @@ -322,7 +322,7 @@ static bool MatchingStackOffset(SDValue Arg, unsigned Offset, int FI = INT_MAX; if (Arg.getOpcode() == ISD::CopyFromReg) { Register VR = cast(Arg.getOperand(1))->getReg(); - if (!Register::isVirtualRegister(VR)) + if (!VR.isVirtual()) return false; MachineInstr *Def = MRI->getVRegDef(VR); if (!Def) diff --git a/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterInfo.cpp index 4bfc35420b402..ae4b2377ad218 100644 --- a/llvm/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/llvm/lib/Target/Mips/MipsRegisterInfo.cpp @@ -207,6 +207,7 @@ getReservedRegs(const MachineFunction &MF) const { // Reserve hardware registers. Reserved.set(Mips::HWR29); + Reserved.set(Mips::HWR2); // Reserve DSP control register. Reserved.set(Mips::DSPPos); diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index c8e29c1da6ec4..51cd2b999ff9e 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -149,67 +149,6 @@ void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, Inst); } -// Handle symbol backtracking for targets that do not support image handles -bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI, - unsigned OpNo, MCOperand &MCOp) { - const MachineOperand &MO = MI->getOperand(OpNo); - const MCInstrDesc &MCID = MI->getDesc(); - - if (MCID.TSFlags & NVPTXII::IsTexFlag) { - // This is a texture fetch, so operand 4 is a texref and operand 5 is - // a samplerref - if (OpNo == 4 && MO.isImm()) { - lowerImageHandleSymbol(MO.getImm(), MCOp); - return true; - } - if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) { - lowerImageHandleSymbol(MO.getImm(), MCOp); - return true; - } - - return false; - } else if (MCID.TSFlags & NVPTXII::IsSuldMask) { - unsigned VecSize = - 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1); - - // For a surface load of vector size N, the Nth operand will be the surfref - if (OpNo == VecSize && MO.isImm()) { - lowerImageHandleSymbol(MO.getImm(), MCOp); - return true; - } - - return false; - } else if (MCID.TSFlags & NVPTXII::IsSustFlag) { - // This is a surface store, so operand 0 is a surfref - if (OpNo == 0 && MO.isImm()) { - lowerImageHandleSymbol(MO.getImm(), MCOp); - return true; - } - - return false; - } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) { - // This is a query, so operand 1 is a surfref/texref - if (OpNo == 1 && MO.isImm()) { - lowerImageHandleSymbol(MO.getImm(), MCOp); - return true; - } - - return false; - } - - return false; -} - -void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) { - // Ewwww - TargetMachine &TM = const_cast(MF->getTarget()); - NVPTXTargetMachine &nvTM = static_cast(TM); - const NVPTXMachineFunctionInfo *MFI = MF->getInfo(); - StringRef Sym = MFI->getImageHandleSymbol(Index); - StringRef SymName = nvTM.getStrPool().save(Sym); - MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(SymName)); -} - void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { OutMI.setOpcode(MI->getOpcode()); // Special: Do not mangle symbol operand of CALL_PROTOTYPE @@ -220,71 +159,53 @@ void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { return; } - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - - MCOperand MCOp; - if (lowerImageHandleOperand(MI, i, MCOp)) { - OutMI.addOperand(MCOp); - continue; - } - - if (lowerOperand(MO, MCOp)) - OutMI.addOperand(MCOp); - } + for (const auto MO : MI->operands()) + OutMI.addOperand(lowerOperand(MO)); } -bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, - MCOperand &MCOp) { +MCOperand NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO) { switch (MO.getType()) { - default: llvm_unreachable("unknown operand type"); + default: + llvm_unreachable("unknown operand type"); case MachineOperand::MO_Register: - MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg())); - break; + return MCOperand::createReg(encodeVirtualRegister(MO.getReg())); case MachineOperand::MO_Immediate: - MCOp = MCOperand::createImm(MO.getImm()); - break; + return MCOperand::createImm(MO.getImm()); case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::createExpr(MCSymbolRefExpr::create( - MO.getMBB()->getSymbol(), OutContext)); - break; + return MCOperand::createExpr( + MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), OutContext)); case MachineOperand::MO_ExternalSymbol: - MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName())); - break; + return GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName())); case MachineOperand::MO_GlobalAddress: - MCOp = GetSymbolRef(getSymbol(MO.getGlobal())); - break; + return GetSymbolRef(getSymbol(MO.getGlobal())); case MachineOperand::MO_FPImmediate: { const ConstantFP *Cnt = MO.getFPImm(); const APFloat &Val = Cnt->getValueAPF(); switch (Cnt->getType()->getTypeID()) { - default: report_fatal_error("Unsupported FP type"); break; - case Type::HalfTyID: - MCOp = MCOperand::createExpr( - NVPTXFloatMCExpr::createConstantFPHalf(Val, OutContext)); + default: + report_fatal_error("Unsupported FP type"); break; + case Type::HalfTyID: + return MCOperand::createExpr( + NVPTXFloatMCExpr::createConstantFPHalf(Val, OutContext)); case Type::BFloatTyID: - MCOp = MCOperand::createExpr( + return MCOperand::createExpr( NVPTXFloatMCExpr::createConstantBFPHalf(Val, OutContext)); - break; case Type::FloatTyID: - MCOp = MCOperand::createExpr( - NVPTXFloatMCExpr::createConstantFPSingle(Val, OutContext)); - break; + return MCOperand::createExpr( + NVPTXFloatMCExpr::createConstantFPSingle(Val, OutContext)); case Type::DoubleTyID: - MCOp = MCOperand::createExpr( - NVPTXFloatMCExpr::createConstantFPDouble(Val, OutContext)); - break; + return MCOperand::createExpr( + NVPTXFloatMCExpr::createConstantFPDouble(Val, OutContext)); } break; } } - return true; } unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) { - if (Register::isVirtualRegister(Reg)) { + if (Register(Reg).isVirtual()) { const TargetRegisterClass *RC = MRI->getRegClass(Reg); DenseMap &RegMap = VRegMapping[RC]; diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h index f7c3fda332eff..74daaa2fb7134 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -163,7 +163,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { void emitInstruction(const MachineInstr *) override; void lowerToMCInst(const MachineInstr *MI, MCInst &OutMI); - bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); + MCOperand lowerOperand(const MachineOperand &MO); MCOperand GetSymbolRef(const MCSymbol *Symbol); unsigned encodeVirtualRegister(unsigned Reg); @@ -226,10 +226,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { void emitDeclarationWithName(const Function *, MCSymbol *, raw_ostream &O); void emitDemotedVars(const Function *, raw_ostream &); - bool lowerImageHandleOperand(const MachineInstr *MI, unsigned OpNo, - MCOperand &MCOp); - void lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp); - bool isLoopHeaderOfNoUnroll(const MachineBasicBlock &MBB) const; // Used to control the need to emit .generic() in the initializer of diff --git a/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h index 6670cb296f216..d9beab7ec42e1 100644 --- a/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h @@ -47,12 +47,6 @@ class NVPTXMachineFunctionInfo : public MachineFunctionInfo { return ImageHandleList.size()-1; } - /// Returns the symbol name at the given index. - StringRef getImageHandleSymbol(unsigned Idx) const { - assert(ImageHandleList.size() > Idx && "Bad index"); - return ImageHandleList[Idx]; - } - /// Check if the symbol has a mapping. Having a mapping means the handle is /// replaced with a reference bool checkImageHandleSymbol(StringRef Symbol) const { diff --git a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp index a3e3978cbbfe2..4d0694faa0c9a 100644 --- a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp @@ -20,7 +20,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -41,10 +40,8 @@ class NVPTXReplaceImageHandles : public MachineFunctionPass { private: bool processInstr(MachineInstr &MI); bool replaceImageHandle(MachineOperand &Op, MachineFunction &MF); - bool findIndexForHandle(MachineOperand &Op, MachineFunction &MF, - unsigned &Idx); }; -} +} // namespace char NVPTXReplaceImageHandles::ID = 0; @@ -1756,9 +1753,11 @@ bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) { } return true; - } else if (MCID.TSFlags & NVPTXII::IsSuldMask) { + } + if (MCID.TSFlags & NVPTXII::IsSuldMask) { unsigned VecSize = - 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1); + 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - + 1); // For a surface load of vector size N, the Nth operand will be the surfref MachineOperand &SurfHandle = MI.getOperand(VecSize); @@ -1767,7 +1766,8 @@ bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) { MI.setDesc(TII->get(suldRegisterToIndexOpcode(MI.getOpcode()))); return true; - } else if (MCID.TSFlags & NVPTXII::IsSustFlag) { + } + if (MCID.TSFlags & NVPTXII::IsSustFlag) { // This is a surface store, so operand 0 is a surfref MachineOperand &SurfHandle = MI.getOperand(0); @@ -1775,7 +1775,8 @@ bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) { MI.setDesc(TII->get(sustRegisterToIndexOpcode(MI.getOpcode()))); return true; - } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) { + } + if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) { // This is a query, so operand 1 is a surfref/texref MachineOperand &Handle = MI.getOperand(1); @@ -1790,16 +1791,6 @@ bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) { bool NVPTXReplaceImageHandles::replaceImageHandle(MachineOperand &Op, MachineFunction &MF) { - unsigned Idx; - if (findIndexForHandle(Op, MF, Idx)) { - Op.ChangeToImmediate(Idx); - return true; - } - return false; -} - -bool NVPTXReplaceImageHandles:: -findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) { const MachineRegisterInfo &MRI = MF.getRegInfo(); NVPTXMachineFunctionInfo *MFI = MF.getInfo(); @@ -1812,25 +1803,16 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) { case NVPTX::LD_i64_avar: { // The handle is a parameter value being loaded, replace with the // parameter symbol - const NVPTXTargetMachine &TM = - static_cast(MF.getTarget()); - if (TM.getDrvInterface() == NVPTX::CUDA) { + const auto &TM = static_cast(MF.getTarget()); + if (TM.getDrvInterface() == NVPTX::CUDA) // For CUDA, we preserve the param loads coming from function arguments return false; - } assert(TexHandleDef.getOperand(7).isSymbol() && "Load is not a symbol!"); StringRef Sym = TexHandleDef.getOperand(7).getSymbolName(); - std::string ParamBaseName = std::string(MF.getName()); - ParamBaseName += "_param_"; - assert(Sym.starts_with(ParamBaseName) && "Invalid symbol reference"); - unsigned Param = atoi(Sym.data()+ParamBaseName.size()); - std::string NewSym; - raw_string_ostream NewSymStr(NewSym); - NewSymStr << MF.getName() << "_param_" << Param; - InstrsToRemove.insert(&TexHandleDef); - Idx = MFI->getImageHandleSymbolIndex(NewSymStr.str()); + Op.ChangeToES(Sym.data()); + MFI->getImageHandleSymbolIndex(Sym); return true; } case NVPTX::texsurf_handles: { @@ -1839,15 +1821,14 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) { const GlobalValue *GV = TexHandleDef.getOperand(1).getGlobal(); assert(GV->hasName() && "Global sampler must be named!"); InstrsToRemove.insert(&TexHandleDef); - Idx = MFI->getImageHandleSymbolIndex(GV->getName()); + Op.ChangeToGA(GV, 0); return true; } case NVPTX::nvvm_move_i64: case TargetOpcode::COPY: { - bool Res = findIndexForHandle(TexHandleDef.getOperand(1), MF, Idx); - if (Res) { + bool Res = replaceImageHandle(TexHandleDef.getOperand(1), MF); + if (Res) InstrsToRemove.insert(&TexHandleDef); - } return Res; } default: diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 3aef6f2c893fa..6c16afec33484 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -5131,7 +5131,7 @@ static bool isOpZeroOfSubwordPreincLoad(int Opcode) { // This function checks for sign extension from 32 bits to 64 bits. static bool definedBySignExtendingOp(const unsigned Reg, const MachineRegisterInfo *MRI) { - if (!Register::isVirtualRegister(Reg)) + if (!Register(Reg).isVirtual()) return false; MachineInstr *MI = MRI->getVRegDef(Reg); @@ -5178,7 +5178,7 @@ static bool definedBySignExtendingOp(const unsigned Reg, // in the higher 32 bits then this function will return true. static bool definedByZeroExtendingOp(const unsigned Reg, const MachineRegisterInfo *MRI) { - if (!Register::isVirtualRegister(Reg)) + if (!Register(Reg).isVirtual()) return false; MachineInstr *MI = MRI->getVRegDef(Reg); @@ -5463,7 +5463,7 @@ std::pair PPCInstrInfo::isSignOrZeroExtended(const unsigned Reg, const unsigned BinOpDepth, const MachineRegisterInfo *MRI) const { - if (!Register::isVirtualRegister(Reg)) + if (!Register(Reg).isVirtual()) return std::pair(false, false); MachineInstr *MI = MRI->getVRegDef(Reg); diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp index 1b6da5781ac6b..4893e17953ab5 100644 --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -1482,7 +1482,7 @@ static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1, } else if (Inst->isFullCopy()) NextReg = Inst->getOperand(1).getReg(); - if (NextReg == SrcReg || !Register::isVirtualRegister(NextReg)) + if (NextReg == SrcReg || !Register(NextReg).isVirtual()) break; SrcReg = NextReg; } diff --git a/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp b/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp index 0bfcba9a52486..58ab7ad27cd6e 100644 --- a/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp +++ b/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp @@ -537,7 +537,7 @@ MachineInstr *PPCReduceCRLogicals::lookThroughCRCopy(unsigned Reg, unsigned &Subreg, MachineInstr *&CpDef) { Subreg = -1; - if (!Register::isVirtualRegister(Reg)) + if (!Register(Reg).isVirtual()) return nullptr; MachineInstr *Copy = MRI->getVRegDef(Reg); CpDef = Copy; diff --git a/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp b/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp index 0349a5929c106..bb5ca6872b2e3 100644 --- a/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp +++ b/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp @@ -40,9 +40,9 @@ namespace { const TargetInstrInfo *TII; - bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC, + bool IsRegInClass(Register Reg, const TargetRegisterClass *RC, MachineRegisterInfo &MRI) { - if (Register::isVirtualRegister(Reg)) { + if (Reg.isVirtual()) { return RC->hasSubClassEq(MRI.getRegClass(Reg)); } else if (RC->contains(Reg)) { return true; diff --git a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index 573b30ccbcf2e..fc80e61a14517 100644 --- a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -157,7 +157,7 @@ struct PPCVSXSwapRemoval : public MachineFunctionPass { // Return true iff the given register is in the given class. bool isRegInClass(unsigned Reg, const TargetRegisterClass *RC) { - if (Register::isVirtualRegister(Reg)) + if (Register(Reg).isVirtual()) return RC->hasSubClassEq(MRI->getRegClass(Reg)); return RC->contains(Reg); } @@ -560,7 +560,7 @@ unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg, if (!MI->isCopyLike()) return SrcReg; - unsigned CopySrcReg; + Register CopySrcReg; if (MI->isCopy()) CopySrcReg = MI->getOperand(1).getReg(); else { @@ -568,7 +568,7 @@ unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg, CopySrcReg = MI->getOperand(2).getReg(); } - if (!Register::isVirtualRegister(CopySrcReg)) { + if (!CopySrcReg.isVirtual()) { if (!isScalarVecReg(CopySrcReg)) SwapVector[VecIdx].MentionsPhysVR = 1; return CopySrcReg; diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index 9b23a5ab521c8..5d1ea50eba494 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -37,6 +37,7 @@ add_llvm_target(RISCVCodeGen RISCVMakeCompressible.cpp RISCVExpandAtomicPseudoInsts.cpp RISCVExpandPseudoInsts.cpp + RISCVFoldMemOffset.cpp RISCVFrameLowering.cpp RISCVGatherScatterLowering.cpp RISCVIndirectBranchTracking.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index 851eea1352852..641e2eb4094f9 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -52,6 +52,9 @@ void initializeRISCVVectorPeepholePass(PassRegistry &); FunctionPass *createRISCVOptWInstrsPass(); void initializeRISCVOptWInstrsPass(PassRegistry &); +FunctionPass *createRISCVFoldMemOffsetPass(); +void initializeRISCVFoldMemOffsetPass(PassRegistry &); + FunctionPass *createRISCVMergeBaseOffsetOptPass(); void initializeRISCVMergeBaseOffsetOptPass(PassRegistry &); diff --git a/llvm/lib/Target/RISCV/RISCVFoldMemOffset.cpp b/llvm/lib/Target/RISCV/RISCVFoldMemOffset.cpp new file mode 100644 index 0000000000000..989e9d859d64f --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVFoldMemOffset.cpp @@ -0,0 +1,282 @@ +//===- RISCVFoldMemOffset.cpp - Fold ADDI into memory offsets ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// +// +// Look for ADDIs that can be removed by folding their immediate into later +// load/store addresses. There may be other arithmetic instructions between the +// addi and load/store that we need to reassociate through. If the final result +// of the arithmetic is only used by load/store addresses, we can fold the +// offset into the all the load/store as long as it doesn't create an offset +// that is too large. +// +//===---------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include + +using namespace llvm; + +#define DEBUG_TYPE "riscv-fold-mem-offset" +#define RISCV_FOLD_MEM_OFFSET_NAME "RISC-V Fold Memory Offset" + +namespace { + +class RISCVFoldMemOffset : public MachineFunctionPass { +public: + static char ID; + + RISCVFoldMemOffset() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + + bool foldOffset(Register OrigReg, int64_t InitialOffset, + const MachineRegisterInfo &MRI, + DenseMap &FoldableInstrs); + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return RISCV_FOLD_MEM_OFFSET_NAME; } +}; + +// Wrapper class around a std::optional to allow accumulation. +class FoldableOffset { + std::optional Offset; + +public: + bool hasValue() const { return Offset.has_value(); } + int64_t getValue() const { return *Offset; } + + FoldableOffset &operator=(int64_t RHS) { + Offset = RHS; + return *this; + } + + FoldableOffset &operator+=(int64_t RHS) { + if (!Offset) + Offset = 0; + Offset = (uint64_t)*Offset + (uint64_t)RHS; + return *this; + } + + int64_t operator*() { return *Offset; } +}; + +} // end anonymous namespace + +char RISCVFoldMemOffset::ID = 0; +INITIALIZE_PASS(RISCVFoldMemOffset, DEBUG_TYPE, RISCV_FOLD_MEM_OFFSET_NAME, + false, false) + +FunctionPass *llvm::createRISCVFoldMemOffsetPass() { + return new RISCVFoldMemOffset(); +} + +// Walk forward from the ADDI looking for arithmetic instructions we can +// analyze or memory instructions that use it as part of their address +// calculation. For each arithmetic instruction we lookup how the offset +// contributes to the value in that register use that information to +// calculate the contribution to the output of this instruction. +// Only addition and left shift are supported. +// FIXME: Add multiplication by constant. The constant will be in a register. +bool RISCVFoldMemOffset::foldOffset( + Register OrigReg, int64_t InitialOffset, const MachineRegisterInfo &MRI, + DenseMap &FoldableInstrs) { + // Map to hold how much the offset contributes to the value of this register. + DenseMap RegToOffsetMap; + + // Insert root offset into the map. + RegToOffsetMap[OrigReg] = InitialOffset; + + std::queue Worklist; + Worklist.push(OrigReg); + + while (!Worklist.empty()) { + Register Reg = Worklist.front(); + Worklist.pop(); + + if (!Reg.isVirtual()) + return false; + + for (auto &User : MRI.use_nodbg_instructions(Reg)) { + FoldableOffset Offset; + + switch (User.getOpcode()) { + default: + return false; + case RISCV::ADD: + if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg()); + I != RegToOffsetMap.end()) + Offset = I->second; + if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg()); + I != RegToOffsetMap.end()) + Offset += I->second; + break; + case RISCV::SH1ADD: + if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg()); + I != RegToOffsetMap.end()) + Offset = (uint64_t)I->second << 1; + if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg()); + I != RegToOffsetMap.end()) + Offset += I->second; + break; + case RISCV::SH2ADD: + if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg()); + I != RegToOffsetMap.end()) + Offset = (uint64_t)I->second << 2; + if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg()); + I != RegToOffsetMap.end()) + Offset += I->second; + break; + case RISCV::SH3ADD: + if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg()); + I != RegToOffsetMap.end()) + Offset = (uint64_t)I->second << 3; + if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg()); + I != RegToOffsetMap.end()) + Offset += I->second; + break; + case RISCV::ADD_UW: + case RISCV::SH1ADD_UW: + case RISCV::SH2ADD_UW: + case RISCV::SH3ADD_UW: + // Don't fold through the zero extended input. + if (User.getOperand(1).getReg() == Reg) + return false; + if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg()); + I != RegToOffsetMap.end()) + Offset = I->second; + break; + case RISCV::SLLI: { + unsigned ShAmt = User.getOperand(2).getImm(); + if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg()); + I != RegToOffsetMap.end()) + Offset = (uint64_t)I->second << ShAmt; + break; + } + case RISCV::LB: + case RISCV::LBU: + case RISCV::SB: + case RISCV::LH: + case RISCV::LH_INX: + case RISCV::LHU: + case RISCV::FLH: + case RISCV::SH: + case RISCV::SH_INX: + case RISCV::FSH: + case RISCV::LW: + case RISCV::LW_INX: + case RISCV::LWU: + case RISCV::FLW: + case RISCV::SW: + case RISCV::SW_INX: + case RISCV::FSW: + case RISCV::LD: + case RISCV::FLD: + case RISCV::SD: + case RISCV::FSD: { + // Can't fold into store value. + if (User.getOperand(0).getReg() == Reg) + return false; + + // Existing offset must be immediate. + if (!User.getOperand(2).isImm()) + return false; + + // Require at least one operation between the ADDI and the load/store. + // We have other optimizations that should handle the simple case. + if (User.getOperand(1).getReg() == OrigReg) + return false; + + auto I = RegToOffsetMap.find(User.getOperand(1).getReg()); + if (I == RegToOffsetMap.end()) + return false; + + int64_t LocalOffset = User.getOperand(2).getImm(); + assert(isInt<12>(LocalOffset)); + int64_t CombinedOffset = (uint64_t)LocalOffset + (uint64_t)I->second; + if (!isInt<12>(CombinedOffset)) + return false; + + FoldableInstrs[&User] = CombinedOffset; + continue; + } + } + + // If we reach here we should have an accumulated offset. + assert(Offset.hasValue() && "Expected an offset"); + + // If the offset is new or changed, add the destination register to the + // work list. + int64_t OffsetVal = Offset.getValue(); + auto P = + RegToOffsetMap.try_emplace(User.getOperand(0).getReg(), OffsetVal); + if (P.second) { + Worklist.push(User.getOperand(0).getReg()); + } else if (P.first->second != OffsetVal) { + P.first->second = OffsetVal; + Worklist.push(User.getOperand(0).getReg()); + } + } + } + + return true; +} + +bool RISCVFoldMemOffset::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + // This optimization may increase size by preventing compression. + if (MF.getFunction().hasOptSize()) + return false; + + MachineRegisterInfo &MRI = MF.getRegInfo(); + + bool MadeChange = false; + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { + // FIXME: We can support ADDIW from an LUI+ADDIW pair if the result is + // equivalent to LUI+ADDI. + if (MI.getOpcode() != RISCV::ADDI) + continue; + + // We only want to optimize register ADDIs. + if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm()) + continue; + + // Ignore 'li'. + if (MI.getOperand(1).getReg() == RISCV::X0) + continue; + + int64_t Offset = MI.getOperand(2).getImm(); + assert(isInt<12>(Offset)); + + DenseMap FoldableInstrs; + + if (!foldOffset(MI.getOperand(0).getReg(), Offset, MRI, FoldableInstrs)) + continue; + + if (FoldableInstrs.empty()) + continue; + + // We can fold this ADDI. + // Rewrite all the instructions. + for (auto [MemMI, NewOffset] : FoldableInstrs) + MemMI->getOperand(2).setImm(NewOffset); + + MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); + MI.eraseFromParent(); + } + } + + return MadeChange; +} diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 0f5e7bd254f68..18f92faa43c5e 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4560,6 +4560,50 @@ static bool isInterleaveShuffle(ArrayRef Mask, MVT VT, int &EvenSrc, return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0); } +/// Is this mask representing a masked combination of two slides? +static bool isMaskedSlidePair(ArrayRef Mask, + std::pair SrcInfo[2]) { + int NumElts = Mask.size(); + int SignalValue = NumElts * 2; + SrcInfo[0] = {-1, SignalValue}; + SrcInfo[1] = {-1, SignalValue}; + for (unsigned i = 0; i != Mask.size(); ++i) { + int M = Mask[i]; + if (M < 0) + continue; + int Src = M >= (int)NumElts; + int Diff = (int)i - (M % NumElts); + bool Match = false; + for (int j = 0; j < 2; j++) { + if (SrcInfo[j].first == -1) { + assert(SrcInfo[j].second == SignalValue); + SrcInfo[j].first = Src; + SrcInfo[j].second = Diff; + } + if (SrcInfo[j].first == Src && SrcInfo[j].second == Diff) { + Match = true; + break; + } + } + if (!Match) + return false; + } + + // Avoid matching unconditional slides for now. This is reasonably + // covered by existing matchers. + if (SrcInfo[0].first == -1 || SrcInfo[1].first == -1) + return false; + // Avoid matching vselect idioms + if (SrcInfo[0].second == 0 && SrcInfo[1].second == 0) + return false; + // Prefer vslideup as the second instruction, and identity + // only as the initial instruction. + if ((SrcInfo[0].second > 0 && SrcInfo[1].second < 0) || + SrcInfo[1].second == 0) + std::swap(SrcInfo[0], SrcInfo[1]); + return true; +} + /// Match shuffles that concatenate two vectors, rotate the concatenation, /// and then extract the original number of elements from the rotated result. /// This is equivalent to vector.splice or X86's PALIGNR instruction. The @@ -5429,7 +5473,11 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); - auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); + // Store the return value in a single variable instead of structured bindings + // so that we can pass it to GetSlide below, which cannot capture structured + // bindings until C++20. + auto TrueMaskVL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); + auto [TrueMask, VL] = TrueMaskVL; if (SVN->isSplat()) { const int Lane = SVN->getSplatIndex(); @@ -5648,6 +5696,76 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget); } + // Recognize a pattern which can handled via a pair of vslideup/vslidedown + // instructions (in any combination) with masking on the second instruction. + // Avoid matching bit rotates as slide pairs. This is a performance + // heuristic, not a functional check. + // TODO: Generalize this slightly to allow single instruction cases, and + // prune the logic above which is mostly covered by this already. + std::pair SrcInfo[2]; + unsigned RotateAmt; + MVT RotateVT; + if (isMaskedSlidePair(Mask, SrcInfo) && + !isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt)) { + SDValue Sources[2]; + auto GetSourceFor = [&](const std::pair &Info) { + int SrcIdx = Info.first; + assert(SrcIdx == 0 || SrcIdx == 1); + SDValue &Src = Sources[SrcIdx]; + if (!Src) { + SDValue SrcV = SrcIdx == 0 ? V1 : V2; + Src = convertToScalableVector(ContainerVT, SrcV, DAG, Subtarget); + } + return Src; + }; + auto GetSlide = [&](const std::pair &Src, SDValue Mask, + SDValue Passthru) { + auto [TrueMask, VL] = TrueMaskVL; + SDValue SrcV = GetSourceFor(Src); + int SlideAmt = Src.second; + if (SlideAmt == 0) { + // Should never be second operation + assert(Mask == TrueMask); + return SrcV; + } + if (SlideAmt < 0) + return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV, + DAG.getConstant(-SlideAmt, DL, XLenVT), Mask, VL, + RISCVVType::TAIL_AGNOSTIC); + return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV, + DAG.getConstant(SlideAmt, DL, XLenVT), Mask, VL, + RISCVVType::TAIL_AGNOSTIC); + }; + + // Build the mask. Note that vslideup unconditionally preserves elements + // below the slide amount in the destination, and thus those elements are + // undefined in the mask. If the mask ends up all true (or undef), it + // will be folded away by general logic. + SmallVector MaskVals; + for (unsigned i = 0; i != Mask.size(); ++i) { + int M = Mask[i]; + if (M < 0 || (SrcInfo[1].second > 0 && i < (unsigned)SrcInfo[1].second)) { + MaskVals.push_back(DAG.getUNDEF(XLenVT)); + continue; + } + int Src = M >= (int)NumElts; + int Diff = (int)i - (M % NumElts); + bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second; + assert(C ^ (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) && + "Must match exactly one of the two slides"); + MaskVals.push_back(DAG.getConstant(C, DL, XLenVT)); + } + assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle"); + MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); + SDValue SelectMask = convertToScalableVector( + ContainerVT.changeVectorElementType(MVT::i1), + DAG.getBuildVector(MaskVT, DL, MaskVals), DAG, Subtarget); + + SDValue Res = DAG.getUNDEF(ContainerVT); + Res = GetSlide(SrcInfo[0], TrueMask, Res); + Res = GetSlide(SrcInfo[1], SelectMask, Res); + return convertFromScalableVector(VT, Res, DAG, Subtarget); + } // Handle any remaining single source shuffles assert(!V1.isUndef() && "Unexpected shuffle canonicalization"); @@ -8961,6 +9079,10 @@ SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, if (VecVT.isScalableVector()) { SDValue SplatZero = DAG.getConstant(0, DL, VecVT); SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT); + if (Src.getOpcode() == ISD::XOR && + ISD::isConstantSplatVectorAllOnes(Src.getOperand(1).getNode())) + return DAG.getNode(ISD::VSELECT, DL, VecVT, Src.getOperand(0), SplatZero, + SplatTrueVal); return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero); } @@ -8976,6 +9098,20 @@ SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT); + if (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR) { + SDValue Xor = Src.getOperand(0); + if (Xor.getOpcode() == RISCVISD::VMXOR_VL) { + SDValue ScalableOnes = Xor.getOperand(1); + if (ScalableOnes.getOpcode() == ISD::INSERT_SUBVECTOR && + ScalableOnes.getOperand(0).isUndef() && + ISD::isConstantSplatVectorAllOnes( + ScalableOnes.getOperand(1).getNode())) { + CC = Xor.getOperand(0); + std::swap(SplatZero, SplatTrueVal); + } + } + } + SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT), SplatZero, VL); SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 167dbb53c5950..52bb10f9ba19b 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -133,6 +133,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { initializeRISCVPostRAExpandPseudoPass(*PR); initializeRISCVMergeBaseOffsetOptPass(*PR); initializeRISCVOptWInstrsPass(*PR); + initializeRISCVFoldMemOffsetPass(*PR); initializeRISCVPreRAExpandPseudoPass(*PR); initializeRISCVExpandPseudoPass(*PR); initializeRISCVVectorPeepholePass(*PR); @@ -588,8 +589,7 @@ void RISCVPassConfig::addPreEmitPass2() { void RISCVPassConfig::addMachineSSAOptimization() { addPass(createRISCVVectorPeepholePass()); - // TODO: Move this to pre regalloc - addPass(createRISCVVMV0EliminationPass()); + addPass(createRISCVFoldMemOffsetPass()); TargetPassConfig::addMachineSSAOptimization(); @@ -602,10 +602,6 @@ void RISCVPassConfig::addMachineSSAOptimization() { } void RISCVPassConfig::addPreRegAlloc() { - // TODO: Move this as late as possible before regalloc - if (TM->getOptLevel() == CodeGenOptLevel::None) - addPass(createRISCVVMV0EliminationPass()); - addPass(createRISCVPreRAExpandPseudoPass()); if (TM->getOptLevel() != CodeGenOptLevel::None) { addPass(createRISCVMergeBaseOffsetOptPass()); @@ -619,6 +615,8 @@ void RISCVPassConfig::addPreRegAlloc() { if (TM->getOptLevel() != CodeGenOptLevel::None && EnableMachinePipeliner) addPass(&MachinePipelinerID); + + addPass(createRISCVVMV0EliminationPass()); } void RISCVPassConfig::addFastRegAlloc() { diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 66b989a84b1ce..c36a1e9adccb0 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -50,11 +50,12 @@ class RISCVVLOptimizer : public MachineFunctionPass { StringRef getPassName() const override { return PASS_NAME; } private: - std::optional getMinimumVLForUser(MachineOperand &UserOp); + std::optional + getMinimumVLForUser(const MachineOperand &UserOp) const; /// Returns the largest common VL MachineOperand that may be used to optimize /// MI. Returns std::nullopt if it failed to find a suitable VL. - std::optional checkUsers(MachineInstr &MI); - bool tryReduceVL(MachineInstr &MI); + std::optional checkUsers(const MachineInstr &MI) const; + bool tryReduceVL(MachineInstr &MI) const; bool isCandidate(const MachineInstr &MI) const; /// For a given instruction, records what elements of it are demanded by @@ -1152,8 +1153,8 @@ static bool isSupportedInstr(const MachineInstr &MI) { } /// Return true if MO is a vector operand but is used as a scalar operand. -static bool isVectorOpUsedAsScalarOp(MachineOperand &MO) { - MachineInstr *MI = MO.getParent(); +static bool isVectorOpUsedAsScalarOp(const MachineOperand &MO) { + const MachineInstr *MI = MO.getParent(); const RISCVVPseudosTable::PseudoInfo *RVV = RISCVVPseudosTable::getPseudoInfo(MI->getOpcode()); @@ -1261,7 +1262,7 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const { } std::optional -RISCVVLOptimizer::getMinimumVLForUser(MachineOperand &UserOp) { +RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const { const MachineInstr &UserMI = *UserOp.getParent(); const MCInstrDesc &Desc = UserMI.getDesc(); @@ -1271,6 +1272,11 @@ RISCVVLOptimizer::getMinimumVLForUser(MachineOperand &UserOp) { return std::nullopt; } + if (mayReadPastVL(UserMI)) { + LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n"); + return std::nullopt; + } + unsigned VLOpNum = RISCVII::getVLOpNum(Desc); const MachineOperand &VLOp = UserMI.getOperand(VLOpNum); // Looking for an immediate or a register VL that isn't X0. @@ -1282,7 +1288,7 @@ RISCVVLOptimizer::getMinimumVLForUser(MachineOperand &UserOp) { if (UserOp.isTied()) { assert(UserOp.getOperandNo() == UserMI.getNumExplicitDefs() && RISCVII::isFirstDefTiedToFirstUse(UserMI.getDesc())); - auto DemandedVL = DemandedVLs[&UserMI]; + auto DemandedVL = DemandedVLs.lookup(&UserMI); if (!DemandedVL || !RISCV::isVLKnownLE(*DemandedVL, VLOp)) { LLVM_DEBUG(dbgs() << " Abort because user is passthru in " "instruction with demanded tail\n"); @@ -1304,7 +1310,7 @@ RISCVVLOptimizer::getMinimumVLForUser(MachineOperand &UserOp) { // If we know the demanded VL of UserMI, then we can reduce the VL it // requires. - if (auto DemandedVL = DemandedVLs[&UserMI]) { + if (auto DemandedVL = DemandedVLs.lookup(&UserMI)) { assert(isCandidate(UserMI)); if (RISCV::isVLKnownLE(*DemandedVL, VLOp)) return DemandedVL; @@ -1313,14 +1319,25 @@ RISCVVLOptimizer::getMinimumVLForUser(MachineOperand &UserOp) { return VLOp; } -std::optional RISCVVLOptimizer::checkUsers(MachineInstr &MI) { +std::optional +RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { std::optional CommonVL; - for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg())) { + SmallSetVector Worklist; + for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg())) + Worklist.insert(&UserOp); + + while (!Worklist.empty()) { + MachineOperand &UserOp = *Worklist.pop_back_val(); const MachineInstr &UserMI = *UserOp.getParent(); LLVM_DEBUG(dbgs() << " Checking user: " << UserMI << "\n"); - if (mayReadPastVL(UserMI)) { - LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n"); - return std::nullopt; + + if (UserMI.isCopy() && UserMI.getOperand(0).getReg().isVirtual() && + UserMI.getOperand(0).getSubReg() == RISCV::NoSubRegister && + UserMI.getOperand(1).getSubReg() == RISCV::NoSubRegister) { + LLVM_DEBUG(dbgs() << " Peeking through uses of COPY\n"); + for (auto &CopyUse : MRI->use_operands(UserMI.getOperand(0).getReg())) + Worklist.insert(&CopyUse); + continue; } auto VLOp = getMinimumVLForUser(UserOp); @@ -1371,7 +1388,7 @@ std::optional RISCVVLOptimizer::checkUsers(MachineInstr &MI) { return CommonVL; } -bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) { +bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const { LLVM_DEBUG(dbgs() << "Trying to reduce VL for " << MI << "\n"); unsigned VLOpNum = RISCVII::getVLOpNum(MI.getDesc()); @@ -1384,7 +1401,7 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) { return false; } - auto CommonVL = DemandedVLs[&MI]; + auto CommonVL = DemandedVLs.lookup(&MI); if (!CommonVL) return false; diff --git a/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp b/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp index ccc86da340440..9737474a18f63 100644 --- a/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp +++ b/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp @@ -131,10 +131,9 @@ bool RISCVVMV0Elimination::runOnMachineFunction(MachineFunction &MF) { // Peek through a single copy to match what isel does. if (MachineInstr *SrcMI = MRI.getVRegDef(Src); - SrcMI->isCopy() && SrcMI->getOperand(1).getReg().isVirtual()) { - assert(SrcMI->getOperand(1).getSubReg() == RISCV::NoSubRegister); + SrcMI->isCopy() && SrcMI->getOperand(1).getReg().isVirtual() && + SrcMI->getOperand(1).getSubReg() == RISCV::NoSubRegister) Src = SrcMI->getOperand(1).getReg(); - } BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::COPY), RISCV::V0) .addReg(Src); diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp index 2ee0c79b8f7c1..10f4ea99fab32 100644 --- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp @@ -336,7 +336,7 @@ void SPIRVInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, if (OpNo < MI->getNumOperands()) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) - O << '%' << (Register::virtReg2Index(Op.getReg()) + 1); + O << '%' << (Register(Op.getReg()).virtRegIndex() + 1); else if (Op.isImm()) O << formatImm((int64_t)Op.getImm()); else if (Op.isDFPImm()) diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp index 68cc6a3a7aac1..db8287c4b1e02 100644 --- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp @@ -77,7 +77,7 @@ static void emitOperand(const MCOperand &Op, SmallVectorImpl &CB) { if (Op.isReg()) { // Emit the id index starting at 1 (0 is an invalid index). support::endian::write( - CB, Register::virtReg2Index(Op.getReg()) + 1, llvm::endianness::little); + CB, Register(Op.getReg()).virtRegIndex() + 1, llvm::endianness::little); } else if (Op.isImm()) { support::endian::write(CB, static_cast(Op.getImm()), llvm::endianness::little); diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp index 7b897f7e34c6f..3e9ce1c06850b 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp @@ -1043,6 +1043,24 @@ static bool buildExtendedBitOpsInst(const SPIRV::IncomingCall *Call, return true; } +/// Helper function for building Intel's bindless image instructions. +static bool buildBindlessImageINTELInst(const SPIRV::IncomingCall *Call, + unsigned Opcode, + MachineIRBuilder &MIRBuilder, + SPIRVGlobalRegistry *GR) { + // Generate SPIRV instruction accordingly. + if (Call->isSpirvOp()) + return buildOpFromWrapper(MIRBuilder, Opcode, Call, + GR->getSPIRVTypeID(Call->ReturnType)); + + MIRBuilder.buildInstr(Opcode) + .addDef(Call->ReturnRegister) + .addUse(GR->getSPIRVTypeID(Call->ReturnType)) + .addUse(Call->Arguments[0]); + + return true; +} + static unsigned getNumComponentsForDim(SPIRV::Dim::Dim dim) { switch (dim) { case SPIRV::Dim::DIM_1D: @@ -2232,6 +2250,17 @@ static bool generateExtendedBitOpsInst(const SPIRV::IncomingCall *Call, return buildExtendedBitOpsInst(Call, Opcode, MIRBuilder, GR); } +static bool generateBindlessImageINTELInst(const SPIRV::IncomingCall *Call, + MachineIRBuilder &MIRBuilder, + SPIRVGlobalRegistry *GR) { + // Lookup the instruction opcode in the TableGen records. + const SPIRV::DemangledBuiltin *Builtin = Call->Builtin; + unsigned Opcode = + SPIRV::lookupNativeBuiltin(Builtin->Name, Builtin->Set)->Opcode; + + return buildBindlessImageINTELInst(Call, Opcode, MIRBuilder, GR); +} + static bool buildNDRange(const SPIRV::IncomingCall *Call, MachineIRBuilder &MIRBuilder, SPIRVGlobalRegistry *GR) { @@ -2809,6 +2838,8 @@ std::optional lowerBuiltin(const StringRef DemangledCall, return generateCoopMatrInst(Call.get(), MIRBuilder, GR); case SPIRV::ExtendedBitOps: return generateExtendedBitOpsInst(Call.get(), MIRBuilder, GR); + case SPIRV::BindlessINTEL: + return generateBindlessImageINTELInst(Call.get(), MIRBuilder, GR); } return false; } diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td index 85f42fc08a4e0..c9a5c92ee3a66 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td @@ -66,6 +66,7 @@ def Construct : BuiltinGroup; def CoopMatr : BuiltinGroup; def ICarryBorrow : BuiltinGroup; def ExtendedBitOps : BuiltinGroup; +def BindlessINTEL : BuiltinGroup; //===----------------------------------------------------------------------===// // Class defining a demangled builtin record. The information in the record @@ -708,6 +709,11 @@ defm : DemangledNativeBuiltin<"__spirv_CooperativeMatrixStoreCheckedINTEL", Open defm : DemangledNativeBuiltin<"__spirv_CooperativeMatrixConstructCheckedINTEL", OpenCL_std, CoopMatr, 5, 5, OpCooperativeMatrixConstructCheckedINTEL>; defm : DemangledNativeBuiltin<"__spirv_CooperativeMatrixGetElementCoordINTEL", OpenCL_std, CoopMatr, 2, 2, OpCooperativeMatrixGetElementCoordINTEL>; +// SPV_INTEL_bindless_images builtin records: +defm : DemangledNativeBuiltin<"__spirv_ConvertHandleToImageINTEL", OpenCL_std, BindlessINTEL, 1, 1, OpConvertHandleToImageINTEL>; +defm : DemangledNativeBuiltin<"__spirv_ConvertHandleToSamplerINTEL", OpenCL_std, BindlessINTEL, 1, 1, OpConvertHandleToSamplerINTEL>; +defm : DemangledNativeBuiltin<"__spirv_ConvertHandleToSampledImageINTEL", OpenCL_std, BindlessINTEL, 1, 1, OpConvertHandleToSampledImageINTEL>; + //===----------------------------------------------------------------------===// // Class defining a work/sub group builtin that should be translated into a // SPIR-V instruction using the defined properties. diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp index 13683fd9a266d..333e0131ac228 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp @@ -71,6 +71,8 @@ static const std::map> SPIRV::Extension::Extension::SPV_KHR_linkonce_odr}, {"SPV_INTEL_inline_assembly", SPIRV::Extension::Extension::SPV_INTEL_inline_assembly}, + {"SPV_INTEL_bindless_images", + SPIRV::Extension::Extension::SPV_INTEL_bindless_images}, {"SPV_INTEL_bfloat16_conversion", SPIRV::Extension::Extension::SPV_INTEL_bfloat16_conversion}, {"SPV_KHR_subgroup_rotate", @@ -84,7 +86,9 @@ static const std::map> {"SPV_KHR_cooperative_matrix", SPIRV::Extension::Extension::SPV_KHR_cooperative_matrix}, {"SPV_KHR_non_semantic_info", - SPIRV::Extension::Extension::SPV_KHR_non_semantic_info}}; + SPIRV::Extension::Extension::SPV_KHR_non_semantic_info}, + {"SPV_INTEL_long_composites", + SPIRV::Extension::Extension::SPV_INTEL_long_composites}}; bool SPIRVExtensionsParser::parse(cl::Option &O, llvm::StringRef ArgName, llvm::StringRef ArgValue, diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index e2f1b211caa5c..abc49b05c247a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -882,6 +882,17 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeStruct(const StructType *Ty, MachineIRBuilder &MIRBuilder, bool EmitIR) { SmallVector FieldTypes; + constexpr unsigned MaxWordCount = UINT16_MAX; + const size_t NumElements = Ty->getNumElements(); + + size_t MaxNumElements = MaxWordCount - 2; + size_t SPIRVStructNumElements = NumElements; + if (NumElements > MaxNumElements) { + // Do adjustments for continued instructions. + SPIRVStructNumElements = MaxNumElements; + MaxNumElements = MaxWordCount - 1; + } + for (const auto &Elem : Ty->elements()) { SPIRVType *ElemTy = findSPIRVType(toTypedPointer(Elem), MIRBuilder); assert(ElemTy && ElemTy->getOpcode() != SPIRV::OpTypeVoid && @@ -889,16 +900,28 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeStruct(const StructType *Ty, FieldTypes.push_back(getSPIRVTypeID(ElemTy)); } Register ResVReg = createTypeVReg(MIRBuilder); - return createOpType(MIRBuilder, [&](MachineIRBuilder &MIRBuilder) { + if (Ty->hasName()) + buildOpName(ResVReg, Ty->getName(), MIRBuilder); + if (Ty->isPacked()) + buildOpDecorate(ResVReg, MIRBuilder, SPIRV::Decoration::CPacked, {}); + + auto SPVType = createOpType(MIRBuilder, [&](MachineIRBuilder &MIRBuilder) { auto MIB = MIRBuilder.buildInstr(SPIRV::OpTypeStruct).addDef(ResVReg); - for (const auto &Ty : FieldTypes) - MIB.addUse(Ty); - if (Ty->hasName()) - buildOpName(ResVReg, Ty->getName(), MIRBuilder); - if (Ty->isPacked()) - buildOpDecorate(ResVReg, MIRBuilder, SPIRV::Decoration::CPacked, {}); + for (size_t I = 0; I < SPIRVStructNumElements; ++I) + MIB.addUse(FieldTypes[I]); return MIB; }); + + for (size_t I = SPIRVStructNumElements; I < NumElements; + I += MaxNumElements) { + createOpType(MIRBuilder, [&](MachineIRBuilder &MIRBuilder) { + auto MIB = MIRBuilder.buildInstr(SPIRV::OpTypeStructContinuedINTEL); + for (size_t J = I; J < std::min(I + MaxNumElements, NumElements); ++J) + MIB.addUse(FieldTypes[I]); + return MIB; + }); + } + return SPVType; } SPIRVType *SPIRVGlobalRegistry::getOrCreateSpecialType( @@ -968,7 +991,8 @@ SPIRVType *SPIRVGlobalRegistry::findSPIRVType( Register SPIRVGlobalRegistry::getSPIRVTypeID(const SPIRVType *SpirvType) const { assert(SpirvType && "Attempting to get type id for nullptr type."); - if (SpirvType->getOpcode() == SPIRV::OpTypeForwardPointer) + if (SpirvType->getOpcode() == SPIRV::OpTypeForwardPointer || + SpirvType->getOpcode() == SPIRV::OpTypeStructContinuedINTEL) return SpirvType->uses().begin()->getReg(); return SpirvType->defs().begin()->getReg(); } diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp index 9a140e75f8ea7..49b6b3bbb6cef 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp @@ -32,12 +32,14 @@ bool SPIRVInstrInfo::isConstantInstr(const MachineInstr &MI) const { case SPIRV::OpConstantI: case SPIRV::OpConstantF: case SPIRV::OpConstantComposite: + case SPIRV::OpConstantCompositeContinuedINTEL: case SPIRV::OpConstantSampler: case SPIRV::OpConstantNull: case SPIRV::OpSpecConstantTrue: case SPIRV::OpSpecConstantFalse: case SPIRV::OpSpecConstant: case SPIRV::OpSpecConstantComposite: + case SPIRV::OpSpecConstantCompositeContinuedINTEL: case SPIRV::OpSpecConstantOp: case SPIRV::OpUndef: case SPIRV::OpConstantFunctionPointerINTEL: @@ -76,7 +78,8 @@ bool SPIRVInstrInfo::isTypeDeclInstr(const MachineInstr &MI) const { auto DefRegClass = MRI.getRegClassOrNull(MI.getOperand(0).getReg()); return DefRegClass && DefRegClass->getID() == SPIRV::TYPERegClass.getID(); } else { - return MI.getOpcode() == SPIRV::OpTypeForwardPointer; + return MI.getOpcode() == SPIRV::OpTypeForwardPointer || + MI.getOpcode() == SPIRV::OpTypeStructContinuedINTEL; } } diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td index 981e224a66399..5e9a9bd145bca 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td @@ -188,6 +188,8 @@ def OpTypeArray: Op<28, (outs TYPE:$type), (ins TYPE:$elementType, ID:$length), def OpTypeRuntimeArray: Op<29, (outs TYPE:$type), (ins TYPE:$elementType), "$type = OpTypeRuntimeArray $elementType">; def OpTypeStruct: Op<30, (outs TYPE:$res), (ins variable_ops), "$res = OpTypeStruct">; +def OpTypeStructContinuedINTEL: Op<6090, (outs), (ins variable_ops), + "OpTypeStructContinuedINTEL">; def OpTypeOpaque: Op<31, (outs TYPE:$res), (ins StringImm:$name, variable_ops), "$res = OpTypeOpaque $name">; def OpTypePointer: Op<32, (outs TYPE:$res), (ins StorageClass:$storage, TYPE:$type), @@ -252,6 +254,9 @@ defm OpConstant: IntFPImm<43, "OpConstant">; def OpConstantComposite: Op<44, (outs ID:$res), (ins TYPE:$type, variable_ops), "$res = OpConstantComposite $type">; +def OpConstantCompositeContinuedINTEL: Op<6091, (outs), (ins variable_ops), + "OpConstantCompositeContinuedINTEL">; + def OpConstantSampler: Op<45, (outs ID:$res), (ins TYPE:$t, SamplerAddressingMode:$s, i32imm:$p, SamplerFilterMode:$f), "$res = OpConstantSampler $t $s $p $f">; @@ -263,6 +268,8 @@ def OpSpecConstant: Op<50, (outs ID:$res), (ins TYPE:$type, i32imm:$imm, variabl "$res = OpSpecConstant $type $imm">; def OpSpecConstantComposite: Op<51, (outs ID:$res), (ins TYPE:$type, variable_ops), "$res = OpSpecConstantComposite $type">; +def OpSpecConstantCompositeContinuedINTEL: Op<6092, (outs), (ins variable_ops), + "OpSpecConstantCompositeContinuedINTEL">; def OpSpecConstantOp: Op<52, (outs ID:$res), (ins TYPE:$t, i32imm:$c, ID:$o, variable_ops), "$res = OpSpecConstantOp $t $c $o">; @@ -476,6 +483,8 @@ def OpVectorShuffle: Op<79, (outs ID:$res), (ins TYPE:$ty, ID:$v1, ID:$v2, varia "$res = OpVectorShuffle $ty $v1 $v2">; def OpCompositeConstruct: Op<80, (outs ID:$res), (ins TYPE:$type, variable_ops), "$res = OpCompositeConstruct $type">; +def OpCompositeConstructContinuedINTEL: Op<6096, (outs), (ins variable_ops), + "OpCompositeConstructContinuedINTEL">; def OpCompositeExtract: Op<81, (outs ID:$res), (ins TYPE:$type, ID:$base, variable_ops), "$res = OpCompositeExtract $type $base">; def OpCompositeInsert: Op<82, (outs ID:$r), (ins TYPE:$ty, ID:$obj, ID:$base, variable_ops), @@ -931,3 +940,11 @@ def OpCooperativeMatrixPrefetchINTEL: Op<6449, (outs), // SPV_EXT_arithmetic_fence def OpArithmeticFenceEXT: Op<6145, (outs ID:$res), (ins TYPE:$type, ID:$target), "$res = OpArithmeticFenceEXT $type $target">; + +// SPV_INTEL_bindless_images +def OpConvertHandleToImageINTEL: Op<6529, (outs ID:$res), (ins TYPE:$type, ID:$operand), + "$res = OpConvertHandleToImageINTEL $type $operand">; +def OpConvertHandleToSamplerINTEL: Op<6530, (outs ID:$res), (ins TYPE:$type, ID:$operand), + "$res = OpConvertHandleToSamplerINTEL $type $operand">; +def OpConvertHandleToSampledImageINTEL: Op<6531, (outs ID:$res), (ins TYPE:$type, ID:$operand), + "$res = OpConvertHandleToSampledImageINTEL $type $operand">; diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index a7a5ecead6f5f..fea3965dd8f2a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -362,6 +362,16 @@ void SPIRVModuleAnalysis::visitDecl( } else if (Opcode == SPIRV::OpFunction || Opcode == SPIRV::OpFunctionParameter) { GReg = handleFunctionOrParameter(MF, MI, GlobalToGReg, IsFunDef); + } else if (Opcode == SPIRV::OpTypeStruct) { + GReg = handleTypeDeclOrConstant(MI, SignatureToGReg); + const MachineInstr *NextInstr = MI.getNextNode(); + while (NextInstr && + NextInstr->getOpcode() == SPIRV::OpTypeStructContinuedINTEL) { + Register Tmp = handleTypeDeclOrConstant(*NextInstr, SignatureToGReg); + MAI.setRegisterAlias(MF, NextInstr->getOperand(0).getReg(), Tmp); + MAI.setSkipEmission(NextInstr); + NextInstr = NextInstr->getNextNode(); + } } else if (TII->isTypeDeclInstr(MI) || TII->isConstantInstr(MI) || TII->isInlineAsmDefInstr(MI)) { GReg = handleTypeDeclOrConstant(MI, SignatureToGReg); @@ -1677,6 +1687,17 @@ void addInstrRequirements(const MachineInstr &MI, Reqs.addCapability( SPIRV::Capability::CooperativeMatrixInvocationInstructionsINTEL); break; + case SPIRV::OpConvertHandleToImageINTEL: + case SPIRV::OpConvertHandleToSamplerINTEL: + case SPIRV::OpConvertHandleToSampledImageINTEL: + if (!ST.canUseExtension(SPIRV::Extension::SPV_INTEL_bindless_images)) + report_fatal_error("OpConvertHandleTo[Image/Sampler/SampledImage]INTEL " + "instructions require the following SPIR-V extension: " + "SPV_INTEL_bindless_images", + false); + Reqs.addExtension(SPIRV::Extension::SPV_INTEL_bindless_images); + Reqs.addCapability(SPIRV::Capability::BindlessImagesINTEL); + break; case SPIRV::OpKill: { Reqs.addCapability(SPIRV::Capability::Shader); } break; @@ -1714,6 +1735,19 @@ void addInstrRequirements(const MachineInstr &MI, Reqs.addCapability(SPIRV::Capability::StorageImageWriteWithoutFormat); break; } + case SPIRV::OpTypeStructContinuedINTEL: + case SPIRV::OpConstantCompositeContinuedINTEL: + case SPIRV::OpSpecConstantCompositeContinuedINTEL: + case SPIRV::OpCompositeConstructContinuedINTEL: { + if (!ST.canUseExtension(SPIRV::Extension::SPV_INTEL_long_composites)) + report_fatal_error( + "Continued instructions require the " + "following SPIR-V extension: SPV_INTEL_long_composites", + false); + Reqs.addExtension(SPIRV::Extension::SPV_INTEL_long_composites); + Reqs.addCapability(SPIRV::Capability::LongCompositesINTEL); + break; + } default: break; diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td index fec3cb0091bf5..c7cb0a50752fe 100644 --- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td +++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td @@ -309,6 +309,8 @@ defm SPV_EXT_arithmetic_fence : ExtensionOperand<112>; defm SPV_EXT_optnone : ExtensionOperand<113>; defm SPV_INTEL_joint_matrix : ExtensionOperand<114>; defm SPV_INTEL_float_controls2 : ExtensionOperand<115>; +defm SPV_INTEL_bindless_images : ExtensionOperand<116>; +defm SPV_INTEL_long_composites : ExtensionOperand<117>; //===----------------------------------------------------------------------===// // Multiclass used to define Capabilities enum values and at the same time @@ -505,6 +507,8 @@ defm CooperativeMatrixBFloat16ComponentTypeINTEL : CapabilityOperand<6437, 0, 0, defm RoundToInfinityINTEL : CapabilityOperand<5582, 0, 0, [SPV_INTEL_float_controls2], []>; defm FloatingPointModeINTEL : CapabilityOperand<5583, 0, 0, [SPV_INTEL_float_controls2], []>; defm FunctionFloatControlINTEL : CapabilityOperand<5821, 0, 0, [SPV_INTEL_float_controls2], []>; +defm LongCompositesINTEL : CapabilityOperand<6089, 0, 0, [SPV_INTEL_long_composites], []>; +defm BindlessImagesINTEL : CapabilityOperand<6528, 0, 0, [SPV_INTEL_bindless_images], []>; //===----------------------------------------------------------------------===// // Multiclass used to define SourceLanguage enum values and at the same time diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp index 5c1f036abef5a..2662241ef8499 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp @@ -65,7 +65,7 @@ static void checkFrameBase(WebAssemblyFunctionInfo &MFI, unsigned Local, if (MFI.isFrameBaseVirtual() && Reg == MFI.getFrameBaseVreg()) { LLVM_DEBUG({ dbgs() << "Allocating local " << Local << "for VReg " - << Register::virtReg2Index(Reg) << '\n'; + << Register(Reg).virtRegIndex() << '\n'; }); MFI.setFrameBaseLocal(Local); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp index 75011ab3c8721..10b4f595546cc 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -64,7 +64,7 @@ void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB, // exist. However we need to handle both here. auto &MRI = MBB.getParent()->getRegInfo(); const TargetRegisterClass *RC = - Register::isVirtualRegister(DestReg) + Register(DestReg).isVirtual() ? MRI.getRegClass(DestReg) : MRI.getTargetRegisterInfo()->getMinimalPhysRegClass(DestReg); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h index 6c9824bbd5d91..8c9fcdee3375a 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h @@ -121,18 +121,18 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo { void stackifyVReg(MachineRegisterInfo &MRI, unsigned VReg) { assert(MRI.getUniqueVRegDef(VReg)); - auto I = Register::virtReg2Index(VReg); + auto I = Register(VReg).virtRegIndex(); if (I >= VRegStackified.size()) VRegStackified.resize(I + 1); VRegStackified.set(I); } void unstackifyVReg(unsigned VReg) { - auto I = Register::virtReg2Index(VReg); + auto I = Register(VReg).virtRegIndex(); if (I < VRegStackified.size()) VRegStackified.reset(I); } bool isVRegStackified(unsigned VReg) const { - auto I = Register::virtReg2Index(VReg); + auto I = Register(VReg).virtRegIndex(); if (I >= VRegStackified.size()) return false; return VRegStackified.test(I); @@ -141,12 +141,12 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo { void initWARegs(MachineRegisterInfo &MRI); void setWAReg(unsigned VReg, unsigned WAReg) { assert(WAReg != WebAssembly::UnusedReg); - auto I = Register::virtReg2Index(VReg); + auto I = Register(VReg).virtRegIndex(); assert(I < WARegs.size()); WARegs[I] = WAReg; } unsigned getWAReg(unsigned VReg) const { - auto I = Register::virtReg2Index(VReg); + auto I = Register(VReg).virtRegIndex(); assert(I < WARegs.size()); return WARegs[I]; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp index 1e2bee7a5c73b..cb152f500436a 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp @@ -93,7 +93,7 @@ bool WebAssemblyReplacePhysRegs::runOnMachineFunction(MachineFunction &MF) { FI->setFrameBaseVreg(VReg); LLVM_DEBUG({ dbgs() << "replacing preg " << PReg << " with " << VReg << " (" - << Register::virtReg2Index(VReg) << ")\n"; + << Register(VReg).virtRegIndex() << ")\n"; }); } } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d805a76754c71..1c9d43ce4c062 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6128,10 +6128,26 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, SDValue Sub = N.getOperand(1); EVT SubVT = Sub.getValueType(); unsigned NumSubElts = SubVT.getVectorNumElements(); + uint64_t InsertIdx = N.getConstantOperandVal(2); + // Handle CONCAT(SUB0, SUB1). + // Limit this to vXi64 vector cases to make the most of cross lane shuffles. + if (Depth > 0 && InsertIdx == NumSubElts && NumElts == (2 * NumSubElts) && + NumBitsPerElt == 64 && Src.getOpcode() == ISD::INSERT_SUBVECTOR && + Src.getOperand(0).isUndef() && + Src.getOperand(1).getValueType() == SubVT && + Src.getConstantOperandVal(2) == 0 && + SDNode::areOnlyUsersOf({N.getNode(), Src.getNode()}, Sub.getNode())) { + for (int i = 0; i != (int)NumSubElts; ++i) + Mask.push_back(i); + for (int i = 0; i != (int)NumSubElts; ++i) + Mask.push_back(i + NumElts); + Ops.push_back(Src.getOperand(1)); + Ops.push_back(Sub); + return true; + } if (!N->isOnlyUserOf(Sub.getNode())) return false; SDValue SubBC = peekThroughBitcasts(Sub); - uint64_t InsertIdx = N.getConstantOperandVal(2); // Handle INSERT_SUBVECTOR(SRC0, EXTRACT_SUBVECTOR(SRC1)). if (SubBC.getOpcode() == ISD::EXTRACT_SUBVECTOR && SubBC.getOperand(0).getValueSizeInBits() == NumSizeInBits) { @@ -6154,21 +6170,6 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, Ops.push_back(SubBCSrc); return true; } - // Handle CONCAT(SUB0, SUB1). - // Limit this to vXi64 vector cases to make the most of cross lane shuffles. - if (Depth > 0 && InsertIdx == NumSubElts && NumElts == (2 * NumSubElts) && - NumBitsPerElt == 64 && Src.getOpcode() == ISD::INSERT_SUBVECTOR && - Src.getOperand(0).isUndef() && - Src.getOperand(1).getValueType() == SubVT && - Src.getConstantOperandVal(2) == 0) { - for (int i = 0; i != (int)NumSubElts; ++i) - Mask.push_back(i); - for (int i = 0; i != (int)NumSubElts; ++i) - Mask.push_back(i + NumElts); - Ops.push_back(Src.getOperand(1)); - Ops.push_back(Sub); - return true; - } // Handle INSERT_SUBVECTOR(SRC0, SHUFFLE(SRC1)). SmallVector SubMask; SmallVector SubInputs; @@ -40064,9 +40065,13 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, if (Depth < 1) return SDValue(); - bool HasVariableMask = llvm::any_of(SrcNodes, [](const SDNode *N) { + int NumVariableMasks = llvm::count_if(SrcNodes, [](const SDNode *N) { return isTargetShuffleVariableMask(N->getOpcode()); }); + bool HasSlowVariableMask = llvm::any_of(SrcNodes, [](const SDNode *N) { + return (N->getOpcode() == X86ISD::VPERMV3 || + N->getOpcode() == X86ISD::VPERMV); + }); // Depth threshold above which we can efficiently use variable mask shuffles. int VariableCrossLaneShuffleDepth = @@ -40074,13 +40079,15 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, int VariablePerLaneShuffleDepth = Subtarget.hasFastVariablePerLaneShuffle() ? 1 : 2; AllowVariableCrossLaneMask &= - (Depth >= VariableCrossLaneShuffleDepth) || HasVariableMask; + (Depth >= VariableCrossLaneShuffleDepth) || NumVariableMasks; AllowVariablePerLaneMask &= - (Depth >= VariablePerLaneShuffleDepth) || HasVariableMask; - // VPERMI2W/VPERMI2B are 3 uops on Skylake and Icelake so we require a + (Depth >= VariablePerLaneShuffleDepth) || NumVariableMasks; + // VPERM2W/VPERM2B are 3 uops on Skylake and Icelake so we require a // higher depth before combining them. + int BWIVPERMV3ShuffleDepth = + VariableCrossLaneShuffleDepth + 2 - NumVariableMasks; bool AllowBWIVPERMV3 = - (Depth >= (VariableCrossLaneShuffleDepth + 2) || HasVariableMask); + (Depth >= BWIVPERMV3ShuffleDepth || HasSlowVariableMask); // If root was a VPERMV3 node, always allow a variable shuffle. if (Root.getOpcode() == X86ISD::VPERMV3) @@ -41077,12 +41084,17 @@ static SDValue combineX86ShufflesRecursively( } } - // Peek through any free extract_subvector nodes back to root size. - for (SDValue &Op : Ops) - while (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR && - (RootSizeInBits % Op.getOperand(0).getValueSizeInBits()) == 0 && - isNullConstant(Op.getOperand(1))) - Op = Op.getOperand(0); + // Peek through any free bitcasts/extract_subvector nodes back to root size. + for (SDValue &Op : Ops){ + SDValue BC = Op; + if (BC.getOpcode() == ISD::BITCAST && BC.hasOneUse()) + BC = peekThroughOneUseBitcasts(BC); + while (BC.getOpcode() == ISD::EXTRACT_SUBVECTOR && + (RootSizeInBits % BC.getOperand(0).getValueSizeInBits()) == 0 && + isNullConstant(BC.getOperand(1))) { + Op = BC = BC.getOperand(0); + } + } // Remove unused/repeated shuffle source ops. resolveTargetShuffleInputsAndMask(Ops, Mask); diff --git a/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp b/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp index 5021425152f6c..6aaabca95c4c3 100644 --- a/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp +++ b/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp @@ -293,7 +293,8 @@ void coro::doRematerializations( for (Instruction *U : E.second) { // Don't process a user twice (this can happen if the instruction uses // more than one rematerializable def) - if (AllRemats.count(U)) + auto [It, Inserted] = AllRemats.try_emplace(U); + if (!Inserted) continue; // Constructor creates the whole RematGraph for the given Use @@ -306,7 +307,7 @@ void coro::doRematerializations( ++I) { (*I)->Node->dump(); } dbgs() << "\n";); - AllRemats[U] = std::move(RematUPtr); + It->second = std::move(RematUPtr); } } diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 1a2a27d22ae68..2d046f09f1b2b 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2064,7 +2064,7 @@ static bool destArrayCanBeWidened(CallInst *CI) { return true; } -static GlobalVariable *widenGlobalVariable(GlobalVariable *OldVar, Function *F, +static GlobalVariable *widenGlobalVariable(GlobalVariable *OldVar, unsigned NumBytesToPad, unsigned NumBytesToCopy) { if (!OldVar->hasInitializer()) @@ -2083,10 +2083,10 @@ static GlobalVariable *widenGlobalVariable(GlobalVariable *OldVar, Function *F, StrData.push_back('\0'); auto Arr = ArrayRef(StrData.data(), NumBytesToCopy + NumBytesToPad); // Create new padded version of global variable. - Constant *SourceReplace = ConstantDataArray::get(F->getContext(), Arr); + Constant *SourceReplace = ConstantDataArray::get(OldVar->getContext(), Arr); GlobalVariable *NewGV = new GlobalVariable( - *(F->getParent()), SourceReplace->getType(), true, OldVar->getLinkage(), - SourceReplace, SourceReplace->getName()); + *(OldVar->getParent()), SourceReplace->getType(), true, + OldVar->getLinkage(), SourceReplace, SourceReplace->getName()); // Copy any other attributes from original global variable // e.g. unamed_addr NewGV->copyAttributesFrom(OldVar); @@ -2114,13 +2114,13 @@ static void widenDestArray(CallInst *CI, const unsigned NumBytesToPad, } } -static bool tryWidenGlobalArrayAndDests(Function *F, GlobalVariable *SourceVar, +static bool tryWidenGlobalArrayAndDests(GlobalVariable *SourceVar, const unsigned NumBytesToPad, const unsigned NumBytesToCopy, ConstantInt *BytesToCopyOp, ConstantDataArray *SourceDataArray) { auto *NewSourceGV = - widenGlobalVariable(SourceVar, F, NumBytesToPad, NumBytesToCopy); + widenGlobalVariable(SourceVar, NumBytesToPad, NumBytesToCopy); if (!NewSourceGV) return false; @@ -2158,8 +2158,6 @@ static bool tryWidenGlobalArraysUsedByMemcpy( if (!callInstIsMemcpy(CI) || !destArrayCanBeWidened(CI)) continue; - Function *F = CI->getCalledFunction(); - auto *BytesToCopyOp = dyn_cast(CI->getArgOperand(2)); if (!BytesToCopyOp) continue; @@ -2191,7 +2189,7 @@ static bool tryWidenGlobalArraysUsedByMemcpy( .getNumBytesToPadGlobalArray(NumBytesToCopy, SourceDataArray->getType()); if (NumBytesToPad) { - return tryWidenGlobalArrayAndDests(F, GV, NumBytesToPad, NumBytesToCopy, + return tryWidenGlobalArrayAndDests(GV, NumBytesToPad, NumBytesToCopy, BytesToCopyOp, SourceDataArray); } } diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index d748b162d7809..2e18d3af36a92 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -89,6 +89,7 @@ STATISTIC(FoundProfiledCalleeMaxDepth, "Maximum depth of profiled callees found via tail calls"); STATISTIC(FoundProfiledCalleeNonUniquelyCount, "Number of profiled callees found via multiple tail call chains"); +STATISTIC(DeferredBackedges, "Number of backedges with deferred cloning"); static cl::opt DotFilePathPrefix( "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, @@ -127,6 +128,10 @@ static cl::opt AllowRecursiveCallsites( "memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles")); +static cl::opt CloneRecursiveContexts( + "memprof-clone-recursive-contexts", cl::init(true), cl::Hidden, + cl::desc("Allow cloning of contexts through recursive cycles")); + // When disabled, try to detect and prevent cloning of recursive contexts. // This is only necessary until we support cloning through recursive cycles. // Leave on by default for now, as disabling requires a little bit of compile @@ -134,7 +139,7 @@ static cl::opt AllowRecursiveCallsites( // hinted bytes reporting a bit when -memprof-report-hinted-sizes is enabled. static cl::opt AllowRecursiveContexts( "memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, - cl::desc("Allow cloning of contexts through recursive cycles")); + cl::desc("Allow cloning of contexts having recursive cycles")); namespace llvm { cl::opt EnableMemProfContextDisambiguation( @@ -293,37 +298,40 @@ class CallsiteContextGraph { // TODO: Should this be a map (from Caller node) for more efficient lookup? std::vector> CallerEdges; - // Get the list of edges from which we can compute allocation information - // such as the context ids and allocation type of this node. - const std::vector> * - getEdgesWithAllocInfo() const { - // If node has any callees, compute from those, otherwise compute from - // callers (i.e. if this is the leaf allocation node). - if (!CalleeEdges.empty()) - return &CalleeEdges; + // Returns true if we need to look at the callee edges for determining the + // node context ids and allocation type. + bool useCallerEdgesForContextInfo() const { // Typically if the callee edges are empty either the caller edges are // also empty, or this is an allocation (leaf node). However, if we are // allowing recursive callsites and contexts this will be violated for // incompletely cloned recursive cycles. - assert(CallerEdges.empty() || IsAllocation || + assert(!CalleeEdges.empty() || CallerEdges.empty() || IsAllocation || (AllowRecursiveCallsites && AllowRecursiveContexts)); - if (!CallerEdges.empty() && IsAllocation) - return &CallerEdges; - return nullptr; + // When cloning for a recursive context, during cloning we might be in the + // midst of cloning for a recurrence and have moved context ids off of a + // caller edge onto the clone but not yet off of the incoming caller + // (back) edge. If we don't look at those we miss the fact that this node + // still has context ids of interest. + return IsAllocation || CloneRecursiveContexts; } // Compute the context ids for this node from the union of its edge context // ids. DenseSet getContextIds() const { - DenseSet ContextIds; - auto *Edges = getEdgesWithAllocInfo(); - if (!Edges) - return {}; unsigned Count = 0; - for (auto &Edge : *Edges) + // Compute the number of ids for reserve below. In general we only need to + // look at one set of edges, typically the callee edges, since other than + // allocations and in some cases during recursion cloning, all the context + // ids on the callers should also flow out via callee edges. + for (auto &Edge : CalleeEdges.empty() ? CallerEdges : CalleeEdges) Count += Edge->getContextIds().size(); + DenseSet ContextIds; ContextIds.reserve(Count); - for (auto &Edge : *Edges) + auto Edges = llvm::concat>( + CalleeEdges, useCallerEdgesForContextInfo() + ? CallerEdges + : std::vector>()); + for (const auto &Edge : Edges) ContextIds.insert(Edge->getContextIds().begin(), Edge->getContextIds().end()); return ContextIds; @@ -332,13 +340,14 @@ class CallsiteContextGraph { // Compute the allocation type for this node from the OR of its edge // allocation types. uint8_t computeAllocType() const { - auto *Edges = getEdgesWithAllocInfo(); - if (!Edges) - return (uint8_t)AllocationType::None; uint8_t BothTypes = (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold; uint8_t AllocType = (uint8_t)AllocationType::None; - for (auto &Edge : *Edges) { + auto Edges = llvm::concat>( + CalleeEdges, useCallerEdgesForContextInfo() + ? CallerEdges + : std::vector>()); + for (const auto &Edge : Edges) { AllocType |= Edge->AllocTypes; // Bail early if alloc type reached both, no further refinement. if (AllocType == BothTypes) @@ -350,10 +359,11 @@ class CallsiteContextGraph { // The context ids set for this node is empty if its edge context ids are // also all empty. bool emptyContextIds() const { - auto *Edges = getEdgesWithAllocInfo(); - if (!Edges) - return true; - for (auto &Edge : *Edges) { + auto Edges = llvm::concat>( + CalleeEdges, useCallerEdgesForContextInfo() + ? CallerEdges + : std::vector>()); + for (const auto &Edge : Edges) { if (!Edge->getContextIds().empty()) return false; } @@ -434,6 +444,14 @@ class CallsiteContextGraph { // for contexts including this edge. uint8_t AllocTypes = 0; + // Set just before initiating cloning when cloning of recursive contexts is + // enabled. Used to defer cloning of backedges until we have done cloning of + // the callee node for non-backedge caller edges. This exposes cloning + // opportunities through the backedge of the cycle. + // TODO: Note that this is not updated during cloning, and it is unclear + // whether that would be needed. + bool IsBackedge = false; + // The set of IDs for contexts including this edge. DenseSet ContextIds; @@ -722,6 +740,9 @@ class CallsiteContextGraph { void moveCalleeEdgeToNewCaller(const std::shared_ptr &Edge, ContextNode *NewCaller); + void markBackedges(ContextNode *Node, DenseSet &Visited, + DenseSet &CurrentStack); + /// Recursively perform cloning on the graph for the given Node and its /// callers, in order to uniquely identify the allocation behavior of an /// allocation given its context. The context ids of the allocation being @@ -2874,6 +2895,7 @@ template void CallsiteContextGraph::ContextEdge::print( raw_ostream &OS) const { OS << "Edge from Callee " << Callee << " to Caller: " << Caller + << (IsBackedge ? " (BE)" : "") << " AllocTypes: " << getAllocTypeString(AllocTypes); OS << " ContextIds:"; std::vector SortedIds(ContextIds.begin(), ContextIds.end()); @@ -3115,6 +3137,8 @@ void CallsiteContextGraph:: // node (Edge's current callee may be the original node too). assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode()); + bool EdgeIsRecursive = Edge->Callee == Edge->Caller; + ContextNode *OldCallee = Edge->Callee; // We might already have an edge to the new callee from earlier cloning for a @@ -3181,8 +3205,16 @@ void CallsiteContextGraph:: // If this is a direct recursion edge, use NewCallee (the clone) as the // callee as well, so that any edge updated/created here is also direct // recursive. - if (CalleeToUse == OldCallee) + if (CalleeToUse == OldCallee) { + // If this is a recursive edge, see if we already moved a recursive edge + // (which would have to have been this one) - if we were only moving a + // subset of context ids it would still be on OldCallee. + if (EdgeIsRecursive) { + assert(OldCalleeEdge == Edge); + continue; + } CalleeToUse = NewCallee; + } // The context ids moving to the new callee are the subset of this edge's // context ids and the context ids on the caller edge being moved. DenseSet EdgeContextIdsToMove = @@ -3369,9 +3401,48 @@ void CallsiteContextGraph:: } } +// This is the standard DFS based backedge discovery algorithm. +template +void CallsiteContextGraph::markBackedges( + ContextNode *Node, DenseSet &Visited, + DenseSet &CurrentStack) { + auto I = Visited.insert(Node); + // We should only call this for unvisited nodes. + assert(I.second); + (void)I; + for (auto &CalleeEdge : Node->CalleeEdges) { + auto *Callee = CalleeEdge->Callee; + if (Visited.count(Callee)) { + // Since this was already visited we need to check if it is currently on + // the recursive stack in which case it is a backedge. + if (CurrentStack.count(Callee)) + CalleeEdge->IsBackedge = true; + continue; + } + CurrentStack.insert(Callee); + markBackedges(Callee, Visited, CurrentStack); + CurrentStack.erase(Callee); + } +} + template void CallsiteContextGraph::identifyClones() { + // If we are cloning recursive contexts, find and mark backedges from all root + // callers, using the typical DFS based backedge analysis. DenseSet Visited; + if (CloneRecursiveContexts) { + DenseSet CurrentStack; + for (auto &Entry : NonAllocationCallToContextNodeMap) { + auto *Node = Entry.second; + if (Node->isRemoved()) + continue; + // It is a root if it doesn't have callers. + if (!Node->CallerEdges.empty()) + continue; + markBackedges(Node, Visited, CurrentStack); + assert(CurrentStack.empty()); + } + } for (auto &Entry : AllocationCallToContextNodeMap) { Visited.clear(); identifyClones(Entry.second, Visited, Entry.second->getContextIds()); @@ -3430,6 +3501,14 @@ void CallsiteContextGraph::identifyClones( assert(!is_contained(Node->CallerEdges, Edge)); continue; } + // Defer backedges. See comments further below where these edges are + // handled during the cloning of this Node. + if (Edge->IsBackedge) { + // We should only mark these if cloning recursive contexts, where we + // need to do this deferral. + assert(CloneRecursiveContexts); + continue; + } // Ignore any caller we previously visited via another edge. if (!Visited.count(Edge->Caller) && !Edge->Caller->CloneOf) { identifyClones(Edge->Caller, Visited, AllocContextIds); @@ -3483,6 +3562,7 @@ void CallsiteContextGraph::identifyClones( assert(Node->AllocTypes != (uint8_t)AllocationType::None); DenseSet RecursiveContextIds; + assert(AllowRecursiveContexts || !CloneRecursiveContexts); // If we are allowing recursive callsites, but have also disabled recursive // contexts, look for context ids that show up in multiple caller edges. if (AllowRecursiveCallsites && !AllowRecursiveContexts) { @@ -3505,6 +3585,13 @@ void CallsiteContextGraph::identifyClones( // makes it less error-prone. auto CallerEdges = Node->CallerEdges; for (auto &CallerEdge : CallerEdges) { + // Skip any that have been removed by an earlier recursive call. + if (CallerEdge->isRemoved()) { + assert(!is_contained(Node->CallerEdges, CallerEdge)); + continue; + } + assert(CallerEdge->Callee == Node); + // See if cloning the prior caller edge left this node with a single alloc // type or a single caller. In that case no more cloning of Node is needed. if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1) @@ -3546,13 +3633,100 @@ void CallsiteContextGraph::identifyClones( // // Then check if by cloning node at least one of the callee edges will be // disambiguated by splitting out different context ids. + // + // However, always do the cloning if this is a backedge, in which case we + // have not yet cloned along this caller edge. assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None); assert(Node->AllocTypes != (uint8_t)AllocationType::None); - if (allocTypeToUse(CallerAllocTypeForAlloc) == + if (!CallerEdge->IsBackedge && + allocTypeToUse(CallerAllocTypeForAlloc) == allocTypeToUse(Node->AllocTypes) && allocTypesMatch( - CalleeEdgeAllocTypesForCallerEdge, Node->CalleeEdges)) + CalleeEdgeAllocTypesForCallerEdge, Node->CalleeEdges)) { continue; + } + + if (CallerEdge->IsBackedge) { + // We should only mark these if cloning recursive contexts, where we + // need to do this deferral. + assert(CloneRecursiveContexts); + DeferredBackedges++; + } + + // If this is a backedge, we now do recursive cloning starting from its + // caller since we may have moved unambiguous caller contexts to a clone + // of this Node in a previous iteration of the current loop, giving more + // opportunity for cloning through the backedge. Because we sorted the + // caller edges earlier so that cold caller edges are first, we would have + // visited and cloned this node for any unamibiguously cold non-recursive + // callers before any ambiguous backedge callers. Note that we don't do this + // if the caller is already cloned or visited during cloning (e.g. via a + // different context path from the allocation). + // TODO: Can we do better in the case where the caller was already visited? + if (CallerEdge->IsBackedge && !CallerEdge->Caller->CloneOf && + !Visited.count(CallerEdge->Caller)) { + const auto OrigIdCount = CallerEdge->getContextIds().size(); + // Now do the recursive cloning of this backedge's caller, which was + // deferred earlier. + identifyClones(CallerEdge->Caller, Visited, CallerEdgeContextsForAlloc); + removeNoneTypeCalleeEdges(CallerEdge->Caller); + // See if the recursive call to identifyClones moved the context ids to a + // new edge from this node to a clone of caller, and switch to looking at + // that new edge so that we clone Node for the new caller clone. + bool UpdatedEdge = false; + if (OrigIdCount > CallerEdge->getContextIds().size()) { + for (auto E : Node->CallerEdges) { + // Only interested in clones of the current edges caller. + if (E->Caller->CloneOf != CallerEdge->Caller) + continue; + // See if this edge contains any of the context ids originally on the + // current caller edge. + auto CallerEdgeContextsForAllocNew = + set_intersection(CallerEdgeContextsForAlloc, E->getContextIds()); + if (CallerEdgeContextsForAllocNew.empty()) + continue; + // Make sure we don't pick a previously existing caller edge of this + // Node, which would be processed on a different iteration of the + // outer loop over the saved CallerEdges. + if (std::find(CallerEdges.begin(), CallerEdges.end(), E) != + CallerEdges.end()) + continue; + // The CallerAllocTypeForAlloc and CalleeEdgeAllocTypesForCallerEdge + // are updated further below for all cases where we just invoked + // identifyClones recursively. + CallerEdgeContextsForAlloc.swap(CallerEdgeContextsForAllocNew); + CallerEdge = E; + UpdatedEdge = true; + break; + } + } + // If cloning removed this edge (and we didn't update it to a new edge + // above), we're done with this edge. It's possible we moved all of the + // context ids to an existing clone, in which case there's no need to do + // further processing for them. + if (CallerEdge->isRemoved()) + continue; + + // Now we need to update the information used for the cloning decisions + // further below, as we may have modified edges and their context ids. + + // Note if we changed the CallerEdge above we would have already updated + // the context ids. + if (!UpdatedEdge) { + CallerEdgeContextsForAlloc = set_intersection( + CallerEdgeContextsForAlloc, CallerEdge->getContextIds()); + if (CallerEdgeContextsForAlloc.empty()) + continue; + } + // Update the other information that depends on the edges and on the now + // updated CallerEdgeContextsForAlloc. + CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc); + CalleeEdgeAllocTypesForCallerEdge.clear(); + for (auto &CalleeEdge : Node->CalleeEdges) { + CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes( + CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc)); + } + } // First see if we can use an existing clone. Check each clone and its // callee edges for matching alloc types. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 4b42e86e25161..d5534c15cca76 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -1437,6 +1437,20 @@ Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) { if (isa(Val)) return eraseInstFromFunction(SI); + // TODO: Add a helper to simplify the pointer operand for all memory + // instructions. + // store val, (select (cond, null, P)) -> store val, P + // store val, (select (cond, P, null)) -> store val, P + if (!NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace())) { + if (SelectInst *Sel = dyn_cast(Ptr)) { + if (isa(Sel->getOperand(1))) + return replaceOperand(SI, 1, Sel->getOperand(2)); + + if (isa(Sel->getOperand(2))) + return replaceOperand(SI, 1, Sel->getOperand(1)); + } + } + return nullptr; } diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index a1649c276de83..f3b53e05c519e 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -174,7 +174,7 @@ static cl::opt EnableInitializesImprovement( // Helper functions //===----------------------------------------------------------------------===// using OverlapIntervalsTy = std::map; -using InstOverlapIntervalsTy = DenseMap; +using InstOverlapIntervalsTy = MapVector; /// Returns true if the end of this instruction can be safely shortened in /// length. diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp index cf0ba6fa54700..82ccd48d89f4f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -439,10 +439,6 @@ void PlainCFGBuilder::buildPlainCFG( } assert(BI->isConditional() && NumSuccs == 2 && BI->isConditional() && "block must have conditional branch with 2 successors"); - // Look up the branch condition to get the corresponding VPValue - // representing the condition bit in VPlan (which may be in another VPBB). - assert(IRDef2VPValue.contains(BI->getCondition()) && - "Missing condition bit in IRDef2VPValue!"); BasicBlock *IRSucc0 = BI->getSuccessor(0); BasicBlock *IRSucc1 = BI->getSuccessor(1); diff --git a/llvm/test/Analysis/GlobalsModRef/nonescaping-noalias.ll b/llvm/test/Analysis/GlobalsModRef/nonescaping-noalias.ll index 4a2c10ca55cdc..eed93cf0df8ef 100644 --- a/llvm/test/Analysis/GlobalsModRef/nonescaping-noalias.ll +++ b/llvm/test/Analysis/GlobalsModRef/nonescaping-noalias.ll @@ -175,3 +175,24 @@ entry: %v = load i32, ptr @g1 ret i32 %v } + +define i32 @test6(ptr %param) { +; Ensure that we can fold a store to a load of a global across a set of +; calls that cannot use in any way a non-escaping global. +; +; CHECK-LABEL: @test6( +; CHECK: store i32 42, ptr @g1 +; CHECK-NOT: load i32 +; CHECK: ret i32 42 +entry: + store i32 42, ptr @g1 + %1 = call ptr @_FortranAioBeginExternalFormattedOutput(ptr null, i64 3, ptr null, i32 6, ptr null, i32 2) + %2 = call i1 @_FortranAioOutputAscii(ptr %1, ptr null, i64 4) + %3 = call i32 @_FortranAioEndIoStatement(ptr %1) + %v = load i32, ptr @g1 + ret i32 %v +} +declare ptr @_FortranAioBeginExternalFormattedOutput(ptr, i64, ptr, i32, ptr, i32) #0 +declare zeroext i1 @_FortranAioOutputAscii(ptr, ptr, i64) #0 +declare i32 @_FortranAioEndIoStatement(ptr) #0 +attributes #0 = { nocallback nosync } diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll index aa5208560817f..bb840023daf5d 100644 --- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll @@ -520,7 +520,12 @@ define amdgpu_kernel void @v_permlane32_swap(ptr addrspace(1) %out, i32 %src0, i ret void } - +; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.dead.i32() +define amdgpu_cs_chain void @dead(ptr addrspace(1) %out) { + %v = call i32 @llvm.amdgcn.dead.i32() + store i32 %v, ptr addrspace(1) %out + ret void +} declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #1 declare i32 @llvm.amdgcn.permlane16.i32(i32, i32, i32, i32, i1, i1) #1 @@ -558,5 +563,7 @@ declare <4 x i16> @llvm.amdgcn.global.load.tr.b128.v4i16(ptr addrspace(1)) declare <4 x half> @llvm.amdgcn.global.load.tr.b128.v4f16(ptr addrspace(1)) declare <4 x bfloat> @llvm.amdgcn.global.load.tr.b128.v4bf16(ptr addrspace(1)) +declare i32 @llvm.amdgcn.dead.i32() + attributes #0 = { nounwind convergent } attributes #1 = { nounwind readnone convergent } diff --git a/llvm/test/CodeGen/AArch64/cmpbr-branch-relaxation.mir b/llvm/test/CodeGen/AArch64/cmpbr-branch-relaxation.mir new file mode 100644 index 0000000000000..5fccb452e9642 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cmpbr-branch-relaxation.mir @@ -0,0 +1,156 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple arm64-apple-ios -mattr +cmpbr -o - -aarch64-cb-offset-bits=3 \ +# RUN: -run-pass=branch-relaxation -verify-machineinstrs -simplify-mir %s | \ +# RUN: FileCheck -check-prefix=RELAX %s +# RUN: llc -mtriple arm64-apple-ios -mattr +cmpbr -o - -aarch64-cb-offset-bits=9 \ +# RUN: -run-pass=branch-relaxation -verify-machineinstrs -simplify-mir %s | \ +# RUN: FileCheck -check-prefix=NO-RELAX %s +--- +name: relax_cb +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +body: | + ; RELAX-LABEL: name: relax_cb + ; RELAX: bb.0: + ; RELAX-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; RELAX-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; RELAX-NEXT: CBWPrr 1, [[COPY]], [[COPY1]], %bb.1 + ; RELAX-NEXT: B %bb.2 + ; RELAX-NEXT: {{ $}} + ; RELAX-NEXT: bb.1: + ; RELAX-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], [[COPY1]] + ; RELAX-NEXT: [[ADDWrr1:%[0-9]+]]:gpr32 = ADDWrr [[ADDWrr]], [[COPY1]] + ; RELAX-NEXT: [[ADDWrr2:%[0-9]+]]:gpr32 = ADDWrr [[ADDWrr1]], [[ADDWrr]] + ; RELAX-NEXT: [[ADDWrr3:%[0-9]+]]:gpr32 = ADDWrr [[ADDWrr2]], [[ADDWrr1]] + ; RELAX-NEXT: [[ADDWrr4:%[0-9]+]]:gpr32 = ADDWrr [[ADDWrr3]], [[ADDWrr2]] + ; RELAX-NEXT: $w0 = ADDWrr [[ADDWrr4]], [[ADDWrr3]] + ; RELAX-NEXT: {{ $}} + ; RELAX-NEXT: bb.2: + ; RELAX-NEXT: RET_ReallyLR implicit $w0 + ; + ; NO-RELAX-LABEL: name: relax_cb + ; NO-RELAX: bb.0: + ; NO-RELAX-NEXT: successors: %bb.1, %bb.2 + ; NO-RELAX-NEXT: {{ $}} + ; NO-RELAX-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; NO-RELAX-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; NO-RELAX-NEXT: CBWPrr 0, [[COPY]], [[COPY1]], %bb.2 + ; NO-RELAX-NEXT: {{ $}} + ; NO-RELAX-NEXT: bb.1: + ; NO-RELAX-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], [[COPY1]] + ; NO-RELAX-NEXT: [[ADDWrr1:%[0-9]+]]:gpr32 = ADDWrr [[ADDWrr]], [[COPY1]] + ; NO-RELAX-NEXT: [[ADDWrr2:%[0-9]+]]:gpr32 = ADDWrr [[ADDWrr1]], [[ADDWrr]] + ; NO-RELAX-NEXT: [[ADDWrr3:%[0-9]+]]:gpr32 = ADDWrr [[ADDWrr2]], [[ADDWrr1]] + ; NO-RELAX-NEXT: [[ADDWrr4:%[0-9]+]]:gpr32 = ADDWrr [[ADDWrr3]], [[ADDWrr2]] + ; NO-RELAX-NEXT: $w0 = ADDWrr [[ADDWrr4]], [[ADDWrr3]] + ; NO-RELAX-NEXT: {{ $}} + ; NO-RELAX-NEXT: bb.2: + ; NO-RELAX-NEXT: RET_ReallyLR implicit $w0 + bb.0: + successors: %bb.1, %bb.2 + %0:gpr32 = COPY $w0 + %1:gpr32 = COPY $w1 + CBWPrr 0, %0, %1, %bb.2 + + bb.1: + successors: %bb.2 + %2:gpr32 = ADDWrr %0, %1 + %3:gpr32 = ADDWrr %2, %1 + %4:gpr32 = ADDWrr %3, %2 + %5:gpr32 = ADDWrr %4, %3 + %6:gpr32 = ADDWrr %5, %4 + $w0 = ADDWrr %6, %5 + + bb.2: + RET_ReallyLR implicit $w0 +... +--- +name: relax_and_split_block +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$w1', virtual-reg: '%1' } +body: | + ; RELAX-LABEL: name: relax_and_split_block + ; RELAX: bb.0: + ; RELAX-NEXT: liveins: $w0, $w1 + ; RELAX-NEXT: {{ $}} + ; RELAX-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; RELAX-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; RELAX-NEXT: CBWPrr 1, [[COPY]], [[COPY1]], %bb.3 + ; RELAX-NEXT: B %bb.2 + ; RELAX-NEXT: {{ $}} + ; RELAX-NEXT: bb.3: + ; RELAX-NEXT: liveins: $w0, $w1 + ; RELAX-NEXT: {{ $}} + ; RELAX-NEXT: CBWPrr 0, [[COPY]], [[COPY1]], %bb.1 + ; RELAX-NEXT: B %bb.2 + ; RELAX-NEXT: {{ $}} + ; RELAX-NEXT: bb.1: + ; RELAX-NEXT: liveins: $w0, $w1 + ; RELAX-NEXT: {{ $}} + ; RELAX-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], [[COPY1]] + ; RELAX-NEXT: [[ADDWrr1:%[0-9]+]]:gpr32 = ADDWrr [[ADDWrr]], [[COPY1]] + ; RELAX-NEXT: [[ADDWrr2:%[0-9]+]]:gpr32 = ADDWrr [[ADDWrr1]], [[ADDWrr]] + ; RELAX-NEXT: [[ADDWrr3:%[0-9]+]]:gpr32 = ADDWrr [[ADDWrr2]], [[ADDWrr1]] + ; RELAX-NEXT: [[ADDWrr4:%[0-9]+]]:gpr32 = ADDWrr [[ADDWrr3]], [[ADDWrr2]] + ; RELAX-NEXT: $w0 = ADDWrr [[ADDWrr4]], [[ADDWrr3]] + ; RELAX-NEXT: {{ $}} + ; RELAX-NEXT: bb.2: + ; RELAX-NEXT: liveins: $w0, $w1 + ; RELAX-NEXT: {{ $}} + ; RELAX-NEXT: RET_ReallyLR implicit $w0 + ; + ; NO-RELAX-LABEL: name: relax_and_split_block + ; NO-RELAX: bb.0: + ; NO-RELAX-NEXT: successors: %bb.1, %bb.2 + ; NO-RELAX-NEXT: liveins: $w0, $w1 + ; NO-RELAX-NEXT: {{ $}} + ; NO-RELAX-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; NO-RELAX-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; NO-RELAX-NEXT: CBWPrr 0, [[COPY]], [[COPY1]], %bb.2 + ; NO-RELAX-NEXT: CBWPrr 1, [[COPY]], [[COPY1]], %bb.2 + ; NO-RELAX-NEXT: {{ $}} + ; NO-RELAX-NEXT: bb.1: + ; NO-RELAX-NEXT: liveins: $w0, $w1 + ; NO-RELAX-NEXT: {{ $}} + ; NO-RELAX-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], [[COPY1]] + ; NO-RELAX-NEXT: [[ADDWrr1:%[0-9]+]]:gpr32 = ADDWrr [[ADDWrr]], [[COPY1]] + ; NO-RELAX-NEXT: [[ADDWrr2:%[0-9]+]]:gpr32 = ADDWrr [[ADDWrr1]], [[ADDWrr]] + ; NO-RELAX-NEXT: [[ADDWrr3:%[0-9]+]]:gpr32 = ADDWrr [[ADDWrr2]], [[ADDWrr1]] + ; NO-RELAX-NEXT: [[ADDWrr4:%[0-9]+]]:gpr32 = ADDWrr [[ADDWrr3]], [[ADDWrr2]] + ; NO-RELAX-NEXT: $w0 = ADDWrr [[ADDWrr4]], [[ADDWrr3]] + ; NO-RELAX-NEXT: {{ $}} + ; NO-RELAX-NEXT: bb.2: + ; NO-RELAX-NEXT: liveins: $w0, $w1 + ; NO-RELAX-NEXT: {{ $}} + ; NO-RELAX-NEXT: RET_ReallyLR implicit $w0 + bb.0: + successors: %bb.1, %bb.2 + liveins: $w0, $w1 + %0:gpr32 = COPY $w0 + %1:gpr32 = COPY $w1 + CBWPrr 0, %0, %1, %bb.2 + CBWPrr 1, %0, %1, %bb.2 + + bb.1: + successors: %bb.2 + liveins: $w0, $w1 + %2:gpr32 = ADDWrr %0, %1 + %3:gpr32 = ADDWrr %2, %1 + %4:gpr32 = ADDWrr %3, %2 + %5:gpr32 = ADDWrr %4, %3 + %6:gpr32 = ADDWrr %5, %4 + $w0 = ADDWrr %6, %5 + + bb.2: + liveins: $w0, $w1 + RET_ReallyLR implicit $w0 +... diff --git a/llvm/test/CodeGen/AArch64/cmpbr-early-ifcvt.mir b/llvm/test/CodeGen/AArch64/cmpbr-early-ifcvt.mir new file mode 100644 index 0000000000000..c3377164f357e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cmpbr-early-ifcvt.mir @@ -0,0 +1,116 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=arm64-apple-ios -mattr +cmpbr -run-pass=early-ifcvt -simplify-mir -o - %s | FileCheck %s +# CHECK: cb_diamond +--- +name: cb_diamond +alignment: 4 +tracksRegLiveness: true +noPhis: false +isSSA: true +noVRegs: false +hasFakeUses: false +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: gpr64 } + - { id: 3, class: gpr64 } + - { id: 4, class: gpr64 } + - { id: 5, class: gpr64 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: cb_diamond + ; CHECK: bb.0: + ; CHECK-NEXT: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64 = MADDXrrr [[COPY]], [[COPY1]], $xzr + ; CHECK-NEXT: $xzr = SUBSXrr [[COPY]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSELXr:%[0-9]+]]:gpr64 = CSELXr [[ADDXrr]], [[MADDXrrr]], 11, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr killed [[CSELXr]], [[COPY]] + ; CHECK-NEXT: $x0 = COPY [[ADDXrr1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + bb.0: + successors: %bb.1, %bb.2 + liveins: $x0, $x1 + + %0:gpr64 = COPY $x0 + %1:gpr64 = COPY $x1 + CBXPrr 11, %0, %1, %bb.1 + B %bb.2 + + bb.1: + successors: %bb.3 + %2:gpr64 = ADDXrr %0, %1 + B %bb.3 + + bb.2: + successors: %bb.3 + %3:gpr64 = MADDXrrr %0, %1, $xzr + B %bb.3 + + bb.3: + %4:gpr64 = PHI %2, %bb.1, %3, %bb.2 + %5:gpr64 = ADDXrr killed %4, %0 + $x0 = COPY %5 + RET_ReallyLR implicit $x0 +... +--- +name: cb_triangle +alignment: 4 +tracksRegLiveness: true +noPhis: false +isSSA: true +noVRegs: false +hasFakeUses: false +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: gpr64 } + - { id: 3, class: gpr64 } + - { id: 4, class: gpr64 } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: cb_triangle + ; CHECK: bb.0: + ; CHECK-NEXT: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY]], [[COPY1]] + ; CHECK-NEXT: $xzr = SUBSXrr [[COPY]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[CSELXr:%[0-9]+]]:gpr64 = CSELXr [[COPY1]], [[ADDXrr]], 10, implicit $nzcv + ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr killed [[CSELXr]], [[COPY]] + ; CHECK-NEXT: $x0 = COPY [[ADDXrr1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + bb.0: + successors: %bb.1, %bb.2 + liveins: $x0, $x1 + + %0:gpr64 = COPY $x0 + %1:gpr64 = COPY $x1 + CBXPrr 10, %0, %1, %bb.2 + + bb.1: + successors: %bb.2 + %2:gpr64 = ADDXrr %0, %1 + + bb.2: + %3:gpr64 = PHI %1, %bb.0, %2, %bb.1 + %4:gpr64 = ADDXrr killed %3, %0 + $x0 = COPY %4 + RET_ReallyLR implicit $x0 +... diff --git a/llvm/test/CodeGen/AArch64/cmpbr-reg-imm-at-bounds.ll b/llvm/test/CodeGen/AArch64/cmpbr-reg-imm-at-bounds.ll new file mode 100644 index 0000000000000..d264cf2126099 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cmpbr-reg-imm-at-bounds.ll @@ -0,0 +1,1328 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple arm64-apple-ios -mattr +cmpbr -verify-machineinstrs -o - < %s | FileCheck %s --check-prefix=CHECK-CMPBR +; RUN: llc -mtriple arm64-apple-ios -mattr -cmpbr -verify-machineinstrs -o - < %s | FileCheck %s --check-prefix=CHECK-NO-CMPBR + +; slt, sle, sgt, sge, ult, ule, ugt, uge, eq, ne + +define void @slt_0_i32(i32 %a) { +; CHECK-CMPBR-LABEL: slt_0_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: tbnz w0, #31, LBB0_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB0_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: slt_0_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: tbnz w0, #31, LBB0_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB0_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp slt i32 %a, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @slt_m1_i32(i32 %a) { +; CHECK-CMPBR-LABEL: slt_m1_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: mov w8, #-2 ; =0xfffffffe +; CHECK-CMPBR-NEXT: cbge w8, w0, LBB1_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB1_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: slt_m1_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmn w0, #2 +; CHECK-NO-CMPBR-NEXT: b.le LBB1_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB1_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp slt i32 %a, -1 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @slt_63_i32(i32 %a) { +; CHECK-CMPBR-LABEL: slt_63_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cblt w0, #63, LBB2_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB2_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: slt_63_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #62 +; CHECK-NO-CMPBR-NEXT: b.le LBB2_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB2_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp slt i32 %a, 63 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @slt_64_i32(i32 %a) { +; CHECK-CMPBR-LABEL: slt_64_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: mov w8, #63 ; =0x3f +; CHECK-CMPBR-NEXT: cbge w8, w0, LBB3_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB3_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: slt_64_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #63 +; CHECK-NO-CMPBR-NEXT: b.le LBB3_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB3_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp slt i32 %a, 64 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @sle_0_i32(i32 %a) { +; CHECK-CMPBR-LABEL: sle_0_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cblt w0, #1, LBB4_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB4_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: sle_0_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #0 +; CHECK-NO-CMPBR-NEXT: b.le LBB4_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB4_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sle i32 %a, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @sle_m1_i32(i32 %a) { +; CHECK-CMPBR-LABEL: sle_m1_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: tbnz w0, #31, LBB5_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB5_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: sle_m1_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: tbnz w0, #31, LBB5_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB5_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sle i32 %a, -1 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @sle_63_i32(i32 %a) { +; CHECK-CMPBR-LABEL: sle_63_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: mov w8, #63 ; =0x3f +; CHECK-CMPBR-NEXT: cbge w8, w0, LBB6_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB6_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: sle_63_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #63 +; CHECK-NO-CMPBR-NEXT: b.le LBB6_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB6_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sle i32 %a, 63 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @sle_64_i32(i32 %a) { +; CHECK-CMPBR-LABEL: sle_64_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: mov w8, #64 ; =0x40 +; CHECK-CMPBR-NEXT: cbge w8, w0, LBB7_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB7_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: sle_64_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #64 +; CHECK-NO-CMPBR-NEXT: b.le LBB7_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB7_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sle i32 %a, 64 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @sgt_0_i32(i32 %a) { +; CHECK-CMPBR-LABEL: sgt_0_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbgt w0, #0, LBB8_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB8_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: sgt_0_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #1 +; CHECK-NO-CMPBR-NEXT: b.ge LBB8_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB8_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sgt i32 %a, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @sgt_m1_i32(i32 %a) { +; CHECK-CMPBR-LABEL: sgt_m1_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: tbz w0, #31, LBB9_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB9_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: sgt_m1_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: tbz w0, #31, LBB9_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB9_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sgt i32 %a, -1 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @sgt_63_i32(i32 %a) { +; CHECK-CMPBR-LABEL: sgt_63_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: mov w8, #64 ; =0x40 +; CHECK-CMPBR-NEXT: cbge w0, w8, LBB10_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB10_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: sgt_63_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #64 +; CHECK-NO-CMPBR-NEXT: b.ge LBB10_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB10_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sgt i32 %a, 63 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @sgt_64_i32(i32 %a) { +; CHECK-CMPBR-LABEL: sgt_64_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: mov w8, #65 ; =0x41 +; CHECK-CMPBR-NEXT: cbge w0, w8, LBB11_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB11_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: sgt_64_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #65 +; CHECK-NO-CMPBR-NEXT: b.ge LBB11_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB11_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sgt i32 %a, 64 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @sge_0_i32(i32 %a) { +; CHECK-CMPBR-LABEL: sge_0_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: tbz w0, #31, LBB12_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB12_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: sge_0_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: tbz w0, #31, LBB12_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB12_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sge i32 %a, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @sge_m1_i32(i32 %a) { +; CHECK-CMPBR-LABEL: sge_m1_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: mov w8, #-1 ; =0xffffffff +; CHECK-CMPBR-NEXT: cbge w0, w8, LBB13_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB13_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: sge_m1_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmn w0, #1 +; CHECK-NO-CMPBR-NEXT: b.ge LBB13_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB13_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sge i32 %a, -1 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @sge_63_i32(i32 %a) { +; CHECK-CMPBR-LABEL: sge_63_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbgt w0, #62, LBB14_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB14_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: sge_63_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #63 +; CHECK-NO-CMPBR-NEXT: b.ge LBB14_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB14_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sge i32 %a, 63 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @sge_64_i32(i32 %a) { +; CHECK-CMPBR-LABEL: sge_64_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: mov w8, #64 ; =0x40 +; CHECK-CMPBR-NEXT: cbge w0, w8, LBB15_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB15_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: sge_64_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #64 +; CHECK-NO-CMPBR-NEXT: b.ge LBB15_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB15_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sge i32 %a, 64 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @ult_0_i32(i32 %a) { +; CHECK-CMPBR-LABEL: ult_0_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbhi wzr, w0, LBB16_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB16_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: ult_0_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #0 +; CHECK-NO-CMPBR-NEXT: b.lo LBB16_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB16_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ult i32 %a, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @ult_63_i32(i32 %a) { +; CHECK-CMPBR-LABEL: ult_63_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cblo w0, #63, LBB17_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB17_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: ult_63_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #62 +; CHECK-NO-CMPBR-NEXT: b.ls LBB17_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB17_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ult i32 %a, 63 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @ult_64_i32(i32 %a) { +; CHECK-CMPBR-LABEL: ult_64_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: mov w8, #63 ; =0x3f +; CHECK-CMPBR-NEXT: cbhs w8, w0, LBB18_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB18_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: ult_64_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #63 +; CHECK-NO-CMPBR-NEXT: b.ls LBB18_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB18_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ult i32 %a, 64 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @ule_0_i32(i32 %a) { +; CHECK-CMPBR-LABEL: ule_0_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbz w0, LBB19_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB19_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: ule_0_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cbz w0, LBB19_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB19_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ule i32 %a, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @ule_63_i32(i32 %a) { +; CHECK-CMPBR-LABEL: ule_63_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: mov w8, #63 ; =0x3f +; CHECK-CMPBR-NEXT: cbhs w8, w0, LBB20_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB20_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: ule_63_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #63 +; CHECK-NO-CMPBR-NEXT: b.ls LBB20_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB20_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ule i32 %a, 63 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @ule_64_i32(i32 %a) { +; CHECK-CMPBR-LABEL: ule_64_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: mov w8, #64 ; =0x40 +; CHECK-CMPBR-NEXT: cbhs w8, w0, LBB21_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB21_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: ule_64_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #64 +; CHECK-NO-CMPBR-NEXT: b.ls LBB21_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB21_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ule i32 %a, 64 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @ugt_0_i32(i32 %a) { +; CHECK-CMPBR-LABEL: ugt_0_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbnz w0, LBB22_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB22_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: ugt_0_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cbnz w0, LBB22_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB22_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ugt i32 %a, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @ugt_63_i32(i32 %a) { +; CHECK-CMPBR-LABEL: ugt_63_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: mov w8, #64 ; =0x40 +; CHECK-CMPBR-NEXT: cbhs w0, w8, LBB23_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB23_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: ugt_63_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #64 +; CHECK-NO-CMPBR-NEXT: b.hs LBB23_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB23_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ugt i32 %a, 63 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @ugt_64_i32(i32 %a) { +; CHECK-CMPBR-LABEL: ugt_64_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: mov w8, #65 ; =0x41 +; CHECK-CMPBR-NEXT: cbhs w0, w8, LBB24_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB24_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: ugt_64_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #65 +; CHECK-NO-CMPBR-NEXT: b.hs LBB24_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB24_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ugt i32 %a, 64 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @uge_0_i32(i32 %a) { +; CHECK-CMPBR-LABEL: uge_0_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbhs w0, wzr, LBB25_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB25_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: uge_0_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #0 +; CHECK-NO-CMPBR-NEXT: b.hs LBB25_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB25_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp uge i32 %a, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @uge_63_i32(i32 %a) { +; CHECK-CMPBR-LABEL: uge_63_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbhi w0, #62, LBB26_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB26_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: uge_63_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #63 +; CHECK-NO-CMPBR-NEXT: b.hs LBB26_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB26_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp uge i32 %a, 63 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @uge_64_i32(i32 %a) { +; CHECK-CMPBR-LABEL: uge_64_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: mov w8, #64 ; =0x40 +; CHECK-CMPBR-NEXT: cbhs w0, w8, LBB27_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB27_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: uge_64_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #64 +; CHECK-NO-CMPBR-NEXT: b.hs LBB27_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB27_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp uge i32 %a, 64 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @eq_0_i32(i32 %a) { +; CHECK-CMPBR-LABEL: eq_0_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbz w0, LBB28_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB28_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: eq_0_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cbz w0, LBB28_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB28_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp eq i32 %a, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @eq_m1_i32(i32 %a) { +; CHECK-CMPBR-LABEL: eq_m1_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: mov w8, #-1 ; =0xffffffff +; CHECK-CMPBR-NEXT: cbeq w0, w8, LBB29_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB29_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: eq_m1_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmn w0, #1 +; CHECK-NO-CMPBR-NEXT: b.eq LBB29_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB29_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp eq i32 %a, -1 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @eq_63_i32(i32 %a) { +; CHECK-CMPBR-LABEL: eq_63_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbeq w0, #63, LBB30_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB30_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: eq_63_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #63 +; CHECK-NO-CMPBR-NEXT: b.eq LBB30_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB30_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp eq i32 %a, 63 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @eq_64_i32(i32 %a) { +; CHECK-CMPBR-LABEL: eq_64_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: mov w8, #64 ; =0x40 +; CHECK-CMPBR-NEXT: cbeq w0, w8, LBB31_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB31_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: eq_64_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #64 +; CHECK-NO-CMPBR-NEXT: b.eq LBB31_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB31_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp eq i32 %a, 64 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @ne_0_i32(i32 %a) { +; CHECK-CMPBR-LABEL: ne_0_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbnz w0, LBB32_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB32_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: ne_0_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cbnz w0, LBB32_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB32_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ne i32 %a, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @ne_m1_i32(i32 %a) { +; CHECK-CMPBR-LABEL: ne_m1_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: mov w8, #-1 ; =0xffffffff +; CHECK-CMPBR-NEXT: cbne w0, w8, LBB33_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB33_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: ne_m1_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmn w0, #1 +; CHECK-NO-CMPBR-NEXT: b.ne LBB33_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB33_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ne i32 %a, -1 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @ne_63_i32(i32 %a) { +; CHECK-CMPBR-LABEL: ne_63_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbne w0, #63, LBB34_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB34_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: ne_63_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #63 +; CHECK-NO-CMPBR-NEXT: b.ne LBB34_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB34_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ne i32 %a, 63 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @ne_64_i32(i32 %a) { +; CHECK-CMPBR-LABEL: ne_64_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: mov w8, #64 ; =0x40 +; CHECK-CMPBR-NEXT: cbne w0, w8, LBB35_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB35_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: ne_64_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #64 +; CHECK-NO-CMPBR-NEXT: b.ne LBB35_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB35_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ne i32 %a, 64 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbge_out_of_upper_bound_i32(i32 %a) { +; CHECK-CMPBR-LABEL: cbge_out_of_upper_bound_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: mov w8, #71 ; =0x47 +; CHECK-CMPBR-NEXT: cbge w0, w8, LBB36_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB36_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbge_out_of_upper_bound_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #71 +; CHECK-NO-CMPBR-NEXT: b.ge LBB36_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB36_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sgt i32 %a, 70 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbge_out_of_lower_bound_i32(i32 %a) { +; CHECK-CMPBR-LABEL: cbge_out_of_lower_bound_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: mov w8, #-10 ; =0xfffffff6 +; CHECK-CMPBR-NEXT: cbge w0, w8, LBB37_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB37_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbge_out_of_lower_bound_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmn w0, #10 +; CHECK-NO-CMPBR-NEXT: b.ge LBB37_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB37_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sge i32 %a, -10 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +; This should trigger a register swap. +define void @cble_out_of_lower_bound_i32(i32 %a) { +; CHECK-CMPBR-LABEL: cble_out_of_lower_bound_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: mov w8, #-10 ; =0xfffffff6 +; CHECK-CMPBR-NEXT: cbhs w8, w0, LBB38_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB38_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cble_out_of_lower_bound_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmn w0, #10 +; CHECK-NO-CMPBR-NEXT: b.ls LBB38_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB38_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ule i32 %a, -10 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @slt_m1_i64(i64 %a) { +; CHECK-CMPBR-LABEL: slt_m1_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: mov x8, #-2 ; =0xfffffffffffffffe +; CHECK-CMPBR-NEXT: cbge x8, x0, LBB39_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB39_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: slt_m1_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmn x0, #2 +; CHECK-NO-CMPBR-NEXT: b.le LBB39_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB39_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp slt i64 %a, -1 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @sle_64_i64(i64 %a) { +; CHECK-CMPBR-LABEL: sle_64_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: mov w8, #64 ; =0x40 +; CHECK-CMPBR-NEXT: cbge x8, x0, LBB40_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB40_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: sle_64_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp x0, #64 +; CHECK-NO-CMPBR-NEXT: b.le LBB40_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB40_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sle i64 %a, 64 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @ult_63_i64(i64 %a) { +; CHECK-CMPBR-LABEL: ult_63_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cblo x0, #63, LBB41_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB41_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: ult_63_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp x0, #62 +; CHECK-NO-CMPBR-NEXT: b.ls LBB41_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB41_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ult i64 %a, 63 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @uge_64_i64(i64 %a) { +; CHECK-CMPBR-LABEL: uge_64_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: mov w8, #64 ; =0x40 +; CHECK-CMPBR-NEXT: cbhs x0, x8, LBB42_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB42_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: uge_64_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp x0, #64 +; CHECK-NO-CMPBR-NEXT: b.hs LBB42_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB42_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp uge i64 %a, 64 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @eq_m1_i64(i64 %a) { +; CHECK-CMPBR-LABEL: eq_m1_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: mov x8, #-1 ; =0xffffffffffffffff +; CHECK-CMPBR-NEXT: cbeq x0, x8, LBB43_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB43_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: eq_m1_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmn x0, #1 +; CHECK-NO-CMPBR-NEXT: b.eq LBB43_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB43_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp eq i64 %a, -1 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @ne_63_i64(i64 %a) { +; CHECK-CMPBR-LABEL: ne_63_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbne x0, #63, LBB44_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB44_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: ne_63_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp x0, #63 +; CHECK-NO-CMPBR-NEXT: b.ne LBB44_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB44_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ne i64 %a, 63 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} diff --git a/llvm/test/CodeGen/AArch64/cmpbr-reg-imm.ll b/llvm/test/CodeGen/AArch64/cmpbr-reg-imm.ll new file mode 100644 index 0000000000000..ad778181bcf9b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cmpbr-reg-imm.ll @@ -0,0 +1,583 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple arm64-apple-ios -mattr +cmpbr -verify-machineinstrs -o - < %s | FileCheck %s --check-prefix=CHECK-CMPBR +; RUN: llc -mtriple arm64-apple-ios -mattr -cmpbr -verify-machineinstrs -o - < %s | FileCheck %s --check-prefix=CHECK-NO-CMPBR + +define void @cbgt_i32(i32 %a) { +; CHECK-CMPBR-LABEL: cbgt_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbgt w0, #10, LBB0_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB0_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbgt_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #11 +; CHECK-NO-CMPBR-NEXT: b.ge LBB0_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB0_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sgt i32 %a, 10 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cblt_i32(i32 %a) { +; CHECK-CMPBR-LABEL: cblt_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cblt w0, #10, LBB1_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB1_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cblt_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #9 +; CHECK-NO-CMPBR-NEXT: b.le LBB1_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB1_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp slt i32 %a, 10 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbhi_i32(i32 %a) { +; CHECK-CMPBR-LABEL: cbhi_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbhi w0, #10, LBB2_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB2_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbhi_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #11 +; CHECK-NO-CMPBR-NEXT: b.hs LBB2_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB2_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ugt i32 %a, 10 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cblo_i32(i32 %a) { +; CHECK-CMPBR-LABEL: cblo_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cblo w0, #10, LBB3_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB3_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cblo_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #9 +; CHECK-NO-CMPBR-NEXT: b.ls LBB3_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB3_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ult i32 %a, 10 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbeq_i32(i32 %a) { +; CHECK-CMPBR-LABEL: cbeq_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbeq w0, #10, LBB4_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB4_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbeq_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #10 +; CHECK-NO-CMPBR-NEXT: b.eq LBB4_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB4_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp eq i32 %a, 10 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbne_i32(i32 %a) { +; CHECK-CMPBR-LABEL: cbne_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbne w0, #10, LBB5_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB5_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbne_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #10 +; CHECK-NO-CMPBR-NEXT: b.ne LBB5_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB5_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ne i32 %a, 10 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbge_gt_dec_i32(i32 %a) { +; CHECK-CMPBR-LABEL: cbge_gt_dec_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbgt w0, #9, LBB6_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB6_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbge_gt_dec_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #10 +; CHECK-NO-CMPBR-NEXT: b.ge LBB6_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB6_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sge i32 %a, 10 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbhs_hi_dec_i32(i32 %a) { +; CHECK-CMPBR-LABEL: cbhs_hi_dec_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbhi w0, #9, LBB7_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB7_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbhs_hi_dec_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #10 +; CHECK-NO-CMPBR-NEXT: b.hs LBB7_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB7_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp uge i32 %a, 10 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cble_lt_inc_i32(i32 %a) { +; CHECK-CMPBR-LABEL: cble_lt_inc_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cblt w0, #11, LBB8_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB8_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cble_lt_inc_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #10 +; CHECK-NO-CMPBR-NEXT: b.le LBB8_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB8_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sle i32 %a, 10 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbls_lo_inc_i32(i32 %a) { +; CHECK-CMPBR-LABEL: cbls_lo_inc_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cblo w0, #11, LBB9_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB9_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbls_lo_inc_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, #10 +; CHECK-NO-CMPBR-NEXT: b.ls LBB9_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB9_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ule i32 %a, 10 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbgt_i64(i64 %a) { +; CHECK-CMPBR-LABEL: cbgt_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbgt x0, #10, LBB10_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB10_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbgt_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp x0, #11 +; CHECK-NO-CMPBR-NEXT: b.ge LBB10_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB10_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sgt i64 %a, 10 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cblt_i64(i64 %a) { +; CHECK-CMPBR-LABEL: cblt_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cblt x0, #10, LBB11_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB11_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cblt_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp x0, #9 +; CHECK-NO-CMPBR-NEXT: b.le LBB11_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB11_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp slt i64 %a, 10 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbhi_i64(i64 %a) { +; CHECK-CMPBR-LABEL: cbhi_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbhi x0, #10, LBB12_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB12_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbhi_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp x0, #11 +; CHECK-NO-CMPBR-NEXT: b.hs LBB12_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB12_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ugt i64 %a, 10 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cblo_i64(i64 %a) { +; CHECK-CMPBR-LABEL: cblo_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cblo x0, #10, LBB13_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB13_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cblo_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp x0, #9 +; CHECK-NO-CMPBR-NEXT: b.ls LBB13_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB13_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ult i64 %a, 10 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbeq_i64(i64 %a) { +; CHECK-CMPBR-LABEL: cbeq_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbeq x0, #10, LBB14_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB14_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbeq_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp x0, #10 +; CHECK-NO-CMPBR-NEXT: b.eq LBB14_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB14_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp eq i64 %a, 10 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbne_i64(i64 %a) { +; CHECK-CMPBR-LABEL: cbne_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbne x0, #10, LBB15_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB15_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbne_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp x0, #10 +; CHECK-NO-CMPBR-NEXT: b.ne LBB15_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB15_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ne i64 %a, 10 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbge_gt_dec_i64(i64 %a) { +; CHECK-CMPBR-LABEL: cbge_gt_dec_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbgt x0, #9, LBB16_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB16_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbge_gt_dec_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp x0, #10 +; CHECK-NO-CMPBR-NEXT: b.ge LBB16_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB16_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sge i64 %a, 10 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbhs_hi_dec_i64(i64 %a) { +; CHECK-CMPBR-LABEL: cbhs_hi_dec_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbhi x0, #9, LBB17_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB17_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbhs_hi_dec_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp x0, #10 +; CHECK-NO-CMPBR-NEXT: b.hs LBB17_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB17_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp uge i64 %a, 10 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cble_lt_inc_i64(i64 %a) { +; CHECK-CMPBR-LABEL: cble_lt_inc_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cblt x0, #11, LBB18_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB18_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cble_lt_inc_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp x0, #10 +; CHECK-NO-CMPBR-NEXT: b.le LBB18_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB18_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sle i64 %a, 10 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbls_lo_inc_i64(i64 %a) { +; CHECK-CMPBR-LABEL: cbls_lo_inc_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cblo x0, #11, LBB19_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB19_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbls_lo_inc_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp x0, #10 +; CHECK-NO-CMPBR-NEXT: b.ls LBB19_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB19_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ule i64 %a, 10 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} diff --git a/llvm/test/CodeGen/AArch64/cmpbr-reg-reg.ll b/llvm/test/CodeGen/AArch64/cmpbr-reg-reg.ll new file mode 100644 index 0000000000000..9e95434564f02 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cmpbr-reg-reg.ll @@ -0,0 +1,586 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple arm64-apple-ios -mattr +cmpbr -verify-machineinstrs -o - < %s | FileCheck %s --check-prefix=CHECK-CMPBR +; RUN: llc -mtriple arm64-apple-ios -mattr -cmpbr -verify-machineinstrs -o - < %s | FileCheck %s --check-prefix=CHECK-NO-CMPBR + + +define void @cbgt_i32(i32 %a, i32 %b) { +; CHECK-CMPBR-LABEL: cbgt_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbgt w0, w1, LBB0_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB0_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbgt_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, w1 +; CHECK-NO-CMPBR-NEXT: b.gt LBB0_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB0_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sgt i32 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbge_i32(i32 %a, i32 %b) { +; CHECK-CMPBR-LABEL: cbge_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbge w0, w1, LBB1_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB1_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbge_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, w1 +; CHECK-NO-CMPBR-NEXT: b.ge LBB1_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB1_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sge i32 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + + +define void @cbhi_i32(i32 %a, i32 %b) { +; CHECK-CMPBR-LABEL: cbhi_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbhi w0, w1, LBB2_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB2_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbhi_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, w1 +; CHECK-NO-CMPBR-NEXT: b.hi LBB2_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB2_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ugt i32 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbhs_i32(i32 %a, i32 %b) { +; CHECK-CMPBR-LABEL: cbhs_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbhs w0, w1, LBB3_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB3_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbhs_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, w1 +; CHECK-NO-CMPBR-NEXT: b.hs LBB3_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB3_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp uge i32 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbeq_i32(i32 %a, i32 %b) { +; CHECK-CMPBR-LABEL: cbeq_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbeq w0, w1, LBB4_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB4_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbeq_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, w1 +; CHECK-NO-CMPBR-NEXT: b.eq LBB4_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB4_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp eq i32 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbne_i32(i32 %a, i32 %b) { +; CHECK-CMPBR-LABEL: cbne_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbne w0, w1, LBB5_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB5_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbne_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, w1 +; CHECK-NO-CMPBR-NEXT: b.ne LBB5_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB5_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ne i32 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cble_ge_swap_i32(i32 %a, i32 %b) { +; CHECK-CMPBR-LABEL: cble_ge_swap_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbge w1, w0, LBB6_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB6_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cble_ge_swap_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, w1 +; CHECK-NO-CMPBR-NEXT: b.le LBB6_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB6_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sle i32 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cblo_hi_swap_i32(i32 %a, i32 %b) { +; CHECK-CMPBR-LABEL: cblo_hi_swap_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbhi w1, w0, LBB7_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB7_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cblo_hi_swap_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, w1 +; CHECK-NO-CMPBR-NEXT: b.lo LBB7_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB7_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ult i32 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbls_hs_swap_i32(i32 %a, i32 %b) { +; CHECK-CMPBR-LABEL: cbls_hs_swap_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbhs w1, w0, LBB8_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB8_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbls_hs_swap_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, w1 +; CHECK-NO-CMPBR-NEXT: b.ls LBB8_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB8_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ule i32 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cblt_gt_swap_i32(i32 %a, i32 %b) { +; CHECK-CMPBR-LABEL: cblt_gt_swap_i32: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbgt w1, w0, LBB9_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB9_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cblt_gt_swap_i32: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp w0, w1 +; CHECK-NO-CMPBR-NEXT: b.lt LBB9_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB9_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp slt i32 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbgt_i64(i64 %a, i64 %b) { +; CHECK-CMPBR-LABEL: cbgt_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbgt x0, x1, LBB10_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB10_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbgt_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp x0, x1 +; CHECK-NO-CMPBR-NEXT: b.gt LBB10_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB10_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sgt i64 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbge_i64(i64 %a, i64 %b) { +; CHECK-CMPBR-LABEL: cbge_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbge x0, x1, LBB11_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB11_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbge_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp x0, x1 +; CHECK-NO-CMPBR-NEXT: b.ge LBB11_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB11_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sge i64 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + + +define void @cbhi_i64(i64 %a, i64 %b) { +; CHECK-CMPBR-LABEL: cbhi_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbhi x0, x1, LBB12_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB12_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbhi_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp x0, x1 +; CHECK-NO-CMPBR-NEXT: b.hi LBB12_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB12_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ugt i64 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbhs_i64(i64 %a, i64 %b) { +; CHECK-CMPBR-LABEL: cbhs_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbhs x0, x1, LBB13_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB13_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbhs_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp x0, x1 +; CHECK-NO-CMPBR-NEXT: b.hs LBB13_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB13_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp uge i64 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbeq_i64(i64 %a, i64 %b) { +; CHECK-CMPBR-LABEL: cbeq_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbeq x0, x1, LBB14_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB14_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbeq_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp x0, x1 +; CHECK-NO-CMPBR-NEXT: b.eq LBB14_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB14_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp eq i64 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbne_i64(i64 %a, i64 %b) { +; CHECK-CMPBR-LABEL: cbne_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbne x0, x1, LBB15_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB15_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbne_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp x0, x1 +; CHECK-NO-CMPBR-NEXT: b.ne LBB15_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB15_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ne i64 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cble_ge_swap_i64(i64 %a, i64 %b) { +; CHECK-CMPBR-LABEL: cble_ge_swap_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbge x1, x0, LBB16_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB16_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cble_ge_swap_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp x0, x1 +; CHECK-NO-CMPBR-NEXT: b.le LBB16_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB16_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp sle i64 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cblo_hi_swap_i64(i64 %a, i64 %b) { +; CHECK-CMPBR-LABEL: cblo_hi_swap_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbhi x1, x0, LBB17_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB17_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cblo_hi_swap_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp x0, x1 +; CHECK-NO-CMPBR-NEXT: b.lo LBB17_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB17_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ult i64 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cbls_hs_swap_i64(i64 %a, i64 %b) { +; CHECK-CMPBR-LABEL: cbls_hs_swap_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbhs x1, x0, LBB18_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB18_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cbls_hs_swap_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp x0, x1 +; CHECK-NO-CMPBR-NEXT: b.ls LBB18_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB18_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp ule i64 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} + +define void @cblt_gt_swap_i64(i64 %a, i64 %b) { +; CHECK-CMPBR-LABEL: cblt_gt_swap_i64: +; CHECK-CMPBR: ; %bb.0: ; %entry +; CHECK-CMPBR-NEXT: cbgt x1, x0, LBB19_2 +; CHECK-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-CMPBR-NEXT: ret +; CHECK-CMPBR-NEXT: LBB19_2: ; %if.then +; CHECK-CMPBR-NEXT: brk #0x1 +; +; CHECK-NO-CMPBR-LABEL: cblt_gt_swap_i64: +; CHECK-NO-CMPBR: ; %bb.0: ; %entry +; CHECK-NO-CMPBR-NEXT: cmp x0, x1 +; CHECK-NO-CMPBR-NEXT: b.lt LBB19_2 +; CHECK-NO-CMPBR-NEXT: ; %bb.1: ; %if.end +; CHECK-NO-CMPBR-NEXT: ret +; CHECK-NO-CMPBR-NEXT: LBB19_2: ; %if.then +; CHECK-NO-CMPBR-NEXT: brk #0x1 +entry: + %cmp = icmp slt i64 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.trap() + unreachable + +if.end: + ret void +} diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll index bfb5c67801e6c..39e2db3a52d2c 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll @@ -987,25 +987,25 @@ define i32 @test_signed_f128_i32(fp128 %f) { ; CHECK-GI-NEXT: adrp x8, .LCPI30_1 ; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI30_1] -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x9, #-4603241769126068224 // =0xc01e000000000000 ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: csel x19, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x19 -; CHECK-GI-NEXT: csel x20, x8, x9, lt +; CHECK-GI-NEXT: csel x20, x8, x9, gt ; CHECK-GI-NEXT: adrp x8, .LCPI30_0 ; CHECK-GI-NEXT: mov v0.d[1], x20 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI30_0] -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: csel x8, x19, xzr, gt +; CHECK-GI-NEXT: csel x8, x19, xzr, lt ; CHECK-GI-NEXT: mov v0.d[0], x8 ; CHECK-GI-NEXT: mov x8, #281474976448512 // =0xfffffffc0000 ; CHECK-GI-NEXT: movk x8, #16413, lsl #48 -; CHECK-GI-NEXT: csel x8, x20, x8, gt +; CHECK-GI-NEXT: csel x8, x20, x8, lt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi ; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll index b2b3430f4d85e..67d625dd16473 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -535,25 +535,25 @@ define <1 x i32> @test_signed_v1f128_v1i32(<1 x fp128> %f) { ; CHECK-GI-NEXT: adrp x8, .LCPI14_1 ; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_1] -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x9, #-4603241769126068224 // =0xc01e000000000000 ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: csel x19, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x19 -; CHECK-GI-NEXT: csel x20, x8, x9, lt +; CHECK-GI-NEXT: csel x20, x8, x9, gt ; CHECK-GI-NEXT: adrp x8, .LCPI14_0 ; CHECK-GI-NEXT: mov v0.d[1], x20 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: csel x8, x19, xzr, gt +; CHECK-GI-NEXT: csel x8, x19, xzr, lt ; CHECK-GI-NEXT: mov v0.d[0], x8 ; CHECK-GI-NEXT: mov x8, #281474976448512 // =0xfffffffc0000 ; CHECK-GI-NEXT: movk x8, #16413, lsl #48 -; CHECK-GI-NEXT: csel x8, x20, x8, gt +; CHECK-GI-NEXT: csel x8, x20, x8, lt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi ; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -656,26 +656,26 @@ define <2 x i32> @test_signed_v2f128_v2i32(<2 x fp128> %f) { ; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill ; CHECK-GI-NEXT: mov v1.16b, v2.16b ; CHECK-GI-NEXT: str q2, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x20, #-4603241769126068224 // =0xc01e000000000000 ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: csel x19, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x19 -; CHECK-GI-NEXT: csel x21, x8, x20, lt +; CHECK-GI-NEXT: csel x21, x8, x20, gt ; CHECK-GI-NEXT: adrp x8, .LCPI15_0 ; CHECK-GI-NEXT: mov v0.d[1], x21 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] ; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x22, #281474976448512 // =0xfffffffc0000 -; CHECK-GI-NEXT: csel x8, x19, xzr, gt +; CHECK-GI-NEXT: csel x8, x19, xzr, lt ; CHECK-GI-NEXT: movk x22, #16413, lsl #48 ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: csel x8, x21, x22, gt +; CHECK-GI-NEXT: csel x8, x21, x22, lt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi ; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -686,21 +686,21 @@ define <2 x i32> @test_signed_v2f128_v2i32(<2 x fp128> %f) { ; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: csel w21, wzr, w19, ne -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: csel x19, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x19 -; CHECK-GI-NEXT: csel x20, x8, x20, lt +; CHECK-GI-NEXT: csel x20, x8, x20, gt ; CHECK-GI-NEXT: mov v0.d[1], x20 -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: csel x8, x19, xzr, gt +; CHECK-GI-NEXT: csel x8, x19, xzr, lt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: csel x8, x20, x22, gt +; CHECK-GI-NEXT: csel x8, x20, x22, lt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi ; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload @@ -827,26 +827,26 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) { ; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-GI-NEXT: str q2, [sp, #64] // 16-byte Folded Spill ; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Folded Spill -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x20, #-4603241769126068224 // =0xc01e000000000000 ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: csel x19, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x19 -; CHECK-GI-NEXT: csel x21, x8, x20, lt +; CHECK-GI-NEXT: csel x21, x8, x20, gt ; CHECK-GI-NEXT: adrp x8, .LCPI16_0 ; CHECK-GI-NEXT: mov v0.d[1], x21 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-GI-NEXT: str q1, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x22, #281474976448512 // =0xfffffffc0000 -; CHECK-GI-NEXT: csel x8, x19, xzr, gt +; CHECK-GI-NEXT: csel x8, x19, xzr, lt ; CHECK-GI-NEXT: movk x22, #16413, lsl #48 ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: csel x8, x21, x22, gt +; CHECK-GI-NEXT: csel x8, x21, x22, lt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi ; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -856,21 +856,21 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) { ; CHECK-GI-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: csel w21, wzr, w19, ne -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: csel x19, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x19 -; CHECK-GI-NEXT: csel x23, x8, x20, lt +; CHECK-GI-NEXT: csel x23, x8, x20, gt ; CHECK-GI-NEXT: mov v0.d[1], x23 -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: csel x8, x19, xzr, gt +; CHECK-GI-NEXT: csel x8, x19, xzr, lt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: csel x8, x23, x22, gt +; CHECK-GI-NEXT: csel x8, x23, x22, lt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi ; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload @@ -881,21 +881,21 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) { ; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: csel w23, wzr, w19, ne -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: csel x19, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x19 -; CHECK-GI-NEXT: csel x20, x8, x20, lt +; CHECK-GI-NEXT: csel x20, x8, x20, gt ; CHECK-GI-NEXT: mov v0.d[1], x20 -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: csel x8, x19, xzr, gt +; CHECK-GI-NEXT: csel x8, x19, xzr, lt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: csel x8, x20, x22, gt +; CHECK-GI-NEXT: csel x8, x20, x22, lt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi ; CHECK-GI-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload @@ -1043,26 +1043,26 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI17_1] ; CHECK-GI-NEXT: str q2, [sp, #32] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp q1, q3, [sp, #64] // 32-byte Folded Spill -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x20, #-4603241769126068224 // =0xc01e000000000000 ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: csel x19, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x19 -; CHECK-GI-NEXT: csel x21, x8, x20, lt +; CHECK-GI-NEXT: csel x21, x8, x20, gt ; CHECK-GI-NEXT: adrp x8, .LCPI17_0 ; CHECK-GI-NEXT: mov v0.d[1], x21 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] ; CHECK-GI-NEXT: str q1, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x22, #281474976448512 // =0xfffffffc0000 -; CHECK-GI-NEXT: csel x8, x19, xzr, gt +; CHECK-GI-NEXT: csel x8, x19, xzr, lt ; CHECK-GI-NEXT: movk x22, #16413, lsl #48 ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: csel x8, x21, x22, gt +; CHECK-GI-NEXT: csel x8, x21, x22, lt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi ; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -1073,21 +1073,21 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: csel w21, wzr, w19, ne -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: csel x19, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x19 -; CHECK-GI-NEXT: csel x23, x8, x20, lt +; CHECK-GI-NEXT: csel x23, x8, x20, gt ; CHECK-GI-NEXT: mov v0.d[1], x23 -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: csel x8, x19, xzr, gt +; CHECK-GI-NEXT: csel x8, x19, xzr, lt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: csel x8, x23, x22, gt +; CHECK-GI-NEXT: csel x8, x23, x22, lt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi ; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload @@ -1098,20 +1098,20 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: csel w23, wzr, w19, ne -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: csel x19, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x19 -; CHECK-GI-NEXT: csel x24, x8, x20, lt +; CHECK-GI-NEXT: csel x24, x8, x20, gt ; CHECK-GI-NEXT: mov v0.d[1], x24 -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: csel x8, x19, xzr, gt +; CHECK-GI-NEXT: csel x8, x19, xzr, lt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: csel x8, x24, x22, gt +; CHECK-GI-NEXT: csel x8, x24, x22, lt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi ; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -1121,21 +1121,21 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-GI-NEXT: ldp q1, q0, [sp, #64] // 32-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: csel w24, wzr, w19, ne -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: csel x19, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x19 -; CHECK-GI-NEXT: csel x20, x8, x20, lt +; CHECK-GI-NEXT: csel x20, x8, x20, gt ; CHECK-GI-NEXT: mov v0.d[1], x20 -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: csel x8, x19, xzr, gt +; CHECK-GI-NEXT: csel x8, x19, xzr, lt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: csel x8, x20, x22, gt +; CHECK-GI-NEXT: csel x8, x20, x22, lt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi ; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload @@ -5633,26 +5633,26 @@ define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) { ; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill ; CHECK-GI-NEXT: mov v1.16b, v2.16b ; CHECK-GI-NEXT: str q2, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x20, #-4594234569871327232 // =0xc03e000000000000 ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: csel x19, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x19 -; CHECK-GI-NEXT: csel x21, x8, x20, lt +; CHECK-GI-NEXT: csel x21, x8, x20, gt ; CHECK-GI-NEXT: adrp x8, .LCPI86_0 ; CHECK-GI-NEXT: mov v0.d[1], x21 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI86_0] ; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: mov x22, #-1125899906842624 // =0xfffc000000000000 ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x23, #4629137466983448575 // =0x403dffffffffffff -; CHECK-GI-NEXT: csel x8, x19, x22, gt +; CHECK-GI-NEXT: csel x8, x19, x22, lt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: csel x8, x21, x23, gt +; CHECK-GI-NEXT: csel x8, x21, x23, lt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfdi ; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -5663,21 +5663,21 @@ define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) { ; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: csel x21, xzr, x19, ne -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: csel x19, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x19 -; CHECK-GI-NEXT: csel x20, x8, x20, lt +; CHECK-GI-NEXT: csel x20, x8, x20, gt ; CHECK-GI-NEXT: mov v0.d[1], x20 -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: csel x8, x19, x22, gt +; CHECK-GI-NEXT: csel x8, x19, x22, lt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: csel x8, x20, x23, gt +; CHECK-GI-NEXT: csel x8, x20, x23, lt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfdi ; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll index 0dea7be5052d0..46950e7a60349 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll @@ -797,25 +797,25 @@ define i32 @test_unsigned_f128_i32(fp128 %f) { ; CHECK-GI-NEXT: adrp x8, .LCPI30_1 ; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI30_1] -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: csel x19, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x19 -; CHECK-GI-NEXT: csel x20, x8, xzr, lt +; CHECK-GI-NEXT: csel x20, x8, xzr, gt ; CHECK-GI-NEXT: adrp x8, .LCPI30_0 ; CHECK-GI-NEXT: mov v0.d[1], x20 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI30_0] -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-GI-NEXT: csel x8, x19, xzr, gt +; CHECK-GI-NEXT: csel x8, x19, xzr, lt ; CHECK-GI-NEXT: mov v0.d[0], x8 ; CHECK-GI-NEXT: mov x8, #281474976579584 // =0xfffffffe0000 ; CHECK-GI-NEXT: movk x8, #16414, lsl #48 -; CHECK-GI-NEXT: csel x8, x20, x8, gt +; CHECK-GI-NEXT: csel x8, x20, x8, lt ; CHECK-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: add sp, sp, #48 diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll index b76df6a101e5f..4d3486d4a2993 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -491,24 +491,24 @@ define <1 x i32> @test_unsigned_v1f128_v1i32(<1 x fp128> %f) { ; CHECK-GI-NEXT: adrp x8, .LCPI14_1 ; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_1] -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: csel x19, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x19 -; CHECK-GI-NEXT: csel x20, x8, xzr, lt +; CHECK-GI-NEXT: csel x20, x8, xzr, gt ; CHECK-GI-NEXT: adrp x8, .LCPI14_0 ; CHECK-GI-NEXT: mov v0.d[1], x20 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: csel x8, x19, xzr, gt +; CHECK-GI-NEXT: csel x8, x19, xzr, lt ; CHECK-GI-NEXT: mov v0.d[0], x8 ; CHECK-GI-NEXT: mov x8, #281474976579584 // =0xfffffffe0000 ; CHECK-GI-NEXT: movk x8, #16414, lsl #48 -; CHECK-GI-NEXT: csel x8, x20, x8, gt +; CHECK-GI-NEXT: csel x8, x20, x8, lt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfsi ; CHECK-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload @@ -588,44 +588,44 @@ define <2 x i32> @test_unsigned_v2f128_v2i32(<2 x fp128> %f) { ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI15_1] ; CHECK-GI-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill ; CHECK-GI-NEXT: mov v1.16b, v2.16b -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: csel x19, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x19 -; CHECK-GI-NEXT: csel x20, x8, xzr, lt +; CHECK-GI-NEXT: csel x20, x8, xzr, gt ; CHECK-GI-NEXT: adrp x8, .LCPI15_0 ; CHECK-GI-NEXT: mov v0.d[1], x20 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] ; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x21, #281474976579584 // =0xfffffffe0000 -; CHECK-GI-NEXT: csel x8, x19, xzr, gt +; CHECK-GI-NEXT: csel x8, x19, xzr, lt ; CHECK-GI-NEXT: movk x21, #16414, lsl #48 ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: csel x8, x20, x21, gt +; CHECK-GI-NEXT: csel x8, x20, x21, lt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfsi ; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: mov w19, w0 -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x20, x8, xzr, lt +; CHECK-GI-NEXT: csel x20, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x20 -; CHECK-GI-NEXT: csel x22, x8, xzr, lt +; CHECK-GI-NEXT: csel x22, x8, xzr, gt ; CHECK-GI-NEXT: mov v0.d[1], x22 -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: csel x8, x20, xzr, gt +; CHECK-GI-NEXT: csel x8, x20, xzr, lt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: csel x8, x22, x21, gt +; CHECK-GI-NEXT: csel x8, x22, x21, lt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfsi ; CHECK-GI-NEXT: mov v0.s[0], w19 @@ -722,63 +722,63 @@ define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) { ; CHECK-GI-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI16_1] ; CHECK-GI-NEXT: stp q1, q2, [sp, #32] // 32-byte Folded Spill -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: csel x19, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x19 -; CHECK-GI-NEXT: csel x20, x8, xzr, lt +; CHECK-GI-NEXT: csel x20, x8, xzr, gt ; CHECK-GI-NEXT: adrp x8, .LCPI16_0 ; CHECK-GI-NEXT: mov v0.d[1], x20 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-GI-NEXT: str q1, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x21, #281474976579584 // =0xfffffffe0000 -; CHECK-GI-NEXT: csel x8, x19, xzr, gt +; CHECK-GI-NEXT: csel x8, x19, xzr, lt ; CHECK-GI-NEXT: movk x21, #16414, lsl #48 ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: csel x8, x20, x21, gt +; CHECK-GI-NEXT: csel x8, x20, x21, lt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfsi ; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov w19, w0 -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x20, x8, xzr, lt +; CHECK-GI-NEXT: csel x20, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x20 -; CHECK-GI-NEXT: csel x22, x8, xzr, lt +; CHECK-GI-NEXT: csel x22, x8, xzr, gt ; CHECK-GI-NEXT: mov v0.d[1], x22 -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: csel x8, x20, xzr, gt +; CHECK-GI-NEXT: csel x8, x20, xzr, lt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: csel x8, x22, x21, gt +; CHECK-GI-NEXT: csel x8, x22, x21, lt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfsi ; CHECK-GI-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload ; CHECK-GI-NEXT: mov w20, w0 -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x22, x8, xzr, lt +; CHECK-GI-NEXT: csel x22, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x22 -; CHECK-GI-NEXT: csel x23, x8, xzr, lt +; CHECK-GI-NEXT: csel x23, x8, xzr, gt ; CHECK-GI-NEXT: mov v0.d[1], x23 -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: csel x8, x22, xzr, gt +; CHECK-GI-NEXT: csel x8, x22, xzr, lt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: csel x8, x23, x21, gt +; CHECK-GI-NEXT: csel x8, x23, x21, lt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfsi ; CHECK-GI-NEXT: mov v0.s[0], w19 @@ -895,84 +895,84 @@ define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill ; CHECK-GI-NEXT: str q3, [sp, #32] // 16-byte Folded Spill ; CHECK-GI-NEXT: str q1, [sp, #64] // 16-byte Folded Spill -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: csel x19, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x19 -; CHECK-GI-NEXT: csel x20, x8, xzr, lt +; CHECK-GI-NEXT: csel x20, x8, xzr, gt ; CHECK-GI-NEXT: adrp x8, .LCPI17_0 ; CHECK-GI-NEXT: mov v0.d[1], x20 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] ; CHECK-GI-NEXT: str q1, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x22, #281474976579584 // =0xfffffffe0000 -; CHECK-GI-NEXT: csel x8, x19, xzr, gt +; CHECK-GI-NEXT: csel x8, x19, xzr, lt ; CHECK-GI-NEXT: movk x22, #16414, lsl #48 ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: csel x8, x20, x22, gt +; CHECK-GI-NEXT: csel x8, x20, x22, lt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfsi ; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov w19, w0 -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x20, x8, xzr, lt +; CHECK-GI-NEXT: csel x20, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x20 -; CHECK-GI-NEXT: csel x21, x8, xzr, lt +; CHECK-GI-NEXT: csel x21, x8, xzr, gt ; CHECK-GI-NEXT: mov v0.d[1], x21 -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: csel x8, x20, xzr, gt +; CHECK-GI-NEXT: csel x8, x20, xzr, lt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: csel x8, x21, x22, gt +; CHECK-GI-NEXT: csel x8, x21, x22, lt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfsi ; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov w20, w0 -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x21, x8, xzr, lt +; CHECK-GI-NEXT: csel x21, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x21 -; CHECK-GI-NEXT: csel x23, x8, xzr, lt +; CHECK-GI-NEXT: csel x23, x8, xzr, gt ; CHECK-GI-NEXT: mov v0.d[1], x23 -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: csel x8, x21, xzr, gt +; CHECK-GI-NEXT: csel x8, x21, xzr, lt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: csel x8, x23, x22, gt +; CHECK-GI-NEXT: csel x8, x23, x22, lt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfsi ; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov w21, w0 -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x23, x8, xzr, lt +; CHECK-GI-NEXT: csel x23, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x23 -; CHECK-GI-NEXT: csel x24, x8, xzr, lt +; CHECK-GI-NEXT: csel x24, x8, xzr, gt ; CHECK-GI-NEXT: mov v0.d[1], x24 -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: csel x8, x23, xzr, gt +; CHECK-GI-NEXT: csel x8, x23, xzr, lt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: csel x8, x24, x22, gt +; CHECK-GI-NEXT: csel x8, x24, x22, lt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfsi ; CHECK-GI-NEXT: mov v0.s[0], w19 @@ -4614,44 +4614,44 @@ define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) { ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI86_1] ; CHECK-GI-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill ; CHECK-GI-NEXT: mov v1.16b, v2.16b -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: csel x19, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x19 -; CHECK-GI-NEXT: csel x20, x8, xzr, lt +; CHECK-GI-NEXT: csel x20, x8, xzr, gt ; CHECK-GI-NEXT: adrp x8, .LCPI86_0 ; CHECK-GI-NEXT: mov v0.d[1], x20 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI86_0] ; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: mov x21, #-562949953421312 // =0xfffe000000000000 ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: mov x22, #4629418941960159231 // =0x403effffffffffff -; CHECK-GI-NEXT: csel x8, x19, x21, gt +; CHECK-GI-NEXT: csel x8, x19, x21, lt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: csel x8, x20, x22, gt +; CHECK-GI-NEXT: csel x8, x20, x22, lt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfdi ; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: mov x19, x0 -; CHECK-GI-NEXT: bl __getf2 +; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: csel x20, x8, xzr, lt +; CHECK-GI-NEXT: csel x20, x8, xzr, gt ; CHECK-GI-NEXT: mov x8, v0.d[1] ; CHECK-GI-NEXT: mov v0.d[0], x20 -; CHECK-GI-NEXT: csel x23, x8, xzr, lt +; CHECK-GI-NEXT: csel x23, x8, xzr, gt ; CHECK-GI-NEXT: mov v0.d[1], x23 -; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: bl __lttf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: csel x8, x20, x21, gt +; CHECK-GI-NEXT: csel x8, x20, x21, lt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: csel x8, x23, x22, gt +; CHECK-GI-NEXT: csel x8, x23, x22, lt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfdi ; CHECK-GI-NEXT: mov v0.d[0], x19 diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf-broken.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf-broken.mir index 7aea97a3053c7..2eb1f5d559651 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf-broken.mir +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf-broken.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass=si-optimize-exec-masking-pre-ra %s -o - | FileCheck -check-prefix=GXN %s +# RUN: llc -mtriple=amdgcn -verify-machineinstrs -passes=si-optimize-exec-masking-pre-ra %s -o - | FileCheck -check-prefix=GXN %s # FIXME: This is a miscompile, and the s_or_b64s need to be preserved. diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dead.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dead.ll new file mode 100644 index 0000000000000..a009854542f21 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dead.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=ASM-DAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=ASM-GISEL %s + +; Test that we can use v0 for temporaries in the if.then block. +define i32 @dead(i1 %cond, i32 %x, ptr addrspace(1) %ptr1, ptr addrspace(1) %ptr2) #0 { +; ASM-DAG-LABEL: dead: +; ASM-DAG: ; %bb.0: ; %entry +; ASM-DAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; ASM-DAG-NEXT: s_wait_expcnt 0x0 +; ASM-DAG-NEXT: s_wait_samplecnt 0x0 +; ASM-DAG-NEXT: s_wait_bvhcnt 0x0 +; ASM-DAG-NEXT: s_wait_kmcnt 0x0 +; ASM-DAG-NEXT: v_mov_b32_e32 v4, v0 +; ASM-DAG-NEXT: v_mov_b32_e32 v0, v1 +; ASM-DAG-NEXT: s_mov_b32 s0, exec_lo +; ASM-DAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; ASM-DAG-NEXT: v_and_b32_e32 v1, 1, v4 +; ASM-DAG-NEXT: v_cmpx_eq_u32_e32 1, v1 +; ASM-DAG-NEXT: s_cbranch_execz .LBB0_2 +; ASM-DAG-NEXT: ; %bb.1: ; %if.then +; ASM-DAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 +; ASM-DAG-NEXT: global_store_b32 v[2:3], v0, off +; ASM-DAG-NEXT: ; implicit-def: $vgpr0 +; ASM-DAG-NEXT: .LBB0_2: ; %if.end +; ASM-DAG-NEXT: s_wait_alu 0xfffe +; ASM-DAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; ASM-DAG-NEXT: s_setpc_b64 s[30:31] +; +; ASM-GISEL-LABEL: dead: +; ASM-GISEL: ; %bb.0: ; %entry +; ASM-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; ASM-GISEL-NEXT: s_wait_expcnt 0x0 +; ASM-GISEL-NEXT: s_wait_samplecnt 0x0 +; ASM-GISEL-NEXT: s_wait_bvhcnt 0x0 +; ASM-GISEL-NEXT: s_wait_kmcnt 0x0 +; ASM-GISEL-NEXT: v_mov_b32_e32 v4, v0 +; ASM-GISEL-NEXT: v_mov_b32_e32 v0, v1 +; ASM-GISEL-NEXT: s_mov_b32 s0, exec_lo +; ASM-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; ASM-GISEL-NEXT: v_and_b32_e32 v1, 1, v4 +; ASM-GISEL-NEXT: v_cmpx_ne_u32_e32 0, v1 +; ASM-GISEL-NEXT: s_cbranch_execz .LBB0_2 +; ASM-GISEL-NEXT: ; %bb.1: ; %if.then +; ASM-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 +; ASM-GISEL-NEXT: global_store_b32 v[2:3], v0, off +; ASM-GISEL-NEXT: ; implicit-def: $vgpr0 +; ASM-GISEL-NEXT: .LBB0_2: ; %if.end +; ASM-GISEL-NEXT: s_wait_alu 0xfffe +; ASM-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; ASM-GISEL-NEXT: s_setpc_b64 s[30:31] +entry: + %dead = call i32 @llvm.amdgcn.dead.i32() + br i1 %cond, label %if.then, label %if.end + +if.then: ; preds = %entry + %temp = add i32 %x, 1 + store i32 %temp, ptr addrspace(1) %ptr1 + br label %if.end + +if.end: + %res = phi i32 [ %x, %entry ], [ %dead, %if.then ] + ret i32 %res +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.whole.wave-w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.whole.wave-w32.ll index 1bdaa4c98127d..110192ecefe55 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.whole.wave-w32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.whole.wave-w32.ll @@ -1115,4 +1115,141 @@ tail: unreachable } +; Since functions that contain amdgcn.init.whole.wave do not preserve the inactive +; lanes of any VGPRs, the middle end will explicitly preserve them if needed by adding +; dummy VGPR arguments. Since only the inactive lanes are important, we need to make +; it clear to the backend that it's safe to allocate v9's active lanes inside +; shader. This is achieved by using the llvm.amdgcn.dead intrinsic. +define amdgpu_cs_chain void @with_inactive_vgprs(ptr inreg %callee, i32 inreg %exec, i32 inreg %sgpr, i32 %active.vgpr, i32 %inactive.vgpr) { +; GISEL12-LABEL: with_inactive_vgprs: +; GISEL12: ; %bb.0: ; %entry +; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GISEL12-NEXT: s_wait_expcnt 0x0 +; GISEL12-NEXT: s_wait_samplecnt 0x0 +; GISEL12-NEXT: s_wait_bvhcnt 0x0 +; GISEL12-NEXT: s_wait_kmcnt 0x0 +; GISEL12-NEXT: s_or_saveexec_b32 s6, -1 +; GISEL12-NEXT: s_mov_b32 s4, s0 +; GISEL12-NEXT: s_mov_b32 s5, s1 +; GISEL12-NEXT: s_mov_b32 s0, s3 +; GISEL12-NEXT: s_wait_alu 0xfffe +; GISEL12-NEXT: s_and_saveexec_b32 s1, s6 +; GISEL12-NEXT: s_cbranch_execz .LBB6_2 +; GISEL12-NEXT: ; %bb.1: ; %shader +; GISEL12-NEXT: v_dual_mov_b32 v10, s5 :: v_dual_mov_b32 v9, s4 +; GISEL12-NEXT: flat_load_b32 v11, v[9:10] +; GISEL12-NEXT: ;;#ASMSTART +; GISEL12-NEXT: ; use v0-7 +; GISEL12-NEXT: ;;#ASMEND +; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GISEL12-NEXT: v_add_nc_u32_e32 v8, v8, v11 +; GISEL12-NEXT: flat_store_b32 v[9:10], v11 +; GISEL12-NEXT: ; implicit-def: $vgpr9 +; GISEL12-NEXT: .LBB6_2: ; %tail.block +; GISEL12-NEXT: s_wait_alu 0xfffe +; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GISEL12-NEXT: s_mov_b32 exec_lo, s2 +; GISEL12-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL12-LABEL: with_inactive_vgprs: +; DAGISEL12: ; %bb.0: ; %entry +; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 +; DAGISEL12-NEXT: s_wait_expcnt 0x0 +; DAGISEL12-NEXT: s_wait_samplecnt 0x0 +; DAGISEL12-NEXT: s_wait_bvhcnt 0x0 +; DAGISEL12-NEXT: s_wait_kmcnt 0x0 +; DAGISEL12-NEXT: s_or_saveexec_b32 s6, -1 +; DAGISEL12-NEXT: s_mov_b32 s5, s1 +; DAGISEL12-NEXT: s_mov_b32 s4, s0 +; DAGISEL12-NEXT: s_wait_alu 0xfffe +; DAGISEL12-NEXT: s_and_saveexec_b32 s0, s6 +; DAGISEL12-NEXT: s_cbranch_execz .LBB6_2 +; DAGISEL12-NEXT: ; %bb.1: ; %shader +; DAGISEL12-NEXT: v_dual_mov_b32 v10, s5 :: v_dual_mov_b32 v9, s4 +; DAGISEL12-NEXT: flat_load_b32 v11, v[9:10] +; DAGISEL12-NEXT: ;;#ASMSTART +; DAGISEL12-NEXT: ; use v0-7 +; DAGISEL12-NEXT: ;;#ASMEND +; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 +; DAGISEL12-NEXT: v_add_nc_u32_e32 v8, v8, v11 +; DAGISEL12-NEXT: flat_store_b32 v[9:10], v11 +; DAGISEL12-NEXT: ; implicit-def: $vgpr9 +; DAGISEL12-NEXT: .LBB6_2: ; %tail.block +; DAGISEL12-NEXT: s_wait_alu 0xfffe +; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; DAGISEL12-NEXT: s_mov_b32 s0, s3 +; DAGISEL12-NEXT: s_mov_b32 exec_lo, s2 +; DAGISEL12-NEXT: s_wait_alu 0xfffe +; DAGISEL12-NEXT: s_setpc_b64 s[4:5] +; +; GISEL10-LABEL: with_inactive_vgprs: +; GISEL10: ; %bb.0: ; %entry +; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL10-NEXT: s_or_saveexec_b32 s6, -1 +; GISEL10-NEXT: s_mov_b32 s4, s0 +; GISEL10-NEXT: s_mov_b32 s5, s1 +; GISEL10-NEXT: s_mov_b32 s0, s3 +; GISEL10-NEXT: s_and_saveexec_b32 s1, s6 +; GISEL10-NEXT: s_cbranch_execz .LBB6_2 +; GISEL10-NEXT: ; %bb.1: ; %shader +; GISEL10-NEXT: v_mov_b32_e32 v10, s5 +; GISEL10-NEXT: v_mov_b32_e32 v9, s4 +; GISEL10-NEXT: flat_load_dword v11, v[9:10] +; GISEL10-NEXT: ;;#ASMSTART +; GISEL10-NEXT: ; use v0-7 +; GISEL10-NEXT: ;;#ASMEND +; GISEL10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GISEL10-NEXT: v_add_nc_u32_e32 v8, v8, v11 +; GISEL10-NEXT: flat_store_dword v[9:10], v11 +; GISEL10-NEXT: ; implicit-def: $vgpr9 +; GISEL10-NEXT: .LBB6_2: ; %tail.block +; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GISEL10-NEXT: s_mov_b32 exec_lo, s2 +; GISEL10-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL10-LABEL: with_inactive_vgprs: +; DAGISEL10: ; %bb.0: ; %entry +; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL10-NEXT: s_or_saveexec_b32 s6, -1 +; DAGISEL10-NEXT: s_mov_b32 s5, s1 +; DAGISEL10-NEXT: s_mov_b32 s4, s0 +; DAGISEL10-NEXT: s_and_saveexec_b32 s0, s6 +; DAGISEL10-NEXT: s_cbranch_execz .LBB6_2 +; DAGISEL10-NEXT: ; %bb.1: ; %shader +; DAGISEL10-NEXT: v_mov_b32_e32 v10, s5 +; DAGISEL10-NEXT: v_mov_b32_e32 v9, s4 +; DAGISEL10-NEXT: flat_load_dword v11, v[9:10] +; DAGISEL10-NEXT: ;;#ASMSTART +; DAGISEL10-NEXT: ; use v0-7 +; DAGISEL10-NEXT: ;;#ASMEND +; DAGISEL10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; DAGISEL10-NEXT: v_add_nc_u32_e32 v8, v8, v11 +; DAGISEL10-NEXT: flat_store_dword v[9:10], v11 +; DAGISEL10-NEXT: ; implicit-def: $vgpr9 +; DAGISEL10-NEXT: .LBB6_2: ; %tail.block +; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; DAGISEL10-NEXT: s_mov_b32 s0, s3 +; DAGISEL10-NEXT: s_mov_b32 exec_lo, s2 +; DAGISEL10-NEXT: s_setpc_b64 s[4:5] +entry: + %imp.def = call i32 @llvm.amdgcn.dead() + %initial.exec = call i1 @llvm.amdgcn.init.whole.wave() + br i1 %initial.exec, label %shader, label %tail.block + +shader: ; preds = %entry + %use.another.vgpr = load i32, ptr %callee ; smth that won't be moved past the inline asm + call void asm sideeffect "; use v0-7", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() + store i32 %use.another.vgpr, ptr %callee + %active.vgpr.new = add i32 %active.vgpr, %use.another.vgpr + br label %tail.block + +tail.block: ; preds = %.exit27, %.exit49, %244, %243, %entry + %active.vgpr.arg = phi i32 [ %active.vgpr, %entry ], [ %active.vgpr.new, %shader ] + %inactive.vgpr.arg = phi i32 [ %inactive.vgpr, %entry ], [ %imp.def, %shader ] + %vgprs.0 = insertvalue { i32, i32 } poison, i32 %active.vgpr.arg, 0 + %vgprs = insertvalue { i32, i32 } %vgprs.0, i32 %inactive.vgpr.arg, 1 + call void (ptr, i32, i32, { i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.i32.sl_i32i32(ptr inreg %callee, i32 inreg %exec, i32 inreg %sgpr, { i32, i32} %vgprs, i32 0) + unreachable +} + declare amdgpu_gfx <16 x i32> @write_v0_v15(<16 x i32>) diff --git a/llvm/test/CodeGen/Mips/readcyclecounter.ll b/llvm/test/CodeGen/Mips/readcyclecounter.ll index 23d3ea014f091..467dd92884b3d 100644 --- a/llvm/test/CodeGen/Mips/readcyclecounter.ll +++ b/llvm/test/CodeGen/Mips/readcyclecounter.ll @@ -7,8 +7,6 @@ ;RUN: llc -mtriple=mipsel -mcpu=mips2 < %s | FileCheck %s --check-prefix=MIPSEL_NOT_SUPPORTED ;RUN: llc -mtriple=mips64el -mcpu=mips3 < %s | FileCheck %s --check-prefix=MIPS64EL_NOT_SUPPORTED -; XFAIL: expensive_checks - declare i64 @llvm.readcyclecounter() nounwind readnone define i64 @test_readcyclecounter() nounwind { diff --git a/llvm/test/CodeGen/PowerPC/llvm.modf.ll b/llvm/test/CodeGen/PowerPC/llvm.modf.ll new file mode 100644 index 0000000000000..69e3b22c7352c --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/llvm.modf.ll @@ -0,0 +1,330 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s + +define { half, half } @test_modf_f16(half %a) { +; CHECK-LABEL: test_modf_f16: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: addi r4, r1, 44 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: bl modff +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f2, 44(r1) +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { half, half } @llvm.modf.f16(half %a) + ret { half, half } %result +} + +define half @test_modf_f16_only_use_fractional_part(half %a) { +; CHECK-LABEL: test_modf_f16_only_use_fractional_part: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: addi r4, r1, 44 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: bl modff +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { half, half } @llvm.modf.f16(half %a) + %result.0 = extractvalue { half, half } %result, 0 + ret half %result.0 +} + +define half @test_modf_f16_only_use_integral_part(half %a) { +; CHECK-LABEL: test_modf_f16_only_use_integral_part: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: addi r4, r1, 44 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: bl modff +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 44(r1) +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { half, half } @llvm.modf.f16(half %a) + %result.1 = extractvalue { half, half } %result, 1 + ret half %result.1 +} + +define { <2 x half>, <2 x half> } @test_modf_v2f16(<2 x half> %a) { +; CHECK-LABEL: test_modf_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset f30, -16 +; CHECK-NEXT: .cfi_offset f31, -8 +; CHECK-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill +; CHECK-NEXT: stdu r1, -64(r1) +; CHECK-NEXT: std r0, 80(r1) +; CHECK-NEXT: xscvdphp f0, f2 +; CHECK-NEXT: addi r4, r1, 40 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f31, f0 +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: bl modff +; CHECK-NEXT: nop +; CHECK-NEXT: addi r4, r1, 44 +; CHECK-NEXT: fmr f30, f1 +; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: bl modff +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f3, 40(r1) +; CHECK-NEXT: fmr f2, f1 +; CHECK-NEXT: fmr f1, f30 +; CHECK-NEXT: lfs f4, 44(r1) +; CHECK-NEXT: addi r1, r1, 64 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { <2 x half>, <2 x half> } @llvm.modf.v2f16(<2 x half> %a) + ret { <2 x half>, <2 x half> } %result +} + +define { float, float } @test_modf_f32(float %a) { +; CHECK-LABEL: test_modf_f32: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: addi r4, r1, 44 +; CHECK-NEXT: bl modff +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f2, 44(r1) +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { float, float } @llvm.modf.f32(float %a) + ret { float, float } %result +} + +define { <3 x float>, <3 x float> } @test_modf_v3f32(<3 x float> %a) { +; CHECK-LABEL: test_modf_v3f32: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -112(r1) +; CHECK-NEXT: std r0, 128(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 112 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r28, -32 +; CHECK-NEXT: .cfi_offset r29, -24 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: .cfi_offset v30, -64 +; CHECK-NEXT: .cfi_offset v31, -48 +; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-NEXT: std r30, 96(r1) # 8-byte Folded Spill +; CHECK-NEXT: addi r30, r1, 36 +; CHECK-NEXT: std r28, 80(r1) # 8-byte Folded Spill +; CHECK-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill +; CHECK-NEXT: std r29, 88(r1) # 8-byte Folded Spill +; CHECK-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill +; CHECK-NEXT: mr r4, r30 +; CHECK-NEXT: vmr v31, v2 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl modff +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs0, v31 +; CHECK-NEXT: addi r29, r1, 40 +; CHECK-NEXT: xscvdpspn v30, f1 +; CHECK-NEXT: mr r4, r29 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl modff +; CHECK-NEXT: nop +; CHECK-NEXT: xscvdpspn vs0, f1 +; CHECK-NEXT: addi r28, r1, 44 +; CHECK-NEXT: mr r4, r28 +; CHECK-NEXT: xxmrghw v30, vs0, v30 +; CHECK-NEXT: xxsldwi vs0, v31, v31, 1 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl modff +; CHECK-NEXT: nop +; CHECK-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-NEXT: xscvdpspn v2, f1 +; CHECK-NEXT: lfiwzx f1, 0, r30 +; CHECK-NEXT: lfiwzx f2, 0, r29 +; CHECK-NEXT: lxsiwzx v3, 0, r28 +; CHECK-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload +; CHECK-NEXT: ld r30, 96(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, 88(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r28, 80(r1) # 8-byte Folded Reload +; CHECK-NEXT: addi r3, r3, .LCPI5_0@toc@l +; CHECK-NEXT: lxv vs0, 0(r3) +; CHECK-NEXT: xxmrghw v4, vs2, vs1 +; CHECK-NEXT: xxperm v2, v30, vs0 +; CHECK-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload +; CHECK-NEXT: xxperm v3, v4, vs0 +; CHECK-NEXT: addi r1, r1, 112 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { <3 x float>, <3 x float> } @llvm.modf.v3f32(<3 x float> %a) + ret { <3 x float>, <3 x float> } %result +} + +define { <2 x float>, <2 x float> } @test_modf_v2f32(<2 x float> %a) { +; CHECK-LABEL: test_modf_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -112(r1) +; CHECK-NEXT: std r0, 128(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 112 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r29, -24 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: .cfi_offset v30, -64 +; CHECK-NEXT: .cfi_offset v31, -48 +; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-NEXT: std r30, 96(r1) # 8-byte Folded Spill +; CHECK-NEXT: addi r30, r1, 40 +; CHECK-NEXT: std r29, 88(r1) # 8-byte Folded Spill +; CHECK-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill +; CHECK-NEXT: mr r4, r30 +; CHECK-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: vmr v31, v2 +; CHECK-NEXT: bl modff +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs0, v31 +; CHECK-NEXT: addi r29, r1, 44 +; CHECK-NEXT: xscvdpspn v30, f1 +; CHECK-NEXT: mr r4, r29 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl modff +; CHECK-NEXT: nop +; CHECK-NEXT: xscvdpspn vs0, f1 +; CHECK-NEXT: lfiwzx f1, 0, r29 +; CHECK-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload +; CHECK-NEXT: ld r29, 88(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghw v2, vs0, v30 +; CHECK-NEXT: lfiwzx f0, 0, r30 +; CHECK-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload +; CHECK-NEXT: ld r30, 96(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghw v3, vs1, vs0 +; CHECK-NEXT: addi r1, r1, 112 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { <2 x float>, <2 x float> } @llvm.modf.v2f32(<2 x float> %a) + ret { <2 x float>, <2 x float> } %result +} + +define { double, double } @test_modf_f64(double %a) { +; CHECK-LABEL: test_modf_f64: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: addi r4, r1, 40 +; CHECK-NEXT: bl modf +; CHECK-NEXT: nop +; CHECK-NEXT: lfd f2, 40(r1) +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { double, double } @llvm.modf.f64(double %a) + ret { double, double } %result +} + +define { <2 x double>, <2 x double> } @test_modf_v2f64(<2 x double> %a) { +; CHECK-LABEL: test_modf_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -80(r1) +; CHECK-NEXT: std r0, 96(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset v30, -32 +; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v2 +; CHECK-NEXT: addi r4, r1, 32 +; CHECK-NEXT: xscpsgndp f1, v31, v31 +; CHECK-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill +; CHECK-NEXT: bl modf +; CHECK-NEXT: nop +; CHECK-NEXT: xscpsgndp v30, f1, f1 +; CHECK-NEXT: xxswapd vs1, v31 +; CHECK-NEXT: addi r4, r1, 40 +; CHECK-NEXT: bl modf +; CHECK-NEXT: nop +; CHECK-NEXT: xxmrghd v2, v30, vs1 +; CHECK-NEXT: lfd f0, 32(r1) +; CHECK-NEXT: lfd f1, 40(r1) +; CHECK-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload +; CHECK-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload +; CHECK-NEXT: xxmrghd v3, vs0, vs1 +; CHECK-NEXT: addi r1, r1, 80 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { <2 x double>, <2 x double> } @llvm.modf.v2f64(<2 x double> %a) + ret { <2 x double>, <2 x double> } %result +} + +define { ppc_fp128, ppc_fp128 } @test_modf_ppcf128(ppc_fp128 %a) { +; CHECK-LABEL: test_modf_ppcf128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: addi r5, r1, 32 +; CHECK-NEXT: bl modfl +; CHECK-NEXT: nop +; CHECK-NEXT: lfd f3, 32(r1) +; CHECK-NEXT: lfd f4, 40(r1) +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %a) + ret { ppc_fp128, ppc_fp128 } %result +} diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll index a50c303819f23..f93cb65897210 100644 --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -39,11 +39,11 @@ ; CHECK-NEXT: RISC-V DAG->DAG Pattern Instruction Selection ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions ; CHECK-NEXT: Local Stack Slot Allocation -; CHECK-NEXT: RISC-V VMV0 Elimination ; CHECK-NEXT: RISC-V Pre-RA pseudo instruction expansion pass ; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass ; CHECK-NEXT: RISC-V Insert Write VXRM Pass ; CHECK-NEXT: RISC-V Landing Pad Setup +; CHECK-NEXT: RISC-V VMV0 Elimination ; CHECK-NEXT: Init Undef Pass ; CHECK-NEXT: Eliminate PHI nodes for register allocation ; CHECK-NEXT: Two-Address instruction pass diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 2646dfeca4eb6..b67fbe1b5d3cd 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -97,7 +97,7 @@ ; CHECK-NEXT: RISC-V DAG->DAG Pattern Instruction Selection ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions ; CHECK-NEXT: RISC-V Vector Peephole Optimization -; CHECK-NEXT: RISC-V VMV0 Elimination +; CHECK-NEXT: RISC-V Fold Memory Offset ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Early Tail Duplication ; CHECK-NEXT: Optimize machine instruction PHIs @@ -128,6 +128,7 @@ ; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass ; CHECK-NEXT: RISC-V Insert Write VXRM Pass ; CHECK-NEXT: RISC-V Landing Pad Setup +; CHECK-NEXT: RISC-V VMV0 Elimination ; CHECK-NEXT: Detect Dead Lanes ; CHECK-NEXT: Init Undef Pass ; CHECK-NEXT: Process Implicit Definitions diff --git a/llvm/test/CodeGen/RISCV/fold-mem-offset.ll b/llvm/test/CodeGen/RISCV/fold-mem-offset.ll new file mode 100644 index 0000000000000..54eb3c9627691 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/fold-mem-offset.ll @@ -0,0 +1,733 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 | FileCheck %s --check-prefixes=CHECK,RV32I +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 | FileCheck %s --check-prefixes=CHECK,RV64I +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zba | FileCheck %s --check-prefixes=ZBA,RV32ZBA +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zba | FileCheck %s --check-prefixes=ZBA,RV64ZBA + +define i64 @test_sh3add(ptr %p, iXLen %x, iXLen %y) { +; RV32I-LABEL: test_sh3add: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: slli a1, a1, 3 +; RV32I-NEXT: slli a2, a2, 3 +; RV32I-NEXT: add a1, a1, a0 +; RV32I-NEXT: add a0, a0, a2 +; RV32I-NEXT: lw a2, 480(a1) +; RV32I-NEXT: lw a1, 484(a1) +; RV32I-NEXT: lw a3, 404(a0) +; RV32I-NEXT: lw a4, 400(a0) +; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: add a0, a4, a2 +; RV32I-NEXT: sltu a2, a0, a4 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_sh3add: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a1, a1, 3 +; RV64I-NEXT: slli a2, a2, 3 +; RV64I-NEXT: add a1, a1, a0 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: ld a1, 480(a1) +; RV64I-NEXT: ld a0, 400(a0) +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBA-LABEL: test_sh3add: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sh3add a1, a1, a0 +; RV32ZBA-NEXT: sh3add a0, a2, a0 +; RV32ZBA-NEXT: lw a2, 480(a1) +; RV32ZBA-NEXT: lw a1, 484(a1) +; RV32ZBA-NEXT: lw a3, 404(a0) +; RV32ZBA-NEXT: lw a4, 400(a0) +; RV32ZBA-NEXT: add a1, a3, a1 +; RV32ZBA-NEXT: add a0, a4, a2 +; RV32ZBA-NEXT: sltu a2, a0, a4 +; RV32ZBA-NEXT: add a1, a1, a2 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: test_sh3add: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sh3add a1, a1, a0 +; RV64ZBA-NEXT: sh3add a0, a2, a0 +; RV64ZBA-NEXT: ld a1, 480(a1) +; RV64ZBA-NEXT: ld a0, 400(a0) +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: ret +entry: + %b = getelementptr inbounds nuw i8, ptr %p, i64 400 + %add = add iXLen %x, 10 + %arrayidx = getelementptr inbounds nuw [100 x i64], ptr %b, i64 0, iXLen %add + %0 = load i64, ptr %arrayidx, align 8 + %arrayidx2 = getelementptr inbounds nuw [100 x i64], ptr %b, i64 0, iXLen %y + %1 = load i64, ptr %arrayidx2, align 8 + %add3 = add nsw i64 %1, %0 + ret i64 %add3 +} + +define signext i32 @test_sh2add(ptr %p, iXLen %x, iXLen %y) { +; RV32I-LABEL: test_sh2add: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: slli a1, a1, 2 +; RV32I-NEXT: slli a2, a2, 2 +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: lw a1, 1200(a1) +; RV32I-NEXT: lw a0, 1240(a0) +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_sh2add: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a1, a1, 2 +; RV64I-NEXT: slli a2, a2, 2 +; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: lw a1, 1200(a1) +; RV64I-NEXT: lw a0, 1240(a0) +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBA-LABEL: test_sh2add: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sh2add a1, a1, a0 +; RV32ZBA-NEXT: sh2add a0, a2, a0 +; RV32ZBA-NEXT: lw a1, 1200(a1) +; RV32ZBA-NEXT: lw a0, 1240(a0) +; RV32ZBA-NEXT: add a0, a0, a1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: test_sh2add: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sh2add a1, a1, a0 +; RV64ZBA-NEXT: sh2add a0, a2, a0 +; RV64ZBA-NEXT: lw a1, 1200(a1) +; RV64ZBA-NEXT: lw a0, 1240(a0) +; RV64ZBA-NEXT: addw a0, a0, a1 +; RV64ZBA-NEXT: ret +entry: + %c = getelementptr inbounds nuw i8, ptr %p, i64 1200 + %arrayidx = getelementptr inbounds nuw [100 x i32], ptr %c, i64 0, iXLen %x + %0 = load i32, ptr %arrayidx, align 4 + %add = add iXLen %y, 10 + %arrayidx2 = getelementptr inbounds nuw [100 x i32], ptr %c, i64 0, iXLen %add + %1 = load i32, ptr %arrayidx2, align 4 + %add3 = add nsw i32 %1, %0 + ret i32 %add3 +} + +define signext i16 @test_sh1add(ptr %p, iXLen %x, iXLen %y) { +; RV32I-LABEL: test_sh1add: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: slli a2, a2, 1 +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: lh a1, 1600(a1) +; RV32I-NEXT: lh a0, 1620(a0) +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_sh1add: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a1, a1, 1 +; RV64I-NEXT: slli a2, a2, 1 +; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: lh a1, 1600(a1) +; RV64I-NEXT: lh a0, 1620(a0) +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: ret +; +; RV32ZBA-LABEL: test_sh1add: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sh1add a1, a1, a0 +; RV32ZBA-NEXT: sh1add a0, a2, a0 +; RV32ZBA-NEXT: lh a1, 1600(a1) +; RV32ZBA-NEXT: lh a0, 1620(a0) +; RV32ZBA-NEXT: add a0, a0, a1 +; RV32ZBA-NEXT: slli a0, a0, 16 +; RV32ZBA-NEXT: srai a0, a0, 16 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: test_sh1add: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sh1add a1, a1, a0 +; RV64ZBA-NEXT: sh1add a0, a2, a0 +; RV64ZBA-NEXT: lh a1, 1600(a1) +; RV64ZBA-NEXT: lh a0, 1620(a0) +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: slli a0, a0, 48 +; RV64ZBA-NEXT: srai a0, a0, 48 +; RV64ZBA-NEXT: ret +entry: + %d = getelementptr inbounds nuw i8, ptr %p, i64 1600 + %arrayidx = getelementptr inbounds nuw [100 x i16], ptr %d, i64 0, iXLen %x + %0 = load i16, ptr %arrayidx, align 2 + %add = add iXLen %y, 10 + %arrayidx2 = getelementptr inbounds nuw [100 x i16], ptr %d, i64 0, iXLen %add + %1 = load i16, ptr %arrayidx2, align 2 + %add4 = add i16 %1, %0 + ret i16 %add4 +} + +define zeroext i8 @test_add(ptr %p, iXLen %x, iXLen %y) { +; CHECK-LABEL: test_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: add a0, a2, a0 +; CHECK-NEXT: lbu a1, 1800(a1) +; CHECK-NEXT: lbu a0, 1810(a0) +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: andi a0, a0, 255 +; CHECK-NEXT: ret +; +; ZBA-LABEL: test_add: +; ZBA: # %bb.0: # %entry +; ZBA-NEXT: add a1, a0, a1 +; ZBA-NEXT: add a0, a2, a0 +; ZBA-NEXT: lbu a1, 1800(a1) +; ZBA-NEXT: lbu a0, 1810(a0) +; ZBA-NEXT: add a0, a0, a1 +; ZBA-NEXT: andi a0, a0, 255 +; ZBA-NEXT: ret +entry: + %e = getelementptr inbounds nuw i8, ptr %p, i64 1800 + %arrayidx = getelementptr inbounds nuw [1000 x i8], ptr %e, i64 0, iXLen %x + %0 = load i8, ptr %arrayidx, align 1 + %add = add iXLen %y, 10 + %arrayidx2 = getelementptr inbounds nuw [1000 x i8], ptr %e, i64 0, iXLen %add + %1 = load i8, ptr %arrayidx2, align 1 + %add4 = add i8 %1, %0 + ret i8 %add4 +} + +define i64 @test_sh3add_uw(ptr %p, i32 signext %x, i32 signext %y) { +; RV32I-LABEL: test_sh3add_uw: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: slli a1, a1, 3 +; RV32I-NEXT: slli a2, a2, 3 +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: add a0, a0, a2 +; RV32I-NEXT: lw a2, 404(a0) +; RV32I-NEXT: lw a3, 400(a1) +; RV32I-NEXT: lw a1, 404(a1) +; RV32I-NEXT: lw a4, 400(a0) +; RV32I-NEXT: add a1, a2, a1 +; RV32I-NEXT: add a0, a4, a3 +; RV32I-NEXT: sltu a2, a0, a4 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_sh3add_uw: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: slli a2, a2, 32 +; RV64I-NEXT: srli a1, a1, 29 +; RV64I-NEXT: srli a2, a2, 29 +; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: ld a1, 400(a1) +; RV64I-NEXT: ld a0, 400(a0) +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBA-LABEL: test_sh3add_uw: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sh3add a1, a1, a0 +; RV32ZBA-NEXT: sh3add a0, a2, a0 +; RV32ZBA-NEXT: lw a2, 404(a0) +; RV32ZBA-NEXT: lw a3, 400(a1) +; RV32ZBA-NEXT: lw a1, 404(a1) +; RV32ZBA-NEXT: lw a4, 400(a0) +; RV32ZBA-NEXT: add a1, a2, a1 +; RV32ZBA-NEXT: add a0, a4, a3 +; RV32ZBA-NEXT: sltu a2, a0, a4 +; RV32ZBA-NEXT: add a1, a1, a2 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: test_sh3add_uw: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sh3add.uw a1, a1, a0 +; RV64ZBA-NEXT: sh3add.uw a0, a2, a0 +; RV64ZBA-NEXT: ld a1, 400(a1) +; RV64ZBA-NEXT: ld a0, 400(a0) +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: ret +entry: + %b = getelementptr inbounds nuw i8, ptr %p, i64 400 + %idxprom = zext i32 %x to i64 + %arrayidx = getelementptr inbounds nuw [100 x i64], ptr %b, i64 0, i64 %idxprom + %0 = load i64, ptr %arrayidx, align 8 + %idxprom2 = zext i32 %y to i64 + %arrayidx3 = getelementptr inbounds nuw [100 x i64], ptr %b, i64 0, i64 %idxprom2 + %1 = load i64, ptr %arrayidx3, align 8 + %add4 = add nsw i64 %1, %0 + ret i64 %add4 +} + +define signext i32 @test_sh2add_uw(ptr %p, i32 signext %x, i32 signext %y) { +; RV32I-LABEL: test_sh2add_uw: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: slli a1, a1, 2 +; RV32I-NEXT: slli a2, a2, 2 +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: add a0, a0, a2 +; RV32I-NEXT: lw a1, 1200(a1) +; RV32I-NEXT: lw a0, 1200(a0) +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_sh2add_uw: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: slli a2, a2, 32 +; RV64I-NEXT: srli a1, a1, 30 +; RV64I-NEXT: srli a2, a2, 30 +; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: lw a1, 1200(a1) +; RV64I-NEXT: lw a0, 1200(a0) +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBA-LABEL: test_sh2add_uw: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sh2add a1, a1, a0 +; RV32ZBA-NEXT: sh2add a0, a2, a0 +; RV32ZBA-NEXT: lw a1, 1200(a1) +; RV32ZBA-NEXT: lw a0, 1200(a0) +; RV32ZBA-NEXT: add a0, a0, a1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: test_sh2add_uw: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sh2add.uw a1, a1, a0 +; RV64ZBA-NEXT: sh2add.uw a0, a2, a0 +; RV64ZBA-NEXT: lw a1, 1200(a1) +; RV64ZBA-NEXT: lw a0, 1200(a0) +; RV64ZBA-NEXT: addw a0, a0, a1 +; RV64ZBA-NEXT: ret +entry: + %c = getelementptr inbounds nuw i8, ptr %p, i64 1200 + %idxprom = zext i32 %x to i64 + %arrayidx = getelementptr inbounds nuw [100 x i32], ptr %c, i64 0, i64 %idxprom + %0 = load i32, ptr %arrayidx, align 4 + %idxprom2 = zext i32 %y to i64 + %arrayidx3 = getelementptr inbounds nuw [100 x i32], ptr %c, i64 0, i64 %idxprom2 + %1 = load i32, ptr %arrayidx3, align 4 + %add4 = add nsw i32 %1, %0 + ret i32 %add4 +} + +define signext i16 @test_sh1add_uw(ptr %p, i32 signext %x, i32 signext %y) { +; RV32I-LABEL: test_sh1add_uw: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: slli a2, a2, 1 +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: lh a1, 1600(a1) +; RV32I-NEXT: lh a0, 1620(a0) +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_sh1add_uw: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: addi a2, a2, 10 +; RV64I-NEXT: srli a1, a1, 31 +; RV64I-NEXT: slli a2, a2, 32 +; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: srli a2, a2, 31 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: lh a1, 1600(a1) +; RV64I-NEXT: lh a0, 1600(a0) +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: ret +; +; RV32ZBA-LABEL: test_sh1add_uw: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sh1add a1, a1, a0 +; RV32ZBA-NEXT: sh1add a0, a2, a0 +; RV32ZBA-NEXT: lh a1, 1600(a1) +; RV32ZBA-NEXT: lh a0, 1620(a0) +; RV32ZBA-NEXT: add a0, a0, a1 +; RV32ZBA-NEXT: slli a0, a0, 16 +; RV32ZBA-NEXT: srai a0, a0, 16 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: test_sh1add_uw: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sh1add.uw a1, a1, a0 +; RV64ZBA-NEXT: addi a2, a2, 10 +; RV64ZBA-NEXT: sh1add.uw a0, a2, a0 +; RV64ZBA-NEXT: lh a1, 1600(a1) +; RV64ZBA-NEXT: lh a0, 1600(a0) +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: slli a0, a0, 48 +; RV64ZBA-NEXT: srai a0, a0, 48 +; RV64ZBA-NEXT: ret +entry: + %d = getelementptr inbounds nuw i8, ptr %p, i64 1600 + %idxprom = zext i32 %x to i64 + %arrayidx = getelementptr inbounds nuw [100 x i16], ptr %d, i64 0, i64 %idxprom + %0 = load i16, ptr %arrayidx, align 2 + %add = add i32 %y, 10 + %idxprom2 = zext i32 %add to i64 + %arrayidx3 = getelementptr inbounds nuw [100 x i16], ptr %d, i64 0, i64 %idxprom2 + %1 = load i16, ptr %arrayidx3, align 2 + %add5 = add i16 %1, %0 + ret i16 %add5 +} + +define zeroext i8 @test_add_uw(ptr %p, i32 signext %x, i32 signext %y) { +; RV32I-LABEL: test_add_uw: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: add a0, a0, a2 +; RV32I-NEXT: lbu a1, 1800(a1) +; RV32I-NEXT: lbu a0, 1800(a0) +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: andi a0, a0, 255 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_add_uw: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: slli a2, a2, 32 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: srli a2, a2, 32 +; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: lbu a1, 1800(a1) +; RV64I-NEXT: lbu a0, 1800(a0) +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: andi a0, a0, 255 +; RV64I-NEXT: ret +; +; RV32ZBA-LABEL: test_add_uw: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: add a1, a0, a1 +; RV32ZBA-NEXT: add a0, a0, a2 +; RV32ZBA-NEXT: lbu a1, 1800(a1) +; RV32ZBA-NEXT: lbu a0, 1800(a0) +; RV32ZBA-NEXT: add a0, a0, a1 +; RV32ZBA-NEXT: andi a0, a0, 255 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: test_add_uw: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: add.uw a1, a1, a0 +; RV64ZBA-NEXT: add.uw a0, a2, a0 +; RV64ZBA-NEXT: lbu a1, 1800(a1) +; RV64ZBA-NEXT: lbu a0, 1800(a0) +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: andi a0, a0, 255 +; RV64ZBA-NEXT: ret +entry: + %e = getelementptr inbounds nuw i8, ptr %p, i64 1800 + %idxprom = zext i32 %x to i64 + %arrayidx = getelementptr inbounds nuw [1000 x i8], ptr %e, i64 0, i64 %idxprom + %0 = load i8, ptr %arrayidx, align 1 + %idxprom2 = zext i32 %y to i64 + %arrayidx3 = getelementptr inbounds nuw [1000 x i8], ptr %e, i64 0, i64 %idxprom2 + %1 = load i8, ptr %arrayidx3, align 1 + %add5 = add i8 %1, %0 + ret i8 %add5 +} + +; The addi is part of the index and used with 2 different scales. +define signext i32 @test_scaled_index_addi(ptr %p, iXLen %x) { +; RV32I-LABEL: test_scaled_index_addi: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: slli a2, a1, 2 +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: add a2, a0, a2 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lw a1, 1196(a2) +; RV32I-NEXT: lh a0, 1598(a0) +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_scaled_index_addi: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a2, a1, 2 +; RV64I-NEXT: slli a1, a1, 1 +; RV64I-NEXT: add a2, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lw a1, 1196(a2) +; RV64I-NEXT: lh a0, 1598(a0) +; RV64I-NEXT: addw a0, a1, a0 +; RV64I-NEXT: ret +; +; RV32ZBA-LABEL: test_scaled_index_addi: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sh2add a2, a1, a0 +; RV32ZBA-NEXT: sh1add a0, a1, a0 +; RV32ZBA-NEXT: lw a1, 1196(a2) +; RV32ZBA-NEXT: lh a0, 1598(a0) +; RV32ZBA-NEXT: add a0, a1, a0 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: test_scaled_index_addi: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sh2add a2, a1, a0 +; RV64ZBA-NEXT: sh1add a0, a1, a0 +; RV64ZBA-NEXT: lw a1, 1196(a2) +; RV64ZBA-NEXT: lh a0, 1598(a0) +; RV64ZBA-NEXT: addw a0, a1, a0 +; RV64ZBA-NEXT: ret +entry: + %c = getelementptr inbounds nuw i8, ptr %p, i64 1200 + %sub = add iXLen %x, -1 + %arrayidx = getelementptr inbounds nuw [100 x i32], ptr %c, i64 0, iXLen %sub + %0 = load i32, ptr %arrayidx, align 4 + %d = getelementptr inbounds nuw i8, ptr %p, i64 1600 + %arrayidx2 = getelementptr inbounds nuw [100 x i16], ptr %d, i64 0, iXLen %sub + %1 = load i16, ptr %arrayidx2, align 2 + %conv = sext i16 %1 to i32 + %add = add nsw i32 %0, %conv + ret i32 %add +} + +; Offset is a pair of addis. We can fold one of them. +define signext i32 @test_medium_offset(ptr %p, iXLen %x, iXLen %y) { +; RV32I-LABEL: test_medium_offset: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi a0, a0, 2047 +; RV32I-NEXT: slli a1, a1, 2 +; RV32I-NEXT: slli a2, a2, 2 +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: lw a1, 753(a1) +; RV32I-NEXT: lw a0, 793(a0) +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_medium_offset: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi a0, a0, 2047 +; RV64I-NEXT: slli a1, a1, 2 +; RV64I-NEXT: slli a2, a2, 2 +; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: lw a1, 753(a1) +; RV64I-NEXT: lw a0, 793(a0) +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBA-LABEL: test_medium_offset: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: addi a0, a0, 2047 +; RV32ZBA-NEXT: sh2add a1, a1, a0 +; RV32ZBA-NEXT: sh2add a0, a2, a0 +; RV32ZBA-NEXT: lw a1, 753(a1) +; RV32ZBA-NEXT: lw a0, 793(a0) +; RV32ZBA-NEXT: add a0, a0, a1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: test_medium_offset: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: addi a0, a0, 2047 +; RV64ZBA-NEXT: sh2add a1, a1, a0 +; RV64ZBA-NEXT: sh2add a0, a2, a0 +; RV64ZBA-NEXT: lw a1, 753(a1) +; RV64ZBA-NEXT: lw a0, 793(a0) +; RV64ZBA-NEXT: addw a0, a0, a1 +; RV64ZBA-NEXT: ret +entry: + %f = getelementptr inbounds nuw i8, ptr %p, i64 2800 + %arrayidx = getelementptr inbounds nuw [1000 x i32], ptr %f, i64 0, iXLen %x + %0 = load i32, ptr %arrayidx, align 4 + %add = add iXLen %y, 10 + %arrayidx2 = getelementptr inbounds nuw [1000 x i32], ptr %f, i64 0, iXLen %add + %1 = load i32, ptr %arrayidx2, align 4 + %add3 = add nsw i32 %1, %0 + ret i32 %add3 +} + +; Offset is a lui+addiw. We can't fold this on RV64. +define signext i32 @test_large_offset(ptr %p, iXLen %x, iXLen %y) { +; RV32I-LABEL: test_large_offset: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a3, 2 +; RV32I-NEXT: slli a1, a1, 2 +; RV32I-NEXT: slli a2, a2, 2 +; RV32I-NEXT: add a0, a0, a3 +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: lw a1, -1392(a1) +; RV32I-NEXT: lw a0, -1352(a0) +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_large_offset: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: lui a3, 2 +; RV64I-NEXT: slli a1, a1, 2 +; RV64I-NEXT: slli a2, a2, 2 +; RV64I-NEXT: addiw a3, a3, -1392 +; RV64I-NEXT: add a0, a0, a3 +; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: lw a1, 0(a1) +; RV64I-NEXT: lw a0, 40(a0) +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBA-LABEL: test_large_offset: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: li a3, 1700 +; RV32ZBA-NEXT: sh2add a0, a3, a0 +; RV32ZBA-NEXT: sh2add a1, a1, a0 +; RV32ZBA-NEXT: sh2add a0, a2, a0 +; RV32ZBA-NEXT: lw a1, 0(a1) +; RV32ZBA-NEXT: lw a0, 40(a0) +; RV32ZBA-NEXT: add a0, a0, a1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: test_large_offset: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: li a3, 1700 +; RV64ZBA-NEXT: sh2add a0, a3, a0 +; RV64ZBA-NEXT: sh2add a1, a1, a0 +; RV64ZBA-NEXT: sh2add a0, a2, a0 +; RV64ZBA-NEXT: lw a1, 0(a1) +; RV64ZBA-NEXT: lw a0, 40(a0) +; RV64ZBA-NEXT: addw a0, a0, a1 +; RV64ZBA-NEXT: ret +entry: + %g = getelementptr inbounds nuw i8, ptr %p, i64 6800 + %arrayidx = getelementptr inbounds nuw [200 x i32], ptr %g, i64 0, iXLen %x + %0 = load i32, ptr %arrayidx, align 4 + %add = add iXLen %y, 10 + %arrayidx2 = getelementptr inbounds nuw [200 x i32], ptr %g, i64 0, iXLen %add + %1 = load i32, ptr %arrayidx2, align 4 + %add3 = add nsw i32 %1, %0 + ret i32 %add3 +} + +; After folding we can CSE the sh2add +define signext i32 @test_cse(ptr %p, iXLen %x) { +; RV32I-LABEL: test_cse: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: slli a1, a1, 2 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lw a1, 1200(a0) +; RV32I-NEXT: addi a0, a0, 2047 +; RV32I-NEXT: lw a0, 753(a0) +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_cse: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a1, a1, 2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lw a1, 1200(a0) +; RV64I-NEXT: addi a0, a0, 2047 +; RV64I-NEXT: lw a0, 753(a0) +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBA-LABEL: test_cse: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sh2add a0, a1, a0 +; RV32ZBA-NEXT: lw a1, 1200(a0) +; RV32ZBA-NEXT: addi a0, a0, 2047 +; RV32ZBA-NEXT: lw a0, 753(a0) +; RV32ZBA-NEXT: add a0, a0, a1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: test_cse: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sh2add a0, a1, a0 +; RV64ZBA-NEXT: lw a1, 1200(a0) +; RV64ZBA-NEXT: addi a0, a0, 2047 +; RV64ZBA-NEXT: lw a0, 753(a0) +; RV64ZBA-NEXT: addw a0, a0, a1 +; RV64ZBA-NEXT: ret +entry: + %c = getelementptr inbounds nuw i8, ptr %p, i64 1200 + %arrayidx = getelementptr inbounds nuw [100 x i32], ptr %c, i64 0, iXLen %x + %0 = load i32, ptr %arrayidx, align 4 + %f = getelementptr inbounds nuw i8, ptr %p, i64 2800 + %arrayidx1 = getelementptr inbounds nuw [1000 x i32], ptr %f, i64 0, iXLen %x + %1 = load i32, ptr %arrayidx1, align 4 + %add = add nsw i32 %1, %0 + ret i32 %add +} + +define zeroext i8 @test_optsize(ptr %p, iXLen %x, iXLen %y) optsize { +; CHECK-LABEL: test_optsize: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a0, a0, 1800 +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: add a0, a2, a0 +; CHECK-NEXT: lbu a1, 0(a1) +; CHECK-NEXT: lbu a0, 10(a0) +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: andi a0, a0, 255 +; CHECK-NEXT: ret +; +; ZBA-LABEL: test_optsize: +; ZBA: # %bb.0: # %entry +; ZBA-NEXT: addi a0, a0, 1800 +; ZBA-NEXT: add a1, a0, a1 +; ZBA-NEXT: add a0, a2, a0 +; ZBA-NEXT: lbu a1, 0(a1) +; ZBA-NEXT: lbu a0, 10(a0) +; ZBA-NEXT: add a0, a0, a1 +; ZBA-NEXT: andi a0, a0, 255 +; ZBA-NEXT: ret +entry: + %e = getelementptr inbounds nuw i8, ptr %p, i64 1800 + %arrayidx = getelementptr inbounds nuw [1000 x i8], ptr %e, i64 0, iXLen %x + %0 = load i8, ptr %arrayidx, align 1 + %add = add iXLen %y, 10 + %arrayidx2 = getelementptr inbounds nuw [1000 x i8], ptr %e, i64 0, iXLen %add + %1 = load i8, ptr %arrayidx2, align 1 + %add4 = add i8 %1, %0 + ret i8 %add4 +} + +define zeroext i8 @test_minsize(ptr %p, iXLen %x, iXLen %y) minsize { +; CHECK-LABEL: test_minsize: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a0, a0, 1800 +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: add a0, a2, a0 +; CHECK-NEXT: lbu a1, 0(a1) +; CHECK-NEXT: lbu a0, 10(a0) +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: andi a0, a0, 255 +; CHECK-NEXT: ret +; +; ZBA-LABEL: test_minsize: +; ZBA: # %bb.0: # %entry +; ZBA-NEXT: addi a0, a0, 1800 +; ZBA-NEXT: add a1, a0, a1 +; ZBA-NEXT: add a0, a2, a0 +; ZBA-NEXT: lbu a1, 0(a1) +; ZBA-NEXT: lbu a0, 10(a0) +; ZBA-NEXT: add a0, a0, a1 +; ZBA-NEXT: andi a0, a0, 255 +; ZBA-NEXT: ret +entry: + %e = getelementptr inbounds nuw i8, ptr %p, i64 1800 + %arrayidx = getelementptr inbounds nuw [1000 x i8], ptr %e, i64 0, iXLen %x + %0 = load i8, ptr %arrayidx, align 1 + %add = add iXLen %y, 10 + %arrayidx2 = getelementptr inbounds nuw [1000 x i8], ptr %e, i64 0, iXLen %add + %1 = load i8, ptr %arrayidx2, align 1 + %add4 = add i8 %1, %0 + ret i8 %add4 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll index 1b9c78a20ec3b..039266b169ab2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll @@ -1515,40 +1515,36 @@ define @vp_ceil_vv_nxv16f64( %va, < ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 3 ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB44_2: ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/commutable.ll b/llvm/test/CodeGen/RISCV/rvv/commutable.ll index e26c467f025bd..5f35626120178 100644 --- a/llvm/test/CodeGen/RISCV/rvv/commutable.ll +++ b/llvm/test/CodeGen/RISCV/rvv/commutable.ll @@ -26,10 +26,9 @@ define @commutable_vadd_vv_masked( %0, @llvm.riscv.vadd.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vadd.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -59,10 +58,9 @@ define @commutable_vand_vv_masked( %0, @llvm.riscv.vand.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vand.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -92,10 +90,9 @@ define @commutable_vor_vv_masked( %0, @llvm.riscv.vor.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vor.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -125,10 +122,9 @@ define @commutable_vxor_vv_masked( %0, @llvm.riscv.vxor.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vxor.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -158,10 +154,9 @@ define @commutable_vmseq_vv_masked( %0, @llvm.riscv.vmseq.mask.nxv1i64( undef, %0, %1, %mask, iXLen %2) %b = call @llvm.riscv.vmseq.mask.nxv1i64( undef, %1, %0, %mask, iXLen %2) @@ -191,10 +186,9 @@ define @commutable_vmsne_vv_masked( %0, @llvm.riscv.vmsne.mask.nxv1i64( undef, %0, %1, %mask, iXLen %2) %b = call @llvm.riscv.vmsne.mask.nxv1i64( undef, %1, %0, %mask, iXLen %2) @@ -224,10 +218,9 @@ define @commutable_vmin_vv_masked( %0, @llvm.riscv.vmin.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vmin.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -257,10 +250,9 @@ define @commutable_vminu_vv_masked( %0, @llvm.riscv.vminu.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vminu.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -290,10 +282,9 @@ define @commutable_vmax_vv_masked( %0, @llvm.riscv.vmax.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vmax.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -323,10 +314,9 @@ define @commutable_vmaxu_vv_masked( %0, @llvm.riscv.vmaxu.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vmaxu.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -356,10 +346,9 @@ define @commutable_vmul_vv_masked( %0, @llvm.riscv.vmul.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vmul.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -389,10 +378,9 @@ define @commutable_vmulh_vv_masked( %0, @llvm.riscv.vmulh.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vmulh.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -422,10 +410,9 @@ define @commutable_vmulhu_vv_masked( %0, @llvm.riscv.vmulhu.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vmulhu.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -456,9 +443,8 @@ define @commutable_vwadd_vv_masked( %0, @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.nxv1i32( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.nxv1i32( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -489,9 +475,8 @@ define @commutable_vwaddu_vv_masked( %0, @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.nxv1i32( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.nxv1i32( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -522,9 +507,8 @@ define @commutable_vwmul_vv_masked( %0, @llvm.riscv.vwmul.mask.nxv1i64.nxv1i32.nxv1i32( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vwmul.mask.nxv1i64.nxv1i32.nxv1i32( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -555,9 +539,8 @@ define @commutable_vwmulu_vv_masked( %0, @llvm.riscv.vwmulu.mask.nxv1i64.nxv1i32.nxv1i32( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vwmulu.mask.nxv1i64.nxv1i32.nxv1i32( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -637,10 +620,9 @@ define @commutable_vadc_vv( %0, @llvm.riscv.vadc.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2) @@ -671,10 +653,9 @@ define @commutable_vsadd_vv_masked( %0, @llvm.riscv.vsadd.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vsadd.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -704,10 +685,9 @@ define @commutable_vsaddu_vv_masked( %0, @llvm.riscv.vsaddu.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vsaddu.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -739,10 +719,9 @@ define @commutable_vaadd_vv_masked( %0, @llvm.riscv.vaadd.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen 0, iXLen %2, iXLen 1) %b = call @llvm.riscv.vaadd.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen 0, iXLen %2, iXLen 1) @@ -774,10 +753,9 @@ define @commutable_vaaddu_vv_masked( %0, @llvm.riscv.vaaddu.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen 0, iXLen %2, iXLen 1) %b = call @llvm.riscv.vaaddu.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen 0, iXLen %2, iXLen 1) @@ -809,10 +787,9 @@ define @commutable_vsmul_vv_masked( %0, @llvm.riscv.vsmul.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen 0, iXLen %2, iXLen 1) %b = call @llvm.riscv.vsmul.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen 0, iXLen %2, iXLen 1) diff --git a/llvm/test/CodeGen/RISCV/rvv/copyprop.mir b/llvm/test/CodeGen/RISCV/rvv/copyprop.mir index 0b905b57f92b8..be73d4808937a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/copyprop.mir +++ b/llvm/test/CodeGen/RISCV/rvv/copyprop.mir @@ -6,9 +6,6 @@ ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma - ; CHECK-NEXT: vmsne.vi v0, v8, 0 - ; CHECK-NEXT: vsll.vi v8, v8, 5 - ; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 ; CHECK-NEXT: sf.vc.v.x 3, 31, v9, a1 ; CHECK-NEXT: bgeu a0, zero, .LBB0_3 ; CHECK-NEXT: # %bb.1: # %entry @@ -22,7 +19,10 @@ ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: bgeu a0, a2, .LBB0_2 ; CHECK-NEXT: .LBB0_4: # %entry - ; CHECK-NEXT: vse64.v v8, (a1) + ; CHECK-NEXT: vmsne.vi v0, v8, 0 + ; CHECK-NEXT: vsll.vi v8, v8, 5 + ; CHECK-NEXT: vmerge.vim v9, v8, -1, v0 + ; CHECK-NEXT: vse64.v v9, (a1) ; CHECK-NEXT: ret entry: ret void diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll index 5f275da1740cb..cd4b19f11d160 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll @@ -1735,8 +1735,7 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -1771,7 +1770,8 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 48 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill @@ -1786,12 +1786,13 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi a3, sp, 32 ; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload @@ -1805,21 +1806,24 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v24, v16, v24, v0.t ; RV32-NEXT: vand.vv v16, v24, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload @@ -1837,7 +1841,8 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vlse64.v v8, (a4), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -1851,7 +1856,8 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vadd.vv v8, v8, v24, v0.t ; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload @@ -1869,8 +1875,7 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload @@ -1886,41 +1891,28 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsub.vv v24, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v24, v8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v24, 2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 +; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 @@ -1932,7 +1924,8 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -3928,8 +3921,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -3964,7 +3956,8 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 48 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill @@ -3979,12 +3972,13 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi a3, sp, 32 ; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload @@ -3998,21 +3992,24 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v24, v16, v24, v0.t ; RV32-NEXT: vand.vv v16, v24, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload @@ -4030,7 +4027,8 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vlse64.v v8, (a4), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -4044,7 +4042,8 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vadd.vv v8, v8, v24, v0.t ; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload @@ -4062,8 +4061,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload @@ -4079,41 +4077,28 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsub.vv v24, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v24, v8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v24, 2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 +; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 @@ -4125,7 +4110,8 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index e82891f90d85e..9bd1da2e53dce 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -39,18 +39,18 @@ define void @buildvec_no_vid_v4f32(ptr %x) { define <4 x float> @hang_when_merging_stores_after_legalization(<8 x float> %x, <8 x float> %y) optsize { ; CHECK-LABEL: hang_when_merging_stores_after_legalization: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v12, -14 -; CHECK-NEXT: vid.v v14 -; CHECK-NEXT: li a0, 7 -; CHECK-NEXT: vmadd.vx v14, a0, v12 -; CHECK-NEXT: li a0, 129 -; CHECK-NEXT: vmv.s.x v15, a0 -; CHECK-NEXT: vmv.v.i v0, 12 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vcompress.vm v12, v8, v15 -; CHECK-NEXT: vrgatherei16.vv v12, v10, v14, v0.t -; CHECK-NEXT: vmv1r.v v8, v12 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 4 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v12, v10, 4 +; CHECK-NEXT: vslideup.vi v12, v10, 2, v0.t +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 2 +; CHECK-NEXT: vmv.v.i v10, 12 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 6, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 ; CHECK-NEXT: ret %z = shufflevector <8 x float> %x, <8 x float> %y, <4 x i32> ret <4 x float> %z diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll index ac78a252cf9cd..7817f010c4deb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll @@ -38,44 +38,27 @@ define <4 x float> @interleave_v2f32(<2 x float> %x, <2 x float> %y) { define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) { ; V128-LABEL: interleave_v2f64: ; V128: # %bb.0: -; V128-NEXT: csrr a0, vlenb -; V128-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; V128-NEXT: vid.v v10 +; V128-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; V128-NEXT: vmv1r.v v10, v9 ; V128-NEXT: vmv.v.i v0, 10 -; V128-NEXT: srli a0, a0, 3 -; V128-NEXT: vsrl.vi v10, v10, 1 -; V128-NEXT: vslidedown.vx v11, v10, a0 -; V128-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; V128-NEXT: vrgatherei16.vv v13, v9, v11 -; V128-NEXT: vrgatherei16.vv v12, v9, v10 -; V128-NEXT: vrgatherei16.vv v15, v8, v11 -; V128-NEXT: vrgatherei16.vv v14, v8, v10 ; V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; V128-NEXT: vmerge.vvm v8, v14, v12, v0 +; V128-NEXT: vslideup.vi v12, v10, 1 +; V128-NEXT: vslideup.vi v12, v10, 2 +; V128-NEXT: vmv2r.v v10, v8 +; V128-NEXT: vslideup.vi v10, v8, 1 +; V128-NEXT: vmerge.vvm v8, v10, v12, v0 ; V128-NEXT: ret ; -; RV32-V512-LABEL: interleave_v2f64: -; RV32-V512: # %bb.0: -; RV32-V512-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; RV32-V512-NEXT: vid.v v10 -; RV32-V512-NEXT: vsrl.vi v11, v10, 1 -; RV32-V512-NEXT: vmv.v.i v0, 10 -; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11 -; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t -; RV32-V512-NEXT: vmv.v.v v8, v10 -; RV32-V512-NEXT: ret -; -; RV64-V512-LABEL: interleave_v2f64: -; RV64-V512: # %bb.0: -; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu -; RV64-V512-NEXT: vid.v v10 -; RV64-V512-NEXT: vsrl.vi v11, v10, 1 -; RV64-V512-NEXT: vmv.v.i v0, 10 -; RV64-V512-NEXT: vrgather.vv v10, v8, v11 -; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t -; RV64-V512-NEXT: vmv.v.v v8, v10 -; RV64-V512-NEXT: ret +; V512-LABEL: interleave_v2f64: +; V512: # %bb.0: +; V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma +; V512-NEXT: vslideup.vi v10, v9, 1 +; V512-NEXT: vmv1r.v v11, v8 +; V512-NEXT: vslideup.vi v10, v9, 2 +; V512-NEXT: vmv.v.i v0, 10 +; V512-NEXT: vslideup.vi v11, v8, 1 +; V512-NEXT: vmerge.vvm v8, v11, v10, v0 +; V512-NEXT: ret %a = shufflevector <2 x double> %x, <2 x double> %y, <4 x i32> ret <4 x double> %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll index 41d8abb9b73eb..a749736097331 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll @@ -69,14 +69,9 @@ define <4 x double> @shuffle_vf_v4f64(<4 x double> %x) { define <4 x double> @vrgather_permute_shuffle_vu_v4f64(<4 x double> %x) { ; CHECK-LABEL: vrgather_permute_shuffle_vu_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 4096 -; CHECK-NEXT: addi a0, a0, 513 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vsext.vf2 v12, v10 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vslidedown.vi v10, v8, 1 +; CHECK-NEXT: vslideup.vi v10, v8, 2 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %s = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> @@ -86,14 +81,9 @@ define <4 x double> @vrgather_permute_shuffle_vu_v4f64(<4 x double> %x) { define <4 x double> @vrgather_permute_shuffle_uv_v4f64(<4 x double> %x) { ; CHECK-LABEL: vrgather_permute_shuffle_uv_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 4096 -; CHECK-NEXT: addi a0, a0, 513 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vsext.vf2 v12, v10 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vslidedown.vi v10, v8, 1 +; CHECK-NEXT: vslideup.vi v10, v8, 2 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %s = shufflevector <4 x double> poison, <4 x double> %x, <4 x i32> @@ -103,13 +93,12 @@ define <4 x double> @vrgather_permute_shuffle_uv_v4f64(<4 x double> %x) { define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y) { ; CHECK-LABEL: vrgather_shuffle_vv_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI7_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI7_0) -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vle16.v v14, (a0) +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vslidedown.vi v12, v8, 1 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 8 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vrgatherei16.vv v12, v8, v14 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vslideup.vi v12, v8, 2 ; CHECK-NEXT: vrgather.vi v12, v10, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -120,16 +109,18 @@ define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y) define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) { ; CHECK-LABEL: vrgather_shuffle_xv_v4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: lui a0, %hi(.LCPI8_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI8_0)(a0) -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrsub.vi v12, v10, 4 +; CHECK-NEXT: vmv2r.v v10, v8 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vslideup.vi v10, v8, 2, v0.t +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 12 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vfmv.v.f v10, fa5 -; CHECK-NEXT: vrgatherei16.vv v10, v8, v12, v0.t -; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa5 +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x double> , <4 x double> %x, <4 x i32> ret <4 x double> %s @@ -138,17 +129,17 @@ define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) { define <4 x double> @vrgather_shuffle_vx_v4f64(<4 x double> %x) { ; CHECK-LABEL: vrgather_shuffle_vx_v4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI9_0)(a0) -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v10, 9 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vcompress.vm v12, v8, v10 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 2, v0.t ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 3 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa5 -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vfmv.v.f v10, fa5 +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x double> %x, <4 x double> , <4 x i32> ret <4 x double> %s @@ -311,13 +302,9 @@ define <8 x double> @splice_binary2(<8 x double> %x, <8 x double> %y) { define <4 x bfloat> @vrgather_permute_shuffle_vu_v4bf16(<4 x bfloat> %x) { ; CHECK-LABEL: vrgather_permute_shuffle_vu_v4bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 4096 -; CHECK-NEXT: addi a0, a0, 513 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vsext.vf2 v10, v9 -; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vslideup.vi v9, v8, 2 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %s = shufflevector <4 x bfloat> %x, <4 x bfloat> poison, <4 x i32> @@ -327,12 +314,10 @@ define <4 x bfloat> @vrgather_permute_shuffle_vu_v4bf16(<4 x bfloat> %x) { define <4 x bfloat> @vrgather_shuffle_vv_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) { ; CHECK-LABEL: vrgather_shuffle_vv_v4bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI25_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI25_0) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vle16.v v11, (a0) +; CHECK-NEXT: vslidedown.vi v10, v8, 1 ; CHECK-NEXT: vmv.v.i v0, 8 -; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: vslideup.vi v10, v8, 2 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -355,13 +340,9 @@ define <4 x bfloat> @vrgather_shuffle_vx_v4bf16_load(ptr %p) { define <4 x half> @vrgather_permute_shuffle_vu_v4f16(<4 x half> %x) { ; CHECK-LABEL: vrgather_permute_shuffle_vu_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 4096 -; CHECK-NEXT: addi a0, a0, 513 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vsext.vf2 v10, v9 -; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vslideup.vi v9, v8, 2 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %s = shufflevector <4 x half> %x, <4 x half> poison, <4 x i32> @@ -371,12 +352,10 @@ define <4 x half> @vrgather_permute_shuffle_vu_v4f16(<4 x half> %x) { define <4 x half> @vrgather_shuffle_vv_v4f16(<4 x half> %x, <4 x half> %y) { ; CHECK-LABEL: vrgather_shuffle_vv_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI28_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI28_0) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vle16.v v11, (a0) +; CHECK-NEXT: vslidedown.vi v10, v8, 1 ; CHECK-NEXT: vmv.v.i v0, 8 -; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: vslideup.vi v10, v8, 2 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index bef29dfecef4c..c885b3c03270c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -3902,12 +3902,10 @@ define void @trunc_v6bf16(ptr %x) { ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -3969,10 +3967,8 @@ define void @trunc_v6f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI172_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI172_0)(a1) -; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 -; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -3986,12 +3982,10 @@ define void @trunc_v6f16(ptr %x) { ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -4082,13 +4076,11 @@ define void @ceil_v6bf16(ptr %x) { ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a1, 3 -; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4155,11 +4147,9 @@ define void @ceil_v6f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI178_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI178_0)(a1) -; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 3 -; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4174,13 +4164,11 @@ define void @ceil_v6f16(ptr %x) { ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a1, 3 -; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4276,13 +4264,11 @@ define void @floor_v6bf16(ptr %x) { ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a1, 2 -; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4349,11 +4335,9 @@ define void @floor_v6f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI184_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI184_0)(a1) -; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 2 -; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4368,13 +4352,11 @@ define void @floor_v6f16(ptr %x) { ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a1, 2 -; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4470,13 +4452,11 @@ define void @round_v6bf16(ptr %x) { ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a1, 4 -; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4543,11 +4523,9 @@ define void @round_v6f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI190_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI190_0)(a1) -; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 4 -; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4562,13 +4540,11 @@ define void @round_v6f16(ptr %x) { ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a1, 4 -; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll index beaf75d5b0cfa..4911c340c9154 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -51,44 +51,27 @@ define <4 x i32> @interleave_v2i32(<2 x i32> %x, <2 x i32> %y) { define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) { ; V128-LABEL: interleave_v2i64: ; V128: # %bb.0: -; V128-NEXT: csrr a0, vlenb -; V128-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; V128-NEXT: vid.v v10 +; V128-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; V128-NEXT: vmv1r.v v10, v9 ; V128-NEXT: vmv.v.i v0, 10 -; V128-NEXT: srli a0, a0, 3 -; V128-NEXT: vsrl.vi v10, v10, 1 -; V128-NEXT: vslidedown.vx v11, v10, a0 -; V128-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; V128-NEXT: vrgatherei16.vv v13, v9, v11 -; V128-NEXT: vrgatherei16.vv v12, v9, v10 -; V128-NEXT: vrgatherei16.vv v15, v8, v11 -; V128-NEXT: vrgatherei16.vv v14, v8, v10 ; V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; V128-NEXT: vmerge.vvm v8, v14, v12, v0 +; V128-NEXT: vslideup.vi v12, v10, 1 +; V128-NEXT: vslideup.vi v12, v10, 2 +; V128-NEXT: vmv2r.v v10, v8 +; V128-NEXT: vslideup.vi v10, v8, 1 +; V128-NEXT: vmerge.vvm v8, v10, v12, v0 ; V128-NEXT: ret ; -; RV32-V512-LABEL: interleave_v2i64: -; RV32-V512: # %bb.0: -; RV32-V512-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; RV32-V512-NEXT: vid.v v10 -; RV32-V512-NEXT: vsrl.vi v11, v10, 1 -; RV32-V512-NEXT: vmv.v.i v0, 10 -; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11 -; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t -; RV32-V512-NEXT: vmv.v.v v8, v10 -; RV32-V512-NEXT: ret -; -; RV64-V512-LABEL: interleave_v2i64: -; RV64-V512: # %bb.0: -; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu -; RV64-V512-NEXT: vid.v v10 -; RV64-V512-NEXT: vsrl.vi v11, v10, 1 -; RV64-V512-NEXT: vmv.v.i v0, 10 -; RV64-V512-NEXT: vrgather.vv v10, v8, v11 -; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t -; RV64-V512-NEXT: vmv.v.v v8, v10 -; RV64-V512-NEXT: ret +; V512-LABEL: interleave_v2i64: +; V512: # %bb.0: +; V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma +; V512-NEXT: vslideup.vi v10, v9, 1 +; V512-NEXT: vmv1r.v v11, v8 +; V512-NEXT: vslideup.vi v10, v9, 2 +; V512-NEXT: vmv.v.i v0, 10 +; V512-NEXT: vslideup.vi v11, v8, 1 +; V512-NEXT: vmerge.vvm v8, v11, v10, v0 +; V512-NEXT: ret %a = shufflevector <2 x i64> %x, <2 x i64> %y, <4 x i32> ret <4 x i64> %a } @@ -191,30 +174,28 @@ define <4 x i32> @interleave_v4i32_offset_2(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) { ; V128-LABEL: interleave_v4i32_offset_1: ; V128: # %bb.0: -; V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; V128-NEXT: vid.v v10 +; V128-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; V128-NEXT: vmv.v.i v0, 8 +; V128-NEXT: vmv1r.v v10, v9 +; V128-NEXT: vslideup.vi v10, v9, 1, v0.t ; V128-NEXT: vmv.v.i v0, 10 -; V128-NEXT: vsrl.vi v10, v10, 1 -; V128-NEXT: vadd.vi v11, v10, 1 ; V128-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; V128-NEXT: vzext.vf2 v10, v8 -; V128-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; V128-NEXT: vrgather.vv v10, v9, v11, v0.t -; V128-NEXT: vmv.v.v v8, v10 +; V128-NEXT: vzext.vf2 v9, v8 +; V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; V128-NEXT: vmerge.vvm v8, v9, v10, v0 ; V128-NEXT: ret ; ; V512-LABEL: interleave_v4i32_offset_1: ; V512: # %bb.0: -; V512-NEXT: vsetivli zero, 4, e32, mf2, ta, ma -; V512-NEXT: vid.v v10 +; V512-NEXT: vsetivli zero, 4, e32, mf2, ta, mu +; V512-NEXT: vmv.v.i v0, 8 +; V512-NEXT: vmv1r.v v10, v9 +; V512-NEXT: vslideup.vi v10, v9, 1, v0.t ; V512-NEXT: vmv.v.i v0, 10 -; V512-NEXT: vsrl.vi v10, v10, 1 -; V512-NEXT: vadd.vi v11, v10, 1 ; V512-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; V512-NEXT: vzext.vf2 v10, v8 -; V512-NEXT: vsetivli zero, 4, e32, mf2, ta, mu -; V512-NEXT: vrgather.vv v10, v9, v11, v0.t -; V512-NEXT: vmv1r.v v8, v10 +; V512-NEXT: vzext.vf2 v9, v8 +; V512-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; V512-NEXT: vmerge.vvm v8, v9, v10, v0 ; V512-NEXT: ret %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> ret <4 x i32> %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index 3e31c9de61657..f307ebb422c6c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -50,13 +50,9 @@ define <4 x i16> @shuffle_vx_v4i16(<4 x i16> %x) { define <4 x i16> @vrgather_permute_shuffle_vu_v4i16(<4 x i16> %x) { ; CHECK-LABEL: vrgather_permute_shuffle_vu_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 4096 -; CHECK-NEXT: addi a0, a0, 513 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vsext.vf2 v10, v9 -; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vslideup.vi v9, v8, 2 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> @@ -66,13 +62,9 @@ define <4 x i16> @vrgather_permute_shuffle_vu_v4i16(<4 x i16> %x) { define <4 x i16> @vrgather_permute_shuffle_uv_v4i16(<4 x i16> %x) { ; CHECK-LABEL: vrgather_permute_shuffle_uv_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 4096 -; CHECK-NEXT: addi a0, a0, 513 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vsext.vf2 v10, v9 -; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vslideup.vi v9, v8, 2 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> poison, <4 x i16> %x, <4 x i32> @@ -82,12 +74,10 @@ define <4 x i16> @vrgather_permute_shuffle_uv_v4i16(<4 x i16> %x) { define <4 x i16> @vrgather_shuffle_vv_v4i16(<4 x i16> %x, <4 x i16> %y) { ; CHECK-LABEL: vrgather_shuffle_vv_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI6_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vle16.v v11, (a0) +; CHECK-NEXT: vslidedown.vi v10, v8, 1 ; CHECK-NEXT: vmv.v.i v0, 8 -; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: vslideup.vi v10, v8, 2 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -99,12 +89,12 @@ define <4 x i16> @vrgather_shuffle_xv_v4i16(<4 x i16> %x) { ; CHECK-LABEL: vrgather_shuffle_xv_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vmv.v.i v0, 8 +; CHECK-NEXT: vmv1r.v v9, v8 +; CHECK-NEXT: vslideup.vi v9, v8, 2, v0.t ; CHECK-NEXT: vmv.v.i v0, 12 -; CHECK-NEXT: vrsub.vi v10, v9, 4 -; CHECK-NEXT: vmv.v.i v9, 5 -; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vmv.v.i v8, 5 +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> , <4 x i16> %x, <4 x i32> ret <4 x i16> %s @@ -113,12 +103,13 @@ define <4 x i16> @vrgather_shuffle_xv_v4i16(<4 x i16> %x) { define <4 x i16> @vrgather_shuffle_vx_v4i16(<4 x i16> %x) { ; CHECK-LABEL: vrgather_shuffle_vx_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 9 -; CHECK-NEXT: vmv.v.i v0, 3 -; CHECK-NEXT: vcompress.vm v10, v8, v9 -; CHECK-NEXT: vmv.v.i v8, 5 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vmv.v.i v0, 2 +; CHECK-NEXT: vmv.v.i v9, 3 +; CHECK-NEXT: vslidedown.vi v8, v8, 2, v0.t +; CHECK-NEXT: vmv.v.i v10, 5 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> %x, <4 x i16> , <4 x i32> ret <4 x i16> %s @@ -603,8 +594,8 @@ define <8 x i8> @concat_4xi8_start_undef(<8 x i8> %v, <8 x i8> %w) { define <8 x i8> @concat_4xi8_start_undef_at_start(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: concat_4xi8_start_undef_at_start: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v9, 4 ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %res @@ -754,11 +745,10 @@ define <8 x i16> @shuffle_compress_singlesrc_e16(<8 x i16> %v) { define <8 x i32> @shuffle_compress_singlesrc_e32(<8 x i32> %v) { ; CHECK-LABEL: shuffle_compress_singlesrc_e32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v12, 13 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vcompress.vm v10, v8, v12 -; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: li a0, 28 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vslidedown.vi v8, v8, 2, v0.t ; CHECK-NEXT: ret %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> ret <8 x i32> %out @@ -856,16 +846,10 @@ define <8 x i32> @shuffle_spread3_singlesrc_e32(<8 x i32> %v) { define <8 x i32> @shuffle_spread4_singlesrc_e32(<8 x i32> %v) { ; CHECK-LABEL: shuffle_spread4_singlesrc_e32: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: vsrl.vi v9, v9, 2 -; CHECK-NEXT: vslidedown.vx v10, v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vrgatherei16.vv v11, v8, v10 -; CHECK-NEXT: vrgatherei16.vv v10, v8, v9 -; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vmv2r.v v10, v8 +; CHECK-NEXT: vslideup.vi v10, v8, 3 +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> ret <8 x i32> %out @@ -959,11 +943,9 @@ define <8 x i32> @shuffle_decompress_singlesrc_e32(<8 x i32> %v) { define <8 x i8> @shuffle_decompress_singlesrc_e8(<8 x i8> %v) { ; CHECK-LABEL: shuffle_decompress_singlesrc_e8: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI66_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI66_0) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vle8.v v10, (a0) -; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v9, v8 +; CHECK-NEXT: vslideup.vi v9, v8, 3 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %out = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> @@ -1367,13 +1349,12 @@ define void @shuffle_i256_splat(ptr %p) nounwind { define <16 x i32> @shuffle_m1_prefix(<16 x i32> %a) { ; CHECK-LABEL: shuffle_m1_prefix: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8208 -; CHECK-NEXT: addi a0, a0, 770 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsext.vf4 v10, v9 -; CHECK-NEXT: vrgather.vv v12, v8, v10 -; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v0, 12 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vslidedown.vi v12, v8, 2 +; CHECK-NEXT: vslideup.vi v12, v8, 1, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %out = shufflevector <16 x i32> %a, <16 x i32> poison, <16 x i32> ret <16 x i32> %out diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index 1516c67bf7ecc..4200837227899 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -187,175 +187,220 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: sub sp, sp, a2 ; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 88 * vlenb -; RV32-NEXT: addi a3, a1, 256 ; RV32-NEXT: addi a4, a1, 128 +; RV32-NEXT: addi a5, a1, 256 ; RV32-NEXT: li a2, 32 -; RV32-NEXT: lui a5, 12291 -; RV32-NEXT: lui a6, %hi(.LCPI8_0) -; RV32-NEXT: addi a6, a6, %lo(.LCPI8_0) -; RV32-NEXT: li a7, 768 -; RV32-NEXT: lui t0, 49164 +; RV32-NEXT: lui a3, 12 +; RV32-NEXT: lui a6, 12291 +; RV32-NEXT: lui a7, %hi(.LCPI8_0) +; RV32-NEXT: addi a7, a7, %lo(.LCPI8_0) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vle32.v v16, (a1) +; RV32-NEXT: vle32.v v16, (a5) +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li t0, 48 +; RV32-NEXT: mul a5, a5, t0 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vmv.s.x v3, a3 +; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li t1, 72 -; RV32-NEXT: mul a1, a1, t1 +; RV32-NEXT: li a5, 72 +; RV32-NEXT: mul a1, a1, a5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vle32.v v8, (a4) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: li a4, 80 +; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a5, a5, 3 +; RV32-NEXT: addi a6, a6, 3 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vle16.v v6, (a6) -; RV32-NEXT: vmv.s.x v0, a5 -; RV32-NEXT: lui a1, %hi(.LCPI8_1) -; RV32-NEXT: addi a1, a1, %lo(.LCPI8_1) -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v16, v8, v16, v0 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vrgatherei16.vv v24, v16, v6 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 48 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vle32.v v16, (a3) -; RV32-NEXT: addi t0, t0, 12 -; RV32-NEXT: vmv.s.x v0, a7 -; RV32-NEXT: vmv.s.x v7, t0 -; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV32-NEXT: vle16.v v4, (a1) +; RV32-NEXT: vle16.v v6, (a7) +; RV32-NEXT: vmv.s.x v2, a6 +; RV32-NEXT: vslideup.vi v8, v16, 4 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vmv1r.v v0, v3 +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: vslideup.vi v8, v24, 10, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 56 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 80 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: li a4, 56 +; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmerge.vvm v20, v24, v16, v0 +; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 36 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: li a4, 72 +; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 72 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: li a4, 80 +; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v24, v8, v16, v0 +; RV32-NEXT: vmerge.vvm v16, v16, v8, v0 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vrgatherei16.vv v8, v24, v4 +; RV32-NEXT: vrgatherei16.vv v8, v16, v6 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 40 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: li a1, 3 -; RV32-NEXT: lui a3, 196656 -; RV32-NEXT: lui a4, %hi(.LCPI8_2) -; RV32-NEXT: addi a4, a4, %lo(.LCPI8_2) -; RV32-NEXT: slli a1, a1, 10 -; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV32-NEXT: vslideup.vi v8, v16, 2 +; RV32-NEXT: vmv1r.v v0, v3 +; RV32-NEXT: vslideup.vi v8, v24, 8, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a4, 60 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: lui a1, 49164 +; RV32-NEXT: lui a4, %hi(.LCPI8_1) +; RV32-NEXT: addi a4, a4, %lo(.LCPI8_1) +; RV32-NEXT: lui a5, 196656 +; RV32-NEXT: lui a6, %hi(.LCPI8_2) +; RV32-NEXT: addi a6, a6, %lo(.LCPI8_2) +; RV32-NEXT: addi a1, a1, 12 +; RV32-NEXT: addi a5, a5, 48 ; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vle16.v v14, (a4) -; RV32-NEXT: vmv.s.x v12, a3 +; RV32-NEXT: vle16.v v8, (a4) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 80 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vmv4r.v v8, v24 +; RV32-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv.s.x v8, a5 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 56 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: li a4, 12 +; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV32-NEXT: vmerge.vvm v8, v24, v8, v0 +; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vle16.v v8, (a6) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 24 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: li a4, 20 +; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: li a4, 80 +; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a4, 72 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v24, v24, v16, v0 +; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl2r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vrgatherei16.vv v16, v24, v14 +; RV32-NEXT: vrgatherei16.vv v0, v8, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 28 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v0, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a4, 12 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a4, 80 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a4, 20 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl2r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vrgatherei16.vv v16, v8, v24 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 3 -; RV32-NEXT: lui a3, 786624 -; RV32-NEXT: lui a4, 12 +; RV32-NEXT: lui a4, 786624 ; RV32-NEXT: lui a5, 768 ; RV32-NEXT: li a6, 48 ; RV32-NEXT: lui a7, 3073 -; RV32-NEXT: li t0, 192 ; RV32-NEXT: addi a1, a1, 3 -; RV32-NEXT: addi a3, a3, 192 -; RV32-NEXT: addi a4, a4, 12 +; RV32-NEXT: addi a4, a4, 192 +; RV32-NEXT: addi a3, a3, 12 ; RV32-NEXT: addi a5, a5, 768 ; RV32-NEXT: addi a7, a7, -1024 -; RV32-NEXT: vmv.s.x v13, a6 -; RV32-NEXT: vmv.s.x v2, t0 +; RV32-NEXT: vmv.s.x v8, a6 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: li t0, 20 +; RV32-NEXT: mul a6, a6, t0 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs1r.v v8, (a6) # Unknown-size Folded Spill ; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vmv.s.x v12, a3 -; RV32-NEXT: vmv.s.x v3, a4 -; RV32-NEXT: vmv.s.x v14, a5 -; RV32-NEXT: vmv.s.x v1, a7 +; RV32-NEXT: vmv.s.x v16, a4 +; RV32-NEXT: vmv.s.x v3, a3 +; RV32-NEXT: vmv.s.x v1, a5 +; RV32-NEXT: vmv.s.x v2, a7 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 56 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vmv4r.v v8, v16 +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 80 +; RV32-NEXT: li a3, 48 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV32-NEXT: vmerge.vvm v20, v8, v16, v0 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmerge.vvm v4, v8, v24, v0 +; RV32-NEXT: vmv1r.v v0, v16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: li a3, 80 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload @@ -364,45 +409,56 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v24, v16, v24, v0 +; RV32-NEXT: vmerge.vvm v16, v16, v8, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a3, 12 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v0, v3 ; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vmerge.vvm v12, v8, v24, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a3, 80 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmerge.vvm v4, v8, v24, v0 -; RV32-NEXT: vmv1r.v v0, v14 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a3, 72 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v24, v16, v24, v0 +; RV32-NEXT: vmerge.vvm v16, v16, v8, v0 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: li a3, 20 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v13 +; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 80 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vmerge.vvm v12, v8, v24, v0 ; RV32-NEXT: csrr a1, vlenb @@ -411,102 +467,88 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 72 +; RV32-NEXT: li a3, 80 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: li a3, 72 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v16, v24, v16, v0 +; RV32-NEXT: vmerge.vvm v16, v16, v8, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 72 +; RV32-NEXT: li a2, 80 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v2 +; RV32-NEXT: lui a1, %hi(.LCPI8_3) +; RV32-NEXT: addi a1, a1, %lo(.LCPI8_3) +; RV32-NEXT: li a2, 192 +; RV32-NEXT: vmv.s.x v0, a2 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vle16.v v3, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 80 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV32-NEXT: vmerge.vvm v8, v8, v24, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: li a2, 72 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: lui a1, 32 -; RV32-NEXT: addi a1, a1, 4 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 36 +; RV32-NEXT: li a2, 40 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v16, v8, v12 +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 48 +; RV32-NEXT: li a2, 56 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v16, v8 +; RV32-NEXT: vmv.v.v v12, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 80 +; RV32-NEXT: li a2, 56 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: lui a1, 48 -; RV32-NEXT: lui a2, %hi(.LCPI8_3) -; RV32-NEXT: addi a2, a2, %lo(.LCPI8_3) -; RV32-NEXT: addi a1, a1, 5 -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v24, (a2) -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v25, a1 +; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 +; RV32-NEXT: li a2, 60 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v16, v8, v25 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v16, v8 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.v.v v8, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 56 +; RV32-NEXT: li a2, 60 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v16, v8, v24 +; RV32-NEXT: vrgatherei16.vv v16, v4, v3 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 28 +; RV32-NEXT: li a2, 24 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -514,8 +556,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: vmv.v.v v16, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill @@ -524,118 +565,96 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: lui a2, %hi(.LCPI8_5) ; RV32-NEXT: addi a2, a2, %lo(.LCPI8_5) ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV32-NEXT: vle16.v v26, (a1) +; RV32-NEXT: vle16.v v24, (a1) ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v24, (a2) +; RV32-NEXT: vle16.v v28, (a2) ; RV32-NEXT: lui a1, %hi(.LCPI8_6) ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_6) ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vle16.v v2, (a1) +; RV32-NEXT: vle16.v v30, (a1) ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 12 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v26 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v20, v4, v24 -; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v20, v8 +; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v0, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vrgatherei16.vv v24, v12, v28 +; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma +; RV32-NEXT: vmv.v.v v24, v8 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vrgatherei16.vv v24, v8, v2 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 48 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vrgatherei16.vv v0, v8, v30 ; RV32-NEXT: lui a1, %hi(.LCPI8_7) ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_7) ; RV32-NEXT: lui a2, %hi(.LCPI8_8) ; RV32-NEXT: addi a2, a2, %lo(.LCPI8_8) ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v12, (a1) +; RV32-NEXT: vle16.v v8, (a1) ; RV32-NEXT: lui a1, %hi(.LCPI8_9) ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_9) ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV32-NEXT: vle16.v v16, (a2) +; RV32-NEXT: vle16.v v10, (a2) ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vle16.v v18, (a1) +; RV32-NEXT: vle16.v v9, (a1) ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 20 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v24, v0, v12 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 48 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v12, v28, v8 ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v24, v0 +; RV32-NEXT: vmv.v.v v12, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 72 +; RV32-NEXT: li a2, 80 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vrgatherei16.vv v8, v0, v16 +; RV32-NEXT: vrgatherei16.vv v16, v0, v10 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 48 +; RV32-NEXT: li a2, 72 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v8, v4, v18 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 48 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v28, v4, v9 ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v8, v0 +; RV32-NEXT: vmv.v.v v28, v16 ; RV32-NEXT: addi a1, a0, 320 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vse32.v v8, (a1) +; RV32-NEXT: vse32.v v28, (a1) ; RV32-NEXT: addi a1, a0, 256 -; RV32-NEXT: vse32.v v24, (a1) +; RV32-NEXT: vse32.v v12, (a1) ; RV32-NEXT: addi a1, a0, 192 -; RV32-NEXT: vse32.v v20, (a1) +; RV32-NEXT: vse32.v v24, (a1) ; RV32-NEXT: addi a1, a0, 128 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 40 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a2, a2, 6 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: addi a1, a0, 64 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 56 +; RV32-NEXT: li a3, 60 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 80 +; RV32-NEXT: li a2, 56 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -655,366 +674,351 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 88 +; RV64-NEXT: li a3, 93 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 88 * vlenb -; RV64-NEXT: addi a3, a1, 128 -; RV64-NEXT: addi a6, a1, 256 -; RV64-NEXT: li a4, 128 -; RV64-NEXT: lui a2, 1 -; RV64-NEXT: lui a5, %hi(.LCPI8_0) -; RV64-NEXT: addi a5, a5, %lo(.LCPI8_0) -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.v.i v16, 6 +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xdd, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 93 * vlenb ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle64.v v8, (a6) -; RV64-NEXT: lui a6, 16 -; RV64-NEXT: addi a6, a6, 7 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.v.x v17, a6 -; RV64-NEXT: addi a6, a2, 65 +; RV64-NEXT: vle64.v v8, (a1) +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 85 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, a1, 128 +; RV64-NEXT: addi a3, a1, 256 +; RV64-NEXT: li a4, 128 +; RV64-NEXT: lui a1, 1 +; RV64-NEXT: vle64.v v8, (a2) +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a5, 77 +; RV64-NEXT: mul a2, a2, a5 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV64-NEXT: lui a2, %hi(.LCPI8_0) +; RV64-NEXT: addi a2, a2, %lo(.LCPI8_0) +; RV64-NEXT: vle64.v v8, (a3) +; RV64-NEXT: vmv.s.x v0, a4 +; RV64-NEXT: addi a3, a1, 65 +; RV64-NEXT: vle16.v v16, (a2) +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a4, 53 +; RV64-NEXT: mul a2, a2, a4 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs2r.v v16, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vmv.s.x v6, a3 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vrgather.vi v4, v8, 4 -; RV64-NEXT: vrgather.vi v20, v8, 5 -; RV64-NEXT: csrr a7, vlenb -; RV64-NEXT: li t0, 84 -; RV64-NEXT: mul a7, a7, t0 -; RV64-NEXT: add a7, sp, a7 -; RV64-NEXT: addi a7, a7, 16 -; RV64-NEXT: vs4r.v v20, (a7) # Unknown-size Folded Spill -; RV64-NEXT: vrgatherei16.vv v20, v8, v16 -; RV64-NEXT: csrr a7, vlenb -; RV64-NEXT: slli a7, a7, 6 -; RV64-NEXT: add a7, sp, a7 -; RV64-NEXT: addi a7, a7, 16 -; RV64-NEXT: vs4r.v v20, (a7) # Unknown-size Folded Spill -; RV64-NEXT: vrgatherei16.vv v20, v8, v17 -; RV64-NEXT: csrr a7, vlenb -; RV64-NEXT: li t0, 56 -; RV64-NEXT: mul a7, a7, t0 -; RV64-NEXT: add a7, sp, a7 -; RV64-NEXT: addi a7, a7, 16 -; RV64-NEXT: vs4r.v v20, (a7) # Unknown-size Folded Spill -; RV64-NEXT: vrgather.vi v16, v8, 2 -; RV64-NEXT: csrr a7, vlenb -; RV64-NEXT: li t0, 72 -; RV64-NEXT: mul a7, a7, t0 -; RV64-NEXT: add a7, sp, a7 -; RV64-NEXT: addi a7, a7, 16 -; RV64-NEXT: vs4r.v v16, (a7) # Unknown-size Folded Spill -; RV64-NEXT: vrgather.vi v16, v8, 3 -; RV64-NEXT: csrr a7, vlenb -; RV64-NEXT: li t0, 68 -; RV64-NEXT: mul a7, a7, t0 -; RV64-NEXT: add a7, sp, a7 -; RV64-NEXT: addi a7, a7, 16 -; RV64-NEXT: vs4r.v v16, (a7) # Unknown-size Folded Spill +; RV64-NEXT: vslideup.vi v20, v8, 2 ; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 8 -; RV64-NEXT: csrr a7, vlenb -; RV64-NEXT: li t0, 40 -; RV64-NEXT: mul a7, a7, t0 -; RV64-NEXT: add a7, sp, a7 -; RV64-NEXT: addi a7, a7, 16 -; RV64-NEXT: vs8r.v v8, (a7) # Unknown-size Folded Spill -; RV64-NEXT: vmv.s.x v0, a4 -; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: slli a4, a4, 5 -; RV64-NEXT: add a4, sp, a4 -; RV64-NEXT: addi a4, a4, 16 -; RV64-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle64.v v24, (a1) -; RV64-NEXT: vle64.v v16, (a3) -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 76 -; RV64-NEXT: mul a1, a1, a3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vle16.v v12, (a5) -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs2r.v v12, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vmv.s.x v2, a6 +; RV64-NEXT: vslidedown.vi v24, v8, 8 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 69 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgather.vi v4, v8, 2, v0.t -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 60 -; RV64-NEXT: mul a1, a1, a3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vmv1r.v v0, v2 -; RV64-NEXT: vmv8r.v v8, v24 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 48 -; RV64-NEXT: mul a1, a1, a3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vslideup.vi v20, v24, 5, v0.t +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 6 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs4r.v v20, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 85 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 77 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vmerge.vvm v24, v16, v24, v0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl2r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v0, v24, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 24 -; RV64-NEXT: mul a1, a1, a3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: lui a1, 2 -; RV64-NEXT: lui a3, %hi(.LCPI8_1) -; RV64-NEXT: addi a3, a3, %lo(.LCPI8_1) -; RV64-NEXT: addi a1, a1, 130 -; RV64-NEXT: vle16.v v16, (a3) -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 3 -; RV64-NEXT: add a3, sp, a3 -; RV64-NEXT: addi a3, a3, 16 -; RV64-NEXT: vs2r.v v16, (a3) # Unknown-size Folded Spill -; RV64-NEXT: vmv.s.x v2, a1 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 84 -; RV64-NEXT: mul a1, a1, a3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 40 -; RV64-NEXT: mul a1, a1, a3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 53 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl2r.v v14, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v0, v24, v14 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 41 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vmv4r.v v24, v8 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 57 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgather.vi v24, v16, 3, v0.t -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 84 -; RV64-NEXT: mul a1, a1, a3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vmv1r.v v0, v2 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 76 -; RV64-NEXT: mul a1, a1, a3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vslideup.vi v8, v24, 1 +; RV64-NEXT: vmv1r.v v1, v12 +; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 69 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vslideup.vi v8, v16, 4, v0.t +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 49 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs4r.v v8, (a2) # Unknown-size Folded Spill +; RV64-NEXT: lui a2, 2 +; RV64-NEXT: lui a3, 4 +; RV64-NEXT: li a4, 32 +; RV64-NEXT: addi a2, a2, 130 +; RV64-NEXT: addi a3, a3, 260 +; RV64-NEXT: vmv.s.x v2, a4 +; RV64-NEXT: vmv.s.x v0, a2 +; RV64-NEXT: vmv.s.x v3, a3 +; RV64-NEXT: vmv4r.v v4, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 85 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 77 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vmerge.vvm v24, v16, v8, v0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl2r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v0, v24, v8 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: lui a1, 4 -; RV64-NEXT: lui a3, 8 -; RV64-NEXT: addi a1, a1, 260 -; RV64-NEXT: addi a3, a3, 520 -; RV64-NEXT: vmv.s.x v0, a1 -; RV64-NEXT: vmv.s.x v2, a3 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 48 -; RV64-NEXT: mul a1, a1, a3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmerge.vvm v8, v16, v24, v0 -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v7, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv1r.v v0, v7 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 6 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 40 -; RV64-NEXT: mul a1, a1, a3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv4r.v v8, v16 +; RV64-NEXT: vmerge.vvm v16, v24, v8, v0 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v3 +; RV64-NEXT: vmerge.vvm v8, v24, v8, v0 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v2 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 57 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgather.vi v12, v16, 4, v0.t -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 6 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vslideup.vi v4, v8, 5, v0.t +; RV64-NEXT: vmv1r.v v0, v1 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 69 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v4, v24, 4, v0.t +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 37 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs4r.v v4, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vslidedown.vi v20, v8, 1 ; RV64-NEXT: vmv1r.v v0, v2 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 76 -; RV64-NEXT: mul a1, a1, a3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vslideup.vi v20, v8, 4, v0.t +; RV64-NEXT: vmv1r.v v0, v1 +; RV64-NEXT: vrgather.vi v20, v24, 5, v0.t +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 53 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs4r.v v20, (a2) # Unknown-size Folded Spill +; RV64-NEXT: lui a2, 8 +; RV64-NEXT: addi a2, a2, 520 +; RV64-NEXT: vmv.s.x v0, a2 +; RV64-NEXT: vslideup.vi v8, v24, 6 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 77 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 85 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vmerge.vvm v16, v16, v24, v0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vmv1r.v v0, v7 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 56 -; RV64-NEXT: mul a1, a1, a3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgather.vi v24, v8, 5, v0.t -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 56 -; RV64-NEXT: mul a1, a1, a3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill -; RV64-NEXT: lui a1, 96 -; RV64-NEXT: li a3, 192 -; RV64-NEXT: vmv.s.x v3, a3 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.v.x v24, a1 -; RV64-NEXT: vmv1r.v v0, v3 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 72 -; RV64-NEXT: mul a1, a1, a3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 29 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v1 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 69 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgatherei16.vv v28, v8, v24, v0.t -; RV64-NEXT: vmv4r.v v16, v8 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 72 -; RV64-NEXT: mul a1, a1, a3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill -; RV64-NEXT: lui a1, %hi(.LCPI8_2) -; RV64-NEXT: addi a1, a1, %lo(.LCPI8_2) -; RV64-NEXT: li a3, 1040 -; RV64-NEXT: lui a4, 112 -; RV64-NEXT: addi a4, a4, 1 -; RV64-NEXT: vmv.s.x v0, a3 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.v.x v5, a4 +; RV64-NEXT: vslideup.vi v8, v16, 1, v0.t +; RV64-NEXT: lui a2, %hi(.LCPI8_1) +; RV64-NEXT: addi a2, a2, %lo(.LCPI8_1) +; RV64-NEXT: lui a3, %hi(.LCPI8_2) +; RV64-NEXT: addi a3, a3, %lo(.LCPI8_2) +; RV64-NEXT: li a4, 192 +; RV64-NEXT: vmv.s.x v0, a4 +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: li a5, 28 +; RV64-NEXT: mul a4, a4, a5 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 16 +; RV64-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV64-NEXT: vle16.v v28, (a2) +; RV64-NEXT: vle16.v v30, (a3) +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 57 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vrgather.vi v24, v16, 2 +; RV64-NEXT: vmerge.vvm v8, v24, v8, v0 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 24 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs4r.v v8, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle16.v v6, (a1) +; RV64-NEXT: vrgatherei16.vv v8, v16, v28 +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v16, v8, v30 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV64-NEXT: li a2, 1040 +; RV64-NEXT: li a3, 64 +; RV64-NEXT: addi a1, a1, -2016 +; RV64-NEXT: vmv.s.x v0, a2 +; RV64-NEXT: vmv.s.x v2, a3 +; RV64-NEXT: vmv.s.x v1, a1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 76 -; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: li a2, 77 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 48 -; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: li a2, 85 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmerge.vvm v24, v8, v24, v0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vmv1r.v v0, v3 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 68 -; RV64-NEXT: mul a1, a1, a3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgatherei16.vv v28, v16, v5, v0.t +; RV64-NEXT: vmerge.vvm v8, v16, v24, v0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 68 -; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill -; RV64-NEXT: addi a1, a2, -2016 -; RV64-NEXT: vmv.s.x v0, a1 -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v16, v24, v6 +; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v1 +; RV64-NEXT: vmerge.vvm v8, v16, v24, v0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 +; RV64-NEXT: li a2, 85 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 48 +; RV64-NEXT: li a2, 69 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv4r.v v20, v8 +; RV64-NEXT: vmv1r.v v0, v2 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vslideup.vi v20, v8, 5, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 +; RV64-NEXT: li a2, 41 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmerge.vvm v8, v16, v8, v0 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: lui a1, %hi(.LCPI8_3) -; RV64-NEXT: addi a1, a1, %lo(.LCPI8_3) -; RV64-NEXT: vle16.v v8, (a1) +; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma +; RV64-NEXT: vmv.v.v v8, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 60 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 24 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v12, v0 +; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv.v.v v12, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 60 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV64-NEXT: lui a1, %hi(.LCPI8_3) +; RV64-NEXT: addi a1, a1, %lo(.LCPI8_3) +; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV64-NEXT: vle16.v v18, (a1) +; RV64-NEXT: lui a1, %hi(.LCPI8_4) +; RV64-NEXT: addi a1, a1, %lo(.LCPI8_4) +; RV64-NEXT: vle16.v v16, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 84 +; RV64-NEXT: li a2, 37 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -1024,114 +1028,117 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: vmv.v.v v12, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 84 +; RV64-NEXT: li a2, 29 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vrgatherei16.vv v24, v0, v18 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 6 +; RV64-NEXT: li a2, 53 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma +; RV64-NEXT: vmv.v.v v28, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 +; RV64-NEXT: li a2, 53 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v16, v24 +; RV64-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v0, v24, v8 -; RV64-NEXT: lui a1, %hi(.LCPI8_4) -; RV64-NEXT: addi a1, a1, %lo(.LCPI8_4) -; RV64-NEXT: vle16.v v8, (a1) +; RV64-NEXT: vrgatherei16.vv v24, v0, v16 ; RV64-NEXT: lui a1, %hi(.LCPI8_5) ; RV64-NEXT: addi a1, a1, %lo(.LCPI8_5) -; RV64-NEXT: vle16.v v10, (a1) +; RV64-NEXT: vle16.v v16, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 6 +; RV64-NEXT: li a2, 77 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs2r.v v10, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs2r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 56 +; RV64-NEXT: li a2, 24 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v12, v0 +; RV64-NEXT: vmv.v.v v16, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: li a2, 57 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v24, v0, v8 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vrgather.vi v24, v0, 3 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 72 +; RV64-NEXT: li a2, 28 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v8, v24 +; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmerge.vvm v20, v24, v20, v0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 +; RV64-NEXT: li a2, 85 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 6 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl2r.v v20, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v24, v0, v20 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 68 +; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl2r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vrgatherei16.vv v24, v0, v8 ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v28, v24 +; RV64-NEXT: vmv.v.v v20, v24 ; RV64-NEXT: addi a1, a0, 256 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vse64.v v8, (a1) +; RV64-NEXT: vse64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 320 -; RV64-NEXT: vse64.v v28, (a1) +; RV64-NEXT: vse64.v v20, (a1) ; RV64-NEXT: addi a1, a0, 192 -; RV64-NEXT: vse64.v v12, (a1) -; RV64-NEXT: addi a1, a0, 128 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 53 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl4r.v v16, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v16, (a1) +; RV64-NEXT: addi a1, a0, 128 +; RV64-NEXT: vse64.v v12, (a1) ; RV64-NEXT: addi a1, a0, 64 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 84 +; RV64-NEXT: li a3, 49 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vse64.v v8, (a1) +; RV64-NEXT: vl4r.v v12, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vse64.v v12, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 60 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 88 +; RV64-NEXT: li a1, 93 ; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: .cfi_def_cfa sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll index e4b6e5c47fd98..acb1802181540 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll @@ -78,12 +78,11 @@ define <8 x i1> @v8i1_v16i1(<16 x i1>) { define <4 x i32> @v4i32_v8i32(<8 x i32>) { ; CHECK-LABEL: v4i32_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.i v0, 8 +; CHECK-NEXT: vslidedown.vi v10, v8, 2 +; CHECK-NEXT: vslideup.vi v10, v8, 1, v0.t ; CHECK-NEXT: vmv.v.i v0, 5 -; CHECK-NEXT: vsrl.vi v10, v10, 1 -; CHECK-NEXT: vrsub.vi v11, v10, 3 -; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 4 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu @@ -95,43 +94,21 @@ define <4 x i32> @v4i32_v8i32(<8 x i32>) { } define <4 x i32> @v4i32_v16i32(<16 x i32>) { -; RV32-LABEL: v4i32_v16i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV32-NEXT: vmv.v.i v12, 1 -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vmv.v.i v14, 6 -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vmv.v.i v0, 10 -; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV32-NEXT: vslideup.vi v14, v12, 1 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vnsrl.wx v12, v8, a0 -; RV32-NEXT: vsetivli zero, 8, e32, m4, ta, ma -; RV32-NEXT: vslidedown.vi v8, v8, 8 -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vrgatherei16.vv v12, v8, v14, v0.t -; RV32-NEXT: vmv1r.v v8, v12 -; RV32-NEXT: ret -; -; RV64-LABEL: v4i32_v16i32: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.v.i v0, 10 -; RV64-NEXT: vnsrl.wx v12, v8, a0 -; RV64-NEXT: vsetivli zero, 8, e32, m4, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 8 -; RV64-NEXT: li a0, 3 -; RV64-NEXT: slli a0, a0, 33 -; RV64-NEXT: addi a0, a0, 1 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vmv.v.x v10, a0 -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV64-NEXT: vrgatherei16.vv v12, v8, v10, v0.t -; RV64-NEXT: vmv1r.v v8, v12 -; RV64-NEXT: ret +; CHECK-LABEL: v4i32_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 8 +; CHECK-NEXT: vsetivli zero, 8, e32, m4, ta, ma +; CHECK-NEXT: vslidedown.vi v12, v8, 8 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v12, v12, 3, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v0, 10 +; CHECK-NEXT: vnsrl.wx v10, v8, a0 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0 +; CHECK-NEXT: ret %2 = shufflevector <16 x i32> %0, <16 x i32> poison, <4 x i32> ret <4 x i32> %2 } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll index ad18c801069f4..59ddc021f4999 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll @@ -98,20 +98,17 @@ define void @deinterleave5_0_i8(ptr %in, ptr %out) { ; CHECK-LABEL: deinterleave5_0_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: li a0, 33 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: lui a0, 28704 -; CHECK-NEXT: addi a0, a0, 1280 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v8, 8 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vrgather.vv v10, v8, v9 -; CHECK-NEXT: vse8.v v10, (a1) +; CHECK-NEXT: vslidedown.vi v10, v9, 8 +; CHECK-NEXT: vmv.v.i v8, 10 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmerge.vvm v9, v10, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vslidedown.vi v9, v9, 4, v0.t +; CHECK-NEXT: vse8.v v9, (a1) ; CHECK-NEXT: ret entry: %0 = load <16 x i8>, ptr %in, align 1 @@ -125,16 +122,16 @@ define void @deinterleave5_8_i8(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, 66 +; CHECK-NEXT: vmv.v.i v0, 2 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vslidedown.vi v9, v8, 5, v0.t ; CHECK-NEXT: vmv.v.i v0, 4 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vcompress.vm v10, v8, v9 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 8 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vi v10, v8, 3, v0.t -; CHECK-NEXT: vse8.v v10, (a1) +; CHECK-NEXT: vrgather.vi v9, v8, 3, v0.t +; CHECK-NEXT: vse8.v v9, (a1) ; CHECK-NEXT: ret entry: %0 = load <16 x i8>, ptr %in, align 1 @@ -148,15 +145,15 @@ define void @deinterleave6_0_i8(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, 65 -; CHECK-NEXT: vmv.v.i v0, 4 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vcompress.vm v10, v8, v9 +; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vslidedown.vi v9, v8, 8 +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: vmv.v.i v8, 4 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vi v10, v8, 4, v0.t +; CHECK-NEXT: vslidedown.vi v10, v10, 5, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vrgather.vi v10, v9, 4, v0.t ; CHECK-NEXT: vse8.v v10, (a1) ; CHECK-NEXT: ret entry: @@ -171,16 +168,16 @@ define void @deinterleave6_8_i8(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, 130 +; CHECK-NEXT: vmv.v.i v0, 2 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vslidedown.vi v9, v8, 6, v0.t ; CHECK-NEXT: vmv.v.i v0, 4 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vcompress.vm v10, v8, v9 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 8 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vi v10, v8, 5, v0.t -; CHECK-NEXT: vse8.v v10, (a1) +; CHECK-NEXT: vrgather.vi v9, v8, 5, v0.t +; CHECK-NEXT: vse8.v v9, (a1) ; CHECK-NEXT: ret entry: %0 = load <16 x i8>, ptr %in, align 1 @@ -194,15 +191,15 @@ define void @deinterleave7_0_i8(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, 129 -; CHECK-NEXT: vmv.v.i v0, 4 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vcompress.vm v10, v8, v9 +; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vslidedown.vi v9, v8, 8 +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: vmv.v.i v8, 4 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vi v10, v8, 6, v0.t +; CHECK-NEXT: vslidedown.vi v10, v10, 6, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vrgather.vi v10, v9, 6, v0.t ; CHECK-NEXT: vse8.v v10, (a1) ; CHECK-NEXT: ret entry: @@ -217,18 +214,16 @@ define void @deinterleave7_8_i8(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, -6 -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: li a0, 6 -; CHECK-NEXT: vmv.v.i v0, 6 -; CHECK-NEXT: vmadd.vx v10, a0, v9 +; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v9, v8, 8 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vi v11, v8, 1 -; CHECK-NEXT: vrgather.vv v11, v9, v10, v0.t -; CHECK-NEXT: vse8.v v11, (a1) +; CHECK-NEXT: vslidedown.vi v10, v9, 4 +; CHECK-NEXT: vslideup.vi v10, v9, 1, v0.t +; CHECK-NEXT: vmv.v.i v0, 6 +; CHECK-NEXT: vrgather.vi v9, v8, 1 +; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0 +; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret entry: %0 = load <16 x i8>, ptr %in, align 1 @@ -285,17 +280,17 @@ define void @deinterleave7_0_i64(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: li a0, 129 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v0, 4 -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vcompress.vm v20, v8, v16 ; CHECK-NEXT: vsetivli zero, 8, e64, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vslidedown.vi v16, v8, 8 +; CHECK-NEXT: vmv4r.v v12, v8 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 2 +; CHECK-NEXT: vmv.v.i v8, 4 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vrgather.vi v20, v8, 6, v0.t -; CHECK-NEXT: vse64.v v20, (a1) +; CHECK-NEXT: vslidedown.vi v12, v12, 6, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vrgather.vi v12, v16, 6, v0.t +; CHECK-NEXT: vse64.v v12, (a1) ; CHECK-NEXT: ret entry: %0 = load <16 x i64>, ptr %in @@ -329,18 +324,18 @@ define void @deinterleave7_0_i32_subvec(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: li a0, 129 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v0, 4 -; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vcompress.vm v14, v8, v12 ; CHECK-NEXT: vsetivli zero, 8, e32, m4, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vslidedown.vi v12, v8, 8 +; CHECK-NEXT: vmv2r.v v10, v8 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 2 +; CHECK-NEXT: vmv.v.i v8, 4 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vrgather.vi v14, v8, 6, v0.t +; CHECK-NEXT: vslidedown.vi v10, v10, 6, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vrgather.vi v10, v12, 6, v0.t ; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma -; CHECK-NEXT: vse32.v v14, (a1) +; CHECK-NEXT: vse32.v v10, (a1) ; CHECK-NEXT: ret entry: %0 = load <16 x i32>, ptr %in @@ -444,8 +439,8 @@ define void @deinterleave8_8_i8_two_source(ptr %in0, ptr %in1, ptr %out) { ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v9, (a1) -; CHECK-NEXT: vmv.v.i v0, -3 -; CHECK-NEXT: vrgather.vi v9, v8, 1, v0.t +; CHECK-NEXT: vmv.v.i v0, 1 +; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t ; CHECK-NEXT: vse8.v v9, (a2) ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll index c0c17d4e0623e..2da18fbb8e41c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll @@ -182,19 +182,19 @@ define void @shuffle1(ptr %explicit_0, ptr %explicit_1) vscale_range(2,2) { ; CHECK-LABEL: shuffle1: ; CHECK: # %bb.0: ; CHECK-NEXT: addi a0, a0, 252 -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v11, (a0) -; CHECK-NEXT: vmv.v.i v0, 5 +; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vmv.v.i v0, 1 +; CHECK-NEXT: vmv.v.i v8, 5 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vsrl.vi v10, v10, 1 -; CHECK-NEXT: vadd.vi v10, v10, 1 -; CHECK-NEXT: vrgather.vv v9, v11, v10, v0.t +; CHECK-NEXT: vslidedown.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmerge.vvm v11, v11, v9, v0 ; CHECK-NEXT: addi a0, a1, 672 -; CHECK-NEXT: vs2r.v v8, (a0) +; CHECK-NEXT: vs2r.v v10, (a0) ; CHECK-NEXT: ret %1 = getelementptr i32, ptr %explicit_0, i64 63 %2 = load <3 x i32>, ptr %1, align 1 @@ -209,15 +209,14 @@ define void @shuffle1(ptr %explicit_0, ptr %explicit_1) vscale_range(2,2) { define <16 x float> @shuffle2(<4 x float> %a) vscale_range(2,2) { ; CHECK-LABEL: shuffle2: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v12, v8, 1 +; CHECK-NEXT: vmv.v.i v0, 6 +; CHECK-NEXT: vslideup.vi v12, v8, 2 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vid.v v13 -; CHECK-NEXT: vadd.vv v13, v13, v13 -; CHECK-NEXT: vmv.v.i v0, 6 -; CHECK-NEXT: vrsub.vi v13, v13, 4 -; CHECK-NEXT: vrgather.vv v9, v12, v13, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmerge.vvm v9, v9, v12, v0 ; CHECK-NEXT: ret %b = extractelement <4 x float> %a, i32 2 %c = insertelement <16 x float> , float %b, i32 5 @@ -234,7 +233,7 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) vsca ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV32-NEXT: vmv.v.i v0, 1 ; RV32-NEXT: li a1, 32 -; RV32-NEXT: vrgather.vi v18, v15, 1, v0.t +; RV32-NEXT: vslidedown.vi v18, v15, 1, v0.t ; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma ; RV32-NEXT: vslidedown.vx v8, v16, a0 ; RV32-NEXT: vmv.x.s a0, v8 @@ -260,7 +259,7 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) vsca ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV64-NEXT: vrgather.vi v18, v15, 1, v0.t +; RV64-NEXT: vslidedown.vi v18, v15, 1, v0.t ; RV64-NEXT: mv s2, sp ; RV64-NEXT: vs8r.v v16, (s2) ; RV64-NEXT: andi a0, a0, 15 @@ -290,14 +289,13 @@ define <4 x double> @shuffles_add(<4 x double> %0, <4 x double> %1) vscale_range ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v13, v10 -; CHECK-NEXT: vslideup.vi v13, v11, 1 -; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv.v.i v0, 1 +; CHECK-NEXT: vslideup.vi v13, v11, 1 +; CHECK-NEXT: vslidedown.vi v11, v10, 1, v0.t +; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vrgather.vi v12, v9, 0 -; CHECK-NEXT: vmv1r.v v9, v11 -; CHECK-NEXT: vrgather.vi v9, v10, 1, v0.t ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfadd.vv v8, v12, v8 +; CHECK-NEXT: vfadd.vv v8, v12, v10 ; CHECK-NEXT: ret %3 = shufflevector <4 x double> %0, <4 x double> %1, <4 x i32> %4 = shufflevector <4 x double> %0, <4 x double> %1, <4 x i32> @@ -325,10 +323,9 @@ entry: define <16 x i32> @m4_linear_num_of_shuffles_in_chunks(<16 x i32> %0) vscale_range(2,2) { ; CHECK-LABEL: m4_linear_num_of_shuffles_in_chunks: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vmv.v.i v0, 8 -; CHECK-NEXT: vrgather.vi v12, v10, 0 -; CHECK-NEXT: vrgather.vi v12, v11, 0, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vslideup.vi v12, v10, 2 +; CHECK-NEXT: vslideup.vi v12, v11, 3 ; CHECK-NEXT: vrgather.vi v14, v8, 2 ; CHECK-NEXT: vrgather.vi v15, v10, 3 ; CHECK-NEXT: vmv4r.v v8, v12 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll index c9fe39685fbc6..5b8e312a06ad4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll @@ -510,15 +510,12 @@ define <8 x i16> @shuffle_v8i16_as_i64_16(<8 x i16> %v) { ; ; ZVKB-ZVE32X-LABEL: shuffle_v8i16_as_i64_16: ; ZVKB-ZVE32X: # %bb.0: -; ZVKB-ZVE32X-NEXT: lui a0, %hi(.LCPI19_0) -; ZVKB-ZVE32X-NEXT: addi a0, a0, %lo(.LCPI19_0) -; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, ma -; ZVKB-ZVE32X-NEXT: vle8.v v10, (a0) -; ZVKB-ZVE32X-NEXT: vsext.vf2 v12, v10 -; ZVKB-ZVE32X-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; ZVKB-ZVE32X-NEXT: vrgather.vv v11, v9, v12 -; ZVKB-ZVE32X-NEXT: vrgather.vv v10, v8, v12 -; ZVKB-ZVE32X-NEXT: vmv2r.v v8, v10 +; ZVKB-ZVE32X-NEXT: li a0, 136 +; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, mu +; ZVKB-ZVE32X-NEXT: vmv.s.x v0, a0 +; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 1 +; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 3, v0.t +; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10 ; ZVKB-ZVE32X-NEXT: ret %shuffle = shufflevector <8 x i16> %v, <8 x i16> poison, <8 x i32> ret <8 x i16> %shuffle @@ -558,16 +555,12 @@ define <8 x i16> @shuffle_v8i16_as_i64_32(<8 x i16> %v) { ; ; ZVKB-ZVE32X-LABEL: shuffle_v8i16_as_i64_32: ; ZVKB-ZVE32X: # %bb.0: -; ZVKB-ZVE32X-NEXT: lui a0, 8240 -; ZVKB-ZVE32X-NEXT: addi a0, a0, 1 -; ZVKB-ZVE32X-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; ZVKB-ZVE32X-NEXT: vmv.s.x v10, a0 -; ZVKB-ZVE32X-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; ZVKB-ZVE32X-NEXT: vsext.vf2 v12, v10 -; ZVKB-ZVE32X-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; ZVKB-ZVE32X-NEXT: vrgatherei16.vv v11, v9, v12 -; ZVKB-ZVE32X-NEXT: vrgatherei16.vv v10, v8, v12 -; ZVKB-ZVE32X-NEXT: vmv2r.v v8, v10 +; ZVKB-ZVE32X-NEXT: li a0, 204 +; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, mu +; ZVKB-ZVE32X-NEXT: vmv.s.x v0, a0 +; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 2 +; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 2, v0.t +; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10 ; ZVKB-ZVE32X-NEXT: ret %shuffle = shufflevector <8 x i16> %v, <8 x i16> poison, <8 x i32> ret <8 x i16> %shuffle @@ -607,15 +600,12 @@ define <8 x i16> @shuffle_v8i16_as_i64_48(<8 x i16> %v) { ; ; ZVKB-ZVE32X-LABEL: shuffle_v8i16_as_i64_48: ; ZVKB-ZVE32X: # %bb.0: -; ZVKB-ZVE32X-NEXT: lui a0, %hi(.LCPI21_0) -; ZVKB-ZVE32X-NEXT: addi a0, a0, %lo(.LCPI21_0) -; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, ma -; ZVKB-ZVE32X-NEXT: vle8.v v10, (a0) -; ZVKB-ZVE32X-NEXT: vsext.vf2 v12, v10 -; ZVKB-ZVE32X-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; ZVKB-ZVE32X-NEXT: vrgather.vv v11, v9, v12 -; ZVKB-ZVE32X-NEXT: vrgather.vv v10, v8, v12 -; ZVKB-ZVE32X-NEXT: vmv2r.v v8, v10 +; ZVKB-ZVE32X-NEXT: li a0, -18 +; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, mu +; ZVKB-ZVE32X-NEXT: vmv.s.x v0, a0 +; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 3 +; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 1, v0.t +; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10 ; ZVKB-ZVE32X-NEXT: ret %shuffle = shufflevector <8 x i16> %v, <8 x i16> poison, <8 x i32> ret <8 x i16> %shuffle @@ -655,17 +645,12 @@ define <8 x i32> @shuffle_v8i32_as_i64(<8 x i32> %v) { ; ; ZVKB-ZVE32X-LABEL: shuffle_v8i32_as_i64: ; ZVKB-ZVE32X: # %bb.0: -; ZVKB-ZVE32X-NEXT: lui a0, %hi(.LCPI22_0) -; ZVKB-ZVE32X-NEXT: addi a0, a0, %lo(.LCPI22_0) -; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, ma -; ZVKB-ZVE32X-NEXT: vle8.v v12, (a0) -; ZVKB-ZVE32X-NEXT: vsext.vf2 v16, v12 -; ZVKB-ZVE32X-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; ZVKB-ZVE32X-NEXT: vrgatherei16.vv v13, v9, v16 -; ZVKB-ZVE32X-NEXT: vrgatherei16.vv v12, v8, v16 -; ZVKB-ZVE32X-NEXT: vrgatherei16.vv v14, v10, v16 -; ZVKB-ZVE32X-NEXT: vrgatherei16.vv v15, v11, v16 -; ZVKB-ZVE32X-NEXT: vmv4r.v v8, v12 +; ZVKB-ZVE32X-NEXT: li a0, 170 +; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e32, m4, ta, mu +; ZVKB-ZVE32X-NEXT: vmv.s.x v0, a0 +; ZVKB-ZVE32X-NEXT: vslidedown.vi v12, v8, 1 +; ZVKB-ZVE32X-NEXT: vslideup.vi v12, v8, 1, v0.t +; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v12 ; ZVKB-ZVE32X-NEXT: ret %shuffle = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> ret <8 x i32> %shuffle @@ -729,15 +714,12 @@ define <8 x half> @shuffle_v8f16_as_i64_16(<8 x half> %v) { ; ; ZVKB-ZVE32X-LABEL: shuffle_v8f16_as_i64_16: ; ZVKB-ZVE32X: # %bb.0: -; ZVKB-ZVE32X-NEXT: lui a0, %hi(.LCPI24_0) -; ZVKB-ZVE32X-NEXT: addi a0, a0, %lo(.LCPI24_0) -; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, ma -; ZVKB-ZVE32X-NEXT: vle8.v v10, (a0) -; ZVKB-ZVE32X-NEXT: vsext.vf2 v12, v10 -; ZVKB-ZVE32X-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; ZVKB-ZVE32X-NEXT: vrgather.vv v11, v9, v12 -; ZVKB-ZVE32X-NEXT: vrgather.vv v10, v8, v12 -; ZVKB-ZVE32X-NEXT: vmv2r.v v8, v10 +; ZVKB-ZVE32X-NEXT: li a0, 136 +; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, mu +; ZVKB-ZVE32X-NEXT: vmv.s.x v0, a0 +; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 1 +; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 3, v0.t +; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10 ; ZVKB-ZVE32X-NEXT: ret %shuffle = shufflevector <8 x half> %v, <8 x half> poison, <8 x i32> ret <8 x half> %shuffle @@ -777,16 +759,12 @@ define <8 x half> @shuffle_v8f16_as_i64_32(<8 x half> %v) { ; ; ZVKB-ZVE32X-LABEL: shuffle_v8f16_as_i64_32: ; ZVKB-ZVE32X: # %bb.0: -; ZVKB-ZVE32X-NEXT: lui a0, 8240 -; ZVKB-ZVE32X-NEXT: addi a0, a0, 1 -; ZVKB-ZVE32X-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; ZVKB-ZVE32X-NEXT: vmv.s.x v10, a0 -; ZVKB-ZVE32X-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; ZVKB-ZVE32X-NEXT: vsext.vf2 v12, v10 -; ZVKB-ZVE32X-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; ZVKB-ZVE32X-NEXT: vrgatherei16.vv v11, v9, v12 -; ZVKB-ZVE32X-NEXT: vrgatherei16.vv v10, v8, v12 -; ZVKB-ZVE32X-NEXT: vmv2r.v v8, v10 +; ZVKB-ZVE32X-NEXT: li a0, 204 +; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, mu +; ZVKB-ZVE32X-NEXT: vmv.s.x v0, a0 +; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 2 +; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 2, v0.t +; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10 ; ZVKB-ZVE32X-NEXT: ret %shuffle = shufflevector <8 x half> %v, <8 x half> poison, <8 x i32> ret <8 x half> %shuffle @@ -826,15 +804,12 @@ define <8 x half> @shuffle_v8f16_as_i64_48(<8 x half> %v) { ; ; ZVKB-ZVE32X-LABEL: shuffle_v8f16_as_i64_48: ; ZVKB-ZVE32X: # %bb.0: -; ZVKB-ZVE32X-NEXT: lui a0, %hi(.LCPI26_0) -; ZVKB-ZVE32X-NEXT: addi a0, a0, %lo(.LCPI26_0) -; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, ma -; ZVKB-ZVE32X-NEXT: vle8.v v10, (a0) -; ZVKB-ZVE32X-NEXT: vsext.vf2 v12, v10 -; ZVKB-ZVE32X-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; ZVKB-ZVE32X-NEXT: vrgather.vv v11, v9, v12 -; ZVKB-ZVE32X-NEXT: vrgather.vv v10, v8, v12 -; ZVKB-ZVE32X-NEXT: vmv2r.v v8, v10 +; ZVKB-ZVE32X-NEXT: li a0, -18 +; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, mu +; ZVKB-ZVE32X-NEXT: vmv.s.x v0, a0 +; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 3 +; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 1, v0.t +; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10 ; ZVKB-ZVE32X-NEXT: ret %shuffle = shufflevector <8 x half> %v, <8 x half> poison, <8 x i32> ret <8 x half> %shuffle @@ -874,17 +849,12 @@ define <8 x float> @shuffle_v8f32_as_i64(<8 x float> %v) { ; ; ZVKB-ZVE32X-LABEL: shuffle_v8f32_as_i64: ; ZVKB-ZVE32X: # %bb.0: -; ZVKB-ZVE32X-NEXT: lui a0, %hi(.LCPI27_0) -; ZVKB-ZVE32X-NEXT: addi a0, a0, %lo(.LCPI27_0) -; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, ma -; ZVKB-ZVE32X-NEXT: vle8.v v12, (a0) -; ZVKB-ZVE32X-NEXT: vsext.vf2 v16, v12 -; ZVKB-ZVE32X-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; ZVKB-ZVE32X-NEXT: vrgatherei16.vv v13, v9, v16 -; ZVKB-ZVE32X-NEXT: vrgatherei16.vv v12, v8, v16 -; ZVKB-ZVE32X-NEXT: vrgatherei16.vv v14, v10, v16 -; ZVKB-ZVE32X-NEXT: vrgatherei16.vv v15, v11, v16 -; ZVKB-ZVE32X-NEXT: vmv4r.v v8, v12 +; ZVKB-ZVE32X-NEXT: li a0, 170 +; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e32, m4, ta, mu +; ZVKB-ZVE32X-NEXT: vmv.s.x v0, a0 +; ZVKB-ZVE32X-NEXT: vslidedown.vi v12, v8, 1 +; ZVKB-ZVE32X-NEXT: vslideup.vi v12, v8, 1, v0.t +; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v12 ; ZVKB-ZVE32X-NEXT: ret %shuffle = shufflevector <8 x float> %v, <8 x float> poison, <8 x i32> ret <8 x float> %shuffle @@ -924,13 +894,12 @@ define <8 x float> @shuffle_v8f32_as_i64_exact(<8 x float> %v) vscale_range(2,2) ; ; ZVKB-ZVE32X-LABEL: shuffle_v8f32_as_i64_exact: ; ZVKB-ZVE32X: # %bb.0: -; ZVKB-ZVE32X-NEXT: lui a0, 8240 -; ZVKB-ZVE32X-NEXT: addi a0, a0, 1 -; ZVKB-ZVE32X-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; ZVKB-ZVE32X-NEXT: vmv.s.x v10, a0 -; ZVKB-ZVE32X-NEXT: vsext.vf4 v12, v10 -; ZVKB-ZVE32X-NEXT: vrgather.vv v11, v9, v12 -; ZVKB-ZVE32X-NEXT: vrgather.vv v10, v8, v12 +; ZVKB-ZVE32X-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; ZVKB-ZVE32X-NEXT: vmv.v.i v0, 10 +; ZVKB-ZVE32X-NEXT: vslidedown.vi v11, v9, 1 +; ZVKB-ZVE32X-NEXT: vslideup.vi v11, v9, 1, v0.t +; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 1 +; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 1, v0.t ; ZVKB-ZVE32X-NEXT: vmv2r.v v8, v10 ; ZVKB-ZVE32X-NEXT: ret %shuffle = shufflevector <8 x float> %v, <8 x float> poison, <8 x i32> @@ -940,30 +909,22 @@ define <8 x float> @shuffle_v8f32_as_i64_exact(<8 x float> %v) vscale_range(2,2) define <8 x i64> @shuffle_v8i64_as_i128(<8 x i64> %v) { ; CHECK-LABEL: shuffle_v8i64_as_i128: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI29_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI29_0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v16, (a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vrgatherei16.vv v13, v9, v16 -; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 -; CHECK-NEXT: vrgatherei16.vv v14, v10, v16 -; CHECK-NEXT: vrgatherei16.vv v15, v11, v16 -; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: li a0, 170 +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vslidedown.vi v12, v8, 1 +; CHECK-NEXT: vslideup.vi v12, v8, 1, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret ; ; ZVKB-V-LABEL: shuffle_v8i64_as_i128: ; ZVKB-V: # %bb.0: -; ZVKB-V-NEXT: lui a0, %hi(.LCPI29_0) -; ZVKB-V-NEXT: addi a0, a0, %lo(.LCPI29_0) -; ZVKB-V-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVKB-V-NEXT: vle16.v v16, (a0) -; ZVKB-V-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; ZVKB-V-NEXT: vrgatherei16.vv v13, v9, v16 -; ZVKB-V-NEXT: vrgatherei16.vv v12, v8, v16 -; ZVKB-V-NEXT: vrgatherei16.vv v14, v10, v16 -; ZVKB-V-NEXT: vrgatherei16.vv v15, v11, v16 -; ZVKB-V-NEXT: vmv4r.v v8, v12 +; ZVKB-V-NEXT: li a0, 170 +; ZVKB-V-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; ZVKB-V-NEXT: vmv.s.x v0, a0 +; ZVKB-V-NEXT: vslidedown.vi v12, v8, 1 +; ZVKB-V-NEXT: vslideup.vi v12, v8, 1, v0.t +; ZVKB-V-NEXT: vmv.v.v v8, v12 ; ZVKB-V-NEXT: ret %shuffle = shufflevector <8 x i64> %v, <8 x i64> poison, <8 x i32> ret <8 x i64> %shuffle @@ -973,30 +934,22 @@ define <8 x i64> @shuffle_v8i64_as_i128(<8 x i64> %v) { define <8 x i64> @shuffle_v8i64_as_i128_2(<8 x i64> %v) { ; CHECK-LABEL: shuffle_v8i64_as_i128_2: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI30_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI30_0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v16, (a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vrgatherei16.vv v13, v9, v16 -; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 -; CHECK-NEXT: vrgatherei16.vv v14, v10, v16 -; CHECK-NEXT: vrgatherei16.vv v15, v11, v16 -; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: li a0, 168 +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vslidedown.vi v12, v8, 1 +; CHECK-NEXT: vslideup.vi v12, v8, 1, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret ; ; ZVKB-V-LABEL: shuffle_v8i64_as_i128_2: ; ZVKB-V: # %bb.0: -; ZVKB-V-NEXT: lui a0, %hi(.LCPI30_0) -; ZVKB-V-NEXT: addi a0, a0, %lo(.LCPI30_0) -; ZVKB-V-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVKB-V-NEXT: vle16.v v16, (a0) -; ZVKB-V-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; ZVKB-V-NEXT: vrgatherei16.vv v13, v9, v16 -; ZVKB-V-NEXT: vrgatherei16.vv v12, v8, v16 -; ZVKB-V-NEXT: vrgatherei16.vv v14, v10, v16 -; ZVKB-V-NEXT: vrgatherei16.vv v15, v11, v16 -; ZVKB-V-NEXT: vmv4r.v v8, v12 +; ZVKB-V-NEXT: li a0, 168 +; ZVKB-V-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; ZVKB-V-NEXT: vmv.s.x v0, a0 +; ZVKB-V-NEXT: vslidedown.vi v12, v8, 1 +; ZVKB-V-NEXT: vslideup.vi v12, v8, 1, v0.t +; ZVKB-V-NEXT: vmv.v.v v8, v12 ; ZVKB-V-NEXT: ret %shuffle = shufflevector <8 x i64> %v, <8 x i64> poison, <8 x i32> ret <8 x i64> %shuffle diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll index 814e35f201dca..ad7cf7eee5023 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll @@ -121,7 +121,7 @@ define <2 x i32> @trn2.v2i32(<2 x i32> %v0, <2 x i32> %v1) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vmv.v.i v0, 1 -; CHECK-NEXT: vrgather.vi v9, v8, 1, v0.t +; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> @@ -166,7 +166,7 @@ define <2 x i64> @trn2.v2i64(<2 x i64> %v0, <2 x i64> %v1) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vmv.v.i v0, 1 -; CHECK-NEXT: vrgather.vi v9, v8, 1, v0.t +; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> @@ -188,7 +188,7 @@ define <2 x float> @trn2.v2f32(<2 x float> %v0, <2 x float> %v1) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vmv.v.i v0, 1 -; CHECK-NEXT: vrgather.vi v9, v8, 1, v0.t +; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> @@ -233,7 +233,7 @@ define <2 x double> @trn2.v2f64(<2 x double> %v0, <2 x double> %v1) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vmv.v.i v0, 1 -; CHECK-NEXT: vrgather.vi v9, v8, 1, v0.t +; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll index 49f6acf9ba8c9..a171a7f8ac5f1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll @@ -374,11 +374,9 @@ define <4 x i8> @vslide1up_4xi8_neg_undef_insert(<4 x i8> %v, i8 %b) { define <4 x i8> @vslide1up_4xi8_neg_incorrect_insert(<4 x i8> %v, i8 %b) { ; CHECK-LABEL: vslide1up_4xi8_neg_incorrect_insert: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8208 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v9, v8 +; CHECK-NEXT: vslideup.vi v9, v8, 1 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %v2 = shufflevector <4 x i8> poison, <4 x i8> %v, <4 x i32> @@ -400,12 +398,9 @@ define <4 x i8> @vslide1up_4xi8_neg_incorrect_insert2(<4 x i8> %v, i8 %b) { define <4 x i8> @vslide1up_4xi8_neg_incorrect_insert3(<4 x i8> %v, i8 %b) { ; CHECK-LABEL: vslide1up_4xi8_neg_incorrect_insert3: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8208 -; CHECK-NEXT: addi a0, a0, 1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vslideup.vi v9, v8, 1 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %v2 = shufflevector <4 x i8> poison, <4 x i8> %v, <4 x i32> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll index 180579e47d075..0fbb139d5f461 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll @@ -182,7 +182,7 @@ define void @vnsrl_32_i32(ptr %in, ptr %out) { ; ZVE32F-NEXT: vmv.v.i v0, 1 ; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu ; ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; ZVE32F-NEXT: vrgather.vi v9, v8, 1, v0.t +; ZVE32F-NEXT: vslidedown.vi v9, v8, 1, v0.t ; ZVE32F-NEXT: vse32.v v9, (a1) ; ZVE32F-NEXT: ret entry: @@ -236,7 +236,7 @@ define void @vnsrl_32_float(ptr %in, ptr %out) { ; ZVE32F-NEXT: vmv.v.i v0, 1 ; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu ; ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; ZVE32F-NEXT: vrgather.vi v9, v8, 1, v0.t +; ZVE32F-NEXT: vslidedown.vi v9, v8, 1, v0.t ; ZVE32F-NEXT: vse32.v v9, (a1) ; ZVE32F-NEXT: ret entry: @@ -279,7 +279,7 @@ define void @vnsrl_64_i64(ptr %in, ptr %out) { ; V-NEXT: vmv.v.i v0, 1 ; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; V-NEXT: vslidedown.vi v9, v8, 2 -; V-NEXT: vrgather.vi v9, v8, 1, v0.t +; V-NEXT: vslidedown.vi v9, v8, 1, v0.t ; V-NEXT: vse64.v v9, (a1) ; V-NEXT: ret ; @@ -330,7 +330,7 @@ define void @vnsrl_64_double(ptr %in, ptr %out) { ; V-NEXT: vmv.v.i v0, 1 ; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; V-NEXT: vslidedown.vi v9, v8, 2 -; V-NEXT: vrgather.vi v9, v8, 1, v0.t +; V-NEXT: vslidedown.vi v9, v8, 1, v0.t ; V-NEXT: vse64.v v9, (a1) ; V-NEXT: ret ; @@ -386,23 +386,20 @@ define void @vnsrl_0_i8_undef3(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, -32 -; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: lui a0, 24640 -; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v0, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, mu ; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: addi a0, a0, 6 +; CHECK-NEXT: li a0, -32 ; CHECK-NEXT: vadd.vv v9, v9, v9 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v10, v8, 2 +; CHECK-NEXT: vslidedown.vi v10, v8, 3, v0.t +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vadd.vi v9, v9, -8 -; CHECK-NEXT: vrgather.vv v11, v8, v10 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 8 ; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, mu -; CHECK-NEXT: vrgather.vv v11, v8, v9, v0.t -; CHECK-NEXT: vse8.v v11, (a1) +; CHECK-NEXT: vrgather.vv v10, v8, v9, v0.t +; CHECK-NEXT: vse8.v v10, (a1) ; CHECK-NEXT: ret entry: %0 = load <16 x i8>, ptr %in, align 1 @@ -417,21 +414,22 @@ define void @vnsrl_0_i8_undef_negative(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: lui a0, %hi(.LCPI17_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI17_0) ; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vle8.v v10, (a0) -; CHECK-NEXT: li a0, 48 -; CHECK-NEXT: vadd.vv v9, v9, v9 -; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vadd.vi v9, v9, -8 -; CHECK-NEXT: vrgather.vv v11, v8, v10 +; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vslidedown.vi v10, v8, 8 ; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, mu -; CHECK-NEXT: vrgather.vv v11, v8, v9, v0.t -; CHECK-NEXT: vse8.v v11, (a1) +; CHECK-NEXT: vslideup.vi v11, v10, 4 +; CHECK-NEXT: vslideup.vi v11, v10, 3, v0.t +; CHECK-NEXT: li a0, 48 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vrgather.vv v10, v8, v9 +; CHECK-NEXT: vmerge.vvm v8, v10, v11, v0 +; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret entry: %0 = load <16 x i8>, ptr %in, align 1 @@ -822,7 +820,7 @@ define void @vnsrl_32_i32_two_source(ptr %in0, ptr %in1, ptr %out) { ; V-NEXT: vle32.v v8, (a0) ; V-NEXT: vle32.v v9, (a1) ; V-NEXT: vmv.v.i v0, 1 -; V-NEXT: vrgather.vi v9, v8, 1, v0.t +; V-NEXT: vslidedown.vi v9, v8, 1, v0.t ; V-NEXT: vse32.v v9, (a2) ; V-NEXT: ret ; @@ -832,7 +830,7 @@ define void @vnsrl_32_i32_two_source(ptr %in0, ptr %in1, ptr %out) { ; ZVE32F-NEXT: vle32.v v8, (a0) ; ZVE32F-NEXT: vle32.v v9, (a1) ; ZVE32F-NEXT: vmv.v.i v0, 1 -; ZVE32F-NEXT: vrgather.vi v9, v8, 1, v0.t +; ZVE32F-NEXT: vslidedown.vi v9, v8, 1, v0.t ; ZVE32F-NEXT: vse32.v v9, (a2) ; ZVE32F-NEXT: ret entry: @@ -876,7 +874,7 @@ define void @vnsrl_32_float_two_source(ptr %in0, ptr %in1, ptr %out) { ; V-NEXT: vle32.v v8, (a0) ; V-NEXT: vle32.v v9, (a1) ; V-NEXT: vmv.v.i v0, 1 -; V-NEXT: vrgather.vi v9, v8, 1, v0.t +; V-NEXT: vslidedown.vi v9, v8, 1, v0.t ; V-NEXT: vse32.v v9, (a2) ; V-NEXT: ret ; @@ -886,7 +884,7 @@ define void @vnsrl_32_float_two_source(ptr %in0, ptr %in1, ptr %out) { ; ZVE32F-NEXT: vle32.v v8, (a0) ; ZVE32F-NEXT: vle32.v v9, (a1) ; ZVE32F-NEXT: vmv.v.i v0, 1 -; ZVE32F-NEXT: vrgather.vi v9, v8, 1, v0.t +; ZVE32F-NEXT: vslidedown.vi v9, v8, 1, v0.t ; ZVE32F-NEXT: vse32.v v9, (a2) ; ZVE32F-NEXT: ret entry: @@ -930,7 +928,7 @@ define void @vnsrl_64_i64_two_source(ptr %in0, ptr %in1, ptr %out) { ; V-NEXT: vle64.v v8, (a0) ; V-NEXT: vle64.v v9, (a1) ; V-NEXT: vmv.v.i v0, 1 -; V-NEXT: vrgather.vi v9, v8, 1, v0.t +; V-NEXT: vslidedown.vi v9, v8, 1, v0.t ; V-NEXT: vse64.v v9, (a2) ; V-NEXT: ret ; @@ -983,7 +981,7 @@ define void @vnsrl_64_double_two_source(ptr %in0, ptr %in1, ptr %out) { ; V-NEXT: vle64.v v8, (a0) ; V-NEXT: vle64.v v9, (a1) ; V-NEXT: vmv.v.i v0, 1 -; V-NEXT: vrgather.vi v9, v8, 1, v0.t +; V-NEXT: vslidedown.vi v9, v8, 1, v0.t ; V-NEXT: vse64.v v9, (a2) ; V-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll index a91dee1cb245f..037ed257f4a89 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare <2 x i7> @llvm.vp.trunc.v2i7.v2i16(<2 x i16>, <2 x i1>, i32) @@ -222,316 +222,645 @@ define <2 x i32> @vtrunc_v2i32_v2i64_unmasked(<2 x i64> %a, i32 zeroext %vl) { declare <128 x i32> @llvm.vp.trunc.v128i32.v128i64(<128 x i64>, <128 x i1>, i32) define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 zeroext %vl) { -; CHECK-LABEL: vtrunc_v128i32_v128i64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 72 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 72 * vlenb -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 5 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vslidedown.vi v6, v0, 8 -; CHECK-NEXT: addi a2, a1, 512 -; CHECK-NEXT: addi a3, a1, 640 -; CHECK-NEXT: addi a4, a7, -64 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v27, v6, 4 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a3) -; CHECK-NEXT: sltu a3, a7, a4 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v27, 2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a4, a3, a4 -; CHECK-NEXT: addi a3, a4, -32 -; CHECK-NEXT: sltu a5, a4, a3 -; CHECK-NEXT: addi a5, a5, -1 -; CHECK-NEXT: and a3, a5, a3 -; CHECK-NEXT: addi a5, a3, -16 -; CHECK-NEXT: sltu a6, a3, a5 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a5, a6, a5 -; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: slli a5, a5, 4 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a2) -; CHECK-NEXT: addi a5, a1, 128 -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v26, v7, 4 -; CHECK-NEXT: bltu a3, a2, .LBB16_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a3, 16 -; CHECK-NEXT: .LBB16_2: -; CHECK-NEXT: vmv1r.v v0, v27 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a5) -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 56 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v27, v26, 2 -; CHECK-NEXT: li a5, 64 -; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 6 -; CHECK-NEXT: add a3, sp, a3 -; CHECK-NEXT: addi a3, a3, 16 -; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: mv a6, a7 -; CHECK-NEXT: bltu a7, a5, .LBB16_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a6, 64 -; CHECK-NEXT: .LBB16_4: -; CHECK-NEXT: vmv1r.v v0, v27 -; CHECK-NEXT: addi a5, a1, 384 -; CHECK-NEXT: li a3, 32 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a1) -; CHECK-NEXT: csrr t0, vlenb -; CHECK-NEXT: li t1, 48 -; CHECK-NEXT: mul t0, t0, t1 -; CHECK-NEXT: add t0, sp, t0 -; CHECK-NEXT: addi t0, t0, 16 -; CHECK-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill -; CHECK-NEXT: addi t0, a6, -32 -; CHECK-NEXT: sltu a6, a6, t0 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a6, a6, t0 -; CHECK-NEXT: addi t0, a6, -16 -; CHECK-NEXT: sltu t1, a6, t0 -; CHECK-NEXT: addi t1, t1, -1 -; CHECK-NEXT: and t0, t1, t0 -; CHECK-NEXT: csrr t1, vlenb -; CHECK-NEXT: li t2, 56 -; CHECK-NEXT: mul t1, t1, t2 -; CHECK-NEXT: add t1, sp, t1 -; CHECK-NEXT: addi t1, t1, 16 -; CHECK-NEXT: vl8r.v v16, (t1) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, t0, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: csrr t0, vlenb -; CHECK-NEXT: slli t0, t0, 3 -; CHECK-NEXT: add t0, sp, t0 -; CHECK-NEXT: addi t0, t0, 16 -; CHECK-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a6, a2, .LBB16_6 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: li a6, 16 -; CHECK-NEXT: .LBB16_6: -; CHECK-NEXT: vmv1r.v v0, v26 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a5) -; CHECK-NEXT: addi a5, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, a1, 256 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v26, v6, 2 -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li t0, 48 -; CHECK-NEXT: mul a5, a5, t0 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 56 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: mv a5, a4 -; CHECK-NEXT: bltu a4, a3, .LBB16_8 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: li a5, 32 -; CHECK-NEXT: .LBB16_8: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a1) -; CHECK-NEXT: addi a1, a5, -16 -; CHECK-NEXT: sltu a5, a5, a1 -; CHECK-NEXT: addi a5, a5, -1 -; CHECK-NEXT: and a1, a5, a1 -; CHECK-NEXT: vmv1r.v v0, v26 -; CHECK-NEXT: addi a5, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a5, 40 -; CHECK-NEXT: mul a1, a1, a5 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a4, a2, .LBB16_10 -; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: li a4, 16 -; CHECK-NEXT: .LBB16_10: -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v25, v7, 2 -; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 48 -; CHECK-NEXT: mul a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: mv a1, a7 -; CHECK-NEXT: bltu a7, a3, .LBB16_12 -; CHECK-NEXT: # %bb.11: -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: .LBB16_12: -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 4 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v24, v16 -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 3 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: li a5, 40 -; CHECK-NEXT: mul a4, a4, a5 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: li a5, 40 -; CHECK-NEXT: mul a4, a4, a5 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 6 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vslideup.vi v16, v24, 16 -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 6 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: addi a4, a1, -16 -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 56 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v16, v8, 16 -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 56 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 48 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 40 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v8, v16, 16 -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 48 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: sltu a1, a1, a4 -; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: and a1, a1, a4 -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: li a5, 24 -; CHECK-NEXT: mul a4, a4, a5 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: bltu a7, a2, .LBB16_14 -; CHECK-NEXT: # %bb.13: -; CHECK-NEXT: li a7, 16 -; CHECK-NEXT: .LBB16_14: -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a7, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vslideup.vi v24, v8, 16 -; CHECK-NEXT: vse32.v v24, (a0) -; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 48 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vse32.v v8, (a1) -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 56 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vse32.v v8, (a1) -; CHECK-NEXT: addi a0, a0, 384 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 6 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 72 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: ret +; RV32-LABEL: vtrunc_v128i32_v128i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw s0, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset s0, -4 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 72 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: sub sp, sp, a2 +; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0xc8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 72 * vlenb +; RV32-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; RV32-NEXT: vmv1r.v v7, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v5, v0, 8 +; RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vi v4, v0, 4 +; RV32-NEXT: addi a2, a7, -64 +; RV32-NEXT: vslidedown.vi v3, v5, 4 +; RV32-NEXT: sltu a3, a7, a2 +; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: and a4, a3, a2 +; RV32-NEXT: addi a2, a4, -32 +; RV32-NEXT: sltu a3, a4, a2 +; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: and a3, a3, a2 +; RV32-NEXT: li a2, 16 +; RV32-NEXT: addi t0, a3, -16 +; RV32-NEXT: mv a5, a3 +; RV32-NEXT: bltu a3, a2, .LBB16_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a5, 16 +; RV32-NEXT: .LBB16_2: +; RV32-NEXT: li t2, 64 +; RV32-NEXT: sltu t1, a3, t0 +; RV32-NEXT: mv a6, a7 +; RV32-NEXT: bltu a7, t2, .LBB16_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: li a6, 64 +; RV32-NEXT: .LBB16_4: +; RV32-NEXT: addi t3, a1, 128 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v6, v4, 2 +; RV32-NEXT: addi s0, a1, 512 +; RV32-NEXT: addi t6, a1, 640 +; RV32-NEXT: vslidedown.vi v0, v3, 2 +; RV32-NEXT: addi t1, t1, -1 +; RV32-NEXT: addi t2, a1, 384 +; RV32-NEXT: vslidedown.vi v2, v5, 2 +; RV32-NEXT: li a3, 32 +; RV32-NEXT: addi t4, a6, -32 +; RV32-NEXT: sltu a6, a6, t4 +; RV32-NEXT: addi a6, a6, -1 +; RV32-NEXT: and a6, a6, t4 +; RV32-NEXT: addi t4, a6, -16 +; RV32-NEXT: sltu t5, a6, t4 +; RV32-NEXT: addi t5, t5, -1 +; RV32-NEXT: bltu a6, a2, .LBB16_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: li a6, 16 +; RV32-NEXT: .LBB16_6: +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle64.v v8, (s0) +; RV32-NEXT: csrr s0, vlenb +; RV32-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32-NEXT: li a0, 56 +; RV32-NEXT: mul s0, s0, a0 +; RV32-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32-NEXT: add s0, sp, s0 +; RV32-NEXT: addi s0, s0, 16 +; RV32-NEXT: vs8r.v v8, (s0) # Unknown-size Folded Spill +; RV32-NEXT: vle64.v v16, (t6) +; RV32-NEXT: vle64.v v8, (t3) +; RV32-NEXT: csrr t3, vlenb +; RV32-NEXT: slli t3, t3, 3 +; RV32-NEXT: add t3, sp, t3 +; RV32-NEXT: addi t3, t3, 16 +; RV32-NEXT: vs8r.v v8, (t3) # Unknown-size Folded Spill +; RV32-NEXT: vle64.v v8, (a1) +; RV32-NEXT: csrr t3, vlenb +; RV32-NEXT: li t6, 48 +; RV32-NEXT: mul t3, t3, t6 +; RV32-NEXT: add t3, sp, t3 +; RV32-NEXT: addi t3, t3, 16 +; RV32-NEXT: vs8r.v v8, (t3) # Unknown-size Folded Spill +; RV32-NEXT: vle64.v v8, (t2) +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 4 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 16 +; RV32-NEXT: vs8r.v v8, (t2) # Unknown-size Folded Spill +; RV32-NEXT: and t2, t1, t0 +; RV32-NEXT: and t1, t5, t4 +; RV32-NEXT: addi a1, a1, 256 +; RV32-NEXT: mv t0, a4 +; RV32-NEXT: bltu a4, a3, .LBB16_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: li t0, 32 +; RV32-NEXT: .LBB16_8: +; RV32-NEXT: vsetvli zero, t2, e32, m4, ta, ma +; RV32-NEXT: vnsrl.wi v8, v16, 0, v0.t +; RV32-NEXT: addi t2, sp, 16 +; RV32-NEXT: vs8r.v v8, (t2) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v3 +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: li t3, 56 +; RV32-NEXT: mul t2, t2, t3 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 16 +; RV32-NEXT: vl8r.v v24, (t2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a5, e32, m4, ta, ma +; RV32-NEXT: vnsrl.wi v16, v24, 0, v0.t +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 3 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, t1, e32, m4, ta, ma +; RV32-NEXT: vnsrl.wi v16, v24, 0, v0.t +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li t1, 24 +; RV32-NEXT: mul a5, a5, t1 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: addi a5, t0, -16 +; RV32-NEXT: sltu t0, t0, a5 +; RV32-NEXT: addi t0, t0, -1 +; RV32-NEXT: and a5, t0, a5 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle64.v v16, (a1) +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v6, v7, 2 +; RV32-NEXT: vmv1r.v v0, v4 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li t0, 48 +; RV32-NEXT: mul a1, a1, t0 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a6, e32, m4, ta, ma +; RV32-NEXT: vnsrl.wi v16, v24, 0, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a6, 56 +; RV32-NEXT: mul a1, a1, a6 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v2 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a5, e32, m4, ta, ma +; RV32-NEXT: vnsrl.wi v24, v16, 0, v0.t +; RV32-NEXT: bltu a4, a2, .LBB16_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: li a4, 16 +; RV32-NEXT: .LBB16_10: +; RV32-NEXT: vmv1r.v v0, v5 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a4, e32, m4, ta, ma +; RV32-NEXT: vnsrl.wi v16, v8, 0, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: mv a1, a7 +; RV32-NEXT: bltu a7, a3, .LBB16_12 +; RV32-NEXT: # %bb.11: +; RV32-NEXT: li a1, 32 +; RV32-NEXT: .LBB16_12: +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vslideup.vi v24, v8, 16 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: addi a4, a1, -16 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li a6, 56 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload +; RV32-NEXT: vslideup.vi v8, v16, 16 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li a6, 56 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li a6, 48 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li a6, 24 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload +; RV32-NEXT: vslideup.vi v8, v16, 16 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li a6, 48 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; RV32-NEXT: sltu a1, a1, a4 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a4 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV32-NEXT: vnsrl.wi v8, v16, 0, v0.t +; RV32-NEXT: bltu a7, a2, .LBB16_14 +; RV32-NEXT: # %bb.13: +; RV32-NEXT: li a7, 16 +; RV32-NEXT: .LBB16_14: +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a7, e32, m4, ta, ma +; RV32-NEXT: vnsrl.wi v16, v24, 0, v0.t +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vslideup.vi v16, v8, 16 +; RV32-NEXT: vse32.v v16, (a0) +; RV32-NEXT: addi a1, a0, 256 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vse32.v v8, (a1) +; RV32-NEXT: addi a1, a0, 128 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vse32.v v8, (a1) +; RV32-NEXT: addi a0, a0, 384 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vse32.v v8, (a0) +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 72 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: .cfi_def_cfa sp, 32 +; RV32-NEXT: lw s0, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: .cfi_restore s0 +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: vtrunc_v128i32_v128i64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -48 +; RV64-NEXT: .cfi_def_cfa_offset 48 +; RV64-NEXT: sd s0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset s0, -8 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 72 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0xc8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 72 * vlenb +; RV64-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; RV64-NEXT: vmv1r.v v7, v0 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 5 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 32 +; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 40 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 32 +; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vslidedown.vi v5, v0, 8 +; RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; RV64-NEXT: vslidedown.vi v4, v0, 4 +; RV64-NEXT: addi a2, a7, -64 +; RV64-NEXT: vslidedown.vi v3, v5, 4 +; RV64-NEXT: sltu a3, a7, a2 +; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: and a4, a3, a2 +; RV64-NEXT: addi a2, a4, -32 +; RV64-NEXT: sltu a3, a4, a2 +; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: and a3, a3, a2 +; RV64-NEXT: li a2, 16 +; RV64-NEXT: addi t0, a3, -16 +; RV64-NEXT: mv a5, a3 +; RV64-NEXT: bltu a3, a2, .LBB16_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a5, 16 +; RV64-NEXT: .LBB16_2: +; RV64-NEXT: li t2, 64 +; RV64-NEXT: sltu t1, a3, t0 +; RV64-NEXT: mv a6, a7 +; RV64-NEXT: bltu a7, t2, .LBB16_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: li a6, 64 +; RV64-NEXT: .LBB16_4: +; RV64-NEXT: addi t3, a1, 128 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v6, v4, 2 +; RV64-NEXT: addi s0, a1, 512 +; RV64-NEXT: addi t6, a1, 640 +; RV64-NEXT: vslidedown.vi v0, v3, 2 +; RV64-NEXT: addi t1, t1, -1 +; RV64-NEXT: addi t2, a1, 384 +; RV64-NEXT: vslidedown.vi v2, v5, 2 +; RV64-NEXT: li a3, 32 +; RV64-NEXT: addi t4, a6, -32 +; RV64-NEXT: sltu a6, a6, t4 +; RV64-NEXT: addi a6, a6, -1 +; RV64-NEXT: and a6, a6, t4 +; RV64-NEXT: addi t4, a6, -16 +; RV64-NEXT: sltu t5, a6, t4 +; RV64-NEXT: addi t5, t5, -1 +; RV64-NEXT: bltu a6, a2, .LBB16_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: li a6, 16 +; RV64-NEXT: .LBB16_6: +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle64.v v8, (s0) +; RV64-NEXT: csrr s0, vlenb +; RV64-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: li a0, 56 +; RV64-NEXT: mul s0, s0, a0 +; RV64-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: add s0, sp, s0 +; RV64-NEXT: addi s0, s0, 32 +; RV64-NEXT: vs8r.v v8, (s0) # Unknown-size Folded Spill +; RV64-NEXT: vle64.v v16, (t6) +; RV64-NEXT: vle64.v v8, (t3) +; RV64-NEXT: csrr t3, vlenb +; RV64-NEXT: slli t3, t3, 3 +; RV64-NEXT: add t3, sp, t3 +; RV64-NEXT: addi t3, t3, 32 +; RV64-NEXT: vs8r.v v8, (t3) # Unknown-size Folded Spill +; RV64-NEXT: vle64.v v8, (a1) +; RV64-NEXT: csrr t3, vlenb +; RV64-NEXT: li t6, 48 +; RV64-NEXT: mul t3, t3, t6 +; RV64-NEXT: add t3, sp, t3 +; RV64-NEXT: addi t3, t3, 32 +; RV64-NEXT: vs8r.v v8, (t3) # Unknown-size Folded Spill +; RV64-NEXT: vle64.v v8, (t2) +; RV64-NEXT: csrr t2, vlenb +; RV64-NEXT: slli t2, t2, 4 +; RV64-NEXT: add t2, sp, t2 +; RV64-NEXT: addi t2, t2, 32 +; RV64-NEXT: vs8r.v v8, (t2) # Unknown-size Folded Spill +; RV64-NEXT: and t2, t1, t0 +; RV64-NEXT: and t1, t5, t4 +; RV64-NEXT: addi a1, a1, 256 +; RV64-NEXT: mv t0, a4 +; RV64-NEXT: bltu a4, a3, .LBB16_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: li t0, 32 +; RV64-NEXT: .LBB16_8: +; RV64-NEXT: vsetvli zero, t2, e32, m4, ta, ma +; RV64-NEXT: vnsrl.wi v8, v16, 0, v0.t +; RV64-NEXT: addi t2, sp, 32 +; RV64-NEXT: vs8r.v v8, (t2) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v3 +; RV64-NEXT: csrr t2, vlenb +; RV64-NEXT: li t3, 56 +; RV64-NEXT: mul t2, t2, t3 +; RV64-NEXT: add t2, sp, t2 +; RV64-NEXT: addi t2, t2, 32 +; RV64-NEXT: vl8r.v v24, (t2) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a5, e32, m4, ta, ma +; RV64-NEXT: vnsrl.wi v16, v24, 0, v0.t +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, t1, e32, m4, ta, ma +; RV64-NEXT: vnsrl.wi v16, v24, 0, v0.t +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: li t1, 24 +; RV64-NEXT: mul a5, a5, t1 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV64-NEXT: addi a5, t0, -16 +; RV64-NEXT: sltu t0, t0, a5 +; RV64-NEXT: addi t0, t0, -1 +; RV64-NEXT: and a5, t0, a5 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v6, v7, 2 +; RV64-NEXT: vmv1r.v v0, v4 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li t0, 48 +; RV64-NEXT: mul a1, a1, t0 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a6, e32, m4, ta, ma +; RV64-NEXT: vnsrl.wi v16, v24, 0, v0.t +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a6, 56 +; RV64-NEXT: mul a1, a1, a6 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v2 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a5, e32, m4, ta, ma +; RV64-NEXT: vnsrl.wi v24, v16, 0, v0.t +; RV64-NEXT: bltu a4, a2, .LBB16_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: li a4, 16 +; RV64-NEXT: .LBB16_10: +; RV64-NEXT: vmv1r.v v0, v5 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a4, e32, m4, ta, ma +; RV64-NEXT: vnsrl.wi v16, v8, 0, v0.t +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a4, 48 +; RV64-NEXT: mul a1, a1, a4 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: mv a1, a7 +; RV64-NEXT: bltu a7, a3, .LBB16_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: li a1, 32 +; RV64-NEXT: .LBB16_12: +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: addi a4, sp, 32 +; RV64-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: li a5, 24 +; RV64-NEXT: mul a4, a4, a5 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 32 +; RV64-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: li a5, 24 +; RV64-NEXT: mul a4, a4, a5 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 32 +; RV64-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: slli a4, a4, 6 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 32 +; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV64-NEXT: vslideup.vi v24, v8, 16 +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: slli a4, a4, 6 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 32 +; RV64-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV64-NEXT: addi a4, a1, -16 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: li a6, 56 +; RV64-NEXT: mul a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload +; RV64-NEXT: vslideup.vi v8, v16, 16 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: li a6, 56 +; RV64-NEXT: mul a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: li a6, 48 +; RV64-NEXT: mul a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: li a6, 24 +; RV64-NEXT: mul a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload +; RV64-NEXT: vslideup.vi v8, v16, 16 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: li a6, 48 +; RV64-NEXT: mul a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; RV64-NEXT: sltu a1, a1, a4 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a4 +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: slli a4, a4, 5 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 32 +; RV64-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vnsrl.wi v8, v16, 0, v0.t +; RV64-NEXT: bltu a7, a2, .LBB16_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: li a7, 16 +; RV64-NEXT: .LBB16_14: +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 40 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a7, e32, m4, ta, ma +; RV64-NEXT: vnsrl.wi v16, v24, 0, v0.t +; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV64-NEXT: vslideup.vi v16, v8, 16 +; RV64-NEXT: vse32.v v16, (a0) +; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 48 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 32 +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vse32.v v8, (a1) +; RV64-NEXT: addi a1, a0, 128 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 56 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 32 +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vse32.v v8, (a1) +; RV64-NEXT: addi a0, a0, 384 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 6 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vse32.v v8, (a0) +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 72 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: .cfi_def_cfa sp, 48 +; RV64-NEXT: ld s0, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore s0 +; RV64-NEXT: addi sp, sp, 48 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret %v = call <128 x i32> @llvm.vp.trunc.v128i32.v128i64(<128 x i64> %a, <128 x i1> %m, i32 %vl) ret <128 x i32> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll index 6c9989775f790..8e2e8f3fb0dec 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll @@ -402,29 +402,29 @@ define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: addi a4, a3, -16 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-NEXT: sltu a3, a3, a4 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a3, a3, a4 +; CHECK-NEXT: addi a5, a3, -16 ; CHECK-NEXT: addi a4, a1, 128 -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a4), v0.t -; CHECK-NEXT: addi a3, a2, -32 -; CHECK-NEXT: sltu a4, a2, a3 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a4, a4, a3 +; CHECK-NEXT: addi a7, a2, -32 +; CHECK-NEXT: sltu a3, a3, a5 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a6, a3, a5 +; CHECK-NEXT: sltu a3, a2, a7 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a5, a3, a7 ; CHECK-NEXT: li a3, 16 -; CHECK-NEXT: bltu a4, a3, .LBB32_4 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v8, 2 +; CHECK-NEXT: bltu a5, a3, .LBB32_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a4, 16 +; CHECK-NEXT: li a5, 16 ; CHECK-NEXT: .LBB32_4: +; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v16, (a4), v0.t ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 4 -; CHECK-NEXT: addi a5, a1, 256 -; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a5), v0.t +; CHECK-NEXT: addi a4, a1, 256 +; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v24, (a4), v0.t ; CHECK-NEXT: bltu a2, a3, .LBB32_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: li a2, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll index 557882ee31d4c..984bc5b2c7352 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll @@ -26,9 +26,10 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a4 ; RV32-NEXT: vslide1down.vx v10, v10, a2 ; RV32-NEXT: vslidedown.vi v10, v10, 2 +; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, tu, mu +; RV32-NEXT: vsetvli zero, zero, e32, m2, tu, mu ; RV32-NEXT: vle32.v v8, (a0), v0.t ; RV32-NEXT: vse32.v v8, (a3) ; RV32-NEXT: ret @@ -56,9 +57,10 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a4 ; RV64-NEXT: vslide1down.vx v10, v10, a2 ; RV64-NEXT: vslidedown.vi v10, v10, 2 +; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, tu, mu +; RV64-NEXT: vsetvli zero, zero, e32, m2, tu, mu ; RV64-NEXT: vle32.v v8, (a0), v0.t ; RV64-NEXT: vse32.v v8, (a3) ; RV64-NEXT: ret @@ -94,9 +96,10 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a4 ; RV32-NEXT: vslide1down.vx v10, v10, a2 ; RV32-NEXT: vslidedown.vi v10, v10, 2 +; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vmerge.vxm v8, v8, a0, v0 ; RV32-NEXT: vse32.v v8, (a3) ; RV32-NEXT: ret @@ -124,9 +127,10 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a4 ; RV64-NEXT: vslide1down.vx v10, v10, a2 ; RV64-NEXT: vslidedown.vi v10, v10, 2 +; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 ; RV64-NEXT: vse32.v v8, (a3) ; RV64-NEXT: ret @@ -163,9 +167,10 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a3 ; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vslidedown.vi v10, v10, 2 +; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vmerge.vim v8, v8, -1, v0 ; RV32-NEXT: vse32.v v8, (a2) ; RV32-NEXT: ret @@ -193,9 +198,10 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a3 ; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: vslidedown.vi v10, v10, 2 +; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vmerge.vim v8, v8, -1, v0 ; RV64-NEXT: vse32.v v8, (a2) ; RV64-NEXT: ret @@ -231,9 +237,10 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a4 ; RV32-NEXT: vslide1down.vx v10, v10, a2 ; RV32-NEXT: vslidedown.vi v10, v10, 2 +; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, tu, mu +; RV32-NEXT: vsetvli zero, zero, e32, m2, tu, mu ; RV32-NEXT: vle32.v v8, (a0), v0.t ; RV32-NEXT: vse32.v v8, (a3) ; RV32-NEXT: ret @@ -261,9 +268,10 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a4 ; RV64-NEXT: vslide1down.vx v10, v10, a2 ; RV64-NEXT: vslidedown.vi v10, v10, 2 +; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, tu, mu +; RV64-NEXT: vsetvli zero, zero, e32, m2, tu, mu ; RV64-NEXT: vle32.v v8, (a0), v0.t ; RV64-NEXT: vse32.v v8, (a3) ; RV64-NEXT: ret @@ -299,9 +307,10 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a3 ; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vslidedown.vi v10, v10, 2 +; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; RV32-NEXT: vse32.v v8, (a2) ; RV32-NEXT: ret @@ -329,9 +338,10 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a3 ; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: vslidedown.vi v10, v10, 2 +; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; RV64-NEXT: vse32.v v8, (a2) ; RV64-NEXT: ret @@ -368,9 +378,10 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a3 ; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vslidedown.vi v10, v10, 2 +; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vmerge.vim v8, v8, 0, v0 ; RV32-NEXT: vse32.v v8, (a2) ; RV32-NEXT: ret @@ -398,9 +409,10 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a3 ; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: vslidedown.vi v10, v10, 2 +; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vmerge.vim v8, v8, 0, v0 ; RV64-NEXT: vse32.v v8, (a2) ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll index f9b5095c9af1d..9b5bde2814fda 100644 --- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll @@ -1515,40 +1515,36 @@ define @vp_floor_nxv16f64( %va, @vfmax_vv_nxv16f64( %va, @vfmin_vv_nxv16f64( %va, @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @mask_sext_not_nxv8i8( %m) { +; CHECK-LABEL: mask_sext_not_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, -1 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: ret + %not = xor %m, splat (i1 true) + %ext = sext %not to + ret %ext +} + +define @mask_zext_not_nxv8i8( %m) { +; CHECK-LABEL: mask_zext_not_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 1 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: ret + %not = xor %m, splat (i1 true) + %ext = zext %not to + ret %ext +} + +define <8 x i8> @mask_sext_not_v8i8(<8 x i1> %m) { +; CHECK-LABEL: mask_sext_not_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v8, -1 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: ret + %not = xor <8 x i1> %m, splat (i1 true) + %ext = sext <8 x i1> %not to <8 x i8> + ret <8 x i8> %ext +} + +define <8 x i8> @mask_zext_not_v8i8(<8 x i1> %m) { +; CHECK-LABEL: mask_zext_not_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 1 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: ret + %not = xor <8 x i1> %m, splat (i1 true) + %ext = zext <8 x i1> %not to <8 x i8> + ret <8 x i8> %ext +} + +define @mask_sext_xor_nxv8i8( %m, %x) { +; CHECK-LABEL: mask_sext_xor_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK-NEXT: ret + %xor = xor %m, %x + %ext = sext %xor to + ret %ext +} + +define @mask_zext_xor_nxv8i8( %m, %x) { +; CHECK-LABEL: mask_zext_xor_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: ret + %xor = xor %m, %x + %ext = zext %xor to + ret %ext +} + +define <8 x i8> @mask_sext_xor_v8i8(<8 x i1> %m) { +; CHECK-LABEL: mask_sext_xor_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 85 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK-NEXT: ret + %xor = xor <8 x i1> %m, + %ext = sext <8 x i1> %xor to <8 x i8> + ret <8 x i8> %ext +} + +define <8 x i8> @mask_zext_xor_v8i8(<8 x i1> %m) { +; CHECK-LABEL: mask_zext_xor_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 85 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: ret + %xor = xor <8 x i1> %m, + %ext = zext <8 x i1> %xor to <8 x i8> + ret <8 x i8> %ext +} diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-tama.ll b/llvm/test/CodeGen/RISCV/rvv/masked-tama.ll index cd678c9740721..4b56518a89d69 100644 --- a/llvm/test/CodeGen/RISCV/rvv/masked-tama.ll +++ b/llvm/test/CodeGen/RISCV/rvv/masked-tama.ll @@ -34,7 +34,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv1i64( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv1i64_nxv1i64(ptr %0, %1, iXLen %2, iXLen* %3) nounwind { +define @intrinsic_vleff_mask_v_nxv1i64_nxv1i64(ptr %0, %1, iXLen %2, ptr %3) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv1i64_nxv1i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e64, m1, ta, ma @@ -58,7 +58,7 @@ entry: iXLen %2, iXLen 3) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %3 + store iXLen %c, ptr %3 ret %b } diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-tamu.ll b/llvm/test/CodeGen/RISCV/rvv/masked-tamu.ll index 0e771eb7c4315..2e8b6c5fcca22 100644 --- a/llvm/test/CodeGen/RISCV/rvv/masked-tamu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/masked-tamu.ll @@ -32,7 +32,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv1i64( , iXLen, iXLen) -define @intrinsic_vleff_mask_v_nxv1i64_nxv1i64( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv1i64_nxv1i64( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv1i64_nxv1i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e64, m1, ta, mu @@ -56,7 +56,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-tuma.ll b/llvm/test/CodeGen/RISCV/rvv/masked-tuma.ll index c4c2fc88913bb..ca9b6245a8570 100644 --- a/llvm/test/CodeGen/RISCV/rvv/masked-tuma.ll +++ b/llvm/test/CodeGen/RISCV/rvv/masked-tuma.ll @@ -32,7 +32,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv1i64( , iXLen, iXLen) -define @intrinsic_vleff_mask_v_nxv1i64_nxv1i64( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv1i64_nxv1i64( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv1i64_nxv1i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e64, m1, tu, ma @@ -56,7 +56,7 @@ entry: iXLen %3, iXLen 2) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-tumu.ll b/llvm/test/CodeGen/RISCV/rvv/masked-tumu.ll index ec0ebb10e8f7a..df9e84c66cefa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/masked-tumu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/masked-tumu.ll @@ -32,7 +32,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv1i64( , iXLen, iXLen) -define @intrinsic_vleff_mask_v_nxv1i64_nxv1i64( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv1i64_nxv1i64( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv1i64_nxv1i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e64, m1, tu, mu @@ -56,7 +56,7 @@ entry: iXLen %3, iXLen 0) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } diff --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll index 937b3e6636df8..a325829d472db 100644 --- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll @@ -1498,59 +1498,66 @@ define @vp_nearbyint_nxv16f64( %va, ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI44_0) ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vslidedown.vx v6, v0, a3 +; CHECK-NEXT: vslidedown.vx v25, v0, a3 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vfabs.v v8, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v25, v8, fa5, v0.t ; CHECK-NEXT: frflags a2 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: fsflags a2 -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB44_2: -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll index 39744dcecd718..bc4b3ad7f79f2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll @@ -1515,40 +1515,36 @@ define @vp_round_nxv16f64( %va, @vp_roundeven_nxv16f64( %va, ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 0 ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB44_2: ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll index 1300d8cd64ebb..75615fe0fe759 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll @@ -1515,40 +1515,36 @@ define @vp_roundtozero_nxv16f64( %v ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 1 ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB44_2: ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll index 403cc0eb9dce1..c249b3c5376fc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -901,9 +901,9 @@ define void @test_dag_loop() { ; CHECK-NEXT: vssubu.vx v12, v8, zero, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; CHECK-NEXT: vmseq.vv v0, v12, v8 -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vsetivli zero, 0, e16, m8, tu, mu +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, mu ; CHECK-NEXT: vle16.v v8, (zero), v0.t ; CHECK-NEXT: vse16.v v8, (zero) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll index e6272701a6033..91442e2b75682 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll @@ -1398,8 +1398,6 @@ define @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, , iXLen } @llvm.riscv.vleff( ptr, iXLen); -define @intrinsic_vleff_v_tu( %0, ptr %1, iXLen %2, iXLen* %3) nounwind { +define @intrinsic_vleff_v_tu( %0, ptr %1, iXLen %2, ptr %3) nounwind { ; RV32-LABEL: intrinsic_vleff_v_tu: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e8, mf8, tu, ma @@ -75,7 +75,7 @@ entry: iXLen %2) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %3 + store iXLen %c, ptr %3 ret %b } diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll index 8b41febced065..4337bedde8674 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll @@ -78,7 +78,7 @@ define {<2 x i64>, <2 x i64>} @vector_deinterleave_v2i64_v4i64(<4 x i64> %vec) { ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vmv.v.i v0, 1 ; CHECK-NEXT: vmv1r.v v9, v10 -; CHECK-NEXT: vrgather.vi v9, v8, 1, v0.t +; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t ; CHECK-NEXT: vslideup.vi v8, v10, 1 ; CHECK-NEXT: ret %retval = call {<2 x i64>, <2 x i64>} @llvm.vector.deinterleave2.v4i64(<4 x i64> %vec) @@ -88,27 +88,33 @@ ret {<2 x i64>, <2 x i64>} %retval define {<4 x i64>, <4 x i64>} @vector_deinterleave_v4i64_v8i64(<8 x i64> %vec) { ; CHECK-LABEL: vector_deinterleave_v4i64_v8i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v14, 5 -; CHECK-NEXT: vid.v v15 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m4, ta, ma ; CHECK-NEXT: vslidedown.vi v16, v8, 4 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v0, 12 -; CHECK-NEXT: vmv.v.i v18, 10 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vcompress.vm v12, v8, v14 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vadd.vv v14, v15, v15 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vcompress.vm v10, v8, v18 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vadd.vi v8, v14, -4 -; CHECK-NEXT: vadd.vi v9, v14, -3 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vrgatherei16.vv v12, v16, v8, v0.t -; CHECK-NEXT: vrgatherei16.vv v10, v16, v9, v0.t -; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: vmv.v.i v10, 2 +; CHECK-NEXT: vmv2r.v v12, v8 +; CHECK-NEXT: vmv.v.i v11, 12 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vslideup.vi v14, v16, 2 +; CHECK-NEXT: vslideup.vi v14, v16, 1, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vslidedown.vi v12, v8, 1, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmerge.vvm v12, v12, v14, v0 +; CHECK-NEXT: vslidedown.vi v18, v8, 1 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v14, 4 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v18, v8, 2, v0.t +; CHECK-NEXT: vmv2r.v v8, v16 +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vslideup.vi v8, v16, 1, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmerge.vvm v10, v18, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret %retval = call {<4 x i64>, <4 x i64>} @llvm.vector.deinterleave2.v8i64(<8 x i64> %vec) ret {<4 x i64>, <4 x i64>} %retval @@ -481,7 +487,7 @@ define {<2 x double>, <2 x double>} @vector_deinterleave_v2f64_v4f64(<4 x double ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vmv.v.i v0, 1 ; CHECK-NEXT: vmv1r.v v9, v10 -; CHECK-NEXT: vrgather.vi v9, v8, 1, v0.t +; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t ; CHECK-NEXT: vslideup.vi v8, v10, 1 ; CHECK-NEXT: ret %retval = call {<2 x double>, <2 x double>} @llvm.vector.deinterleave2.v4f64(<4 x double> %vec) @@ -491,27 +497,33 @@ ret {<2 x double>, <2 x double>} %retval define {<4 x double>, <4 x double>} @vector_deinterleave_v4f64_v8f64(<8 x double> %vec) { ; CHECK-LABEL: vector_deinterleave_v4f64_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v14, 5 -; CHECK-NEXT: vid.v v15 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m4, ta, ma ; CHECK-NEXT: vslidedown.vi v16, v8, 4 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v0, 12 -; CHECK-NEXT: vmv.v.i v18, 10 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vcompress.vm v12, v8, v14 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vadd.vv v14, v15, v15 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vcompress.vm v10, v8, v18 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vadd.vi v8, v14, -4 -; CHECK-NEXT: vadd.vi v9, v14, -3 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vrgatherei16.vv v12, v16, v8, v0.t -; CHECK-NEXT: vrgatherei16.vv v10, v16, v9, v0.t -; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: vmv.v.i v10, 2 +; CHECK-NEXT: vmv2r.v v12, v8 +; CHECK-NEXT: vmv.v.i v11, 12 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vslideup.vi v14, v16, 2 +; CHECK-NEXT: vslideup.vi v14, v16, 1, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vslidedown.vi v12, v8, 1, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmerge.vvm v12, v12, v14, v0 +; CHECK-NEXT: vslidedown.vi v18, v8, 1 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v14, 4 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v18, v8, 2, v0.t +; CHECK-NEXT: vmv2r.v v8, v16 +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vslideup.vi v8, v16, 1, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmerge.vvm v10, v18, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret %retval = call {<4 x double>, <4 x double>} @llvm.vector.deinterleave2.v8f64(<8 x double> %vec) ret {<4 x double>, <4 x double>} %retval diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-extract-last-active.ll b/llvm/test/CodeGen/RISCV/rvv/vector-extract-last-active.ll index 10929394af75f..16074250a8351 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-extract-last-active.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-extract-last-active.ll @@ -5,13 +5,14 @@ define i8 @extract_last_i8(<16 x i8> %data, <16 x i8> %mask, i8 %passthru) { ; CHECK-LABEL: extract_last_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vcpop.m a1, v0 -; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: beqz a1, .LBB0_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: vredmaxu.vs v9, v9, v9 ; CHECK-NEXT: vmv.x.s a0, v9 ; CHECK-NEXT: andi a0, a0, 255 @@ -29,12 +30,12 @@ define i16 @extract_last_i16(<8 x i16> %data, <8 x i16> %mask, i16 %passthru) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vcpop.m a1, v0 -; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: beqz a1, .LBB1_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: vredmaxu.vs v9, v9, v9 ; CHECK-NEXT: vmv.x.s a0, v9 ; CHECK-NEXT: andi a0, a0, 255 @@ -53,12 +54,12 @@ define i32 @extract_last_i32(<4 x i32> %data, <4 x i32> %mask, i32 %passthru) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vcpop.m a1, v0 -; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: beqz a1, .LBB2_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: vredmaxu.vs v9, v9, v9 ; CHECK-NEXT: vmv.x.s a0, v9 ; CHECK-NEXT: andi a0, a0, 255 @@ -77,14 +78,14 @@ define i64 @extract_last_i64(<2 x i64> %data, <2 x i64> %mask, i64 %passthru) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vmsne.vi v0, v9, 0 -; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vcpop.m a2, v0 -; RV32-NEXT: vid.v v9, v0.t ; RV32-NEXT: beqz a2, .LBB3_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: vredmaxu.vs v9, v9, v9 +; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: li a1, 32 +; RV32-NEXT: vid.v v9, v0.t +; RV32-NEXT: vredmaxu.vs v9, v9, v9 ; RV32-NEXT: vmv.x.s a0, v9 ; RV32-NEXT: andi a0, a0, 255 ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma @@ -100,12 +101,12 @@ define i64 @extract_last_i64(<2 x i64> %data, <2 x i64> %mask, i64 %passthru) { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vmsne.vi v0, v9, 0 -; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vcpop.m a1, v0 -; RV64-NEXT: vid.v v9, v0.t ; RV64-NEXT: beqz a1, .LBB3_2 ; RV64-NEXT: # %bb.1: +; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; RV64-NEXT: vmv.v.i v9, 0 +; RV64-NEXT: vid.v v9, v0.t ; RV64-NEXT: vredmaxu.vs v9, v9, v9 ; RV64-NEXT: vmv.x.s a0, v9 ; RV64-NEXT: andi a0, a0, 255 @@ -124,12 +125,12 @@ define float @extract_last_float(<4 x float> %data, <4 x i32> %mask, float %pass ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: beqz a0, .LBB4_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: vredmaxu.vs v9, v9, v9 ; CHECK-NEXT: vmv.x.s a0, v9 ; CHECK-NEXT: andi a0, a0, 255 @@ -148,12 +149,12 @@ define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: beqz a0, .LBB5_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: vredmaxu.vs v9, v9, v9 ; CHECK-NEXT: vmv.x.s a0, v9 ; CHECK-NEXT: andi a0, a0, 255 @@ -170,12 +171,13 @@ define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double % define i8 @extract_last_i8_scalable( %data, %mask, i8 %passthru) { ; CHECK-LABEL: extract_last_i8_scalable: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vcpop.m a1, v0 -; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: beqz a1, .LBB6_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, mu +; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: vredmaxu.vs v10, v10, v10 ; CHECK-NEXT: vmv.x.s a0, v10 ; CHECK-NEXT: andi a0, a0, 255 @@ -191,12 +193,13 @@ define i8 @extract_last_i8_scalable( %data, define i16 @extract_last_i16_scalable( %data, %mask, i16 %passthru) { ; CHECK-LABEL: extract_last_i16_scalable: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vcpop.m a1, v0 -; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: beqz a1, .LBB7_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: vredmaxu.vs v10, v10, v10 ; CHECK-NEXT: vmv.x.s a0, v10 ; CHECK-NEXT: andi a0, a0, 255 @@ -212,12 +215,13 @@ define i16 @extract_last_i16_scalable( %data, %data, %mask, i32 %passthru) { ; CHECK-LABEL: extract_last_i32_scalable: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vcpop.m a1, v0 -; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: beqz a1, .LBB8_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: vredmaxu.vs v10, v10, v10 ; CHECK-NEXT: vmv.x.s a0, v10 ; CHECK-NEXT: andi a0, a0, 255 @@ -233,14 +237,15 @@ define i32 @extract_last_i32_scalable( %data, %data, %mask, i64 %passthru) { ; RV32-LABEL: extract_last_i64_scalable: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, mu -; RV32-NEXT: vmv.v.i v10, 0 +; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV32-NEXT: vcpop.m a2, v0 -; RV32-NEXT: vid.v v10, v0.t ; RV32-NEXT: beqz a2, .LBB9_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: vredmaxu.vs v10, v10, v10 +; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; RV32-NEXT: vid.v v10, v0.t +; RV32-NEXT: vredmaxu.vs v10, v10, v10 ; RV32-NEXT: vmv.x.s a0, v10 ; RV32-NEXT: andi a0, a0, 255 ; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma @@ -254,12 +259,13 @@ define i64 @extract_last_i64_scalable( %data, %data, %data, %mask, float %passthru) { ; CHECK-LABEL: extract_last_float_scalable: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: beqz a0, .LBB10_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: vredmaxu.vs v10, v10, v10 ; CHECK-NEXT: vmv.x.s a0, v10 ; CHECK-NEXT: andi a0, a0, 255 @@ -296,12 +303,13 @@ define float @extract_last_float_scalable( %data, %data, %mask, double %passthru) { ; CHECK-LABEL: extract_last_double_scalable: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: beqz a0, .LBB11_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: vredmaxu.vs v10, v10, v10 ; CHECK-NEXT: vmv.x.s a0, v10 ; CHECK-NEXT: andi a0, a0, 255 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll b/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll index 6435c1c14e061..fd1dbab2362a7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll @@ -222,9 +222,9 @@ define @vadd_vv_mask_negative( %0, @llvm.riscv.vadd.mask.nxv1i8.nxv1i8( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll index 9e78bbdc4f441..6831d1fb63cae 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll @@ -429,16 +429,16 @@ define @vfadd_vv_nxv32bf16( %va, @vfadd_vv_nxv32f16( %va, @vfdiv_vv_nxv32bf16( %va, @vfdiv_vv_nxv32f16( %va, @vfmsub_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a2, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmv8r.v v24, v8 -; ZVFHMIN-NEXT: vl8re16.v v8, (a0) +; ZVFHMIN-NEXT: vl8re16.v v24, (a0) ; ZVFHMIN-NEXT: lui a2, 8 +; ZVFHMIN-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a3, vlenb ; ZVFHMIN-NEXT: slli a0, a3, 1 @@ -8516,25 +8515,25 @@ define @vfmsub_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a3 ; ZVFHMIN-NEXT: sltu a3, a1, a4 ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v16, v8, a2 +; ZVFHMIN-NEXT: vxor.vx v16, v24, a2 ; ZVFHMIN-NEXT: addi a3, a3, -1 ; ZVFHMIN-NEXT: and a3, a3, a4 -; ZVFHMIN-NEXT: vmv4r.v v8, v16 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv4r.v v16, v8 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: mv a3, a2 @@ -8543,35 +8542,32 @@ define @vfmsub_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t ; ZVFHMIN-NEXT: bltu a1, a0, .LBB281_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a1, a0 ; ZVFHMIN-NEXT: .LBB281_2: -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -8579,15 +8575,12 @@ define @vfmsub_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24 +; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb @@ -10079,36 +10072,34 @@ define @vfnmadd_vf_nxv32f16_unmasked( % ; ZVFHMIN-NEXT: addi a3, a3, -1 ; ZVFHMIN-NEXT: and a3, a3, a4 ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: addi a4, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: mv a5, a4 -; ZVFHMIN-NEXT: slli a4, a4, 1 -; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: vmv4r.v v16, v8 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t @@ -10119,34 +10110,31 @@ define @vfnmadd_vf_nxv32f16_unmasked( % ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB292_2: ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: slli a0, a0, 1 -; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 -; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma @@ -10307,6 +10295,7 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v7, v0 +; ZVFHMIN-NEXT: vmv8r.v v24, v16 ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: add a1, sp, a1 @@ -10315,11 +10304,11 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: lui a2, 8 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: vmv.v.x v24, a1 +; ZVFHMIN-NEXT: vmv.v.x v16, a1 ; ZVFHMIN-NEXT: slli a1, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 -; ZVFHMIN-NEXT: vxor.vx v8, v24, a2, v0.t -; ZVFHMIN-NEXT: vxor.vx v16, v16, a2, v0.t +; ZVFHMIN-NEXT: vxor.vx v8, v16, a2, v0.t +; ZVFHMIN-NEXT: vxor.vx v24, v24, a2, v0.t ; ZVFHMIN-NEXT: sub a2, a0, a1 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a3 @@ -10330,14 +10319,15 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: slli a3, a3, 4 ; ZVFHMIN-NEXT: add a3, sp, a3 ; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv4r.v v16, v8 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: mv a3, a2 @@ -10345,7 +10335,7 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 @@ -10481,7 +10471,7 @@ define @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute( @vfmax_vv_nxv32bf16( %va, @vfmax_vv_nxv32f16( %va, @vfmin_vv_nxv32bf16( %va, @vfmin_vv_nxv32f16( %va, @vfmul_vv_nxv32f16( %va, @llvm.vp.fptrunc.nxv16f64.nxv16f32( @vfptrunc_nxv16f32_nxv16f64( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vfptrunc_nxv16f32_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: sub a3, a0, a1 @@ -113,24 +105,16 @@ define @vfptrunc_nxv16f32_nxv16f64( ; CHECK-NEXT: sltu a2, a0, a3 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t +; CHECK-NEXT: vfncvt.f.f.w v28, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB7_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB7_2: -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: vfncvt.f.f.w v24, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %v = call @llvm.vp.fptrunc.nxv16f64.nxv16f32( %a, %m, i32 %vl) ret %v @@ -144,58 +128,68 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a3, a1, 3 -; CHECK-NEXT: srli a5, a1, 2 -; CHECK-NEXT: slli a6, a1, 3 -; CHECK-NEXT: slli a4, a1, 1 -; CHECK-NEXT: vslidedown.vx v16, v0, a5 -; CHECK-NEXT: add a6, a0, a6 -; CHECK-NEXT: sub a5, a2, a4 -; CHECK-NEXT: vl8re64.v v24, (a6) -; CHECK-NEXT: sltu a6, a2, a5 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a5, a6, a5 -; CHECK-NEXT: sub a6, a5, a1 -; CHECK-NEXT: sltu a7, a5, a6 -; CHECK-NEXT: addi a7, a7, -1 ; CHECK-NEXT: vl8re64.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v16, a3 -; CHECK-NEXT: and a0, a7, a6 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t -; CHECK-NEXT: bltu a5, a1, .LBB8_2 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: srli a5, a1, 3 +; CHECK-NEXT: slli a4, a1, 3 +; CHECK-NEXT: slli a3, a1, 1 +; CHECK-NEXT: add a6, a0, a4 +; CHECK-NEXT: sub a0, a2, a3 +; CHECK-NEXT: sltu a4, a2, a0 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a0, a4, a0 +; CHECK-NEXT: sub a4, a0, a1 +; CHECK-NEXT: sltu a7, a0, a4 +; CHECK-NEXT: addi a7, a7, -1 +; CHECK-NEXT: and a4, a7, a4 +; CHECK-NEXT: srli a7, a1, 2 +; CHECK-NEXT: vl8re64.v v8, (a6) +; CHECK-NEXT: vslidedown.vx v16, v0, a7 +; CHECK-NEXT: vsetvli a6, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v25, v0, a5 +; CHECK-NEXT: vslidedown.vx v0, v16, a5 +; CHECK-NEXT: bltu a0, a1, .LBB8_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a5, a1 +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB8_2: +; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v20, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v6, v7, a3 -; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma +; CHECK-NEXT: addi a4, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t -; CHECK-NEXT: bltu a2, a4, .LBB8_4 +; CHECK-NEXT: bltu a2, a3, .LBB8_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a2, a4 +; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB8_4: ; CHECK-NEXT: sub a0, a2, a1 ; CHECK-NEXT: sltu a3, a2, a0 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a0, a3, a0 -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 ; CHECK-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v28, v8, v0.t @@ -203,9 +197,9 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB8_6: -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -213,7 +207,8 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: vfncvt.f.f.w v24, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll index 059408a1c9c3f..056c7557440e0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll @@ -391,16 +391,16 @@ define @vfsub_vv_nxv32bf16( %va, @vfsub_vv_nxv32f16( %va, %a, %b, call void @llvm.riscv.vse( %3, ptr %p, iXLen %vl) ret void } + +; This function has a copy between two vrm2 virtual registers, make sure we can +; reduce vl between it. +define void @fadd_fcmp_select_copy( %v, %c, ptr %p, iXLen %vl) { +; NOVLOPT-LABEL: fadd_fcmp_select_copy: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v8 +; NOVLOPT-NEXT: fmv.w.x fa5, zero +; NOVLOPT-NEXT: vmflt.vf v10, v8, fa5 +; NOVLOPT-NEXT: vmand.mm v10, v0, v10 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vse32.v v8, (a0) +; NOVLOPT-NEXT: vsm.v v10, (a0) +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: fadd_fcmp_select_copy: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v8 +; VLOPT-NEXT: fmv.w.x fa5, zero +; VLOPT-NEXT: vmflt.vf v10, v8, fa5 +; VLOPT-NEXT: vmand.mm v10, v0, v10 +; VLOPT-NEXT: vse32.v v8, (a0) +; VLOPT-NEXT: vsm.v v10, (a0) +; VLOPT-NEXT: ret + %fadd = fadd %v, %v + %fcmp = fcmp olt %fadd, zeroinitializer + %select = select %c, %fcmp, zeroinitializer + call void @llvm.riscv.vse( %fadd, ptr %p, iXLen %vl) + call void @llvm.riscv.vsm( %select, ptr %p, iXLen %vl) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir index cb43a89ea3bc6..d42feeca9dbcc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir @@ -330,3 +330,41 @@ body: | %x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */, implicit-def $vxsat %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ ... +--- +name: copy +body: | + bb.0: + ; CHECK-LABEL: name: copy + ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %y:vr = COPY %x + ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + %y:vr = COPY %x + %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ +... +--- +name: copy_multiple_users +body: | + bb.0: + ; CHECK-LABEL: name: copy_multiple_users + ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 3, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %y:vr = COPY %x + ; CHECK-NEXT: %z0:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %z1:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 3, 3 /* e8 */, 0 /* tu, mu */ + %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + %y:vr = COPY %x + %z0:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + %z1:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 3, 3 /* e8 */, 0 /* tu, mu */ +... +--- +name: copy_user_invalid_sew +body: | + bb.0: + ; CHECK-LABEL: name: copy_user_invalid_sew + ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + ; CHECK-NEXT: %y:vr = COPY %x + ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ + %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */ + %y:vr = COPY %x + %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */ +... diff --git a/llvm/test/CodeGen/RISCV/rvv/vleff.ll b/llvm/test/CodeGen/RISCV/rvv/vleff.ll index 39e0a0d02e88d..1f3959c1eac8e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vleff.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vleff.ll @@ -9,7 +9,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv1i64( ptr, iXLen); -define @intrinsic_vleff_v_nxv1i64_nxv1i64(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv1i64_nxv1i64(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv1i64_nxv1i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e64, m1, ta, ma @@ -32,7 +32,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -43,7 +43,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv1i64( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv1i64_nxv1i64( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv1i64_nxv1i64( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv1i64_nxv1i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e64, m1, ta, mu @@ -67,7 +67,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -77,7 +77,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv2i64( ptr, iXLen); -define @intrinsic_vleff_v_nxv2i64_nxv2i64(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv2i64_nxv2i64(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv2i64_nxv2i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e64, m2, ta, ma @@ -100,7 +100,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -111,7 +111,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv2i64( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv2i64_nxv2i64( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv2i64_nxv2i64( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv2i64_nxv2i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e64, m2, ta, mu @@ -135,7 +135,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -145,7 +145,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv4i64( ptr, iXLen); -define @intrinsic_vleff_v_nxv4i64_nxv4i64(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv4i64_nxv4i64(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv4i64_nxv4i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma @@ -168,7 +168,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -179,7 +179,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv4i64( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv4i64_nxv4i64( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv4i64_nxv4i64( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv4i64_nxv4i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu @@ -203,7 +203,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -213,7 +213,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv8i64( ptr, iXLen); -define @intrinsic_vleff_v_nxv8i64_nxv8i64(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv8i64_nxv8i64(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv8i64_nxv8i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -236,7 +236,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -247,7 +247,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv8i64( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv8i64_nxv8i64( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv8i64_nxv8i64( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv8i64_nxv8i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu @@ -271,7 +271,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -281,7 +281,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv1f64( ptr, iXLen); -define @intrinsic_vleff_v_nxv1f64_nxv1f64(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv1f64_nxv1f64(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv1f64_nxv1f64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e64, m1, ta, ma @@ -304,7 +304,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -315,7 +315,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv1f64( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv1f64_nxv1f64( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv1f64_nxv1f64( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv1f64_nxv1f64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e64, m1, ta, mu @@ -339,7 +339,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -349,7 +349,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv2f64( ptr, iXLen); -define @intrinsic_vleff_v_nxv2f64_nxv2f64(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv2f64_nxv2f64(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv2f64_nxv2f64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e64, m2, ta, ma @@ -372,7 +372,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -383,7 +383,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv2f64( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv2f64_nxv2f64( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv2f64_nxv2f64( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv2f64_nxv2f64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e64, m2, ta, mu @@ -407,7 +407,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -417,7 +417,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv4f64( ptr, iXLen); -define @intrinsic_vleff_v_nxv4f64_nxv4f64(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv4f64_nxv4f64(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv4f64_nxv4f64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma @@ -440,7 +440,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -451,7 +451,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv4f64( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv4f64_nxv4f64( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv4f64_nxv4f64( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv4f64_nxv4f64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, mu @@ -475,7 +475,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -485,7 +485,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv8f64( ptr, iXLen); -define @intrinsic_vleff_v_nxv8f64_nxv8f64(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv8f64_nxv8f64(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv8f64_nxv8f64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -508,7 +508,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -519,7 +519,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv8f64( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv8f64_nxv8f64( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv8f64_nxv8f64( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv8f64_nxv8f64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu @@ -543,7 +543,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -553,7 +553,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv1i32( ptr, iXLen); -define @intrinsic_vleff_v_nxv1i32_nxv1i32(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv1i32_nxv1i32(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv1i32_nxv1i32: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma @@ -576,7 +576,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -587,7 +587,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv1i32( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv1i32_nxv1i32( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv1i32_nxv1i32( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv1i32_nxv1i32: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, mu @@ -611,7 +611,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -621,7 +621,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv2i32( ptr, iXLen); -define @intrinsic_vleff_v_nxv2i32_nxv2i32(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv2i32_nxv2i32(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv2i32_nxv2i32: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma @@ -644,7 +644,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -655,7 +655,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv2i32( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv2i32_nxv2i32( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv2i32_nxv2i32( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv2i32_nxv2i32: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, mu @@ -679,7 +679,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -689,7 +689,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv4i32( ptr, iXLen); -define @intrinsic_vleff_v_nxv4i32_nxv4i32(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv4i32_nxv4i32(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv4i32_nxv4i32: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma @@ -712,7 +712,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -723,7 +723,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv4i32( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv4i32_nxv4i32( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv4i32_nxv4i32( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv4i32_nxv4i32: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu @@ -747,7 +747,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -757,7 +757,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv8i32( ptr, iXLen); -define @intrinsic_vleff_v_nxv8i32_nxv8i32(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv8i32_nxv8i32(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv8i32_nxv8i32: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma @@ -780,7 +780,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -791,7 +791,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv8i32( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv8i32_nxv8i32( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv8i32_nxv8i32( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv8i32_nxv8i32: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -815,7 +815,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -825,7 +825,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv16i32( ptr, iXLen); -define @intrinsic_vleff_v_nxv16i32_nxv16i32(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv16i32_nxv16i32(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv16i32_nxv16i32: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma @@ -848,7 +848,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -859,7 +859,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv16i32( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv16i32_nxv16i32( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv16i32_nxv16i32( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv16i32_nxv16i32: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu @@ -883,7 +883,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -893,7 +893,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv1f32( ptr, iXLen); -define @intrinsic_vleff_v_nxv1f32_nxv1f32(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv1f32_nxv1f32(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv1f32_nxv1f32: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma @@ -916,7 +916,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -927,7 +927,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv1f32( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv1f32_nxv1f32( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv1f32_nxv1f32( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv1f32_nxv1f32: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, mu @@ -951,7 +951,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -961,7 +961,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv2f32( ptr, iXLen); -define @intrinsic_vleff_v_nxv2f32_nxv2f32(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv2f32_nxv2f32(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv2f32_nxv2f32: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma @@ -984,7 +984,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -995,7 +995,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv2f32( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv2f32_nxv2f32( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv2f32_nxv2f32( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv2f32_nxv2f32: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, mu @@ -1019,7 +1019,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -1029,7 +1029,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv4f32( ptr, iXLen); -define @intrinsic_vleff_v_nxv4f32_nxv4f32(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv4f32_nxv4f32(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv4f32_nxv4f32: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma @@ -1052,7 +1052,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -1063,7 +1063,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv4f32( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv4f32_nxv4f32( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv4f32_nxv4f32( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv4f32_nxv4f32: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, mu @@ -1087,7 +1087,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -1097,7 +1097,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv8f32( ptr, iXLen); -define @intrinsic_vleff_v_nxv8f32_nxv8f32(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv8f32_nxv8f32(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv8f32_nxv8f32: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma @@ -1120,7 +1120,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -1131,7 +1131,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv8f32( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv8f32_nxv8f32( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv8f32_nxv8f32( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv8f32_nxv8f32: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -1155,7 +1155,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -1165,7 +1165,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv16f32( ptr, iXLen); -define @intrinsic_vleff_v_nxv16f32_nxv16f32(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv16f32_nxv16f32(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv16f32_nxv16f32: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma @@ -1188,7 +1188,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -1199,7 +1199,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv16f32( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv16f32_nxv16f32( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv16f32_nxv16f32( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv16f32_nxv16f32: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu @@ -1223,7 +1223,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -1233,7 +1233,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv1i16( ptr, iXLen); -define @intrinsic_vleff_v_nxv1i16_nxv1i16(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv1i16_nxv1i16(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv1i16_nxv1i16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma @@ -1256,7 +1256,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -1267,7 +1267,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv1i16( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv1i16_nxv1i16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv1i16_nxv1i16( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv1i16_nxv1i16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, mu @@ -1291,7 +1291,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -1301,7 +1301,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv2i16( ptr, iXLen); -define @intrinsic_vleff_v_nxv2i16_nxv2i16(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv2i16_nxv2i16(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv2i16_nxv2i16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma @@ -1324,7 +1324,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -1335,7 +1335,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv2i16( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv2i16_nxv2i16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv2i16_nxv2i16( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv2i16_nxv2i16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, mu @@ -1359,7 +1359,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -1369,7 +1369,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv4i16( ptr, iXLen); -define @intrinsic_vleff_v_nxv4i16_nxv4i16(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv4i16_nxv4i16(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv4i16_nxv4i16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma @@ -1392,7 +1392,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -1403,7 +1403,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv4i16( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv4i16_nxv4i16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv4i16_nxv4i16( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv4i16_nxv4i16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, mu @@ -1427,7 +1427,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -1437,7 +1437,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv8i16( ptr, iXLen); -define @intrinsic_vleff_v_nxv8i16_nxv8i16(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv8i16_nxv8i16(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv8i16_nxv8i16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma @@ -1460,7 +1460,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -1471,7 +1471,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv8i16( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv8i16_nxv8i16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv8i16_nxv8i16( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv8i16_nxv8i16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, mu @@ -1495,7 +1495,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -1505,7 +1505,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv16i16( ptr, iXLen); -define @intrinsic_vleff_v_nxv16i16_nxv16i16(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv16i16_nxv16i16(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv16i16_nxv16i16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, ma @@ -1528,7 +1528,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -1539,7 +1539,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv16i16( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv16i16_nxv16i16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv16i16_nxv16i16( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv16i16_nxv16i16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, mu @@ -1563,7 +1563,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -1573,7 +1573,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv32i16( ptr, iXLen); -define @intrinsic_vleff_v_nxv32i16_nxv32i16(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv32i16_nxv32i16(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv32i16_nxv32i16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma @@ -1596,7 +1596,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -1607,7 +1607,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv32i16( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv32i16_nxv32i16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv32i16_nxv32i16( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv32i16_nxv32i16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, mu @@ -1631,7 +1631,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -1641,7 +1641,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv1bf16( ptr, iXLen); -define @intrinsic_vleff_v_nxv1half_nxv1bf16(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv1half_nxv1bf16(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv1half_nxv1bf16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma @@ -1664,7 +1664,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -1675,7 +1675,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv1bf16( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv1half_nxv1bf16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv1half_nxv1bf16( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv1half_nxv1bf16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, mu @@ -1699,7 +1699,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -1709,7 +1709,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv2bf16( ptr, iXLen); -define @intrinsic_vleff_v_nxv2half_nxv2bf16(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv2half_nxv2bf16(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv2half_nxv2bf16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma @@ -1732,7 +1732,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -1743,7 +1743,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv2bf16( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv2half_nxv2bf16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv2half_nxv2bf16( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv2half_nxv2bf16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, mu @@ -1767,7 +1767,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -1777,7 +1777,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv4bf16( ptr, iXLen); -define @intrinsic_vleff_v_nxv4half_nxv4bf16(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv4half_nxv4bf16(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv4half_nxv4bf16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma @@ -1800,7 +1800,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -1811,7 +1811,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv4bf16( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv4half_nxv4bf16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv4half_nxv4bf16( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv4half_nxv4bf16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, mu @@ -1835,7 +1835,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -1845,7 +1845,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv8bf16( ptr, iXLen); -define @intrinsic_vleff_v_nxv8half_nxv8bf16(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv8half_nxv8bf16(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv8half_nxv8bf16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma @@ -1868,7 +1868,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -1879,7 +1879,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv8bf16( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv8half_nxv8bf16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv8half_nxv8bf16( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv8half_nxv8bf16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, mu @@ -1903,7 +1903,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -1913,7 +1913,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv16bf16( ptr, iXLen); -define @intrinsic_vleff_v_nxv16half_nxv16bf16(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv16half_nxv16bf16(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv16half_nxv16bf16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, ma @@ -1936,7 +1936,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -1947,7 +1947,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv16bf16( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv16half_nxv16bf16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv16half_nxv16bf16( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv16half_nxv16bf16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, mu @@ -1971,7 +1971,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -1981,7 +1981,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv32bf16( ptr, iXLen); -define @intrinsic_vleff_v_nxv32half_nxv32bf16(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv32half_nxv32bf16(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv32half_nxv32bf16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma @@ -2004,7 +2004,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -2015,7 +2015,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv32bf16( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv32half_nxv32bf16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv32half_nxv32bf16( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv32half_nxv32bf16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, mu @@ -2039,7 +2039,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -2049,7 +2049,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv1f16( ptr, iXLen); -define @intrinsic_vleff_v_nxv1bfloat_nxv1f16(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv1bfloat_nxv1f16(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv1bfloat_nxv1f16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma @@ -2072,7 +2072,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -2083,7 +2083,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv1f16( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv1bfloat_nxv1f16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv1bfloat_nxv1f16( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv1bfloat_nxv1f16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, mu @@ -2107,7 +2107,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -2117,7 +2117,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv2f16( ptr, iXLen); -define @intrinsic_vleff_v_nxv2bfloat_nxv2f16(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv2bfloat_nxv2f16(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv2bfloat_nxv2f16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma @@ -2140,7 +2140,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -2151,7 +2151,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv2f16( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv2bfloat_nxv2f16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv2bfloat_nxv2f16( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv2bfloat_nxv2f16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, mu @@ -2175,7 +2175,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -2185,7 +2185,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv4f16( ptr, iXLen); -define @intrinsic_vleff_v_nxv4bfloat_nxv4f16(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv4bfloat_nxv4f16(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv4bfloat_nxv4f16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma @@ -2208,7 +2208,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -2219,7 +2219,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv4f16( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv4bfloat_nxv4f16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv4bfloat_nxv4f16( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv4bfloat_nxv4f16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, mu @@ -2243,7 +2243,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -2253,7 +2253,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv8f16( ptr, iXLen); -define @intrinsic_vleff_v_nxv8bfloat_nxv8f16(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv8bfloat_nxv8f16(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv8bfloat_nxv8f16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma @@ -2276,7 +2276,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -2287,7 +2287,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv8f16( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv8bfloat_nxv8f16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv8bfloat_nxv8f16( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv8bfloat_nxv8f16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, mu @@ -2311,7 +2311,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -2321,7 +2321,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv16f16( ptr, iXLen); -define @intrinsic_vleff_v_nxv16bfloat_nxv16f16(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv16bfloat_nxv16f16(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv16bfloat_nxv16f16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, ma @@ -2344,7 +2344,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -2355,7 +2355,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv16f16( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv16bfloat_nxv16f16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv16bfloat_nxv16f16( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv16bfloat_nxv16f16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, mu @@ -2379,7 +2379,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -2389,7 +2389,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv32f16( ptr, iXLen); -define @intrinsic_vleff_v_nxv32bfloat_nxv32f16(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv32bfloat_nxv32f16(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv32bfloat_nxv32f16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma @@ -2412,7 +2412,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -2423,7 +2423,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv32f16( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv32bfloat_nxv32f16( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv32bfloat_nxv32f16( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv32bfloat_nxv32f16: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, mu @@ -2447,7 +2447,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -2457,7 +2457,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv1i8( ptr, iXLen); -define @intrinsic_vleff_v_nxv1i8_nxv1i8(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv1i8_nxv1i8(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv1i8_nxv1i8: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -2480,7 +2480,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -2491,7 +2491,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv1i8( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv1i8_nxv1i8( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv1i8_nxv1i8( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv1i8_nxv1i8: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e8, mf8, ta, mu @@ -2515,7 +2515,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -2525,7 +2525,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv2i8( ptr, iXLen); -define @intrinsic_vleff_v_nxv2i8_nxv2i8(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv2i8_nxv2i8(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv2i8_nxv2i8: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -2548,7 +2548,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -2559,7 +2559,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv2i8( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv2i8_nxv2i8( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv2i8_nxv2i8( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv2i8_nxv2i8: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, mu @@ -2583,7 +2583,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -2593,7 +2593,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv4i8( ptr, iXLen); -define @intrinsic_vleff_v_nxv4i8_nxv4i8(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv4i8_nxv4i8(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv4i8_nxv4i8: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -2616,7 +2616,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -2627,7 +2627,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv4i8( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv4i8_nxv4i8( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv4i8_nxv4i8( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv4i8_nxv4i8: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, mu @@ -2651,7 +2651,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -2661,7 +2661,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv8i8( ptr, iXLen); -define @intrinsic_vleff_v_nxv8i8_nxv8i8(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv8i8_nxv8i8(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv8i8_nxv8i8: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -2684,7 +2684,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -2695,7 +2695,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv8i8( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv8i8_nxv8i8( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv8i8_nxv8i8( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv8i8_nxv8i8: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, mu @@ -2719,7 +2719,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -2729,7 +2729,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv16i8( ptr, iXLen); -define @intrinsic_vleff_v_nxv16i8_nxv16i8(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv16i8_nxv16i8(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv16i8_nxv16i8: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma @@ -2752,7 +2752,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -2763,7 +2763,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv16i8( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv16i8_nxv16i8( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv16i8_nxv16i8( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv16i8_nxv16i8: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, mu @@ -2787,7 +2787,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -2797,7 +2797,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv32i8( ptr, iXLen); -define @intrinsic_vleff_v_nxv32i8_nxv32i8(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv32i8_nxv32i8(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv32i8_nxv32i8: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma @@ -2820,7 +2820,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -2831,7 +2831,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv32i8( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv32i8_nxv32i8( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv32i8_nxv32i8( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv32i8_nxv32i8: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, mu @@ -2855,7 +2855,7 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } @@ -2865,7 +2865,7 @@ declare { , iXLen } @llvm.riscv.vleff.nxv64i8( ptr, iXLen); -define @intrinsic_vleff_v_nxv64i8_nxv64i8(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_v_nxv64i8_nxv64i8(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_v_nxv64i8_nxv64i8: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma @@ -2888,7 +2888,7 @@ entry: iXLen %1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %2 + store iXLen %c, ptr %2 ret %b } @@ -2899,7 +2899,7 @@ declare { , iXLen } @llvm.riscv.vleff.mask.nxv64i8( iXLen, iXLen); -define @intrinsic_vleff_mask_v_nxv64i8_nxv64i8( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define @intrinsic_vleff_mask_v_nxv64i8_nxv64i8( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_v_nxv64i8_nxv64i8: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e8, m8, ta, mu @@ -2923,13 +2923,13 @@ entry: iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 0 %c = extractvalue { , iXLen } %a, 1 - store iXLen %c, iXLen* %4 + store iXLen %c, ptr %4 ret %b } ; Test with the VL output unused -define @intrinsic_vleff_dead_vl(ptr %0, iXLen %1, iXLen* %2) nounwind { +define @intrinsic_vleff_dead_vl(ptr %0, iXLen %1, ptr %2) nounwind { ; CHECK-LABEL: intrinsic_vleff_dead_vl: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma @@ -2962,7 +2962,7 @@ entry: } ; Test with the loaded value unused -define void @intrinsic_vleff_dead_value(ptr %0, iXLen %1, iXLen* %2) nounwind { +define void @intrinsic_vleff_dead_value(ptr %0, iXLen %1, ptr %2) nounwind { ; RV32-LABEL: intrinsic_vleff_dead_value: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e64, m1, ta, ma @@ -2984,11 +2984,11 @@ entry: ptr %0, iXLen %1) %b = extractvalue { , iXLen } %a, 1 - store iXLen %b, iXLen* %2 + store iXLen %b, ptr %2 ret void } -define void @intrinsic_vleff_mask_dead_value( %0, ptr %1, %2, iXLen %3, iXLen* %4) nounwind { +define void @intrinsic_vleff_mask_dead_value( %0, ptr %1, %2, iXLen %3, ptr %4) nounwind { ; RV32-LABEL: intrinsic_vleff_mask_dead_value: ; RV32: # %bb.0: # %entry ; RV32-NEXT: vsetvli zero, a1, e64, m1, ta, mu @@ -3011,13 +3011,13 @@ entry: %2, iXLen %3, iXLen 1) %b = extractvalue { , iXLen } %a, 1 - store iXLen %b, iXLen* %4 + store iXLen %b, ptr %4 ret void } ; Test with both outputs dead. Make sure the vleff isn't deleted. -define void @intrinsic_vleff_dead_all(ptr %0, iXLen %1, iXLen* %2) nounwind { +define void @intrinsic_vleff_dead_all(ptr %0, iXLen %1, ptr %2) nounwind { ; CHECK-LABEL: intrinsic_vleff_dead_all: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll index e481891dfd52f..00d27dd583c8a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll @@ -331,8 +331,9 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract( % ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV32-NEXT: vmv1r.v v8, v0 +; RV32-NEXT: slli a2, a1, 1 ; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: li a2, -1 +; RV32-NEXT: li a1, -1 ; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: csrr a3, vlenb @@ -340,21 +341,19 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract( % ; RV32-NEXT: vmerge.vim v11, v9, 1, v0 ; RV32-NEXT: srli a3, a3, 2 ; RV32-NEXT: vwaddu.vv v12, v11, v11 -; RV32-NEXT: vwmaccu.vx v12, a2, v11 +; RV32-NEXT: vwmaccu.vx v12, a1, v11 ; RV32-NEXT: vmsne.vi v0, v12, 0 -; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; RV32-NEXT: vslidedown.vx v11, v12, a3 ; RV32-NEXT: vmerge.vim v10, v10, 1, v0 -; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; RV32-NEXT: vmsne.vi v0, v11, 0 -; RV32-NEXT: add a2, a3, a3 +; RV32-NEXT: add a1, a3, a3 ; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma +; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; RV32-NEXT: vslideup.vx v10, v9, a3 -; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: slli a2, a1, 1 -; RV32-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; RV32-NEXT: vle32.v v10, (a0), v0.t ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma @@ -676,6 +675,7 @@ define {, } @not_same_mask( ; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV32-NEXT: vmv1r.v v9, v0 ; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: slli a1, a1, 1 ; RV32-NEXT: vmv.v.i v8, 0 ; RV32-NEXT: li a2, -1 ; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma @@ -698,10 +698,8 @@ define {, } @not_same_mask( ; RV32-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma ; RV32-NEXT: vslideup.vx v10, v8, a3 -; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: slli a1, a1, 1 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; RV32-NEXT: vle32.v v10, (a0), v0.t ; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll index 1007d1ce649cc..68e7297605be2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -304,12 +304,12 @@ define @vpgather_baseidx_nxv32i8(ptr %base, @vpload_nxv17f64(ptr %ptr, ptr %out, %v, %val, %m, i32 zeroext %evl) { ; CHECK-LABEL: vreduce_fminimum_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: feq.s a1, fa0, fa0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfredmin.vs v10, v8, v10, v0.t -; CHECK-NEXT: vmfne.vv v11, v8, v8, v0.t -; CHECK-NEXT: vcpop.m a0, v11, v0.t +; CHECK-NEXT: vmfne.vv v10, v8, v8, v0.t +; CHECK-NEXT: feq.s a1, fa0, fa0 +; CHECK-NEXT: vcpop.m a2, v10, v0.t ; CHECK-NEXT: xori a1, a1, 1 -; CHECK-NEXT: or a0, a0, a1 -; CHECK-NEXT: beqz a0, .LBB22_2 +; CHECK-NEXT: or a1, a2, a1 +; CHECK-NEXT: beqz a1, .LBB22_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB22_2: +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v10, v8, v10, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v10 ; CHECK-NEXT: ret %s = call float @llvm.vp.reduce.fminimum.nxv4f32(float %start, %val, %m, i32 %evl) @@ -371,21 +372,22 @@ define float @vreduce_fminimum_nxv4f32(float %start, %val, define float @vreduce_fmaximum_nxv4f32(float %start, %val, %m, i32 zeroext %evl) { ; CHECK-LABEL: vreduce_fmaximum_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: feq.s a1, fa0, fa0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfredmax.vs v10, v8, v10, v0.t -; CHECK-NEXT: vmfne.vv v11, v8, v8, v0.t -; CHECK-NEXT: vcpop.m a0, v11, v0.t +; CHECK-NEXT: vmfne.vv v10, v8, v8, v0.t +; CHECK-NEXT: feq.s a1, fa0, fa0 +; CHECK-NEXT: vcpop.m a2, v10, v0.t ; CHECK-NEXT: xori a1, a1, 1 -; CHECK-NEXT: or a0, a0, a1 -; CHECK-NEXT: beqz a0, .LBB23_2 +; CHECK-NEXT: or a1, a2, a1 +; CHECK-NEXT: beqz a1, .LBB23_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB23_2: +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v10, v8, v10, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v10 ; CHECK-NEXT: ret %s = call float @llvm.vp.reduce.fmaximum.nxv4f32(float %start, %val, %m, i32 %evl) @@ -421,21 +423,22 @@ define float @vreduce_fmaximum_nnan_nxv4f32(float %start, % define float @vreduce_fminimum_v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vreduce_fminimum_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: feq.s a1, fa0, fa0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vfredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t -; CHECK-NEXT: vcpop.m a0, v8, v0.t +; CHECK-NEXT: vmfne.vv v9, v8, v8, v0.t +; CHECK-NEXT: feq.s a1, fa0, fa0 +; CHECK-NEXT: vcpop.m a2, v9, v0.t ; CHECK-NEXT: xori a1, a1, 1 -; CHECK-NEXT: or a0, a0, a1 -; CHECK-NEXT: beqz a0, .LBB26_2 +; CHECK-NEXT: or a1, a2, a1 +; CHECK-NEXT: beqz a1, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfredmin.vs v9, v8, v9, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v9 ; CHECK-NEXT: ret %s = call float @llvm.vp.reduce.fminimum.v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl) @@ -445,21 +448,22 @@ define float @vreduce_fminimum_v4f32(float %start, <4 x float> %val, <4 x i1> %m define float @vreduce_fmaximum_v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vreduce_fmaximum_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: feq.s a1, fa0, fa0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vfredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t -; CHECK-NEXT: vcpop.m a0, v8, v0.t +; CHECK-NEXT: vmfne.vv v9, v8, v8, v0.t +; CHECK-NEXT: feq.s a1, fa0, fa0 +; CHECK-NEXT: vcpop.m a2, v9, v0.t ; CHECK-NEXT: xori a1, a1, 1 -; CHECK-NEXT: or a0, a0, a1 -; CHECK-NEXT: beqz a0, .LBB27_2 +; CHECK-NEXT: or a1, a2, a1 +; CHECK-NEXT: beqz a1, .LBB27_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfredmax.vs v9, v8, v9, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v9 ; CHECK-NEXT: ret %s = call float @llvm.vp.reduce.fmaximum.v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll index fd5bf4ebcede8..32d24778d7327 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll @@ -285,58 +285,68 @@ define @vtrunc_nxv32i64_nxv32i32( %a, @vtrunc_nxv32i64_nxv32i32( %a, @vtrunc_nxv32i64_nxv32i32( %a, This Inner Loop Header: Depth=1 ; RV32I-NEXT: addi a4, a3, 1 -; RV32I-NEXT: sw a4, 0(a0) -; RV32I-NEXT: sw a3, 4(a0) -; RV32I-NEXT: sw a4, 0(a1) -; RV32I-NEXT: sw a3, 4(a1) +; RV32I-NEXT: sw a4, -1920(a0) +; RV32I-NEXT: sw a3, -1916(a0) +; RV32I-NEXT: sw a4, -1920(a1) +; RV32I-NEXT: sw a3, -1916(a1) ; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: blt a4, a2, .LBB1_1 ; RV32I-NEXT: .LBB1_2: # %while_end @@ -126,11 +124,10 @@ define void @test3(ptr %t) { ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a1, 20 ; RV32I-NEXT: li a2, 2 -; RV32I-NEXT: addi a1, a1, -1920 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: li a1, 3 -; RV32I-NEXT: sw a2, 4(a0) -; RV32I-NEXT: sw a1, 8(a0) +; RV32I-NEXT: sw a2, -1916(a0) +; RV32I-NEXT: sw a1, -1912(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: test3: diff --git a/llvm/test/CodeGen/RISCV/xtheadmemidx.ll b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll index e761fcb736a87..578f51a957a75 100644 --- a/llvm/test/CodeGen/RISCV/xtheadmemidx.ll +++ b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll @@ -1136,10 +1136,9 @@ define i64 @lrd_large_offset(ptr %a, i64 %b) { ; RV32XTHEADMEMIDX-NEXT: slli a1, a1, 3 ; RV32XTHEADMEMIDX-NEXT: add a0, a1, a0 ; RV32XTHEADMEMIDX-NEXT: lui a1, 23 -; RV32XTHEADMEMIDX-NEXT: addi a1, a1, 1792 ; RV32XTHEADMEMIDX-NEXT: add a1, a0, a1 -; RV32XTHEADMEMIDX-NEXT: lw a0, 0(a1) -; RV32XTHEADMEMIDX-NEXT: lw a1, 4(a1) +; RV32XTHEADMEMIDX-NEXT: lw a0, 1792(a1) +; RV32XTHEADMEMIDX-NEXT: lw a1, 1796(a1) ; RV32XTHEADMEMIDX-NEXT: ret ; ; RV64XTHEADMEMIDX-LABEL: lrd_large_offset: diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_bindless_images/bindless_images_generic.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_bindless_images/bindless_images_generic.ll new file mode 100644 index 0000000000000..3963180e51172 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_bindless_images/bindless_images_generic.ll @@ -0,0 +1,34 @@ +; RUN: not llc -O0 -mtriple=spirv64-unknown-unknown %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_bindless_images %s -o - | FileCheck %s + +; CHECK-ERROR: LLVM ERROR: OpConvertHandleTo[Image/Sampler/SampledImage]INTEL instruction +; CHECK-ERROR-SAME: require the following SPIR-V extension: SPV_INTEL_bindless_images + +; CHECK: OpCapability BindlessImagesINTEL +; CHECK: OpExtension "SPV_INTEL_bindless_images" + +; CHECK-DAG: %[[#VoidTy:]] = OpTypeVoid +; CHECK-DAG: %[[#Int64Ty:]] = OpTypeInt 64 +; CHECK-DAG: %[[#Const42:]] = OpConstant %[[#Int64Ty]] 42 +; CHECK-DAG: %[[#Const43:]] = OpConstant %[[#Int64Ty]] 43 +; CHECK-DAG: %[[#IntImgTy:]] = OpTypeImage %[[#Int64Ty]] +; CHECK-DAG: %[[#SamplerTy:]] = OpTypeSampler +; CHECK-DAG: %[[#IntSmpImgTy:]] = OpTypeImage %[[#Int64Ty]] +; CHECK-DAG: %[[#SampImageTy:]] = OpTypeSampledImage %[[#IntSmpImgTy]] +; CHECK: %[[#Input:]] = OpFunctionParameter %[[#Int64Ty]] +; CHECK: %[[#]] = OpConvertHandleToImageINTEL %[[#IntImgTy]] %[[#Input]] +; CHECK: %[[#]] = OpConvertHandleToSamplerINTEL %[[#SamplerTy]] %[[#Const42]] +; CHECK: %[[#]] = OpConvertHandleToSampledImageINTEL %[[#SampImageTy]] %[[#Const43]] + +define spir_func void @foo(i64 %in) { + %img = call spir_func target("spirv.Image", i64, 2, 0, 0, 0, 0, 0, 0) @_Z33__spirv_ConvertHandleToImageINTELl(i64 %in) + %samp = call spir_func target("spirv.Sampler") @_Z35__spirv_ConvertHandleToSamplerINTELl(i64 42) + %sampImage = call spir_func target("spirv.SampledImage", i64, 1, 0, 0, 0, 0, 0, 0) @_Z40__spirv_ConvertHandleToSampledImageINTELl(i64 43) + ret void +} + +declare spir_func target("spirv.Image", i64, 2, 0, 0, 0, 0, 0, 0) @_Z33__spirv_ConvertHandleToImageINTELl(i64) + +declare spir_func target("spirv.Sampler") @_Z35__spirv_ConvertHandleToSamplerINTELl(i64) + +declare spir_func target("spirv.SampledImage", i64, 1, 0, 0, 0, 0, 0, 0) @_Z40__spirv_ConvertHandleToSampledImageINTELl(i64) diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_long_composites/long-type-struct.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_long_composites/long-type-struct.ll new file mode 100644 index 0000000000000..9aa6b83df24e5 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_long_composites/long-type-struct.ll @@ -0,0 +1,24 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_long_composites %s -o - | FileCheck %s +; TODO: enable back once spirv-val knows about OpTypeStructContinuedINTEL type +; RUNx: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_long_composites %s -o - -filetype=obj | spirv-val --max-struct-members 65535 %} +; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - 2>&1 | FileCheck %s --check-prefix=ERR + +; ERR: LLVM ERROR: Continued instructions require the following SPIR-V extension: SPV_INTEL_long_composites + +; CHECK: Capability LongCompositesINTEL +; CHECK: Extension "SPV_INTEL_long_composites" +; CHECK: %[[#TInt:]] = OpTypeInt 8 0 +; CHECK: %[[#TIntPtr:]] = OpTypePointer Generic %[[#TInt]] +; CHECK: %[[#TArr:]] = OpTypeArray + +; CHECK: OpTypeStruct %[[#TIntPtr]] %[[#TIntPtr]] %[[#TArr]] %[[#TInt]] %[[#TInt]] %[[#TInt]] +; CHECK-NEXT: OpTypeStructContinuedINTEL %[[#TInt]] %[[#TInt]] %[[#TInt]] %[[#TInt]] %[[#TInt]] %[[#TInt]] %[[#TInt]] %[[#TInt]] %[[#TInt]]{{$}} + +%struct.A = type { ptr addrspace(4), ptr addrspace(4), [10 x float], i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 } + +; Function Attrs: convergent norecurse +define spir_kernel void @test() { +entry: + %a = alloca %struct.A, align 8 + ret void +} diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll index fd9b46e82e0b1..a84424bf7dea9 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll @@ -225,15 +225,25 @@ define <8 x i16> @test_masked_z_16xi16_to_8xi16_perm_mem_mask1(ptr %vp, <8 x i16 } define <8 x i16> @test_masked_16xi16_to_8xi16_perm_mem_mask2(ptr %vp, <8 x i16> %vec2, <8 x i16> %mask) { -; CHECK-LABEL: test_masked_16xi16_to_8xi16_perm_mem_mask2: -; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; CHECK-NEXT: vpmovsxbw {{.*#+}} xmm2 = [1,8,11,8,13,8,15,9] -; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1 -; CHECK-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} -; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; CHECK-FAST-LABEL: test_masked_16xi16_to_8xi16_perm_mem_mask2: +; CHECK-FAST: # %bb.0: +; CHECK-FAST-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; CHECK-FAST-NEXT: vpmovsxbw {{.*#+}} xmm2 = [1,8,11,8,13,8,15,9] +; CHECK-FAST-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; CHECK-FAST-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} +; CHECK-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; CHECK-FAST-NEXT: vzeroupper +; CHECK-FAST-NEXT: retq +; +; CHECK-FAST-PERLANE-LABEL: test_masked_16xi16_to_8xi16_perm_mem_mask2: +; CHECK-FAST-PERLANE: # %bb.0: +; CHECK-FAST-PERLANE-NEXT: vpsrld $16, (%rdi), %xmm2 +; CHECK-FAST-PERLANE-NEXT: vmovdqa 16(%rdi), %xmm3 +; CHECK-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[u,u,0,1,6,7,0,1,10,11,0,1,14,15,2,3] +; CHECK-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3,4,5,6,7] +; CHECK-FAST-PERLANE-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; CHECK-FAST-PERLANE-NEXT: vmovdqu16 %xmm2, %xmm0 {%k1} +; CHECK-FAST-PERLANE-NEXT: retq %vec = load <16 x i16>, ptr %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer @@ -242,14 +252,24 @@ define <8 x i16> @test_masked_16xi16_to_8xi16_perm_mem_mask2(ptr %vp, <8 x i16> } define <8 x i16> @test_masked_z_16xi16_to_8xi16_perm_mem_mask2(ptr %vp, <8 x i16> %mask) { -; CHECK-LABEL: test_masked_z_16xi16_to_8xi16_perm_mem_mask2: -; CHECK: # %bb.0: -; CHECK-NEXT: vpmovsxbw {{.*#+}} xmm1 = [1,8,11,8,13,8,15,9] -; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1 -; CHECK-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} -; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; CHECK-FAST-LABEL: test_masked_z_16xi16_to_8xi16_perm_mem_mask2: +; CHECK-FAST: # %bb.0: +; CHECK-FAST-NEXT: vpmovsxbw {{.*#+}} xmm1 = [1,8,11,8,13,8,15,9] +; CHECK-FAST-NEXT: vptestnmw %xmm0, %xmm0, %k1 +; CHECK-FAST-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} +; CHECK-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; CHECK-FAST-NEXT: vzeroupper +; CHECK-FAST-NEXT: retq +; +; CHECK-FAST-PERLANE-LABEL: test_masked_z_16xi16_to_8xi16_perm_mem_mask2: +; CHECK-FAST-PERLANE: # %bb.0: +; CHECK-FAST-PERLANE-NEXT: vpsrld $16, (%rdi), %xmm1 +; CHECK-FAST-PERLANE-NEXT: vmovdqa 16(%rdi), %xmm2 +; CHECK-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,0,1,6,7,0,1,10,11,0,1,14,15,2,3] +; CHECK-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] +; CHECK-FAST-PERLANE-NEXT: vptestnmw %xmm0, %xmm0, %k1 +; CHECK-FAST-PERLANE-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1} {z} +; CHECK-FAST-PERLANE-NEXT: retq %vec = load <16 x i16>, ptr %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer diff --git a/llvm/test/CodeGen/X86/sext-vsetcc.ll b/llvm/test/CodeGen/X86/sext-vsetcc.ll index 0990c0b12f79a..0f473bfbe4e47 100644 --- a/llvm/test/CodeGen/X86/sext-vsetcc.ll +++ b/llvm/test/CodeGen/X86/sext-vsetcc.ll @@ -57,7 +57,7 @@ define <8 x i16> @cmp_ne_load_const_volatile(ptr %x) nounwind { ; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: vpmovsxbw %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -107,7 +107,7 @@ define <8 x i16> @cmp_ne_load_const_extra_use1(ptr %x) nounwind { ; AVX512-NEXT: callq use_v8i8@PLT ; AVX512-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vpcmpeqb (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: vpmovsxbw %xmm0, %xmm0 ; AVX512-NEXT: addq $24, %rsp ; AVX512-NEXT: vzeroupper @@ -159,7 +159,7 @@ define <8 x i16> @cmp_ne_load_const_extra_use2(ptr %x) nounwind { ; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: vmovdqu64 %zmm0, (%rsp) # 64-byte Spill ; AVX512-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512-NEXT: vzeroupper @@ -202,7 +202,7 @@ define <8 x i16> @cmp_ne_no_load_const(i64 %x) nounwind { ; AVX512-NEXT: vmovq %rdi, %xmm0 ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: vpmovsxbw %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -267,7 +267,7 @@ define <3 x i32> @cmp_ult_load_const_bad_type(ptr %x) nounwind { ; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: vpmovsxbd %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -326,7 +326,7 @@ define <2 x i64> @cmp_ne_zextload(ptr %x, ptr %y) nounwind { ; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero ; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero ; AVX512-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -414,7 +414,7 @@ define <8 x i32> @cmp_ne_zextload_from_legal_op(ptr %x, ptr %y) { ; AVX512: # %bb.0: ; AVX512-NEXT: vmovdqa (%rdi), %xmm0 ; AVX512-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0 ; AVX512-NEXT: retq %loadx = load <8 x i16>, ptr %x @@ -679,7 +679,7 @@ define <8 x i32> @PR63946(<8 x i32> %a0, <8 x i32> %b0) nounwind { ; AVX512-NEXT: korw %k5, %k0, %k0 ; AVX512-NEXT: korw %k6, %k0, %k0 ; AVX512-NEXT: korw %k7, %k0, %k1 -; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-6.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-6.ll index 9d0183c816b12..feb75b21d5c8d 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-6.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-6.ll @@ -293,8 +293,8 @@ define void @load_i16_stride6_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr ; AVX512BW-FCP-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[1,3,2,3,4,5,6,7] ; AVX512BW-FCP-NEXT: vpbroadcastw 4(%rdi), %xmm4 ; AVX512BW-FCP-NEXT: vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3] -; AVX512BW-FCP-NEXT: vpmovsxbw {{.*#+}} xmm5 = [3,9,1,9,2,10,3,11] -; AVX512BW-FCP-NEXT: vpermw (%rdi), %ymm5, %ymm5 +; AVX512BW-FCP-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX512BW-FCP-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[12,13,6,7,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512BW-FCP-NEXT: vpbroadcastw 20(%rdi), %xmm6 ; AVX512BW-FCP-NEXT: vpbroadcastw 8(%rdi), %xmm7 ; AVX512BW-FCP-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[1],xmm6[1],xmm7[2],xmm6[2],xmm7[3],xmm6[3] @@ -307,7 +307,6 @@ define void @load_i16_stride6_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr ; AVX512BW-FCP-NEXT: vmovd %xmm5, (%r8) ; AVX512BW-FCP-NEXT: vmovd %xmm6, (%r9) ; AVX512BW-FCP-NEXT: vmovd %xmm0, (%rax) -; AVX512BW-FCP-NEXT: vzeroupper ; AVX512BW-FCP-NEXT: retq ; ; AVX512DQ-BW-LABEL: load_i16_stride6_vf2: @@ -347,8 +346,8 @@ define void @load_i16_stride6_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr ; AVX512DQ-BW-FCP-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[1,3,2,3,4,5,6,7] ; AVX512DQ-BW-FCP-NEXT: vpbroadcastw 4(%rdi), %xmm4 ; AVX512DQ-BW-FCP-NEXT: vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3] -; AVX512DQ-BW-FCP-NEXT: vpmovsxbw {{.*#+}} xmm5 = [3,9,1,9,2,10,3,11] -; AVX512DQ-BW-FCP-NEXT: vpermw (%rdi), %ymm5, %ymm5 +; AVX512DQ-BW-FCP-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX512DQ-BW-FCP-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[12,13,6,7,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512DQ-BW-FCP-NEXT: vpbroadcastw 20(%rdi), %xmm6 ; AVX512DQ-BW-FCP-NEXT: vpbroadcastw 8(%rdi), %xmm7 ; AVX512DQ-BW-FCP-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[1],xmm6[1],xmm7[2],xmm6[2],xmm7[3],xmm6[3] @@ -361,7 +360,6 @@ define void @load_i16_stride6_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr ; AVX512DQ-BW-FCP-NEXT: vmovd %xmm5, (%r8) ; AVX512DQ-BW-FCP-NEXT: vmovd %xmm6, (%r9) ; AVX512DQ-BW-FCP-NEXT: vmovd %xmm0, (%rax) -; AVX512DQ-BW-FCP-NEXT: vzeroupper ; AVX512DQ-BW-FCP-NEXT: retq %wide.vec = load <12 x i16>, ptr %in.vec, align 64 %strided.vec0 = shufflevector <12 x i16> %wide.vec, <12 x i16> poison, <2 x i32> diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-7.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-7.ll index 95b5ffde48564..038c73bd9fed2 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-7.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-7.ll @@ -321,23 +321,23 @@ define void @load_i16_stride7_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr ; AVX512BW-FCP-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[0,1,14,15,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512BW-FCP-NEXT: vpsrld $16, %xmm0, %xmm3 ; AVX512BW-FCP-NEXT: vpunpcklwd {{.*#+}} xmm4 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] -; AVX512BW-FCP-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX512BW-FCP-NEXT: vpshufb {{.*#+}} xmm5 = xmm0[8,9,6,7,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX512BW-FCP-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,10,11,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX512BW-FCP-NEXT: vpbroadcastw 8(%rdi), %xmm6 -; AVX512BW-FCP-NEXT: vpsrlq $48, %xmm1, %xmm7 -; AVX512BW-FCP-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3] -; AVX512BW-FCP-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; AVX512BW-FCP-NEXT: vpmovsxbw {{.*#+}} xmm3 = [6,13,5,13,6,14,7,15] -; AVX512BW-FCP-NEXT: vpermw (%rdi), %ymm3, %ymm3 +; AVX512BW-FCP-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX512BW-FCP-NEXT: vmovdqa {{.*#+}} xmm6 = [8,9,6,7,4,5,6,7,8,9,10,11,12,13,14,15] +; AVX512BW-FCP-NEXT: vpshufb %xmm6, %xmm5, %xmm7 +; AVX512BW-FCP-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[12,13,10,11,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BW-FCP-NEXT: vpbroadcastw 8(%rdi), %xmm8 +; AVX512BW-FCP-NEXT: vpsrlq $48, %xmm1, %xmm9 +; AVX512BW-FCP-NEXT: vpunpcklwd {{.*#+}} xmm8 = xmm8[0],xmm9[0],xmm8[1],xmm9[1],xmm8[2],xmm9[2],xmm8[3],xmm9[3] +; AVX512BW-FCP-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] +; AVX512BW-FCP-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; AVX512BW-FCP-NEXT: vpshufb %xmm6, %xmm0, %xmm0 ; AVX512BW-FCP-NEXT: vmovd %xmm2, (%rsi) ; AVX512BW-FCP-NEXT: vmovd %xmm4, (%rdx) -; AVX512BW-FCP-NEXT: vmovd %xmm5, (%rcx) -; AVX512BW-FCP-NEXT: vmovd %xmm0, (%r8) -; AVX512BW-FCP-NEXT: vmovd %xmm6, (%r9) -; AVX512BW-FCP-NEXT: vmovd %xmm1, (%r10) -; AVX512BW-FCP-NEXT: vmovd %xmm3, (%rax) -; AVX512BW-FCP-NEXT: vzeroupper +; AVX512BW-FCP-NEXT: vmovd %xmm7, (%rcx) +; AVX512BW-FCP-NEXT: vmovd %xmm5, (%r8) +; AVX512BW-FCP-NEXT: vmovd %xmm8, (%r9) +; AVX512BW-FCP-NEXT: vmovd %xmm3, (%r10) +; AVX512BW-FCP-NEXT: vmovd %xmm0, (%rax) ; AVX512BW-FCP-NEXT: retq ; ; AVX512DQ-BW-LABEL: load_i16_stride7_vf2: @@ -379,23 +379,23 @@ define void @load_i16_stride7_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr ; AVX512DQ-BW-FCP-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[0,1,14,15,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512DQ-BW-FCP-NEXT: vpsrld $16, %xmm0, %xmm3 ; AVX512DQ-BW-FCP-NEXT: vpunpcklwd {{.*#+}} xmm4 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] -; AVX512DQ-BW-FCP-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX512DQ-BW-FCP-NEXT: vpshufb {{.*#+}} xmm5 = xmm0[8,9,6,7,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX512DQ-BW-FCP-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,10,11,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX512DQ-BW-FCP-NEXT: vpbroadcastw 8(%rdi), %xmm6 -; AVX512DQ-BW-FCP-NEXT: vpsrlq $48, %xmm1, %xmm7 -; AVX512DQ-BW-FCP-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3] -; AVX512DQ-BW-FCP-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; AVX512DQ-BW-FCP-NEXT: vpmovsxbw {{.*#+}} xmm3 = [6,13,5,13,6,14,7,15] -; AVX512DQ-BW-FCP-NEXT: vpermw (%rdi), %ymm3, %ymm3 +; AVX512DQ-BW-FCP-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX512DQ-BW-FCP-NEXT: vmovdqa {{.*#+}} xmm6 = [8,9,6,7,4,5,6,7,8,9,10,11,12,13,14,15] +; AVX512DQ-BW-FCP-NEXT: vpshufb %xmm6, %xmm5, %xmm7 +; AVX512DQ-BW-FCP-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[12,13,10,11,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512DQ-BW-FCP-NEXT: vpbroadcastw 8(%rdi), %xmm8 +; AVX512DQ-BW-FCP-NEXT: vpsrlq $48, %xmm1, %xmm9 +; AVX512DQ-BW-FCP-NEXT: vpunpcklwd {{.*#+}} xmm8 = xmm8[0],xmm9[0],xmm8[1],xmm9[1],xmm8[2],xmm9[2],xmm8[3],xmm9[3] +; AVX512DQ-BW-FCP-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] +; AVX512DQ-BW-FCP-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; AVX512DQ-BW-FCP-NEXT: vpshufb %xmm6, %xmm0, %xmm0 ; AVX512DQ-BW-FCP-NEXT: vmovd %xmm2, (%rsi) ; AVX512DQ-BW-FCP-NEXT: vmovd %xmm4, (%rdx) -; AVX512DQ-BW-FCP-NEXT: vmovd %xmm5, (%rcx) -; AVX512DQ-BW-FCP-NEXT: vmovd %xmm0, (%r8) -; AVX512DQ-BW-FCP-NEXT: vmovd %xmm6, (%r9) -; AVX512DQ-BW-FCP-NEXT: vmovd %xmm1, (%r10) -; AVX512DQ-BW-FCP-NEXT: vmovd %xmm3, (%rax) -; AVX512DQ-BW-FCP-NEXT: vzeroupper +; AVX512DQ-BW-FCP-NEXT: vmovd %xmm7, (%rcx) +; AVX512DQ-BW-FCP-NEXT: vmovd %xmm5, (%r8) +; AVX512DQ-BW-FCP-NEXT: vmovd %xmm8, (%r9) +; AVX512DQ-BW-FCP-NEXT: vmovd %xmm3, (%r10) +; AVX512DQ-BW-FCP-NEXT: vmovd %xmm0, (%rax) ; AVX512DQ-BW-FCP-NEXT: retq %wide.vec = load <14 x i16>, ptr %in.vec, align 64 %strided.vec0 = shufflevector <14 x i16> %wide.vec, <14 x i16> poison, <2 x i32> diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-4.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-4.ll index 71eb606a8665d..187a8102095ed 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-4.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-4.ll @@ -123,9 +123,9 @@ define void @store_i16_stride4_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512BW-NEXT: vmovdqa (%rdx), %xmm1 ; AVX512BW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; AVX512BW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] -; AVX512BW-NEXT: vpmovsxbw {{.*#+}} xmm2 = [0,2,8,10,1,3,9,11] -; AVX512BW-NEXT: vpermi2w %xmm1, %xmm0, %xmm2 -; AVX512BW-NEXT: vmovdqa %xmm2, (%r8) +; AVX512BW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15] +; AVX512BW-NEXT: vmovdqa %xmm0, (%r8) ; AVX512BW-NEXT: retq ; ; AVX512BW-FCP-LABEL: store_i16_stride4_vf2: @@ -145,9 +145,9 @@ define void @store_i16_stride4_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512DQ-BW-NEXT: vmovdqa (%rdx), %xmm1 ; AVX512DQ-BW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; AVX512DQ-BW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] -; AVX512DQ-BW-NEXT: vpmovsxbw {{.*#+}} xmm2 = [0,2,8,10,1,3,9,11] -; AVX512DQ-BW-NEXT: vpermi2w %xmm1, %xmm0, %xmm2 -; AVX512DQ-BW-NEXT: vmovdqa %xmm2, (%r8) +; AVX512DQ-BW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15] +; AVX512DQ-BW-NEXT: vmovdqa %xmm0, (%r8) ; AVX512DQ-BW-NEXT: retq ; ; AVX512DQ-BW-FCP-LABEL: store_i16_stride4_vf2: diff --git a/llvm/test/CodeGen/X86/vector-replicaton-i1-mask.ll b/llvm/test/CodeGen/X86/vector-replicaton-i1-mask.ll index 717d1e447e165..a768baae97add 100644 --- a/llvm/test/CodeGen/X86/vector-replicaton-i1-mask.ll +++ b/llvm/test/CodeGen/X86/vector-replicaton-i1-mask.ll @@ -12867,46 +12867,25 @@ define void @mask_replication_factor8_vf8(ptr %in.maskvec, ptr %in.vec, ptr %out ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; -; AVX512BW-ONLY-LABEL: mask_replication_factor8_vf8: -; AVX512BW-ONLY: # %bb.0: -; AVX512BW-ONLY-NEXT: kmovq (%rdi), %k0 -; AVX512BW-ONLY-NEXT: vpmovm2b %k0, %zmm0 -; AVX512BW-ONLY-NEXT: vpbroadcastq %xmm0, %zmm0 -; AVX512BW-ONLY-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19,36,36,36,36,36,36,36,36,37,37,37,37,37,37,37,37,54,54,54,54,54,54,54,54,55,55,55,55,55,55,55,55] -; AVX512BW-ONLY-NEXT: vpmovb2m %zmm0, %k1 -; AVX512BW-ONLY-NEXT: kshiftrq $16, %k1, %k2 -; AVX512BW-ONLY-NEXT: vmovdqa32 64(%rsi), %zmm0 {%k2} {z} -; AVX512BW-ONLY-NEXT: vmovdqa32 (%rsi), %zmm1 {%k1} {z} -; AVX512BW-ONLY-NEXT: kshiftrq $48, %k1, %k2 -; AVX512BW-ONLY-NEXT: vmovdqa32 192(%rsi), %zmm2 {%k2} {z} -; AVX512BW-ONLY-NEXT: kshiftrq $32, %k1, %k1 -; AVX512BW-ONLY-NEXT: vmovdqa32 128(%rsi), %zmm3 {%k1} {z} -; AVX512BW-ONLY-NEXT: vmovdqa64 %zmm3, 128(%rdx) -; AVX512BW-ONLY-NEXT: vmovdqa64 %zmm2, 192(%rdx) -; AVX512BW-ONLY-NEXT: vmovdqa64 %zmm1, (%rdx) -; AVX512BW-ONLY-NEXT: vmovdqa64 %zmm0, 64(%rdx) -; AVX512BW-ONLY-NEXT: vzeroupper -; AVX512BW-ONLY-NEXT: retq -; -; AVX512VBMI-ONLY-LABEL: mask_replication_factor8_vf8: -; AVX512VBMI-ONLY: # %bb.0: -; AVX512VBMI-ONLY-NEXT: kmovq (%rdi), %k0 -; AVX512VBMI-ONLY-NEXT: vpmovm2b %k0, %zmm0 -; AVX512VBMI-ONLY-NEXT: vpmovsxbq %xmm0, %zmm0 -; AVX512VBMI-ONLY-NEXT: vpmovb2m %zmm0, %k1 -; AVX512VBMI-ONLY-NEXT: kshiftrq $16, %k1, %k2 -; AVX512VBMI-ONLY-NEXT: vmovdqa32 64(%rsi), %zmm0 {%k2} {z} -; AVX512VBMI-ONLY-NEXT: vmovdqa32 (%rsi), %zmm1 {%k1} {z} -; AVX512VBMI-ONLY-NEXT: kshiftrq $48, %k1, %k2 -; AVX512VBMI-ONLY-NEXT: vmovdqa32 192(%rsi), %zmm2 {%k2} {z} -; AVX512VBMI-ONLY-NEXT: kshiftrq $32, %k1, %k1 -; AVX512VBMI-ONLY-NEXT: vmovdqa32 128(%rsi), %zmm3 {%k1} {z} -; AVX512VBMI-ONLY-NEXT: vmovdqa64 %zmm3, 128(%rdx) -; AVX512VBMI-ONLY-NEXT: vmovdqa64 %zmm2, 192(%rdx) -; AVX512VBMI-ONLY-NEXT: vmovdqa64 %zmm1, (%rdx) -; AVX512VBMI-ONLY-NEXT: vmovdqa64 %zmm0, 64(%rdx) -; AVX512VBMI-ONLY-NEXT: vzeroupper -; AVX512VBMI-ONLY-NEXT: retq +; AVX512BW-LABEL: mask_replication_factor8_vf8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: kmovq (%rdi), %k0 +; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 +; AVX512BW-NEXT: vpmovsxbq %xmm0, %zmm0 +; AVX512BW-NEXT: vpmovb2m %zmm0, %k1 +; AVX512BW-NEXT: kshiftrq $16, %k1, %k2 +; AVX512BW-NEXT: vmovdqa32 64(%rsi), %zmm0 {%k2} {z} +; AVX512BW-NEXT: vmovdqa32 (%rsi), %zmm1 {%k1} {z} +; AVX512BW-NEXT: kshiftrq $48, %k1, %k2 +; AVX512BW-NEXT: vmovdqa32 192(%rsi), %zmm2 {%k2} {z} +; AVX512BW-NEXT: kshiftrq $32, %k1, %k1 +; AVX512BW-NEXT: vmovdqa32 128(%rsi), %zmm3 {%k1} {z} +; AVX512BW-NEXT: vmovdqa64 %zmm3, 128(%rdx) +; AVX512BW-NEXT: vmovdqa64 %zmm2, 192(%rdx) +; AVX512BW-NEXT: vmovdqa64 %zmm1, (%rdx) +; AVX512BW-NEXT: vmovdqa64 %zmm0, 64(%rdx) +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq %src.mask.padded = load <64 x i1>, ptr %in.maskvec, align 64 %src.mask = shufflevector <64 x i1> %src.mask.padded, <64 x i1> poison, <8 x i32> %tgt.mask = shufflevector <8 x i1> %src.mask, <8 x i1> poison, <64 x i32> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll index 9fd8c11ba6c4d..b1c90aa8021b8 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -495,23 +495,11 @@ define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20( ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9] ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: -; AVX2: # %bb.0: -; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9] -; AVX2-NEXT: retq -; -; AVX512VLBW-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: -; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9] -; AVX512VLBW-NEXT: retq -; -; AVX512VLVBMI-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: -; AVX512VLVBMI: # %bb.0: -; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm2 = [3,2,1,0,7,6,5,4,19,18,17,16,23,22,21,20] -; AVX512VLVBMI-NEXT: vpermt2b %xmm1, %xmm2, %xmm0 -; AVX512VLVBMI-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: +; AVX2OR512VL: # %bb.0: +; AVX2OR512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9] +; AVX2OR512VL-NEXT: retq ; ; XOP-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: ; XOP: # %bb.0: @@ -853,23 +841,11 @@ define <16 x i8> @shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu( ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,9,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: -; AVX2: # %bb.0: -; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,9,u,u,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX2-NEXT: retq -; -; AVX512VLBW-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: -; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,9,u,u,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX512VLBW-NEXT: retq -; -; AVX512VLVBMI-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: -; AVX512VLVBMI: # %bb.0: -; AVX512VLVBMI-NEXT: vpbroadcastw {{.*#+}} xmm2 = [2,20,2,20,2,20,2,20,2,20,2,20,2,20,2,20] -; AVX512VLVBMI-NEXT: vpermt2b %xmm1, %xmm2, %xmm0 -; AVX512VLVBMI-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: +; AVX2OR512VL: # %bb.0: +; AVX2OR512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,9,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2OR512VL-NEXT: retq ; ; XOP-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: ; XOP: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll index d73cfb379333b..212cde9fcd6b2 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -7,8 +7,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2OR512VL,AVX2,AVX2-FAST ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2OR512VL,AVX2,AVX2-FAST ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX2OR512VL,AVX512VL,AVX512VL-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2OR512VL,AVX512VL,AVX512VL-FAST -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2OR512VL,AVX512VL,AVX512VL-FAST +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2OR512VL,AVX512VL,AVX512VL-FAST,AVX512VL-FAST-ALL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2OR512VL,AVX512VL,AVX512VL-FAST,AVX512VL-FAST-PERLANE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=AVX,XOP,XOPAVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=AVX,XOP,XOPAVX2 @@ -1155,8 +1155,8 @@ define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) { ; ; AVX512VL-FAST-LABEL: shuffle_v8i16_109832ba: ; AVX512VL-FAST: # %bb.0: -; AVX512VL-FAST-NEXT: vpmovsxbw {{.*#+}} xmm2 = [1,0,9,8,3,2,11,10] -; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0 +; AVX512VL-FAST-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,6,7,2,3,12,13,8,9,14,15,10,11] ; AVX512VL-FAST-NEXT: retq ; ; XOP-LABEL: shuffle_v8i16_109832ba: @@ -1246,11 +1246,18 @@ define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) { ; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; AVX512VL-SLOW-NEXT: retq ; -; AVX512VL-FAST-LABEL: shuffle_v8i16_0213cedf: -; AVX512VL-FAST: # %bb.0: -; AVX512VL-FAST-NEXT: vpmovsxbw {{.*#+}} xmm2 = [0,2,1,3,12,14,13,15] -; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0 -; AVX512VL-FAST-NEXT: retq +; AVX512VL-FAST-ALL-LABEL: shuffle_v8i16_0213cedf: +; AVX512VL-FAST-ALL: # %bb.0: +; AVX512VL-FAST-ALL-NEXT: vpmovsxbw {{.*#+}} xmm2 = [0,2,1,3,12,14,13,15] +; AVX512VL-FAST-ALL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0 +; AVX512VL-FAST-ALL-NEXT: retq +; +; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8i16_0213cedf: +; AVX512VL-FAST-PERLANE: # %bb.0: +; AVX512VL-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,u,u,u,u,u,u,u,u] +; AVX512VL-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] +; AVX512VL-FAST-PERLANE-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VL-FAST-PERLANE-NEXT: retq ; ; XOP-LABEL: shuffle_v8i16_0213cedf: ; XOP: # %bb.0: @@ -1314,8 +1321,8 @@ define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) { ; ; AVX512VL-FAST-LABEL: shuffle_v8i16_443aXXXX: ; AVX512VL-FAST: # %bb.0: -; AVX512VL-FAST-NEXT: vpmovsxbw {{.*#+}} xmm2 = [4,4,3,10,4,5,6,7] -; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0 +; AVX512VL-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] +; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7,4,5,8,9,10,11,12,13,14,15] ; AVX512VL-FAST-NEXT: retq ; ; XOP-LABEL: shuffle_v8i16_443aXXXX: @@ -1542,11 +1549,23 @@ define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) { ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7] ; AVX2-NEXT: retq ; -; AVX512VL-LABEL: shuffle_v8i16_012dcde3: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpmovsxbw {{.*#+}} xmm2 = [0,1,2,13,12,13,14,3] -; AVX512VL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0 -; AVX512VL-NEXT: retq +; AVX512VL-SLOW-LABEL: shuffle_v8i16_012dcde3: +; AVX512VL-SLOW: # %bb.0: +; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX512VL-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7] +; AVX512VL-SLOW-NEXT: retq +; +; AVX512VL-FAST-ALL-LABEL: shuffle_v8i16_012dcde3: +; AVX512VL-FAST-ALL: # %bb.0: +; AVX512VL-FAST-ALL-NEXT: vpmovsxbw {{.*#+}} xmm2 = [0,1,2,13,12,13,14,3] +; AVX512VL-FAST-ALL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0 +; AVX512VL-FAST-ALL-NEXT: retq +; +; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8i16_012dcde3: +; AVX512VL-FAST-PERLANE: # %bb.0: +; AVX512VL-FAST-PERLANE-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX512VL-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7] +; AVX512VL-FAST-PERLANE-NEXT: retq ; ; XOP-LABEL: shuffle_v8i16_012dcde3: ; XOP: # %bb.0: @@ -1645,11 +1664,18 @@ define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) { ; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] ; AVX512VL-SLOW-NEXT: retq ; -; AVX512VL-FAST-LABEL: shuffle_v8i16_XXX1X579: -; AVX512VL-FAST: # %bb.0: -; AVX512VL-FAST-NEXT: vpmovsxbw {{.*#+}} xmm2 = [1,1,1,1,4,5,7,9] -; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0 -; AVX512VL-FAST-NEXT: retq +; AVX512VL-FAST-ALL-LABEL: shuffle_v8i16_XXX1X579: +; AVX512VL-FAST-ALL: # %bb.0: +; AVX512VL-FAST-ALL-NEXT: vpmovsxbw {{.*#+}} xmm2 = [1,1,1,1,4,5,7,9] +; AVX512VL-FAST-ALL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0 +; AVX512VL-FAST-ALL-NEXT: retq +; +; AVX512VL-FAST-PERLANE-LABEL: shuffle_v8i16_XXX1X579: +; AVX512VL-FAST-PERLANE: # %bb.0: +; AVX512VL-FAST-PERLANE-NEXT: vpbroadcastd %xmm1, %xmm1 +; AVX512VL-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,8,9,10,11,14,15,u,u] +; AVX512VL-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] +; AVX512VL-FAST-PERLANE-NEXT: retq ; ; XOP-LABEL: shuffle_v8i16_XXX1X579: ; XOP: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll index 176ba696e6540..d287fb6d5b834 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -4641,11 +4641,25 @@ define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_ ; AVX512VLBW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512VLBW-NEXT: retq ; -; AVX512VLVBMI-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: -; AVX512VLVBMI: # %bb.0: -; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,16,16,16,16,16,16,16,16] -; AVX512VLVBMI-NEXT: vpermt2b %xmm1, %xmm2, %xmm0 -; AVX512VLVBMI-NEXT: retq +; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: +; AVX512VLVBMI-SLOW: # %bb.0: +; AVX512VLVBMI-SLOW-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,u,u,u,u,u,u,u,u] +; AVX512VLVBMI-SLOW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VLVBMI-SLOW-NEXT: retq +; +; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: +; AVX512VLVBMI-FAST-ALL: # %bb.0: +; AVX512VLVBMI-FAST-ALL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,16,16,16,16,16,16,16,16] +; AVX512VLVBMI-FAST-ALL-NEXT: vpermt2b %xmm1, %xmm2, %xmm0 +; AVX512VLVBMI-FAST-ALL-NEXT: retq +; +; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: +; AVX512VLVBMI-FAST-PERLANE: # %bb.0: +; AVX512VLVBMI-FAST-PERLANE-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,u,u,u,u,u,u,u,u] +; AVX512VLVBMI-FAST-PERLANE-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VLVBMI-FAST-PERLANE-NEXT: retq ; ; XOP-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: ; XOP: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll index 4fe50a60b67fa..34f94bcfe3e6f 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll @@ -494,9 +494,8 @@ define <64 x i8> @shuffle_v64i8_63_zz_61_zz_59_zz_57_zz_55_zz_53_zz_51_zz_49_zz_ ; ; AVX512VBMI-LABEL: shuffle_v64i8_63_zz_61_zz_59_zz_57_zz_55_zz_53_zz_51_zz_49_zz_47_zz_45_zz_43_zz_41_zz_39_zz_37_zz_35_zz_33_zz_31_zz_29_zz_27_zz_25_zz_23_zz_21_zz_19_zz_17_zz_15_zz_13_zz_11_zz_9_zz_7_zz_5_zz_3_zz_1_zz: ; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm2 = [63,65,61,67,59,69,57,71,55,73,53,75,51,77,49,79,47,81,45,83,43,85,41,87,39,89,37,91,35,93,33,95,31,97,29,99,27,101,25,103,23,105,21,107,19,109,17,111,15,113,13,115,11,117,9,119,7,121,5,123,3,125,1,127] -; AVX512VBMI-NEXT: vpermt2b %zmm1, %zmm2, %zmm0 +; AVX512VBMI-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5,2,3,0,1] +; AVX512VBMI-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[15],zero,zmm0[13],zero,zmm0[11],zero,zmm0[9],zero,zmm0[7],zero,zmm0[5],zero,zmm0[3],zero,zmm0[1],zero,zmm0[31],zero,zmm0[29],zero,zmm0[27],zero,zmm0[25],zero,zmm0[23],zero,zmm0[21],zero,zmm0[19],zero,zmm0[17],zero,zmm0[47],zero,zmm0[45],zero,zmm0[43],zero,zmm0[41],zero,zmm0[39],zero,zmm0[37],zero,zmm0[35],zero,zmm0[33],zero,zmm0[63],zero,zmm0[61],zero,zmm0[59],zero,zmm0[57],zero,zmm0[55],zero,zmm0[53],zero,zmm0[51],zero,zmm0[49],zero ; AVX512VBMI-NEXT: retq %shuffle = shufflevector <64 x i8> %a, <64 x i8> zeroinitializer, <64 x i32> ret <64 x i8> %shuffle diff --git a/llvm/test/CodeGen/X86/vector-shuffle-v192.ll b/llvm/test/CodeGen/X86/vector-shuffle-v192.ll index e83c1e8482773..fea59d9657612 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-v192.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-v192.ll @@ -69,7 +69,7 @@ define <64 x i8> @f1(ptr %p0) { ; AVX512F-NEXT: vpshufb {{.*#+}} ymm5 = ymm5[u,u,u,u,u,u,u,u,u,u,u,1,5,7,11,13,17,19,23,25,29,31,u,u,u,u,u,u,u,u,u,u] ; AVX512F-NEXT: vpblendw {{.*#+}} ymm2 = ymm5[0,1,2],ymm2[3,4,5,6,7],ymm5[8,9,10],ymm2[11,12,13,14,15] ; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7] -; AVX512F-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 +; AVX512F-NEXT: vpternlogq {{.*#+}} ymm2 = ymm0 ^ (mem & (ymm2 ^ ymm0)) ; AVX512F-NEXT: vmovdqa 80(%rdi), %xmm0 ; AVX512F-NEXT: vpshufb %xmm4, %xmm0, %xmm0 ; AVX512F-NEXT: vmovdqa 64(%rdi), %xmm4 @@ -83,7 +83,7 @@ define <64 x i8> @f1(ptr %p0) { ; AVX512F-NEXT: vpor %xmm1, %xmm3, %xmm1 ; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm3 ; AVX512F-NEXT: vpshufb {{.*#+}} ymm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm3[1,5,7,11,13,17,19,23,25,29,31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; AVX512F-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm3 +; AVX512F-NEXT: vpternlogq {{.*#+}} ymm3 = ymm3 | (ymm1 & mem) ; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm3[0,1,2],ymm0[3,4,5,6,7],ymm3[8,9,10],ymm0[11,12,13,14,15] ; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7] ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 @@ -214,7 +214,7 @@ define <64 x i8> @f2(ptr %p0) { ; AVX512F-NEXT: vmovdqa 128(%rdi), %ymm4 ; AVX512F-NEXT: vpshufb {{.*#+}} ymm4 = ymm4[u,u,u,u,u,u,u,u,u,u,u,3,5,9,11,15,17,21,23,27,29],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2 -; AVX512F-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm2 +; AVX512F-NEXT: vpternlogq {{.*#+}} zmm2 = zmm2 | (zmm0 & mem) ; AVX512F-NEXT: vmovdqa 96(%rdi), %xmm0 ; AVX512F-NEXT: vpshufb %xmm5, %xmm0, %xmm0 ; AVX512F-NEXT: vmovdqa 112(%rdi), %xmm4 @@ -228,7 +228,7 @@ define <64 x i8> @f2(ptr %p0) { ; AVX512F-NEXT: vpor %xmm1, %xmm3, %xmm1 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 ; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; AVX512F-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2)) ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: f2: @@ -344,7 +344,7 @@ define <64 x i8> @f3(ptr %p0) { ; AVX512F-NEXT: # ymm4 = mem[0,1,0,1] ; AVX512F-NEXT: vpshufb %ymm4, %ymm2, %ymm2 ; AVX512F-NEXT: vpmovsxwd {{.*#+}} ymm5 = [4294967295,4294967295,4294967295,4294967295,4294967295,255,0,0] -; AVX512F-NEXT: vpternlogq $216, %ymm5, %ymm2, %ymm0 +; AVX512F-NEXT: vpternlogq {{.*#+}} ymm0 = ymm0 ^ (ymm5 & (ymm0 ^ ymm2)) ; AVX512F-NEXT: vmovdqa 112(%rdi), %xmm6 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm7 = [128,128,128,128,128,0,4,6,10,12,u,u,u,u,u,u] ; AVX512F-NEXT: vpshufb %xmm7, %xmm6, %xmm6 @@ -369,7 +369,7 @@ define <64 x i8> @f3(ptr %p0) { ; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm3 ; AVX512F-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4],xmm3[5,6,7] ; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7] -; AVX512F-NEXT: vpternlogq $226, %ymm1, %ymm5, %ymm2 +; AVX512F-NEXT: vpternlogq {{.*#+}} ymm2 = ymm1 ^ (ymm5 & (ymm2 ^ ymm1)) ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 ; AVX512F-NEXT: retq ; @@ -497,7 +497,7 @@ define <64 x i8> @f4(ptr %p0) { ; AVX512F-NEXT: vmovdqa 128(%rdi), %ymm4 ; AVX512F-NEXT: vpshufb {{.*#+}} ymm4 = ymm4[u,u,u,u,u,u,u,u,u,u,u,2,4,8,10,14,16,20,22,26,28],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2 -; AVX512F-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm2 +; AVX512F-NEXT: vpternlogq {{.*#+}} zmm2 = zmm2 | (zmm0 & mem) ; AVX512F-NEXT: vmovdqa 96(%rdi), %xmm0 ; AVX512F-NEXT: vpshufb %xmm5, %xmm0, %xmm0 ; AVX512F-NEXT: vmovdqa 112(%rdi), %xmm4 @@ -511,7 +511,7 @@ define <64 x i8> @f4(ptr %p0) { ; AVX512F-NEXT: vpor %xmm1, %xmm3, %xmm1 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 ; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; AVX512F-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2)) ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: f4: diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll index ec7a708fc0b02..42521b809b102 100644 --- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll +++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll @@ -5117,15 +5117,11 @@ define void @vec384_i32_widen_to_i128_factor4_broadcast_to_v3i128_factor3(ptr %i ; AVX512BW-FAST-LABEL: vec384_i32_widen_to_i128_factor4_broadcast_to_v3i128_factor3: ; AVX512BW-FAST: # %bb.0: ; AVX512BW-FAST-NEXT: vmovdqa64 (%rdi), %zmm0 -; AVX512BW-FAST-NEXT: vpmovsxbd {{.*#+}} ymm1 = [0,13,14,15,0,1,2,3] ; AVX512BW-FAST-NEXT: vpaddb (%rsi), %zmm0, %zmm0 -; AVX512BW-FAST-NEXT: vpermd %zmm0, %zmm1, %zmm1 -; AVX512BW-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512BW-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5,6,7] -; AVX512BW-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512BW-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7] -; AVX512BW-FAST-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512BW-FAST-NEXT: vpaddb (%rdx), %zmm0, %zmm0 +; AVX512BW-FAST-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-FAST-NEXT: vpmovsxbd {{.*#+}} zmm2 = [0,13,14,15,0,21,22,23,0,25,26,27,28,29,30,31] +; AVX512BW-FAST-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 +; AVX512BW-FAST-NEXT: vpaddb (%rdx), %zmm2, %zmm0 ; AVX512BW-FAST-NEXT: vmovdqa64 %zmm0, (%rcx) ; AVX512BW-FAST-NEXT: vzeroupper ; AVX512BW-FAST-NEXT: retq @@ -6181,10 +6177,9 @@ define void @vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16(ptr %i ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 ; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0 -; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovsxbw {{.*#+}} zmm2 = [0,33,0,35,0,37,0,39,0,41,0,43,0,45,0,47,0,49,0,51,0,53,0,55,0,57,0,59,0,61,0,63] -; AVX512BW-NEXT: vpermi2w %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vpaddb (%rdx), %zmm2, %zmm0 +; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1] +; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,1],zero,zero,zmm0[0,1],zero,zero,zmm0[0,1],zero,zero,zmm0[0,1],zero,zero,zmm0[16,17],zero,zero,zmm0[16,17],zero,zero,zmm0[16,17],zero,zero,zmm0[16,17],zero,zero,zmm0[32,33],zero,zero,zmm0[32,33],zero,zero,zmm0[32,33],zero,zero,zmm0[32,33],zero,zero,zmm0[48,49],zero,zero,zmm0[48,49],zero,zero,zmm0[48,49],zero,zero,zmm0[48,49],zero,zero +; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -6300,10 +6295,9 @@ define void @vec512_i16_widen_to_i64_factor4_broadcast_to_v8i64_factor8(ptr %in. ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 ; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0 -; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovsxbw {{.*#+}} zmm2 = [0,33,34,35,0,37,38,39,0,41,42,43,0,45,46,47,0,49,50,51,0,53,54,55,0,57,58,59,0,61,62,63] -; AVX512BW-NEXT: vpermi2w %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vpaddb (%rdx), %zmm2, %zmm0 +; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1] +; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,1],zero,zero,zero,zero,zero,zero,zmm0[0,1],zero,zero,zero,zero,zero,zero,zmm0[16,17],zero,zero,zero,zero,zero,zero,zmm0[16,17],zero,zero,zero,zero,zero,zero,zmm0[32,33],zero,zero,zero,zero,zero,zero,zmm0[32,33],zero,zero,zero,zero,zero,zero,zmm0[48,49],zero,zero,zero,zero,zero,zero,zmm0[48,49],zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -6419,10 +6413,9 @@ define void @vec512_i16_widen_to_i128_factor8_broadcast_to_v4i128_factor4(ptr %i ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 ; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0 -; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovsxbw {{.*#+}} zmm2 = [0,33,34,35,36,37,38,39,0,41,42,43,44,45,46,47,0,49,50,51,52,53,54,55,0,57,58,59,60,61,62,63] -; AVX512BW-NEXT: vpermi2w %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vpaddb (%rdx), %zmm2, %zmm0 +; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1] +; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -6900,9 +6893,10 @@ define void @vec512_i32_widen_to_i256_factor8_broadcast_to_v2i256_factor2(ptr %i ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 ; AVX512BW-NEXT: vpaddb (%rsi), %xmm0, %xmm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 -; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0 +; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovsxbd {{.*#+}} zmm2 = [0,17,18,19,4,21,22,23,0,25,26,27,4,29,30,31] +; AVX512BW-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 +; AVX512BW-NEXT: vpaddb (%rdx), %zmm2, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -7101,9 +7095,10 @@ define void @vec512_i64_widen_to_i256_factor4_broadcast_to_v2i256_factor2(ptr %i ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 ; AVX512BW-NEXT: vpaddb (%rsi), %xmm0, %xmm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 -; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0 +; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,9,2,11,0,13,2,15] +; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 +; AVX512BW-NEXT: vpaddb (%rdx), %zmm2, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll index 14c2a60a5b998..a33c4a7e85954 100644 --- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll +++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll @@ -4945,10 +4945,10 @@ define void @vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16(ptr %i ; ; AVX512BW-LABEL: vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpmovsxbw {{.*#+}} zmm0 = [32,1,32,3,32,5,32,7,32,9,32,11,32,13,32,15,32,17,32,19,32,21,32,23,32,25,32,27,32,29,32,31] -; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpermt2w (%rdi), %zmm0, %zmm1 -; AVX512BW-NEXT: vpaddb (%rsi), %zmm1, %zmm0 +; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1] +; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,1],zero,zero,zmm0[0,1],zero,zero,zmm0[0,1],zero,zero,zmm0[0,1],zero,zero,zmm0[16,17],zero,zero,zmm0[16,17],zero,zero,zmm0[16,17],zero,zero,zmm0[16,17],zero,zero,zmm0[32,33],zero,zero,zmm0[32,33],zero,zero,zmm0[32,33],zero,zero,zmm0[32,33],zero,zero,zmm0[48,49],zero,zero,zmm0[48,49],zero,zero,zmm0[48,49],zero,zero,zmm0[48,49],zero,zero +; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -5048,10 +5048,10 @@ define void @vec512_i16_widen_to_i64_factor4_broadcast_to_v8i64_factor8(ptr %in. ; ; AVX512BW-LABEL: vec512_i16_widen_to_i64_factor4_broadcast_to_v8i64_factor8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpmovsxbw {{.*#+}} zmm0 = [32,1,2,3,32,5,6,7,32,9,10,11,32,13,14,15,32,17,18,19,32,21,22,23,32,25,26,27,32,29,30,31] -; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpermt2w (%rdi), %zmm0, %zmm1 -; AVX512BW-NEXT: vpaddb (%rsi), %zmm1, %zmm0 +; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1] +; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,1],zero,zero,zero,zero,zero,zero,zmm0[0,1],zero,zero,zero,zero,zero,zero,zmm0[16,17],zero,zero,zero,zero,zero,zero,zmm0[16,17],zero,zero,zero,zero,zero,zero,zmm0[32,33],zero,zero,zero,zero,zero,zero,zmm0[32,33],zero,zero,zero,zero,zero,zero,zmm0[48,49],zero,zero,zero,zero,zero,zero,zmm0[48,49],zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -5152,10 +5152,10 @@ define void @vec512_i16_widen_to_i128_factor8_broadcast_to_v4i128_factor4(ptr %i ; ; AVX512BW-LABEL: vec512_i16_widen_to_i128_factor8_broadcast_to_v4i128_factor4: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpmovsxbw {{.*#+}} zmm0 = [32,1,2,3,4,5,6,7,32,9,10,11,12,13,14,15,32,17,18,19,20,21,22,23,32,25,26,27,28,29,30,31] -; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpermt2w (%rdi), %zmm0, %zmm1 -; AVX512BW-NEXT: vpaddb (%rsi), %zmm1, %zmm0 +; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1] +; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq diff --git a/llvm/test/ThinLTO/X86/memprof-recursive.ll b/llvm/test/ThinLTO/X86/memprof-recursive.ll index 4b2b5490bc2cb..e1a9084b583b9 100644 --- a/llvm/test/ThinLTO/X86/memprof-recursive.ll +++ b/llvm/test/ThinLTO/X86/memprof-recursive.ll @@ -3,12 +3,15 @@ ;; See llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll for ;; information on how the test was created. +;; -stats requires asserts +; REQUIRES: asserts + ; RUN: opt -thinlto-bc %s >%t.o ;; Check behavior when we enable cloning of contexts involved with recursive -;; cycles, but not through the cycle itself. I.e. until full support for -;; recursion is added, the cloned recursive call from C back to B (line 12) will -;; not be updated to call a clone. +;; cycles, but not through the cycle itself. I.e. with full support for cloning +;; recursive cycles off, the cloned recursive call from C back to B (line 12) +;; will not be updated to call a clone. ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ ; RUN: -supports-hot-cold-new \ ; RUN: -r=%t.o,_Z1Dv,plx \ @@ -19,6 +22,7 @@ ; RUN: -memprof-verify-ccg -memprof-verify-nodes \ ; RUN: -pass-remarks=memprof-context-disambiguation \ ; RUN: -memprof-allow-recursive-callsites=true \ +; RUN: -memprof-clone-recursive-contexts=false \ ; RUN: -o %t.out 2>&1 | FileCheck %s \ ; RUN: --implicit-check-not "memprof_recursive.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ ; RUN: --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=ALLOW-RECUR-CONTEXTS @@ -39,7 +43,7 @@ ; RUN: --implicit-check-not="created clone" \ ; RUN: --implicit-check-not="marked with memprof allocation attribute cold" -;; Check the default behavior (enabled recursive callsites). +;; Check the default behavior (clone recursive callsites). ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ ; RUN: -supports-hot-cold-new \ ; RUN: -r=%t.o,_Z1Dv,plx \ @@ -47,11 +51,11 @@ ; RUN: -r=%t.o,_Z1Bi,plx \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,_Znam, \ -; RUN: -memprof-verify-ccg -memprof-verify-nodes \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats \ ; RUN: -pass-remarks=memprof-context-disambiguation \ ; RUN: -o %t.out 2>&1 | FileCheck %s \ -; RUN: --implicit-check-not "memprof_recursive.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ -; RUN: --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=ALLOW-RECUR-CONTEXTS +; RUN: --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=ALLOW-RECUR-CONTEXTS \ +; RUN: --check-prefix=CLONE-RECUR-CALLSITES ;; Skipping recursive contexts should prevent spurious call to cloned version of ;; B from the context starting at memprof_recursive.cc:19:13, which is actually @@ -67,6 +71,7 @@ ; RUN: -pass-remarks=memprof-context-disambiguation \ ; RUN: -memprof-allow-recursive-callsites=true \ ; RUN: -memprof-allow-recursive-contexts=false \ +; RUN: -memprof-clone-recursive-contexts=false \ ; RUN: -o %t.out 2>&1 | FileCheck %s \ ; RUN: --implicit-check-not "memprof_recursive.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ ; RUN: --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=SKIP-RECUR-CONTEXTS @@ -76,6 +81,7 @@ ; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:5:10: call in clone _Z1Dv.memprof.1 marked with memprof allocation attribute cold ; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:8:0: created clone _Z1Ci.memprof.1 ; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:10:12: call in clone _Z1Ci.memprof.1 assigned to call function clone _Z1Dv.memprof.1 +; CLONE-RECUR-CALLSITES: memprof_recursive.cc:12:10: call in clone _Z1Ci.memprof.1 assigned to call function clone _Z1Bi.memprof.1 ; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:14:0: created clone _Z1Bi.memprof.1 ; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:15:10: call in clone _Z1Bi.memprof.1 assigned to call function clone _Z1Ci.memprof.1 ;; We should only call the cold clone for the recursive context if we enabled @@ -83,6 +89,7 @@ ; ALLOW-RECUR-CONTEXTS: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi.memprof.1 ; SKIP-RECUR-CONTEXTS-NOT: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi.memprof.1 ; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:20:13: call in clone main assigned to call function clone _Z1Bi.memprof.1 +; CLONE-RECUR-CALLSITES: 1 memprof-context-disambiguation - Number of backedges with deferred cloning target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/FunctionAttrs/nocapture.ll b/llvm/test/Transforms/FunctionAttrs/nocapture.ll index 6164f2adbf5b9..3b0d11d15331e 100644 --- a/llvm/test/Transforms/FunctionAttrs/nocapture.ll +++ b/llvm/test/Transforms/FunctionAttrs/nocapture.ll @@ -771,6 +771,40 @@ define i1 @captureICmpRev(ptr %x) { ret i1 %1 } +define i1 @captureICmpWrongPred(ptr %x) { +; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; FNATTRS-LABEL: define i1 @captureICmpWrongPred +; FNATTRS-SAME: (ptr readnone [[X:%.*]]) #[[ATTR0]] { +; FNATTRS-NEXT: [[TMP1:%.*]] = icmp slt ptr [[X]], null +; FNATTRS-NEXT: ret i1 [[TMP1]] +; +; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; ATTRIBUTOR-LABEL: define i1 @captureICmpWrongPred +; ATTRIBUTOR-SAME: (ptr nofree readnone [[X:%.*]]) #[[ATTR0]] { +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = icmp slt ptr [[X]], null +; ATTRIBUTOR-NEXT: ret i1 [[TMP1]] +; + %1 = icmp slt ptr %x, null + ret i1 %1 +} + +define i1 @captureICmpWrongPredDereferenceableOrNull(ptr dereferenceable_or_null(1) %x) { +; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; FNATTRS-LABEL: define noundef i1 @captureICmpWrongPredDereferenceableOrNull +; FNATTRS-SAME: (ptr readnone dereferenceable_or_null(1) [[X:%.*]]) #[[ATTR0]] { +; FNATTRS-NEXT: [[TMP1:%.*]] = icmp slt ptr [[X]], null +; FNATTRS-NEXT: ret i1 [[TMP1]] +; +; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; ATTRIBUTOR-LABEL: define i1 @captureICmpWrongPredDereferenceableOrNull +; ATTRIBUTOR-SAME: (ptr nofree readnone dereferenceable_or_null(1) [[X:%.*]]) #[[ATTR0]] { +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = icmp slt ptr [[X]], null +; ATTRIBUTOR-NEXT: ret i1 [[TMP1]] +; + %1 = icmp slt ptr %x, null + ret i1 %1 +} + define i1 @nocaptureInboundsGEPICmp(ptr %x) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; FNATTRS-LABEL: define i1 @nocaptureInboundsGEPICmp diff --git a/llvm/test/Transforms/InstCombine/select-icmp-and.ll b/llvm/test/Transforms/InstCombine/select-icmp-and.ll index e49c2f6214114..16fb3f34047ee 100644 --- a/llvm/test/Transforms/InstCombine/select-icmp-and.ll +++ b/llvm/test/Transforms/InstCombine/select-icmp-and.ll @@ -900,3 +900,15 @@ define i8 @neg_select_trunc_bittest_to_shl_extra_use(i8 %x) { %ret = select i1 %trunc, i8 4, i8 0 ret i8 %ret } + +define i16 @select_trunc_nuw_bittest_or(i8 %x) { +; CHECK-LABEL: @select_trunc_nuw_bittest_or( +; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw i8 [[X:%.*]] to i1 +; CHECK-NEXT: [[RES:%.*]] = select i1 [[TMP1]], i16 20, i16 4 +; CHECK-NEXT: ret i16 [[RES]] +; + %trunc = trunc nuw i8 %x to i1 + %select = select i1 %trunc, i16 16, i16 0 + %res = or i16 4, %select + ret i16 %res +} diff --git a/llvm/test/Transforms/InstCombine/store.ll b/llvm/test/Transforms/InstCombine/store.ll index 673395464c85a..0a2b0a5ee7987 100644 --- a/llvm/test/Transforms/InstCombine/store.ll +++ b/llvm/test/Transforms/InstCombine/store.ll @@ -345,6 +345,48 @@ define void @store_to_readonly_noalias(ptr readonly noalias %0) { ret void } +define void @store_select_with_null(i1 %cond, ptr %p) { +; CHECK-LABEL: @store_select_with_null( +; CHECK-NEXT: store i32 0, ptr [[SEL:%.*]], align 4 +; CHECK-NEXT: ret void +; + %sel = select i1 %cond, ptr %p, ptr null + store i32 0, ptr %sel, align 4 + ret void +} + +define void @store_select_with_null_commuted(i1 %cond, ptr %p) { +; CHECK-LABEL: @store_select_with_null_commuted( +; CHECK-NEXT: store i32 0, ptr [[SEL:%.*]], align 4 +; CHECK-NEXT: ret void +; + %sel = select i1 %cond, ptr null, ptr %p + store i32 0, ptr %sel, align 4 + ret void +} + +define void @store_select_with_null_null_is_valid(i1 %cond, ptr %p) null_pointer_is_valid { +; CHECK-LABEL: @store_select_with_null_null_is_valid( +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], ptr [[P:%.*]], ptr null +; CHECK-NEXT: store i32 0, ptr [[SEL]], align 4 +; CHECK-NEXT: ret void +; + %sel = select i1 %cond, ptr %p, ptr null + store i32 0, ptr %sel, align 4 + ret void +} + +define void @store_select_with_unknown(i1 %cond, ptr %p, ptr %p2) { +; CHECK-LABEL: @store_select_with_unknown( +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], ptr [[P:%.*]], ptr [[P2:%.*]] +; CHECK-NEXT: store i32 0, ptr [[SEL]], align 4 +; CHECK-NEXT: ret void +; + %sel = select i1 %cond, ptr %p, ptr %p2 + store i32 0, ptr %sel, align 4 + ret void +} + !0 = !{!4, !4, i64 0} !1 = !{!"omnipotent char", !2} !2 = !{!"Simple C/C++ TBAA"} diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll index ba073dc1590d1..ffc9e7c532b90 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll @@ -1,21 +1,335 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt < %s -passes=loop-vectorize -S | FileCheck %s -; CHECK: fadd -; CHECK-NEXT: fadd -; CHECK-NEXT: fadd -; CHECK-NEXT: fadd -; CHECK-NEXT: fadd -; CHECK-NEXT: fadd -; CHECK-NEXT: fadd -; CHECK-NEXT: fadd -; CHECK-NEXT: = -; CHECK-NOT: fadd -; CHECK-SAME: > - target datalayout = "e-m:e-i64:64-n32:64" target triple = "powerpc64le-ibm-linux-gnu" -define void @QLA_F3_r_veq_norm2_V(ptr noalias nocapture %r, ptr noalias nocapture readonly %a, i32 signext %n) #0 { +; We expect the loop with double reductions to be interleaved 8 times. +define void @QLA_F3_r_veq_norm2_V(ptr noalias %r, ptr noalias %a, i32 %n) { +; CHECK-LABEL: define void @QLA_F3_r_veq_norm2_V( +; CHECK-SAME: ptr noalias [[R:%.*]], ptr noalias [[A:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP24:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP24]], label %[[ITER_CHECK:.*]], label %[[FOR_END13:.*]] +; CHECK: [[ITER_CHECK]]: +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] +; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 16 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP129:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP130:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP131:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP132:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP133:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP134:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP135:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP136:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 6 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 10 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 12 +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 14 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x { float, float }], ptr [[A]], i64 [[TMP1]], i64 0, i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x { float, float }], ptr [[A]], i64 [[TMP2]], i64 0, i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x { float, float }], ptr [[A]], i64 [[TMP3]], i64 0, i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x { float, float }], ptr [[A]], i64 [[TMP4]], i64 0, i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x { float, float }], ptr [[A]], i64 [[TMP5]], i64 0, i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x { float, float }], ptr [[A]], i64 [[TMP6]], i64 0, i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x { float, float }], ptr [[A]], i64 [[TMP7]], i64 0, i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x { float, float }], ptr [[A]], i64 [[TMP8]], i64 0, i32 0 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x float>, ptr [[TMP9]], align 8 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x float> [[WIDE_VEC]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC9:%.*]] = shufflevector <12 x float> [[WIDE_VEC]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC10:%.*]] = shufflevector <12 x float> [[WIDE_VEC]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC11:%.*]] = shufflevector <12 x float> [[WIDE_VEC]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC12:%.*]] = shufflevector <12 x float> [[WIDE_VEC]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC13:%.*]] = shufflevector <12 x float> [[WIDE_VEC]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[WIDE_VEC14:%.*]] = load <12 x float>, ptr [[TMP10]], align 8 +; CHECK-NEXT: [[STRIDED_VEC15:%.*]] = shufflevector <12 x float> [[WIDE_VEC14]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC16:%.*]] = shufflevector <12 x float> [[WIDE_VEC14]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC17:%.*]] = shufflevector <12 x float> [[WIDE_VEC14]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC18:%.*]] = shufflevector <12 x float> [[WIDE_VEC14]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC19:%.*]] = shufflevector <12 x float> [[WIDE_VEC14]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC20:%.*]] = shufflevector <12 x float> [[WIDE_VEC14]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[WIDE_VEC21:%.*]] = load <12 x float>, ptr [[TMP11]], align 8 +; CHECK-NEXT: [[STRIDED_VEC22:%.*]] = shufflevector <12 x float> [[WIDE_VEC21]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC23:%.*]] = shufflevector <12 x float> [[WIDE_VEC21]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC24:%.*]] = shufflevector <12 x float> [[WIDE_VEC21]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC25:%.*]] = shufflevector <12 x float> [[WIDE_VEC21]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC26:%.*]] = shufflevector <12 x float> [[WIDE_VEC21]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC27:%.*]] = shufflevector <12 x float> [[WIDE_VEC21]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[WIDE_VEC28:%.*]] = load <12 x float>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[STRIDED_VEC29:%.*]] = shufflevector <12 x float> [[WIDE_VEC28]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC30:%.*]] = shufflevector <12 x float> [[WIDE_VEC28]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC31:%.*]] = shufflevector <12 x float> [[WIDE_VEC28]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC32:%.*]] = shufflevector <12 x float> [[WIDE_VEC28]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC33:%.*]] = shufflevector <12 x float> [[WIDE_VEC28]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC34:%.*]] = shufflevector <12 x float> [[WIDE_VEC28]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[WIDE_VEC35:%.*]] = load <12 x float>, ptr [[TMP13]], align 8 +; CHECK-NEXT: [[STRIDED_VEC36:%.*]] = shufflevector <12 x float> [[WIDE_VEC35]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC37:%.*]] = shufflevector <12 x float> [[WIDE_VEC35]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC38:%.*]] = shufflevector <12 x float> [[WIDE_VEC35]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC39:%.*]] = shufflevector <12 x float> [[WIDE_VEC35]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC40:%.*]] = shufflevector <12 x float> [[WIDE_VEC35]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC41:%.*]] = shufflevector <12 x float> [[WIDE_VEC35]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[WIDE_VEC42:%.*]] = load <12 x float>, ptr [[TMP14]], align 8 +; CHECK-NEXT: [[STRIDED_VEC43:%.*]] = shufflevector <12 x float> [[WIDE_VEC42]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC44:%.*]] = shufflevector <12 x float> [[WIDE_VEC42]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC45:%.*]] = shufflevector <12 x float> [[WIDE_VEC42]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC46:%.*]] = shufflevector <12 x float> [[WIDE_VEC42]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC47:%.*]] = shufflevector <12 x float> [[WIDE_VEC42]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC48:%.*]] = shufflevector <12 x float> [[WIDE_VEC42]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[WIDE_VEC49:%.*]] = load <12 x float>, ptr [[TMP15]], align 8 +; CHECK-NEXT: [[STRIDED_VEC50:%.*]] = shufflevector <12 x float> [[WIDE_VEC49]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC51:%.*]] = shufflevector <12 x float> [[WIDE_VEC49]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC52:%.*]] = shufflevector <12 x float> [[WIDE_VEC49]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC53:%.*]] = shufflevector <12 x float> [[WIDE_VEC49]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC54:%.*]] = shufflevector <12 x float> [[WIDE_VEC49]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC55:%.*]] = shufflevector <12 x float> [[WIDE_VEC49]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[WIDE_VEC56:%.*]] = load <12 x float>, ptr [[TMP16]], align 8 +; CHECK-NEXT: [[STRIDED_VEC57:%.*]] = shufflevector <12 x float> [[WIDE_VEC56]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC58:%.*]] = shufflevector <12 x float> [[WIDE_VEC56]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC59:%.*]] = shufflevector <12 x float> [[WIDE_VEC56]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC60:%.*]] = shufflevector <12 x float> [[WIDE_VEC56]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC61:%.*]] = shufflevector <12 x float> [[WIDE_VEC56]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC62:%.*]] = shufflevector <12 x float> [[WIDE_VEC56]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = fmul fast <2 x float> [[STRIDED_VEC]], [[STRIDED_VEC]] +; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <2 x float> [[STRIDED_VEC15]], [[STRIDED_VEC15]] +; CHECK-NEXT: [[TMP19:%.*]] = fmul fast <2 x float> [[STRIDED_VEC22]], [[STRIDED_VEC22]] +; CHECK-NEXT: [[TMP20:%.*]] = fmul fast <2 x float> [[STRIDED_VEC29]], [[STRIDED_VEC29]] +; CHECK-NEXT: [[TMP21:%.*]] = fmul fast <2 x float> [[STRIDED_VEC36]], [[STRIDED_VEC36]] +; CHECK-NEXT: [[TMP22:%.*]] = fmul fast <2 x float> [[STRIDED_VEC43]], [[STRIDED_VEC43]] +; CHECK-NEXT: [[TMP23:%.*]] = fmul fast <2 x float> [[STRIDED_VEC50]], [[STRIDED_VEC50]] +; CHECK-NEXT: [[TMP24:%.*]] = fmul fast <2 x float> [[STRIDED_VEC57]], [[STRIDED_VEC57]] +; CHECK-NEXT: [[TMP25:%.*]] = fmul fast <2 x float> [[STRIDED_VEC9]], [[STRIDED_VEC9]] +; CHECK-NEXT: [[TMP26:%.*]] = fmul fast <2 x float> [[STRIDED_VEC16]], [[STRIDED_VEC16]] +; CHECK-NEXT: [[TMP27:%.*]] = fmul fast <2 x float> [[STRIDED_VEC23]], [[STRIDED_VEC23]] +; CHECK-NEXT: [[TMP28:%.*]] = fmul fast <2 x float> [[STRIDED_VEC30]], [[STRIDED_VEC30]] +; CHECK-NEXT: [[TMP29:%.*]] = fmul fast <2 x float> [[STRIDED_VEC37]], [[STRIDED_VEC37]] +; CHECK-NEXT: [[TMP30:%.*]] = fmul fast <2 x float> [[STRIDED_VEC44]], [[STRIDED_VEC44]] +; CHECK-NEXT: [[TMP31:%.*]] = fmul fast <2 x float> [[STRIDED_VEC51]], [[STRIDED_VEC51]] +; CHECK-NEXT: [[TMP32:%.*]] = fmul fast <2 x float> [[STRIDED_VEC58]], [[STRIDED_VEC58]] +; CHECK-NEXT: [[TMP33:%.*]] = fadd fast <2 x float> [[TMP25]], [[TMP17]] +; CHECK-NEXT: [[TMP34:%.*]] = fadd fast <2 x float> [[TMP26]], [[TMP18]] +; CHECK-NEXT: [[TMP35:%.*]] = fadd fast <2 x float> [[TMP27]], [[TMP19]] +; CHECK-NEXT: [[TMP36:%.*]] = fadd fast <2 x float> [[TMP28]], [[TMP20]] +; CHECK-NEXT: [[TMP37:%.*]] = fadd fast <2 x float> [[TMP29]], [[TMP21]] +; CHECK-NEXT: [[TMP38:%.*]] = fadd fast <2 x float> [[TMP30]], [[TMP22]] +; CHECK-NEXT: [[TMP39:%.*]] = fadd fast <2 x float> [[TMP31]], [[TMP23]] +; CHECK-NEXT: [[TMP40:%.*]] = fadd fast <2 x float> [[TMP32]], [[TMP24]] +; CHECK-NEXT: [[TMP41:%.*]] = fpext <2 x float> [[TMP33]] to <2 x double> +; CHECK-NEXT: [[TMP42:%.*]] = fpext <2 x float> [[TMP34]] to <2 x double> +; CHECK-NEXT: [[TMP43:%.*]] = fpext <2 x float> [[TMP35]] to <2 x double> +; CHECK-NEXT: [[TMP44:%.*]] = fpext <2 x float> [[TMP36]] to <2 x double> +; CHECK-NEXT: [[TMP45:%.*]] = fpext <2 x float> [[TMP37]] to <2 x double> +; CHECK-NEXT: [[TMP46:%.*]] = fpext <2 x float> [[TMP38]] to <2 x double> +; CHECK-NEXT: [[TMP47:%.*]] = fpext <2 x float> [[TMP39]] to <2 x double> +; CHECK-NEXT: [[TMP48:%.*]] = fpext <2 x float> [[TMP40]] to <2 x double> +; CHECK-NEXT: [[TMP49:%.*]] = fadd fast <2 x double> [[TMP41]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP50:%.*]] = fadd fast <2 x double> [[TMP42]], [[VEC_PHI2]] +; CHECK-NEXT: [[TMP51:%.*]] = fadd fast <2 x double> [[TMP43]], [[VEC_PHI3]] +; CHECK-NEXT: [[TMP52:%.*]] = fadd fast <2 x double> [[TMP44]], [[VEC_PHI4]] +; CHECK-NEXT: [[TMP53:%.*]] = fadd fast <2 x double> [[TMP45]], [[VEC_PHI5]] +; CHECK-NEXT: [[TMP54:%.*]] = fadd fast <2 x double> [[TMP46]], [[VEC_PHI6]] +; CHECK-NEXT: [[TMP55:%.*]] = fadd fast <2 x double> [[TMP47]], [[VEC_PHI7]] +; CHECK-NEXT: [[TMP56:%.*]] = fadd fast <2 x double> [[TMP48]], [[VEC_PHI8]] +; CHECK-NEXT: [[TMP57:%.*]] = fmul fast <2 x float> [[STRIDED_VEC10]], [[STRIDED_VEC10]] +; CHECK-NEXT: [[TMP58:%.*]] = fmul fast <2 x float> [[STRIDED_VEC17]], [[STRIDED_VEC17]] +; CHECK-NEXT: [[TMP59:%.*]] = fmul fast <2 x float> [[STRIDED_VEC24]], [[STRIDED_VEC24]] +; CHECK-NEXT: [[TMP60:%.*]] = fmul fast <2 x float> [[STRIDED_VEC31]], [[STRIDED_VEC31]] +; CHECK-NEXT: [[TMP61:%.*]] = fmul fast <2 x float> [[STRIDED_VEC38]], [[STRIDED_VEC38]] +; CHECK-NEXT: [[TMP62:%.*]] = fmul fast <2 x float> [[STRIDED_VEC45]], [[STRIDED_VEC45]] +; CHECK-NEXT: [[TMP63:%.*]] = fmul fast <2 x float> [[STRIDED_VEC52]], [[STRIDED_VEC52]] +; CHECK-NEXT: [[TMP64:%.*]] = fmul fast <2 x float> [[STRIDED_VEC59]], [[STRIDED_VEC59]] +; CHECK-NEXT: [[TMP65:%.*]] = fmul fast <2 x float> [[STRIDED_VEC11]], [[STRIDED_VEC11]] +; CHECK-NEXT: [[TMP66:%.*]] = fmul fast <2 x float> [[STRIDED_VEC18]], [[STRIDED_VEC18]] +; CHECK-NEXT: [[TMP67:%.*]] = fmul fast <2 x float> [[STRIDED_VEC25]], [[STRIDED_VEC25]] +; CHECK-NEXT: [[TMP68:%.*]] = fmul fast <2 x float> [[STRIDED_VEC32]], [[STRIDED_VEC32]] +; CHECK-NEXT: [[TMP69:%.*]] = fmul fast <2 x float> [[STRIDED_VEC39]], [[STRIDED_VEC39]] +; CHECK-NEXT: [[TMP70:%.*]] = fmul fast <2 x float> [[STRIDED_VEC46]], [[STRIDED_VEC46]] +; CHECK-NEXT: [[TMP71:%.*]] = fmul fast <2 x float> [[STRIDED_VEC53]], [[STRIDED_VEC53]] +; CHECK-NEXT: [[TMP72:%.*]] = fmul fast <2 x float> [[STRIDED_VEC60]], [[STRIDED_VEC60]] +; CHECK-NEXT: [[TMP73:%.*]] = fadd fast <2 x float> [[TMP65]], [[TMP57]] +; CHECK-NEXT: [[TMP74:%.*]] = fadd fast <2 x float> [[TMP66]], [[TMP58]] +; CHECK-NEXT: [[TMP75:%.*]] = fadd fast <2 x float> [[TMP67]], [[TMP59]] +; CHECK-NEXT: [[TMP76:%.*]] = fadd fast <2 x float> [[TMP68]], [[TMP60]] +; CHECK-NEXT: [[TMP77:%.*]] = fadd fast <2 x float> [[TMP69]], [[TMP61]] +; CHECK-NEXT: [[TMP78:%.*]] = fadd fast <2 x float> [[TMP70]], [[TMP62]] +; CHECK-NEXT: [[TMP79:%.*]] = fadd fast <2 x float> [[TMP71]], [[TMP63]] +; CHECK-NEXT: [[TMP80:%.*]] = fadd fast <2 x float> [[TMP72]], [[TMP64]] +; CHECK-NEXT: [[TMP81:%.*]] = fpext <2 x float> [[TMP73]] to <2 x double> +; CHECK-NEXT: [[TMP82:%.*]] = fpext <2 x float> [[TMP74]] to <2 x double> +; CHECK-NEXT: [[TMP83:%.*]] = fpext <2 x float> [[TMP75]] to <2 x double> +; CHECK-NEXT: [[TMP84:%.*]] = fpext <2 x float> [[TMP76]] to <2 x double> +; CHECK-NEXT: [[TMP85:%.*]] = fpext <2 x float> [[TMP77]] to <2 x double> +; CHECK-NEXT: [[TMP86:%.*]] = fpext <2 x float> [[TMP78]] to <2 x double> +; CHECK-NEXT: [[TMP87:%.*]] = fpext <2 x float> [[TMP79]] to <2 x double> +; CHECK-NEXT: [[TMP88:%.*]] = fpext <2 x float> [[TMP80]] to <2 x double> +; CHECK-NEXT: [[TMP89:%.*]] = fadd fast <2 x double> [[TMP81]], [[TMP49]] +; CHECK-NEXT: [[TMP90:%.*]] = fadd fast <2 x double> [[TMP82]], [[TMP50]] +; CHECK-NEXT: [[TMP91:%.*]] = fadd fast <2 x double> [[TMP83]], [[TMP51]] +; CHECK-NEXT: [[TMP92:%.*]] = fadd fast <2 x double> [[TMP84]], [[TMP52]] +; CHECK-NEXT: [[TMP93:%.*]] = fadd fast <2 x double> [[TMP85]], [[TMP53]] +; CHECK-NEXT: [[TMP94:%.*]] = fadd fast <2 x double> [[TMP86]], [[TMP54]] +; CHECK-NEXT: [[TMP95:%.*]] = fadd fast <2 x double> [[TMP87]], [[TMP55]] +; CHECK-NEXT: [[TMP96:%.*]] = fadd fast <2 x double> [[TMP88]], [[TMP56]] +; CHECK-NEXT: [[TMP97:%.*]] = fmul fast <2 x float> [[STRIDED_VEC12]], [[STRIDED_VEC12]] +; CHECK-NEXT: [[TMP98:%.*]] = fmul fast <2 x float> [[STRIDED_VEC19]], [[STRIDED_VEC19]] +; CHECK-NEXT: [[TMP99:%.*]] = fmul fast <2 x float> [[STRIDED_VEC26]], [[STRIDED_VEC26]] +; CHECK-NEXT: [[TMP100:%.*]] = fmul fast <2 x float> [[STRIDED_VEC33]], [[STRIDED_VEC33]] +; CHECK-NEXT: [[TMP101:%.*]] = fmul fast <2 x float> [[STRIDED_VEC40]], [[STRIDED_VEC40]] +; CHECK-NEXT: [[TMP102:%.*]] = fmul fast <2 x float> [[STRIDED_VEC47]], [[STRIDED_VEC47]] +; CHECK-NEXT: [[TMP103:%.*]] = fmul fast <2 x float> [[STRIDED_VEC54]], [[STRIDED_VEC54]] +; CHECK-NEXT: [[TMP104:%.*]] = fmul fast <2 x float> [[STRIDED_VEC61]], [[STRIDED_VEC61]] +; CHECK-NEXT: [[TMP105:%.*]] = fmul fast <2 x float> [[STRIDED_VEC13]], [[STRIDED_VEC13]] +; CHECK-NEXT: [[TMP106:%.*]] = fmul fast <2 x float> [[STRIDED_VEC20]], [[STRIDED_VEC20]] +; CHECK-NEXT: [[TMP107:%.*]] = fmul fast <2 x float> [[STRIDED_VEC27]], [[STRIDED_VEC27]] +; CHECK-NEXT: [[TMP108:%.*]] = fmul fast <2 x float> [[STRIDED_VEC34]], [[STRIDED_VEC34]] +; CHECK-NEXT: [[TMP109:%.*]] = fmul fast <2 x float> [[STRIDED_VEC41]], [[STRIDED_VEC41]] +; CHECK-NEXT: [[TMP110:%.*]] = fmul fast <2 x float> [[STRIDED_VEC48]], [[STRIDED_VEC48]] +; CHECK-NEXT: [[TMP111:%.*]] = fmul fast <2 x float> [[STRIDED_VEC55]], [[STRIDED_VEC55]] +; CHECK-NEXT: [[TMP112:%.*]] = fmul fast <2 x float> [[STRIDED_VEC62]], [[STRIDED_VEC62]] +; CHECK-NEXT: [[TMP113:%.*]] = fadd fast <2 x float> [[TMP105]], [[TMP97]] +; CHECK-NEXT: [[TMP114:%.*]] = fadd fast <2 x float> [[TMP106]], [[TMP98]] +; CHECK-NEXT: [[TMP115:%.*]] = fadd fast <2 x float> [[TMP107]], [[TMP99]] +; CHECK-NEXT: [[TMP116:%.*]] = fadd fast <2 x float> [[TMP108]], [[TMP100]] +; CHECK-NEXT: [[TMP117:%.*]] = fadd fast <2 x float> [[TMP109]], [[TMP101]] +; CHECK-NEXT: [[TMP118:%.*]] = fadd fast <2 x float> [[TMP110]], [[TMP102]] +; CHECK-NEXT: [[TMP119:%.*]] = fadd fast <2 x float> [[TMP111]], [[TMP103]] +; CHECK-NEXT: [[TMP120:%.*]] = fadd fast <2 x float> [[TMP112]], [[TMP104]] +; CHECK-NEXT: [[TMP121:%.*]] = fpext <2 x float> [[TMP113]] to <2 x double> +; CHECK-NEXT: [[TMP122:%.*]] = fpext <2 x float> [[TMP114]] to <2 x double> +; CHECK-NEXT: [[TMP123:%.*]] = fpext <2 x float> [[TMP115]] to <2 x double> +; CHECK-NEXT: [[TMP124:%.*]] = fpext <2 x float> [[TMP116]] to <2 x double> +; CHECK-NEXT: [[TMP125:%.*]] = fpext <2 x float> [[TMP117]] to <2 x double> +; CHECK-NEXT: [[TMP126:%.*]] = fpext <2 x float> [[TMP118]] to <2 x double> +; CHECK-NEXT: [[TMP127:%.*]] = fpext <2 x float> [[TMP119]] to <2 x double> +; CHECK-NEXT: [[TMP128:%.*]] = fpext <2 x float> [[TMP120]] to <2 x double> +; CHECK-NEXT: [[TMP129]] = fadd fast <2 x double> [[TMP121]], [[TMP89]] +; CHECK-NEXT: [[TMP130]] = fadd fast <2 x double> [[TMP122]], [[TMP90]] +; CHECK-NEXT: [[TMP131]] = fadd fast <2 x double> [[TMP123]], [[TMP91]] +; CHECK-NEXT: [[TMP132]] = fadd fast <2 x double> [[TMP124]], [[TMP92]] +; CHECK-NEXT: [[TMP133]] = fadd fast <2 x double> [[TMP125]], [[TMP93]] +; CHECK-NEXT: [[TMP134]] = fadd fast <2 x double> [[TMP126]], [[TMP94]] +; CHECK-NEXT: [[TMP135]] = fadd fast <2 x double> [[TMP127]], [[TMP95]] +; CHECK-NEXT: [[TMP136]] = fadd fast <2 x double> [[TMP128]], [[TMP96]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-NEXT: [[TMP137:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP137]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <2 x double> [[TMP130]], [[TMP129]] +; CHECK-NEXT: [[BIN_RDX63:%.*]] = fadd fast <2 x double> [[TMP131]], [[BIN_RDX]] +; CHECK-NEXT: [[BIN_RDX64:%.*]] = fadd fast <2 x double> [[TMP132]], [[BIN_RDX63]] +; CHECK-NEXT: [[BIN_RDX65:%.*]] = fadd fast <2 x double> [[TMP133]], [[BIN_RDX64]] +; CHECK-NEXT: [[BIN_RDX66:%.*]] = fadd fast <2 x double> [[TMP134]], [[BIN_RDX65]] +; CHECK-NEXT: [[BIN_RDX67:%.*]] = fadd fast <2 x double> [[TMP135]], [[BIN_RDX66]] +; CHECK-NEXT: [[BIN_RDX68:%.*]] = fadd fast <2 x double> [[TMP136]], [[BIN_RDX67]] +; CHECK-NEXT: [[TMP138:%.*]] = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[BIN_RDX68]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_FOR_END13_CRIT_EDGE:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] +; CHECK: [[VEC_EPILOG_ITER_CHECK]]: +; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]] +; CHECK: [[VEC_EPILOG_PH]]: +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP138]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0.000000e+00, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[N_MOD_VF69:%.*]] = urem i64 [[TMP0]], 2 +; CHECK-NEXT: [[N_VEC70:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF69]] +; CHECK-NEXT: [[TMP139:%.*]] = insertelement <2 x double> zeroinitializer, double [[BC_MERGE_RDX]], i32 0 +; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] +; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX71:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT80:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI72:%.*]] = phi <2 x double> [ [[TMP139]], %[[VEC_EPILOG_PH]] ], [ [[TMP156:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP140:%.*]] = add i64 [[INDEX71]], 0 +; CHECK-NEXT: [[TMP141:%.*]] = getelementptr inbounds [3 x { float, float }], ptr [[A]], i64 [[TMP140]], i64 0, i32 0 +; CHECK-NEXT: [[WIDE_VEC73:%.*]] = load <12 x float>, ptr [[TMP141]], align 8 +; CHECK-NEXT: [[STRIDED_VEC74:%.*]] = shufflevector <12 x float> [[WIDE_VEC73]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC75:%.*]] = shufflevector <12 x float> [[WIDE_VEC73]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC76:%.*]] = shufflevector <12 x float> [[WIDE_VEC73]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC77:%.*]] = shufflevector <12 x float> [[WIDE_VEC73]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC78:%.*]] = shufflevector <12 x float> [[WIDE_VEC73]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[STRIDED_VEC79:%.*]] = shufflevector <12 x float> [[WIDE_VEC73]], <12 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP142:%.*]] = fmul fast <2 x float> [[STRIDED_VEC74]], [[STRIDED_VEC74]] +; CHECK-NEXT: [[TMP143:%.*]] = fmul fast <2 x float> [[STRIDED_VEC75]], [[STRIDED_VEC75]] +; CHECK-NEXT: [[TMP144:%.*]] = fadd fast <2 x float> [[TMP143]], [[TMP142]] +; CHECK-NEXT: [[TMP145:%.*]] = fpext <2 x float> [[TMP144]] to <2 x double> +; CHECK-NEXT: [[TMP146:%.*]] = fadd fast <2 x double> [[TMP145]], [[VEC_PHI72]] +; CHECK-NEXT: [[TMP147:%.*]] = fmul fast <2 x float> [[STRIDED_VEC76]], [[STRIDED_VEC76]] +; CHECK-NEXT: [[TMP148:%.*]] = fmul fast <2 x float> [[STRIDED_VEC77]], [[STRIDED_VEC77]] +; CHECK-NEXT: [[TMP149:%.*]] = fadd fast <2 x float> [[TMP148]], [[TMP147]] +; CHECK-NEXT: [[TMP150:%.*]] = fpext <2 x float> [[TMP149]] to <2 x double> +; CHECK-NEXT: [[TMP151:%.*]] = fadd fast <2 x double> [[TMP150]], [[TMP146]] +; CHECK-NEXT: [[TMP152:%.*]] = fmul fast <2 x float> [[STRIDED_VEC78]], [[STRIDED_VEC78]] +; CHECK-NEXT: [[TMP153:%.*]] = fmul fast <2 x float> [[STRIDED_VEC79]], [[STRIDED_VEC79]] +; CHECK-NEXT: [[TMP154:%.*]] = fadd fast <2 x float> [[TMP153]], [[TMP152]] +; CHECK-NEXT: [[TMP155:%.*]] = fpext <2 x float> [[TMP154]] to <2 x double> +; CHECK-NEXT: [[TMP156]] = fadd fast <2 x double> [[TMP155]], [[TMP151]] +; CHECK-NEXT: [[INDEX_NEXT80]] = add nuw i64 [[INDEX71]], 2 +; CHECK-NEXT: [[TMP157:%.*]] = icmp eq i64 [[INDEX_NEXT80]], [[N_VEC70]] +; CHECK-NEXT: br i1 [[TMP157]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP158:%.*]] = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[TMP156]]) +; CHECK-NEXT: [[CMP_N81:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC70]] +; CHECK-NEXT: br i1 [[CMP_N81]], label %[[FOR_COND_FOR_END13_CRIT_EDGE]], label %[[VEC_EPILOG_SCALAR_PH]] +; CHECK: [[VEC_EPILOG_SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC70]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX82:%.*]] = phi double [ [[TMP158]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP138]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0.000000e+00, %[[ITER_CHECK]] ] +; CHECK-NEXT: br label %[[FOR_COND1_PREHEADER:.*]] +; CHECK: [[FOR_COND1_PREHEADER]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_COND1_PREHEADER]] ], [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[SUM_026:%.*]] = phi double [ [[ADD10_2:%.*]], %[[FOR_COND1_PREHEADER]] ], [ [[BC_MERGE_RDX82]], %[[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX5_REALP:%.*]] = getelementptr inbounds [3 x { float, float }], ptr [[A]], i64 [[INDVARS_IV]], i64 0, i32 0 +; CHECK-NEXT: [[ARRAYIDX5_REAL:%.*]] = load float, ptr [[ARRAYIDX5_REALP]], align 8 +; CHECK-NEXT: [[ARRAYIDX5_IMAGP:%.*]] = getelementptr inbounds [3 x { float, float }], ptr [[A]], i64 [[INDVARS_IV]], i64 0, i32 1 +; CHECK-NEXT: [[ARRAYIDX5_IMAG:%.*]] = load float, ptr [[ARRAYIDX5_IMAGP]], align 8 +; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[ARRAYIDX5_REAL]], [[ARRAYIDX5_REAL]] +; CHECK-NEXT: [[MUL9:%.*]] = fmul fast float [[ARRAYIDX5_IMAG]], [[ARRAYIDX5_IMAG]] +; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[MUL9]], [[MUL]] +; CHECK-NEXT: [[CONV:%.*]] = fpext float [[ADD]] to double +; CHECK-NEXT: [[ADD10:%.*]] = fadd fast double [[CONV]], [[SUM_026]] +; CHECK-NEXT: [[ARRAYIDX5_REALP_1:%.*]] = getelementptr inbounds [3 x { float, float }], ptr [[A]], i64 [[INDVARS_IV]], i64 1, i32 0 +; CHECK-NEXT: [[ARRAYIDX5_REAL_1:%.*]] = load float, ptr [[ARRAYIDX5_REALP_1]], align 8 +; CHECK-NEXT: [[ARRAYIDX5_IMAGP_1:%.*]] = getelementptr inbounds [3 x { float, float }], ptr [[A]], i64 [[INDVARS_IV]], i64 1, i32 1 +; CHECK-NEXT: [[ARRAYIDX5_IMAG_1:%.*]] = load float, ptr [[ARRAYIDX5_IMAGP_1]], align 8 +; CHECK-NEXT: [[MUL_1:%.*]] = fmul fast float [[ARRAYIDX5_REAL_1]], [[ARRAYIDX5_REAL_1]] +; CHECK-NEXT: [[MUL9_1:%.*]] = fmul fast float [[ARRAYIDX5_IMAG_1]], [[ARRAYIDX5_IMAG_1]] +; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[MUL9_1]], [[MUL_1]] +; CHECK-NEXT: [[CONV_1:%.*]] = fpext float [[ADD_1]] to double +; CHECK-NEXT: [[ADD10_1:%.*]] = fadd fast double [[CONV_1]], [[ADD10]] +; CHECK-NEXT: [[ARRAYIDX5_REALP_2:%.*]] = getelementptr inbounds [3 x { float, float }], ptr [[A]], i64 [[INDVARS_IV]], i64 2, i32 0 +; CHECK-NEXT: [[ARRAYIDX5_REAL_2:%.*]] = load float, ptr [[ARRAYIDX5_REALP_2]], align 8 +; CHECK-NEXT: [[ARRAYIDX5_IMAGP_2:%.*]] = getelementptr inbounds [3 x { float, float }], ptr [[A]], i64 [[INDVARS_IV]], i64 2, i32 1 +; CHECK-NEXT: [[ARRAYIDX5_IMAG_2:%.*]] = load float, ptr [[ARRAYIDX5_IMAGP_2]], align 8 +; CHECK-NEXT: [[MUL_2:%.*]] = fmul fast float [[ARRAYIDX5_REAL_2]], [[ARRAYIDX5_REAL_2]] +; CHECK-NEXT: [[MUL9_2:%.*]] = fmul fast float [[ARRAYIDX5_IMAG_2]], [[ARRAYIDX5_IMAG_2]] +; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[MUL9_2]], [[MUL_2]] +; CHECK-NEXT: [[CONV_2:%.*]] = fpext float [[ADD_2]] to double +; CHECK-NEXT: [[ADD10_2]] = fadd fast double [[CONV_2]], [[ADD10_1]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_FOR_END13_CRIT_EDGE]], label %[[FOR_COND1_PREHEADER]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[FOR_COND_FOR_END13_CRIT_EDGE]]: +; CHECK-NEXT: [[ADD10_2_LCSSA:%.*]] = phi double [ [[ADD10_2]], %[[FOR_COND1_PREHEADER]] ], [ [[TMP138]], %[[MIDDLE_BLOCK]] ], [ [[TMP158]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[PHITMP:%.*]] = fptrunc double [[ADD10_2_LCSSA]] to float +; CHECK-NEXT: br label %[[FOR_END13]] +; CHECK: [[FOR_END13]]: +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi float [ [[PHITMP]], %[[FOR_COND_FOR_END13_CRIT_EDGE]] ], [ 0.000000e+00, %[[ENTRY]] ] +; CHECK-NEXT: store float [[SUM_0_LCSSA]], ptr [[R]], align 4 +; CHECK-NEXT: ret void +; entry: %cmp24 = icmp sgt i32 %n, 0 br i1 %cmp24, label %for.cond1.preheader.preheader, label %for.end13 @@ -69,3 +383,10 @@ for.end13: ; preds = %for.cond.for.end13_ ret void } +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll index ce83ab460a6c2..5cd68347c168f 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll @@ -1,21 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt < %s -passes=loop-vectorize -S | FileCheck %s -; CHECK: vector.body: -; CHECK: fadd -; CHECK-NEXT: fadd -; CHECK-NEXT: fadd -; CHECK-NEXT: fadd -; CHECK-NEXT: fadd -; CHECK-NEXT: fadd -; CHECK-NEXT: fadd -; CHECK-NEXT: fadd -; CHECK-NOT: fadd -; CHECK: middle.block - target datalayout = "e-m:e-i64:64-n32:64" target triple = "powerpc64le-ibm-linux-gnu" -define void @test(ptr nocapture readonly %arr, i32 signext %len) #0 { +; We expect the loop with double reductions to be interleaved 8 times. +define void @test(ptr %arr, i32 %len) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr [[ARR:%.*]], i32 [[LEN:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[LEN]], 0 +; CHECK-NEXT: br i1 [[CMP4]], label %[[ITER_CHECK:.*]], label %[[FOR_END:.*]] +; CHECK: [[ITER_CHECK]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[LEN]] to i64 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] +; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP1]], 16 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 16 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[ARR]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 6 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 10 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 12 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 14 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP4]], align 8 +; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 +; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <2 x double>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <2 x double>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <2 x double>, ptr [[TMP8]], align 8 +; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <2 x double>, ptr [[TMP9]], align 8 +; CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <2 x double>, ptr [[TMP10]], align 8 +; CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <2 x double>, ptr [[TMP11]], align 8 +; CHECK-NEXT: [[TMP12]] = fadd fast <2 x double> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP13]] = fadd fast <2 x double> [[WIDE_LOAD9]], [[VEC_PHI2]] +; CHECK-NEXT: [[TMP14]] = fadd fast <2 x double> [[WIDE_LOAD10]], [[VEC_PHI3]] +; CHECK-NEXT: [[TMP15]] = fadd fast <2 x double> [[WIDE_LOAD11]], [[VEC_PHI4]] +; CHECK-NEXT: [[TMP16]] = fadd fast <2 x double> [[WIDE_LOAD12]], [[VEC_PHI5]] +; CHECK-NEXT: [[TMP17]] = fadd fast <2 x double> [[WIDE_LOAD13]], [[VEC_PHI6]] +; CHECK-NEXT: [[TMP18]] = fadd fast <2 x double> [[WIDE_LOAD14]], [[VEC_PHI7]] +; CHECK-NEXT: [[TMP19]] = fadd fast <2 x double> [[WIDE_LOAD15]], [[VEC_PHI8]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <2 x double> [[TMP13]], [[TMP12]] +; CHECK-NEXT: [[BIN_RDX16:%.*]] = fadd fast <2 x double> [[TMP14]], [[BIN_RDX]] +; CHECK-NEXT: [[BIN_RDX17:%.*]] = fadd fast <2 x double> [[TMP15]], [[BIN_RDX16]] +; CHECK-NEXT: [[BIN_RDX18:%.*]] = fadd fast <2 x double> [[TMP16]], [[BIN_RDX17]] +; CHECK-NEXT: [[BIN_RDX19:%.*]] = fadd fast <2 x double> [[TMP17]], [[BIN_RDX18]] +; CHECK-NEXT: [[BIN_RDX20:%.*]] = fadd fast <2 x double> [[TMP18]], [[BIN_RDX19]] +; CHECK-NEXT: [[BIN_RDX21:%.*]] = fadd fast <2 x double> [[TMP19]], [[BIN_RDX20]] +; CHECK-NEXT: [[TMP21:%.*]] = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[BIN_RDX21]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] +; CHECK: [[VEC_EPILOG_ITER_CHECK]]: +; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP1]], [[N_VEC]] +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]] +; CHECK: [[VEC_EPILOG_PH]]: +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP21]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0.000000e+00, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[N_MOD_VF22:%.*]] = urem i64 [[TMP1]], 2 +; CHECK-NEXT: [[N_VEC23:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF22]] +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x double> zeroinitializer, double [[BC_MERGE_RDX]], i32 0 +; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] +; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX24:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT27:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI25:%.*]] = phi <2 x double> [ [[TMP22]], %[[VEC_EPILOG_PH]] ], [ [[TMP26:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[INDEX24]], 0 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds double, ptr [[ARR]], i64 [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, ptr [[TMP24]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD26:%.*]] = load <2 x double>, ptr [[TMP25]], align 8 +; CHECK-NEXT: [[TMP26]] = fadd fast <2 x double> [[WIDE_LOAD26]], [[VEC_PHI25]] +; CHECK-NEXT: [[INDEX_NEXT27]] = add nuw i64 [[INDEX24]], 2 +; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT27]], [[N_VEC23]] +; CHECK-NEXT: br i1 [[TMP27]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP28:%.*]] = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[TMP26]]) +; CHECK-NEXT: [[CMP_N28:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC23]] +; CHECK-NEXT: br i1 [[CMP_N28]], label %[[FOR_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]] +; CHECK: [[VEC_EPILOG_SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC23]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX29:%.*]] = phi double [ [[TMP28]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP21]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0.000000e+00, %[[ITER_CHECK]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[REDX_05:%.*]] = phi double [ [[BC_MERGE_RDX29]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[ARR]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP29:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ADD]] = fadd fast double [[TMP29]], [[REDX_05]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[TMP0]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[FOR_END_LOOPEXIT]]: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi double [ [[ADD]], %[[FOR_BODY]] ], [ [[TMP21]], %[[MIDDLE_BLOCK]] ], [ [[TMP28]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label %[[FOR_END]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[REDX_0_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_LCSSA]], %[[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: ret void +; entry: %cmp4 = icmp sgt i32 %len, 0 br i1 %cmp4, label %for.body.lr.ph, label %for.end @@ -43,3 +147,10 @@ for.end: ; preds = %for.end.loopexit, % %redx.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa, %for.end.loopexit ] ret void } +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll index 17809b3caf210..cf66264486095 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll @@ -114,44 +114,8 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr ; CHECK-SAME: ptr [[ARG:%.*]], i64 [[ARG1:%.*]], ptr [[ARG2:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[ARG1]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 54 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] -; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[ARG2]], i64 8 -; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]]) -; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = sub i64 0, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[SCEVGEP]] -; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[ARG2]], i64 12 -; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]]) -; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = sub i64 0, [[MUL_RESULT3]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ult ptr [[TMP6]], [[SCEVGEP1]] -; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW4]] -; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[ARG2]], i64 4 -; CHECK-NEXT: [[MUL6:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]]) -; CHECK-NEXT: [[MUL_RESULT7:%.*]] = extractvalue { i64, i1 } [[MUL6]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW8:%.*]] = extractvalue { i64, i1 } [[MUL6]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = sub i64 0, [[MUL_RESULT7]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 [[MUL_RESULT7]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[SCEVGEP5]] -; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW8]] -; CHECK-NEXT: [[MUL9:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]]) -; CHECK-NEXT: [[MUL_RESULT10:%.*]] = extractvalue { i64, i1 } [[MUL9]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW11:%.*]] = extractvalue { i64, i1 } [[MUL9]], 1 -; CHECK-NEXT: [[TMP13:%.*]] = sub i64 0, [[MUL_RESULT10]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[MUL_RESULT10]] -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult ptr [[TMP14]], [[ARG2]] -; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW11]] -; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP4]], [[TMP8]] -; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[TMP12]] -; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP18]], [[TMP16]] -; CHECK-NEXT: br i1 [[TMP19]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[ARG1]], 4 ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP20]], 16 @@ -171,9 +135,9 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP25:%.*]] = shl i64 [[TMP24]], 5 -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 [[TMP25]] ; CHECK-NEXT: [[TMP27:%.*]] = shl i64 [[TMP24]], 4 -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[TMP27]] +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 [[TMP27]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x float>, ptr [[TMP26]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC14:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> @@ -203,44 +167,44 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[SHL_IV_5:%.*]] = shl i64 [[IV]], 5 -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[SHL_IV_5]] +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 [[SHL_IV_5]] ; CHECK-NEXT: [[ADD_5:%.*]] = or disjoint i64 [[SHL_IV_5]], 16 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[ADD_5]] ; CHECK-NEXT: [[SHL_IV_4:%.*]] = shl i64 [[IV]], 4 -; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[SHL_IV_4]] +; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 [[SHL_IV_4]] ; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_1]], align 4 ; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[GEP_2]], align 4 ; CHECK-NEXT: [[ADD_1:%.*]] = fadd float [[L_1]], [[L_2]] ; CHECK-NEXT: [[MUL_1:%.*]] = fmul float [[ADD_1]], 0.000000e+00 ; CHECK-NEXT: store float [[MUL_1]], ptr [[GEP_3]], align 4 -; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr i8, ptr [[GEP_1]], i64 4 +; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i8, ptr [[GEP_1]], i64 4 ; CHECK-NEXT: [[L_3:%.*]] = load float, ptr [[GEP_4]], align 4 -; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr i8, ptr [[GEP_2]], i64 4 +; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i8, ptr [[GEP_2]], i64 4 ; CHECK-NEXT: [[L_4:%.*]] = load float, ptr [[GEP_5]], align 4 ; CHECK-NEXT: [[ADD_2:%.*]] = fadd float [[L_3]], [[L_4]] ; CHECK-NEXT: [[MUL_2:%.*]] = fmul float [[ADD_2]], 0.000000e+00 -; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr i8, ptr [[GEP_3]], i64 4 +; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i8, ptr [[GEP_3]], i64 4 ; CHECK-NEXT: store float [[MUL_2]], ptr [[GEP_6]], align 4 -; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr i8, ptr [[GEP_1]], i64 8 +; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i8, ptr [[GEP_1]], i64 8 ; CHECK-NEXT: [[L_5:%.*]] = load float, ptr [[GEP_7]], align 4 -; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr i8, ptr [[GEP_2]], i64 8 +; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i8, ptr [[GEP_2]], i64 8 ; CHECK-NEXT: [[L_6:%.*]] = load float, ptr [[GEP_8]], align 4 ; CHECK-NEXT: [[ADD_3:%.*]] = fadd float [[L_5]], [[L_6]] ; CHECK-NEXT: [[MUL_3:%.*]] = fmul float [[ADD_3]], 0.000000e+00 -; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr i8, ptr [[GEP_3]], i64 8 +; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i8, ptr [[GEP_3]], i64 8 ; CHECK-NEXT: store float [[MUL_3]], ptr [[GEP_9]], align 4 -; CHECK-NEXT: [[I27:%.*]] = getelementptr i8, ptr [[GEP_1]], i64 12 +; CHECK-NEXT: [[I27:%.*]] = getelementptr inbounds i8, ptr [[GEP_1]], i64 12 ; CHECK-NEXT: [[L_7:%.*]] = load float, ptr [[I27]], align 4 -; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr i8, ptr [[GEP_2]], i64 12 +; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i8, ptr [[GEP_2]], i64 12 ; CHECK-NEXT: [[L_8:%.*]] = load float, ptr [[GEP_10]], align 4 ; CHECK-NEXT: [[ADD_4:%.*]] = fadd float [[L_7]], [[L_8]] ; CHECK-NEXT: [[MUL_4:%.*]] = fmul float [[ADD_4]], 0.000000e+00 -; CHECK-NEXT: [[GEP_11:%.*]] = getelementptr i8, ptr [[GEP_3]], i64 12 +; CHECK-NEXT: [[GEP_11:%.*]] = getelementptr inbounds i8, ptr [[GEP_3]], i64 12 ; CHECK-NEXT: store float [[MUL_4]], ptr [[GEP_11]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[ARG1]] @@ -254,39 +218,39 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %shl.iv.5 = shl i64 %iv, 5 - %gep.1 = getelementptr i8, ptr %arg, i64 %shl.iv.5 + %gep.1 = getelementptr inbounds i8, ptr %arg, i64 %shl.iv.5 %add.5 = or disjoint i64 %shl.iv.5, 16 %gep.2 = getelementptr i8, ptr %arg, i64 %add.5 %shl.iv.4 = shl i64 %iv, 4 - %gep.3 = getelementptr i8, ptr %arg2, i64 %shl.iv.4 + %gep.3 = getelementptr inbounds i8, ptr %arg2, i64 %shl.iv.4 %l.1 = load float, ptr %gep.1, align 4 %l.2 = load float, ptr %gep.2, align 4 %add.1 = fadd float %l.1, %l.2 %mul.1 = fmul float %add.1, 0.000000e+00 store float %mul.1, ptr %gep.3, align 4 - %gep.4 = getelementptr i8, ptr %gep.1, i64 4 + %gep.4 = getelementptr inbounds i8, ptr %gep.1, i64 4 %l.3 = load float, ptr %gep.4, align 4 - %gep.5 = getelementptr i8, ptr %gep.2, i64 4 + %gep.5 = getelementptr inbounds i8, ptr %gep.2, i64 4 %l.4 = load float, ptr %gep.5, align 4 %add.2 = fadd float %l.3, %l.4 %mul.2 = fmul float %add.2, 0.000000e+00 - %gep.6 = getelementptr i8, ptr %gep.3, i64 4 + %gep.6 = getelementptr inbounds i8, ptr %gep.3, i64 4 store float %mul.2, ptr %gep.6, align 4 - %gep.7 = getelementptr i8, ptr %gep.1, i64 8 + %gep.7 = getelementptr inbounds i8, ptr %gep.1, i64 8 %l.5 = load float, ptr %gep.7, align 4 - %gep.8 = getelementptr i8, ptr %gep.2, i64 8 + %gep.8 = getelementptr inbounds i8, ptr %gep.2, i64 8 %l.6 = load float, ptr %gep.8, align 4 %add.3 = fadd float %l.5, %l.6 %mul.3 = fmul float %add.3, 0.000000e+00 - %gep.9 = getelementptr i8, ptr %gep.3, i64 8 + %gep.9 = getelementptr inbounds i8, ptr %gep.3, i64 8 store float %mul.3, ptr %gep.9, align 4 - %i27 = getelementptr i8, ptr %gep.1, i64 12 + %i27 = getelementptr inbounds i8, ptr %gep.1, i64 12 %l.7 = load float, ptr %i27, align 4 - %gep.10 = getelementptr i8, ptr %gep.2, i64 12 + %gep.10 = getelementptr inbounds i8, ptr %gep.2, i64 12 %l.8 = load float, ptr %gep.10, align 4 %add.4 = fadd float %l.7, %l.8 %mul.4 = fmul float %add.4, 0.000000e+00 - %gep.11 = getelementptr i8, ptr %gep.3, i64 12 + %gep.11 = getelementptr inbounds i8, ptr %gep.3, i64 12 store float %mul.4, ptr %gep.11, align 4 %iv.next = add i64 %iv, 1 %ec = icmp eq i64 %iv, %arg1 diff --git a/llvm/test/Transforms/LoopVectorize/loop-with-constant-exit-condition.ll b/llvm/test/Transforms/LoopVectorize/loop-with-constant-exit-condition.ll new file mode 100644 index 0000000000000..9339aed927960 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/loop-with-constant-exit-condition.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=loop-vectorize -force-vector-width=2 < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define i16 @multiple_exit_one_with_constant_condition(ptr %dst, i64 %x) { +; CHECK-LABEL: @multiple_exit_one_with_constant_condition( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[X:%.*]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP0]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 2, i64 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[TMP2]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP4]] +; CHECK-NEXT: store i64 0, ptr [[TMP5]], align 8 +; CHECK-NEXT: store i64 0, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]] +; CHECK-NEXT: store i64 0, ptr [[GEP]], align 8 +; CHECK-NEXT: br i1 true, label [[LOOP_THEN:%.*]], label [[EXIT_2:%.*]] +; CHECK: loop.then: +; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i64 [[IV]], [[X]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_LATCH]], label [[EXIT_1:%.*]] +; CHECK: loop.latch: +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: br label [[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: exit.1: +; CHECK-NEXT: ret i16 0 +; CHECK: exit.2: +; CHECK-NEXT: ret i16 1 +; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep = getelementptr inbounds i32, ptr %dst, i64 %iv + store i64 0, ptr %gep + br i1 true, label %loop.then, label %exit.2 + +loop.then: + %cmp3 = icmp ne i64 %iv, %x + br i1 %cmp3, label %loop.latch, label %exit.1 + +loop.latch: + %iv.next = add i64 %iv, 1 + br label %loop.header + +exit.1: + ret i16 0 + +exit.2: + ret i16 1 +} diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll index 061b85357b771..89d8eb1ee6711 100644 --- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll @@ -907,6 +907,23 @@ define void @memcpy_immut_escape_after(ptr align 4 noalias %val) { ret void } +declare void @two_args(ptr, ptr) + +; Should not perform call slot optimization: The function accepts the +; destination as an argument and may read/write it. +define void @test(ptr noalias writable dereferenceable(4) %p) { +; CHECK-LABEL: @test( +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RET:%.*]] = call ptr @two_args(ptr [[A]], ptr captures(ret: address, provenance) [[P:%.*]]) #[[ATTR2]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P]], ptr [[A]], i64 4, i1 false) +; CHECK-NEXT: ret void +; + %a = alloca i32 + %ret = call ptr @two_args(ptr %a, ptr captures(ret: address, provenance) %p) nounwind + call void @llvm.memcpy(ptr align 4 %p, ptr %a, i64 4, i1 false) + ret void +} + !0 = !{!0} !1 = !{!1, !0} !2 = !{!1} diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll b/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll index d234dedc5a57a..1d09b1c1a0cb3 100644 --- a/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll @@ -1,5 +1,8 @@ ;; Test recursion handling during cloning. -;; + +;; -stats requires asserts +; REQUIRES: asserts + ;; Original code looks like: ;; ;; #include @@ -35,13 +38,14 @@ ;; The IR was then reduced using llvm-reduce with the expected FileCheck input. ;; Check behavior when we enable cloning of contexts involved with recursive -;; cycles, but not through the cycle itself. I.e. until full support for -;; recursion is added, the cloned recursive call from C back to B (line 12) will -;; not be updated to call a clone. +;; cycles, but not through the cycle itself. I.e. with full support for cloning +;; recursive cycles off, the cloned recursive call from C back to B (line 12) +;; will not be updated to call a clone. ; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes \ ; RUN: -pass-remarks=memprof-context-disambiguation \ ; RUN: -memprof-allow-recursive-callsites=true \ +; RUN: -memprof-clone-recursive-contexts=false \ ; RUN: %s -S 2>&1 | FileCheck %s \ ; RUN: --implicit-check-not "memprof_recursive.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ ; RUN: --check-prefix=ALL --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=ALLOW-RECUR-CONTEXTS @@ -57,13 +61,13 @@ ; RUN: --implicit-check-not="marked with memprof allocation attribute cold" \ ; RUN: --check-prefix=ALL -;; Check the default behavior (enabled recursive callsites). +;; Check the default behavior (clone recursive callsites). ; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ -; RUN: -memprof-verify-ccg -memprof-verify-nodes \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats \ ; RUN: -pass-remarks=memprof-context-disambiguation \ ; RUN: %s -S 2>&1 | FileCheck %s \ -; RUN: --implicit-check-not "memprof_recursive.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ -; RUN: --check-prefix=ALL --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=ALLOW-RECUR-CONTEXTS +; RUN: --check-prefix=ALL --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=ALLOW-RECUR-CONTEXTS \ +; RUN: --check-prefix=CLONE-RECUR-CALLSITES ;; Skipping recursive contexts should prevent spurious call to cloned version of ;; B from the context starting at memprof_recursive.cc:19:13, which is actually @@ -73,6 +77,7 @@ ; RUN: -pass-remarks=memprof-context-disambiguation \ ; RUN: -memprof-allow-recursive-callsites=true \ ; RUN: -memprof-allow-recursive-contexts=false \ +; RUN: -memprof-clone-recursive-contexts=false \ ; RUN: %s -S 2>&1 | FileCheck %s \ ; RUN: --implicit-check-not "memprof_recursive.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ ; RUN: --check-prefix=ALL --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=SKIP-RECUR-CONTEXTS @@ -84,6 +89,7 @@ ;; We should only call the cold clone for the recursive context if we enabled ;; recursive contexts via -memprof-allow-recursive-contexts=true (default). ; ALLOW-RECUR-CONTEXTS: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi.memprof.1 +; CLONE-RECUR-CALLSITES: memprof_recursive.cc:12:10: call in clone _Z1Ci.memprof.1 assigned to call function clone _Z1Bi.memprof.1 ; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:15:10: call in clone _Z1Bi.memprof.1 assigned to call function clone _Z1Ci.memprof.1 ; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:10:12: call in clone _Z1Ci.memprof.1 assigned to call function clone _Z1Dv.memprof.1 ; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:5:10: call in clone _Z1Dv.memprof.1 marked with memprof allocation attribute cold @@ -95,6 +101,7 @@ ; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:15:10: call in clone _Z1Bi assigned to call function clone _Z1Ci ; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:10:12: call in clone _Z1Ci assigned to call function clone _Z1Dv ; ALL: memprof_recursive.cc:5:10: call in clone _Z1Dv marked with memprof allocation attribute notcold +; CLONE-RECUR-CALLSITES: 1 memprof-context-disambiguation - Number of backedges with deferred cloning target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/PhaseOrdering/load-store-sameval.ll b/llvm/test/Transforms/PhaseOrdering/load-store-sameval.ll new file mode 100644 index 0000000000000..d8ef0723cf09e --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/load-store-sameval.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='instcombine,early-cse' -S %s | FileCheck %s + +; FIXME: We can remove the store instruction in the exit block +define i32 @load_store_sameval(ptr %p, i1 %cond1, i1 %cond2) { +; CHECK-LABEL: define i32 @load_store_sameval( +; CHECK-SAME: ptr [[P:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND1]], ptr null, ptr [[P]] +; CHECK-NEXT: [[PRE:%.*]] = load i32, ptr [[SPEC_SELECT]], align 4 +; CHECK-NEXT: br label %[[BLOCK:.*]] +; CHECK: [[BLOCK]]: +; CHECK-NEXT: br label %[[BLOCK2:.*]] +; CHECK: [[BLOCK2]]: +; CHECK-NEXT: br i1 [[COND2]], label %[[BLOCK3:.*]], label %[[EXIT:.*]] +; CHECK: [[BLOCK3]]: +; CHECK-NEXT: [[LOAD:%.*]] = load double, ptr [[SPEC_SELECT]], align 8 +; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[LOAD]], 0.000000e+00 +; CHECK-NEXT: br i1 [[CMP]], label %[[BLOCK]], label %[[BLOCK2]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: store i32 [[PRE]], ptr [[P]], align 4 +; CHECK-NEXT: ret i32 0 +; +entry: + %spec.select = select i1 %cond1, ptr null, ptr %p + %pre = load i32, ptr %spec.select, align 4 + br label %block + +block: + br label %block2 + +block2: + br i1 %cond2, label %block3, label %exit + +block3: + %load = load double, ptr %spec.select, align 8 + %cmp = fcmp une double %load, 0.000000e+00 + br i1 %cmp, label %block, label %block2 + +exit: + store i32 %pre, ptr %spec.select, align 4 + ret i32 0 +} + diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/global_none_meta.ll b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/global_none_meta.ll new file mode 100644 index 0000000000000..b9fd8a47df14b --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/global_none_meta.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 +; RUN: opt < %s -S | FileCheck %s + +; Function Attrs: convergent noinline nounwind +declare hidden spir_func void @__cxx_global_var_init() #0 + +; Function Attrs: convergent noinline nounwind +define hidden spir_kernel void @_GLOBAL__sub_I_global_init.clcpp() #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !4 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !4 { +; CHECK-LABEL: define hidden spir_kernel void @_GLOBAL__sub_I_global_init.clcpp( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {{.*}}{ +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: call spir_func void @__cxx_global_var_init() +; CHECK-NEXT: ret void +; +entry: + call spir_func void @__cxx_global_var_init() + ret void +} + +attributes #0 = { convergent noinline nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" } + +!llvm.module.flags = !{!0} +!opencl.ocl.version = !{!1} +!opencl.cxx.version = !{!2} +!opencl.spir.version = !{!1} +!llvm.ident = !{!3} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 2, i32 0} +!2 = !{i32 1, i32 0} +!3 = !{!"clang version 20.0.0git"} +!4 = !{} + diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/global_none_meta.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/global_none_meta.ll.expected new file mode 100644 index 0000000000000..b9fd8a47df14b --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/global_none_meta.ll.expected @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 +; RUN: opt < %s -S | FileCheck %s + +; Function Attrs: convergent noinline nounwind +declare hidden spir_func void @__cxx_global_var_init() #0 + +; Function Attrs: convergent noinline nounwind +define hidden spir_kernel void @_GLOBAL__sub_I_global_init.clcpp() #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !4 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !4 { +; CHECK-LABEL: define hidden spir_kernel void @_GLOBAL__sub_I_global_init.clcpp( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {{.*}}{ +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: call spir_func void @__cxx_global_var_init() +; CHECK-NEXT: ret void +; +entry: + call spir_func void @__cxx_global_var_init() + ret void +} + +attributes #0 = { convergent noinline nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" } + +!llvm.module.flags = !{!0} +!opencl.ocl.version = !{!1} +!opencl.cxx.version = !{!2} +!opencl.spir.version = !{!1} +!llvm.ident = !{!3} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 2, i32 0} +!2 = !{i32 1, i32 0} +!3 = !{!"clang version 20.0.0git"} +!4 = !{} + diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/global_none_meta.test b/llvm/test/tools/UpdateTestChecks/update_test_checks/global_none_meta.test new file mode 100644 index 0000000000000..11a4665c44f92 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/global_none_meta.test @@ -0,0 +1,3 @@ +## Basic test checking that update_test_checks.py --check-globals none works correctly +# RUN: cp -f %S/Inputs/global_none_meta.ll %t.ll && %update_test_checks %t.ll --check-globals none +# RUN: diff -u %t.ll %S/Inputs/global_none_meta.ll.expected diff --git a/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-completeness-json-output.s b/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-completeness-json-output.s new file mode 100644 index 0000000000000..c4b7ffe1d2d8e --- /dev/null +++ b/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-completeness-json-output.s @@ -0,0 +1,172 @@ +# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj -o - | not llvm-dwarfdump -verify --verify-json=%t.json - +# RUN: FileCheck %s --input-file %t.json + +# CHECK: {"error-categories":{"Name Index DIE entry missing name":{"count":10}},"error-count":10} +# CHECK-NOT: error: Name Index @ 0x0: Entry for DIE @ {{.*}} (DW_TAG_variable) with name var_block_addr missing. + + .section .debug_loc,"",@progbits +.Ldebug_loc0: + .quad 0 + .quad 1 + .short .Lloc0_end-.Lloc0_start # Loc expr size +.Lloc0_start: + .byte 3 # DW_OP_addr + .quad 0x47 +.Lloc0_end: + .quad 0 + .quad 0 + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 8 # DW_FORM_string + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + + .byte 2 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 8 # DW_FORM_string + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 0 # EOM(1) + .byte 0 # EOM(2) + + .byte 3 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 3 # DW_AT_name + .byte 8 # DW_FORM_string + .byte 110 # DW_AT_linkage_name + .byte 8 # DW_FORM_string + .byte 82 # DW_AT_entry_pc + .byte 1 # DW_FORM_addr + .byte 0 # EOM(1) + .byte 0 # EOM(2) + + .byte 4 # Abbreviation Code + .byte 57 # DW_TAG_namespace + .byte 1 # DW_CHILDREN_yes + .byte 3 # DW_AT_name + .byte 8 # DW_FORM_string + .byte 0 # EOM(1) + .byte 0 # EOM(2) + + .byte 5 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 8 # DW_FORM_string + .byte 2 # DW_AT_location + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + + .byte 6 # Abbreviation Code + .byte 57 # DW_TAG_namespace + .byte 1 # DW_CHILDREN_yes + .byte 0 # EOM(1) + .byte 0 # EOM(2) + + .byte 7 # Abbreviation Code + .byte 29 # DW_TAG_inlined_subroutine + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 8 # DW_FORM_string + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 1 # DW_FORM_addr + .byte 0 # EOM(1) + .byte 0 # EOM(2) + + .byte 8 # Abbreviation Code + .byte 10 # DW_TAG_label + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 8 # DW_FORM_string + .byte 82 # DW_AT_entry_pc + .byte 1 # DW_FORM_addr + .byte 0 # EOM(1) + .byte 0 # EOM(2) + + .byte 0 # EOM(3) + .section .debug_info,"",@progbits + +.Lcu_begin0: + .long .Lcu_end0-.Lcu_start0 # Length of Unit +.Lcu_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] DW_TAG_compile_unit + .asciz "hand-written DWARF" # DW_AT_producer + .quad 0x0 # DW_AT_low_pc + .long 0x100 # DW_AT_high_pc + + .byte 4 # Abbrev [4] DW_TAG_namespace + .asciz "namesp" # DW_AT_name + .byte 2 # Abbrev [2] DW_TAG_variable + .asciz "var_block_addr" # DW_AT_name + .byte 9 # DW_AT_location + .byte 3 # DW_OP_addr + .quad 0x47 + .byte 0 # End Of Children Mark + + .byte 6 # Abbrev [6] DW_TAG_namespace + .byte 5 # Abbrev [5] DW_TAG_variable + .asciz "var_loc_addr" # DW_AT_name + .long .Ldebug_loc0 # DW_AT_location + .byte 0 # End Of Children Mark + + .byte 2 # Abbrev [2] DW_TAG_variable + .asciz "var_loc_tls" # DW_AT_name + .byte 1 # DW_AT_location + .byte 0x9b # DW_OP_form_tls_address + + .byte 2 # Abbrev [2] DW_TAG_variable + .asciz "var_loc_gnu_tls" # DW_AT_name + .byte 1 # DW_AT_location + .byte 0xe0 # DW_OP_GNU_push_tls_address + + .byte 3 # Abbrev [3] DW_TAG_subprogram + .asciz "fun_name" # DW_AT_name + .asciz "_Z8fun_name" # DW_AT_linkage_name + .quad 0x47 # DW_AT_entry_pc + .byte 7 # Abbrev [7] DW_TAG_inlined_subroutine + .asciz "fun_inline" # DW_AT_name + .quad 0x48 # DW_AT_low_pc + .quad 0x49 # DW_AT_high_pc + .byte 8 # Abbrev [8] DW_TAG_label + .asciz "label" # DW_AT_name + .quad 0x4a # DW_AT_entry_pc + .byte 0 # End Of Children Mark + + .byte 0 # End Of Children Mark +.Lcu_end0: + + .section .debug_names,"",@progbits + .long .Lnames_end0-.Lnames_start0 # Header: contribution length +.Lnames_start0: + .short 5 # Header: version + .short 0 # Header: padding + .long 1 # Header: compilation unit count + .long 0 # Header: local type unit count + .long 0 # Header: foreign type unit count + .long 0 # Header: bucket count + .long 0 # Header: name count + .long .Lnames_abbrev_end0-.Lnames_abbrev_start0 # Header: abbreviation table size + .long 0 # Header: augmentation length + .long .Lcu_begin0 # Compilation unit 0 +.Lnames_abbrev_start0: + .byte 0 # End of abbrev list +.Lnames_abbrev_end0: +.Lnames_entries0: +.Lnames_end0: diff --git a/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-cu-lists-json-output.s b/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-cu-lists-json-output.s new file mode 100644 index 0000000000000..dfbfe04943a83 --- /dev/null +++ b/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-cu-lists-json-output.s @@ -0,0 +1,111 @@ +# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj | \ +# RUN: not llvm-dwarfdump -verify -verify-json=%t.json - +# RUN: FileCheck %s --input-file %t.json + +# CHECK: {"error-categories":{"Duplicate Name Index":{"count":1},"Name Index doesn't index any CU":{"count":1},"Name Index references non-existing CU":{"count":1}},"error-count":3} +# CHECK-NOT : error: Name Index @ 0x58 references a CU @ 0x0, but this CU is already indexed by Name Index @ 0x28 +# CHECK-NOT: warning: CU @ 0x13 not covered by any Name Index + + + .section .debug_str,"MS",@progbits,1 +.Lstring_foo: + .asciz "foo" +.Lstring_foo_mangled: + .asciz "_Z3foov" +.Lstring_bar: + .asciz "bar" +.Lstring_producer: + .asciz "Hand-written dwarf" + + .section .debug_abbrev,"",@progbits +.Lsection_abbrev: + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 14 # DW_FORM_strp + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Lcu_end0-.Lcu_start0 # Length of Unit +.Lcu_start0: + .short 4 # DWARF version number + .long .Lsection_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] DW_TAG_compile_unit + .long .Lstring_producer # DW_AT_producer + .short 12 # DW_AT_language + .byte 0 # End Of Children Mark +.Lcu_end0: + +.Lcu_begin1: + .long .Lcu_end1-.Lcu_start1 # Length of Unit +.Lcu_start1: + .short 4 # DWARF version number + .long .Lsection_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] DW_TAG_compile_unit + .long .Lstring_producer # DW_AT_producer + .short 12 # DW_AT_language + .byte 0 # End Of Children Mark +.Lcu_end1: + + .section .debug_names,"",@progbits + .long .Lnames_end0-.Lnames_start0 # Header: contribution length +.Lnames_start0: + .short 5 # Header: version + .short 0 # Header: padding + .long 0 # Header: compilation unit count + .long 0 # Header: local type unit count + .long 0 # Header: foreign type unit count + .long 0 # Header: bucket count + .long 0 # Header: name count + .long .Lnames_abbrev_end0-.Lnames_abbrev_start0 # Header: abbreviation table size + .long 0 # Header: augmentation length +.Lnames_abbrev_start0: + .byte 0 # End of abbrev list +.Lnames_abbrev_end0: +.p2align 2 +.Lnames_end0: + + .long .Lnames_end1-.Lnames_start1 # Header: contribution length +.Lnames_start1: + .short 5 # Header: version + .short 0 # Header: padding + .long 2 # Header: compilation unit count + .long 0 # Header: local type unit count + .long 0 # Header: foreign type unit count + .long 0 # Header: bucket count + .long 0 # Header: name count + .long .Lnames_abbrev_end1-.Lnames_abbrev_start1 # Header: abbreviation table size + .long 0 # Header: augmentation length + .long .Lcu_begin0 # Compilation unit 0 + .long .Lcu_begin0+1 # Compilation unit 0 +.Lnames_abbrev_start1: + .byte 0 # End of abbrev list +.Lnames_abbrev_end1: +.p2align 2 +.Lnames_end1: + + .long .Lnames_end2-.Lnames_start2 # Header: contribution length +.Lnames_start2: + .short 5 # Header: version + .short 0 # Header: padding + .long 1 # Header: compilation unit count + .long 0 # Header: local type unit count + .long 0 # Header: foreign type unit count + .long 0 # Header: bucket count + .long 0 # Header: name count + .long .Lnames_abbrev_end2-.Lnames_abbrev_start2 # Header: abbreviation table size + .long 0 # Header: augmentation length + .long .Lcu_begin0 # Compilation unit 0 +.Lnames_abbrev_start2: + .byte 0 # End of abbrev list +.Lnames_abbrev_end2: +.p2align 2 +.Lnames_end2: diff --git a/llvm/unittests/ADT/SetOperationsTest.cpp b/llvm/unittests/ADT/SetOperationsTest.cpp index f99f5c9b2af10..6d191160f774a 100644 --- a/llvm/unittests/ADT/SetOperationsTest.cpp +++ b/llvm/unittests/ADT/SetOperationsTest.cpp @@ -273,4 +273,21 @@ TEST(SetOperationsTest, SetIsSubset) { EXPECT_FALSE(set_is_subset(Set1, Set2)); } +TEST(SetOperationsTest, SetIntersects) { + std::set Set1 = {1, 2, 3, 4}; + std::set Set2 = {3, 4, 5}; + EXPECT_TRUE(set_intersects(Set1, Set2)); + EXPECT_TRUE(set_intersects(Set2, Set1)); + + Set2 = {5, 6, 7}; + EXPECT_FALSE(set_intersects(Set1, Set2)); + EXPECT_FALSE(set_intersects(Set2, Set1)); + + // Check that intersecting with a null set returns false. + Set1.clear(); + EXPECT_FALSE(set_intersects(Set1, Set2)); + EXPECT_FALSE(set_intersects(Set2, Set1)); + EXPECT_FALSE(set_intersects(Set1, Set1)); +} + } // namespace diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp index 2ad33659c609b..088264e0429fd 100644 --- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp +++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp @@ -6166,3 +6166,26 @@ define void @bar() { // This should not crash, even though there is already a value for LLVMBar. Ctx.createFunction(&LLVMBar); } + +TEST_F(SandboxIRTest, OpaqueValue) { + parseIR(C, R"IR( +declare void @bar(metadata) +define void @foo() { + call void @bar(metadata !1) + call void asm "asm", ""() + ret void +} +!1 = !{} +)IR"); + Function &LLVMFoo = *M->getFunction("foo"); + sandboxir::Context Ctx(C); + auto *F = Ctx.createFunction(&LLVMFoo); + auto *BB = &*F->begin(); + auto It = BB->begin(); + auto *Call = cast(&*It++); + auto *Op0 = Call->getOperand(0); + EXPECT_TRUE(isa(Op0)); + auto *Asm = cast(&*It++); + auto *AsmOp0 = Asm->getOperand(0); + EXPECT_TRUE(isa(AsmOp0)); +} diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index 6611dc2fd3ced..0700486b61ec5 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -983,6 +983,7 @@ def get_failed_prefixes(self): ##### Generator of LLVM IR CHECK lines SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*") +SCRUB_IR_FUNC_META_RE = re.compile(r"((?:\!(?!dbg\b)[a-zA-Z_]\w*(?:\s+![0-9]+)?)\s*)+") # TODO: We should also derive check lines for global, debug, loop declarations, etc.. @@ -1087,6 +1088,7 @@ def __init__( nameless_values: List[NamelessValue], regexp_prefix, regexp_suffix, + no_meta_details=False, ): self._version = version self._mode = mode @@ -1094,6 +1096,7 @@ def __init__( self._regexp_prefix = regexp_prefix self._regexp_suffix = regexp_suffix + self._no_meta_details = no_meta_details self._regexp, _ = self._build_regexp(False, False) ( @@ -1147,6 +1150,9 @@ def get_regexp(self): def get_unstable_globals_regexp(self): return self._unstable_globals_regexp + def no_meta_details(self): + return self._no_meta_details + # The entire match is group 0, the prefix has one group (=1), the entire # IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start. FIRST_NAMELESS_GROUP_IN_MATCH = 3 @@ -1175,7 +1181,7 @@ def get_nameless_value_from_match(self, match) -> NamelessValue: return self.get_match_info(match)[1] -def make_ir_generalizer(version): +def make_ir_generalizer(version, no_meta_details): values = [] if version >= 5: @@ -1224,7 +1230,9 @@ def make_ir_generalizer(version): # not (unstable_ids_only and nameless_value.match_literally) # ] - return GeneralizerInfo(version, GeneralizerInfo.MODE_IR, values, prefix, suffix) + return GeneralizerInfo( + version, GeneralizerInfo.MODE_IR, values, prefix, suffix, no_meta_details + ) def make_asm_generalizer(version): @@ -1726,6 +1734,7 @@ def generalize_check_lines( original_check_lines=None, *, unstable_globals_only=False, + no_meta_details=False, ): if unstable_globals_only: regexp = ginfo.get_unstable_globals_regexp() @@ -1755,6 +1764,9 @@ def escape_braces(match_obj): break # Ignore any comments, since the check lines will too. scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line) + # Ignore the metadata details if check global is none + if no_meta_details: + scrubbed_line = SCRUB_IR_FUNC_META_RE.sub(r"{{.*}}", scrubbed_line) lines[i] = scrubbed_line if not preserve_names: @@ -1986,6 +1998,7 @@ def add_checks( global_vars_seen, preserve_names, original_check_lines=[], + no_meta_details=ginfo.no_meta_details(), )[0] func_name_separator = func_dict[checkprefix][func_name].func_name_separator if "[[" in args_and_sig: diff --git a/llvm/utils/gn/secondary/libunwind/src/BUILD.gn b/llvm/utils/gn/secondary/libunwind/src/BUILD.gn index 2396300f94717..f63b22822ca96 100644 --- a/llvm/utils/gn/secondary/libunwind/src/BUILD.gn +++ b/llvm/utils/gn/secondary/libunwind/src/BUILD.gn @@ -40,11 +40,11 @@ unwind_sources = [ "UnwindRegistersRestore.S", "UnwindRegistersSave.S", "assembly.h", - "cet_unwind.h", "config.h", "dwarf2.h", "libunwind.cpp", "libunwind_ext.h", + "shadow_stack_unwind.h", ] if (current_os == "aix") { unwind_sources += [ "Unwind_AIXExtras.cpp" ] diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn index e3095e2f3df26..f18e40a2a5744 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn @@ -118,6 +118,7 @@ static_library("LLVMRISCVCodeGen") { "RISCVDeadRegisterDefinitions.cpp", "RISCVExpandAtomicPseudoInsts.cpp", "RISCVExpandPseudoInsts.cpp", + "RISCVFoldMemOffset.cpp", "RISCVFrameLowering.cpp", "RISCVGatherScatterLowering.cpp", "RISCVISelDAGToDAG.cpp", diff --git a/llvm/utils/lit/lit/llvm/config.py b/llvm/utils/lit/lit/llvm/config.py index 5f762ec7f3514..e40a422d2db6c 100644 --- a/llvm/utils/lit/lit/llvm/config.py +++ b/llvm/utils/lit/lit/llvm/config.py @@ -169,8 +169,12 @@ def __init__(self, lit_config, config): features.add("target-aarch64") elif re.match(r"^arm.*", target_triple): features.add("target-arm") - if re.match(r'^ppc64le.*-linux', target_triple): - features.add('target=powerpc64le-linux') + elif re.match(r"^ppc64le.*-linux", target_triple): + features.add("target=powerpc64le-linux") + elif re.match(r"^riscv64-.*-elf", target_triple): + features.add("target-riscv64") + elif re.match(r"^riscv32-.*-elf.", target_triple): + features.add("target-riscv32") if not user_is_root(): features.add("non-root-user") diff --git a/llvm/utils/release/build_llvm_release.bat b/llvm/utils/release/build_llvm_release.bat index dd041d7d384ec..1c30673cf88bd 100755 --- a/llvm/utils/release/build_llvm_release.bat +++ b/llvm/utils/release/build_llvm_release.bat @@ -150,7 +150,7 @@ set common_cmake_flags=^ -DCMAKE_BUILD_TYPE=Release ^ -DLLVM_ENABLE_ASSERTIONS=OFF ^ -DLLVM_INSTALL_TOOLCHAIN_ONLY=ON ^ - -DLLVM_TARGETS_TO_BUILD="AArch64;ARM;X86" ^ + -DLLVM_TARGETS_TO_BUILD="AArch64;ARM;X86;BPF;WebAssembly;RISCV;NVPTX" ^ -DLLVM_BUILD_LLVM_C_DYLIB=ON ^ -DCMAKE_INSTALL_UCRT_LIBRARIES=ON ^ -DPython3_FIND_REGISTRY=NEVER ^ diff --git a/llvm/utils/update_cc_test_checks.py b/llvm/utils/update_cc_test_checks.py index 7a4796eaabb3b..7529d480b35dd 100755 --- a/llvm/utils/update_cc_test_checks.py +++ b/llvm/utils/update_cc_test_checks.py @@ -368,7 +368,9 @@ def main(): # Store only filechecked runlines. filecheck_run_list = [i for i in run_list if i[0]] - ginfo = common.make_ir_generalizer(version=ti.args.version) + ginfo = common.make_ir_generalizer( + ti.args.version, ti.args.check_globals == "none" + ) builder = common.FunctionTestBuilder( run_list=filecheck_run_list, flags=ti.args, diff --git a/llvm/utils/update_test_checks.py b/llvm/utils/update_test_checks.py index b413c253e3975..4f83c02f19f6f 100755 --- a/llvm/utils/update_test_checks.py +++ b/llvm/utils/update_test_checks.py @@ -153,7 +153,9 @@ def main(): # now, we just ignore all but the last. prefix_list.append((check_prefixes, tool_cmd_args, preprocess_cmd)) - ginfo = common.make_ir_generalizer(ti.args.version) + ginfo = common.make_ir_generalizer( + ti.args.version, ti.args.check_globals == "none" + ) global_vars_seen_dict = {} builder = common.FunctionTestBuilder( run_list=prefix_list, diff --git a/mlir/docs/Dialects/Linalg/_index.md b/mlir/docs/Dialects/Linalg/_index.md index 976f0fd3c7e91..b519e4159f186 100644 --- a/mlir/docs/Dialects/Linalg/_index.md +++ b/mlir/docs/Dialects/Linalg/_index.md @@ -695,3 +695,4 @@ the same IR. ## Operations [include "Dialects/LinalgOps.md"] +[include "Dialects/LinalgRelayoutOps.td"] diff --git a/mlir/include/mlir/Analysis/DataFlow/IntegerRangeAnalysis.h b/mlir/include/mlir/Analysis/DataFlow/IntegerRangeAnalysis.h index f99eae379596b..b9e5573d6ad40 100644 --- a/mlir/include/mlir/Analysis/DataFlow/IntegerRangeAnalysis.h +++ b/mlir/include/mlir/Analysis/DataFlow/IntegerRangeAnalysis.h @@ -71,6 +71,18 @@ class IntegerRangeAnalysis unsigned firstIndex) override; }; +/// Succeeds if an op can be converted to its unsigned equivalent without +/// changing its semantics. This is the case when none of its openands or +/// results can be below 0 when analyzed from a signed perspective. +LogicalResult staticallyNonNegative(DataFlowSolver &solver, Operation *op); + +/// Succeeds when a value is statically non-negative in that it has a lower +/// bound on its value (if it is treated as signed) and that bound is +/// non-negative. +/// Note, the results of this query may not be accurate for `index` if you plan +/// to use a non-64-bit index. +LogicalResult staticallyNonNegative(DataFlowSolver &solver, Value v); + } // end namespace dataflow } // end namespace mlir diff --git a/mlir/include/mlir/Dialect/EmitC/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/EmitC/IR/CMakeLists.txt index ac8c651cdced8..610170f5944eb 100644 --- a/mlir/include/mlir/Dialect/EmitC/IR/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/EmitC/IR/CMakeLists.txt @@ -1,5 +1,5 @@ add_mlir_dialect(EmitC emitc) -add_mlir_doc(EmitC EmitC Dialects/ -gen-dialect-doc) +add_mlir_doc(EmitC EmitC Dialects/ -gen-dialect-doc -dialect emitc) set(LLVM_TARGET_DEFINITIONS EmitCAttributes.td) mlir_tablegen(EmitCEnums.h.inc -gen-enum-decls) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td index 72fae1bdbf461..c270b0898f865 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td @@ -170,6 +170,10 @@ def LLVM_SinOp : LLVM_UnaryIntrOpF<"sin">; def LLVM_CosOp : LLVM_UnaryIntrOpF<"cos">; def LLVM_TanOp : LLVM_UnaryIntrOpF<"tan">; +def LLVM_ASinOp : LLVM_UnaryIntrOpF<"asin">; +def LLVM_ACosOp : LLVM_UnaryIntrOpF<"acos">; +def LLVM_ATanOp : LLVM_UnaryIntrOpF<"atan">; + def LLVM_SinhOp : LLVM_UnaryIntrOpF<"sinh">; def LLVM_CoshOp : LLVM_UnaryIntrOpF<"cosh">; def LLVM_TanhOp : LLVM_UnaryIntrOpF<"tanh">; diff --git a/mlir/include/mlir/Dialect/Math/IR/MathOps.td b/mlir/include/mlir/Dialect/Math/IR/MathOps.td index 16ce4e2366c76..56370388dea87 100644 --- a/mlir/include/mlir/Dialect/Math/IR/MathOps.td +++ b/mlir/include/mlir/Dialect/Math/IR/MathOps.td @@ -736,6 +736,7 @@ def Math_IsFiniteOp : Math_FloatClassificationOp<"isfinite"> { %f = math.isfinite %a : f32 ``` }]; + let hasFolder = 1; } //===----------------------------------------------------------------------===// @@ -754,6 +755,7 @@ def Math_IsInfOp : Math_FloatClassificationOp<"isinf"> { %f = math.isinf %a : f32 ``` }]; + let hasFolder = 1; } //===----------------------------------------------------------------------===// @@ -772,6 +774,7 @@ def Math_IsNaNOp : Math_FloatClassificationOp<"isnan"> { %f = math.isnan %a : f32 ``` }]; + let hasFolder = 1; } @@ -791,6 +794,7 @@ def Math_IsNormalOp : Math_FloatClassificationOp<"isnormal"> { %f = math.isnormal %a : f32 ``` }]; + let hasFolder = 1; } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td index c821e7b1527b4..fbbf817ecff98 100644 --- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td @@ -644,11 +644,13 @@ def Vector_ExtractElementOp : Results<(outs AnyType:$result)> { let summary = "extractelement operation"; let description = [{ + Note: This operation is deprecated. Please use vector.extract insert. + Takes a 0-D or 1-D vector and a optional dynamic index position and extracts the scalar at that position. Note that this instruction resembles vector.extract, but is restricted to - 0-D and 1-D vectors and relaxed to dynamic indices. + 0-D and 1-D vectors. If the vector is 0-D, the position must be std::nullopt. @@ -834,11 +836,13 @@ def Vector_InsertElementOp : Results<(outs AnyVectorOfAnyRank:$result)> { let summary = "insertelement operation"; let description = [{ + Note: This operation is deprecated. Please use vector.insert instead. + Takes a scalar source, a 0-D or 1-D destination vector and a dynamic index position and inserts the source into the destination at the proper position. Note that this instruction resembles vector.insert, but is restricted to 0-D - and 1-D vectors and relaxed to dynamic indices. + and 1-D vectors. It is meant to be closer to LLVM's version: https://llvm.org/docs/LangRef.html#insertelement-instruction diff --git a/mlir/lib/Analysis/DataFlow/IntegerRangeAnalysis.cpp b/mlir/lib/Analysis/DataFlow/IntegerRangeAnalysis.cpp index 722f4df18e981..c7a950d9a8871 100644 --- a/mlir/lib/Analysis/DataFlow/IntegerRangeAnalysis.cpp +++ b/mlir/lib/Analysis/DataFlow/IntegerRangeAnalysis.cpp @@ -37,6 +37,24 @@ using namespace mlir; using namespace mlir::dataflow; +namespace mlir::dataflow { +LogicalResult staticallyNonNegative(DataFlowSolver &solver, Value v) { + auto *result = solver.lookupState(v); + if (!result || result->getValue().isUninitialized()) + return failure(); + const ConstantIntRanges &range = result->getValue().getValue(); + return success(range.smin().isNonNegative()); +} + +LogicalResult staticallyNonNegative(DataFlowSolver &solver, Operation *op) { + auto nonNegativePred = [&solver](Value v) -> bool { + return succeeded(staticallyNonNegative(solver, v)); + }; + return success(llvm::all_of(op->getOperands(), nonNegativePred) && + llvm::all_of(op->getResults(), nonNegativePred)); +} +} // namespace mlir::dataflow + void IntegerValueRangeLattice::onUpdate(DataFlowSolver *solver) const { Lattice::onUpdate(solver); diff --git a/mlir/lib/AsmParser/Parser.cpp b/mlir/lib/AsmParser/Parser.cpp index b5f1d2e27c9ba..2982757a6c5ce 100644 --- a/mlir/lib/AsmParser/Parser.cpp +++ b/mlir/lib/AsmParser/Parser.cpp @@ -820,6 +820,12 @@ class OperationParser : public Parser { /// their first reference, to allow checking for use of undefined values. DenseMap forwardRefPlaceholders; + /// Operations that define the placeholders. These are kept until the end of + /// of the lifetime of the parser because some custom parsers may store + /// references to them in local state and use them after forward references + /// have been resolved. + DenseSet forwardRefOps; + /// Deffered locations: when parsing `loc(#loc42)` we add an entry to this /// map. After parsing the definition `#loc42 = ...` we'll patch back users /// of this location. @@ -847,11 +853,11 @@ OperationParser::OperationParser(ParserState &state, ModuleOp topLevelOp) } OperationParser::~OperationParser() { - for (auto &fwd : forwardRefPlaceholders) { + for (Operation *op : forwardRefOps) { // Drop all uses of undefined forward declared reference and destroy // defining operation. - fwd.first.dropAllUses(); - fwd.first.getDefiningOp()->destroy(); + op->dropAllUses(); + op->destroy(); } for (const auto &scope : forwardRef) { for (const auto &fwd : scope) { @@ -1007,7 +1013,6 @@ ParseResult OperationParser::addDefinition(UnresolvedOperand useInfo, // the actual definition instead, delete the forward ref, and remove it // from our set of forward references we track. existing.replaceAllUsesWith(value); - existing.getDefiningOp()->destroy(); forwardRefPlaceholders.erase(existing); // If a definition of the value already exists, replace it in the assembly @@ -1194,6 +1199,7 @@ Value OperationParser::createForwardRefPlaceholder(SMLoc loc, Type type) { /*attributes=*/std::nullopt, /*properties=*/nullptr, /*successors=*/{}, /*numRegions=*/0); forwardRefPlaceholders[op->getResult(0)] = loc; + forwardRefOps.insert(op); return op->getResult(0); } diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp index cfa434699cdef..c3b3a78abe7f7 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp @@ -9,6 +9,7 @@ #include "GPUOpsLowering.h" #include "mlir/Conversion/GPUCommon/GPUCommonPass.h" +#include "mlir/Conversion/LLVMCommon/VectorPattern.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" @@ -586,22 +587,15 @@ LogicalResult GPUPrintfOpToVPrintfLowering::matchAndRewrite( return success(); } -/// Unrolls op if it's operating on vectors. -LogicalResult impl::scalarizeVectorOp(Operation *op, ValueRange operands, - ConversionPatternRewriter &rewriter, - const LLVMTypeConverter &converter) { +/// Helper for impl::scalarizeVectorOp. Scalarizes vectors to elements. +/// Used either directly (for ops on 1D vectors) or as the callback passed to +/// detail::handleMultidimensionalVectors (for ops on higher-rank vectors). +static Value scalarizeVectorOpHelper(Operation *op, ValueRange operands, + Type llvm1DVectorTy, + ConversionPatternRewriter &rewriter, + const LLVMTypeConverter &converter) { TypeRange operandTypes(operands); - if (llvm::none_of(operandTypes, llvm::IsaPred)) { - return rewriter.notifyMatchFailure(op, "expected vector operand"); - } - if (op->getNumRegions() != 0 || op->getNumSuccessors() != 0) - return rewriter.notifyMatchFailure(op, "expected no region/successor"); - if (op->getNumResults() != 1) - return rewriter.notifyMatchFailure(op, "expected single result"); - VectorType vectorType = dyn_cast(op->getResult(0).getType()); - if (!vectorType) - return rewriter.notifyMatchFailure(op, "expected vector result"); - + VectorType vectorType = cast(llvm1DVectorTy); Location loc = op->getLoc(); Value result = rewriter.create(loc, vectorType); Type indexType = converter.convertType(rewriter.getIndexType()); @@ -621,9 +615,32 @@ LogicalResult impl::scalarizeVectorOp(Operation *op, ValueRange operands, result = rewriter.create( loc, result, scalarOp->getResult(0), index); } + return result; +} - rewriter.replaceOp(op, result); - return success(); +/// Unrolls op to array/vector elements. +LogicalResult impl::scalarizeVectorOp(Operation *op, ValueRange operands, + ConversionPatternRewriter &rewriter, + const LLVMTypeConverter &converter) { + TypeRange operandTypes(operands); + if (llvm::any_of(operandTypes, llvm::IsaPred)) { + VectorType vectorType = cast(op->getResultTypes()[0]); + rewriter.replaceOp(op, scalarizeVectorOpHelper(op, operands, vectorType, + rewriter, converter)); + return success(); + } + + if (llvm::any_of(operandTypes, llvm::IsaPred)) { + return LLVM::detail::handleMultidimensionalVectors( + op, operands, converter, + [&](Type llvm1DVectorTy, ValueRange operands) -> Value { + return scalarizeVectorOpHelper(op, operands, llvm1DVectorTy, rewriter, + converter); + }, + rewriter); + } + + return rewriter.notifyMatchFailure(op, "no llvm.array or vector to unroll"); } static IntegerAttr wrapNumericMemorySpace(MLIRContext *ctx, unsigned space) { diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h index e73a74845d2b6..bd2fd020f684b 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h @@ -172,13 +172,13 @@ struct GPUReturnOpLowering : public ConvertOpToLLVMPattern { }; namespace impl { -/// Unrolls op if it's operating on vectors. +/// Unrolls op to array/vector elements. LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands, ConversionPatternRewriter &rewriter, const LLVMTypeConverter &converter); } // namespace impl -/// Rewriting that unrolls SourceOp to scalars if it's operating on vectors. +/// Unrolls SourceOp to array/vector elements. template struct ScalarizeVectorOpLowering : public ConvertOpToLLVMPattern { public: @@ -191,6 +191,7 @@ struct ScalarizeVectorOpLowering : public ConvertOpToLLVMPattern { *this->getTypeConverter()); } }; + } // namespace mlir #endif // MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_ diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp index 4e02559a08949..82b96e9876a6f 100644 --- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp @@ -2001,8 +2001,15 @@ static LogicalResult generateCopy( } SmallVector lbMaps(rank), ubMaps(rank); - for (unsigned i = 0; i < rank; ++i) + for (unsigned i = 0; i < rank; ++i) { region.getLowerAndUpperBound(i, lbMaps[i], ubMaps[i]); + if (lbMaps[i].getNumResults() == 0 || ubMaps[i].getNumResults() == 0) { + LLVM_DEBUG(llvm::dbgs() + << "Missing lower or upper bound for region along dimension: " + << i << '\n'); + return failure(); + } + } const FlatAffineValueConstraints *cst = region.getConstraints(); // 'regionSymbols' hold values that this memory region is symbolic/parametric diff --git a/mlir/lib/Dialect/Arith/Transforms/UnsignedWhenEquivalent.cpp b/mlir/lib/Dialect/Arith/Transforms/UnsignedWhenEquivalent.cpp index 8922e93e399f9..dabfffda390bb 100644 --- a/mlir/lib/Dialect/Arith/Transforms/UnsignedWhenEquivalent.cpp +++ b/mlir/lib/Dialect/Arith/Transforms/UnsignedWhenEquivalent.cpp @@ -27,32 +27,6 @@ using namespace mlir; using namespace mlir::arith; using namespace mlir::dataflow; -/// Succeeds when a value is statically non-negative in that it has a lower -/// bound on its value (if it is treated as signed) and that bound is -/// non-negative. -// TODO: IntegerRangeAnalysis internally assumes index is 64bit and this pattern -// relies on this. These transformations may not be valid for 32bit index, -// need more investigation. -static LogicalResult staticallyNonNegative(DataFlowSolver &solver, Value v) { - auto *result = solver.lookupState(v); - if (!result || result->getValue().isUninitialized()) - return failure(); - const ConstantIntRanges &range = result->getValue().getValue(); - return success(range.smin().isNonNegative()); -} - -/// Succeeds if an op can be converted to its unsigned equivalent without -/// changing its semantics. This is the case when none of its openands or -/// results can be below 0 when analyzed from a signed perspective. -static LogicalResult staticallyNonNegative(DataFlowSolver &solver, - Operation *op) { - auto nonNegativePred = [&solver](Value v) -> bool { - return succeeded(staticallyNonNegative(solver, v)); - }; - return success(llvm::all_of(op->getOperands(), nonNegativePred) && - llvm::all_of(op->getResults(), nonNegativePred)); -} - /// Succeeds when the comparison predicate is a signed operation and all the /// operands are non-negative, indicating that the cmpi operation `op` can have /// its predicate changed to an unsigned equivalent. @@ -103,6 +77,10 @@ class DataFlowListener : public RewriterBase::Listener { DataFlowSolver &s; }; +// TODO: IntegerRangeAnalysis internally assumes index is 64bit and this pattern +// (via staticallyNonNegative) relies on this. These transformations may not be +// valid for 32bit index, need more investigation. + template struct ConvertOpToUnsigned final : OpRewritePattern { ConvertOpToUnsigned(MLIRContext *context, DataFlowSolver &s) diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index bfcba40555a7c..ccf8f72b2b230 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -1932,6 +1932,18 @@ OpFoldResult LLVM::ExtractValueOp::fold(FoldAdaptor adaptor) { getContainerMutable().set(extractValueOp.getContainer()); return getResult(); } + + { + DenseElementsAttr constval; + matchPattern(getContainer(), m_Constant(&constval)); + if (constval && constval.getElementType() == getType()) { + if (isa(constval)) + return constval.getSplatValue(); + if (getPosition().size() == 1) + return constval.getValues()[getPosition()[0]]; + } + } + auto insertValueOp = getContainer().getDefiningOp(); OpFoldResult result = {}; while (insertValueOp) { diff --git a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp index d7af282381627..53cc23d7967bf 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp @@ -1547,10 +1547,9 @@ static Value getCollapsedOpOperand(Location loc, LinalgOp op, /// Modify the `linalg.index` operations in the original generic op, to its /// value in the collapsed operation. -void generateCollapsedIndexingRegion(Location loc, Block *block, - const CollapsingInfo &collapsingInfo, - ValueRange loopRange, - RewriterBase &rewriter) { +static void generateCollapsedIndexingRegion( + Location loc, Block *block, const CollapsingInfo &collapsingInfo, + ArrayRef loopRange, RewriterBase &rewriter) { OpBuilder::InsertionGuard g(rewriter); rewriter.setInsertionPointToStart(block); @@ -1571,10 +1570,12 @@ void generateCollapsedIndexingRegion(Location loc, Block *block, Value newIndexVal = rewriter.create(loc, foldedDims.index()); for (auto dim : llvm::reverse(foldedDimsRef.drop_front())) { + Value loopDim = + getValueOrCreateConstantIndexOp(rewriter, loc, loopRange[dim]); indexReplacementVals[dim] = - rewriter.create(loc, newIndexVal, loopRange[dim]); + rewriter.createOrFold(loc, newIndexVal, loopDim); newIndexVal = - rewriter.create(loc, newIndexVal, loopRange[dim]); + rewriter.createOrFold(loc, newIndexVal, loopDim); } indexReplacementVals[foldedDims.value().front()] = newIndexVal; } @@ -1721,14 +1722,13 @@ FailureOr mlir::linalg::collapseOpIterationDims( LinalgOp collapsedOp = createCollapsedOp(op, collapsingInfo, rewriter); Location loc = op->getLoc(); + SmallVector loopBound = + llvm::map_to_vector(loopRanges, [](Range range) { return range.size; }); + if (collapsedOp.hasIndexSemantics()) { // Collect the loop range of the generic op. OpBuilder::InsertionGuard g(rewriter); rewriter.setInsertionPoint(collapsedOp); - SmallVector loopBound = - llvm::map_to_vector(loopRanges, [&](Range range) { - return getValueOrCreateConstantIndexOp(rewriter, loc, range.size); - }); generateCollapsedIndexingRegion(loc, &collapsedOp->getRegion(0).front(), collapsingInfo, loopBound, rewriter); } @@ -1746,15 +1746,22 @@ FailureOr mlir::linalg::collapseOpIterationDims( op.getIndexingMapMatchingResult(originalResult.value()); SmallVector reassociation = getOperandReassociation(indexingMap, collapsingInfo); + assert( + indexingMap.isProjectedPermutation() && + "Expected indexing map to be a projected permutation for collapsing"); + SmallVector resultShape = + applyPermutationMap(indexingMap, ArrayRef(loopBound)); Value result; if (isa(collapsedOpResult.getType())) { MemRefType expandShapeResultType = MemRefType::get( originalResultType.getShape(), originalResultType.getElementType()); result = rewriter.create( - loc, expandShapeResultType, collapsedOpResult, reassociation); + loc, expandShapeResultType, collapsedOpResult, reassociation, + resultShape); } else { result = rewriter.create( - loc, originalResultType, collapsedOpResult, reassociation); + loc, originalResultType, collapsedOpResult, reassociation, + resultShape); } results.push_back(result); } else { diff --git a/mlir/lib/Dialect/Math/IR/MathOps.cpp b/mlir/lib/Dialect/Math/IR/MathOps.cpp index 9c4d88e2191ce..26441a9d78658 100644 --- a/mlir/lib/Dialect/Math/IR/MathOps.cpp +++ b/mlir/lib/Dialect/Math/IR/MathOps.cpp @@ -579,6 +579,70 @@ OpFoldResult math::ExpM1Op::fold(FoldAdaptor adaptor) { }); } +//===----------------------------------------------------------------------===// +// IsFiniteOp folder +//===----------------------------------------------------------------------===// + +OpFoldResult math::IsFiniteOp::fold(FoldAdaptor adaptor) { + if (auto val = dyn_cast_or_null(adaptor.getOperand())) { + return BoolAttr::get(val.getContext(), val.getValue().isFinite()); + } + if (auto splat = dyn_cast_or_null(adaptor.getOperand())) { + return DenseElementsAttr::get( + cast(getType()), + APInt(1, splat.getSplatValue().isFinite())); + } + return {}; +} + +//===----------------------------------------------------------------------===// +// IsInfOp folder +//===----------------------------------------------------------------------===// + +OpFoldResult math::IsInfOp::fold(FoldAdaptor adaptor) { + if (auto val = dyn_cast_or_null(adaptor.getOperand())) { + return BoolAttr::get(val.getContext(), val.getValue().isInfinity()); + } + if (auto splat = dyn_cast_or_null(adaptor.getOperand())) { + return DenseElementsAttr::get( + cast(getType()), + APInt(1, splat.getSplatValue().isInfinity())); + } + return {}; +} + +//===----------------------------------------------------------------------===// +// IsNaNOp folder +//===----------------------------------------------------------------------===// + +OpFoldResult math::IsNaNOp::fold(FoldAdaptor adaptor) { + if (auto val = dyn_cast_or_null(adaptor.getOperand())) { + return BoolAttr::get(val.getContext(), val.getValue().isNaN()); + } + if (auto splat = dyn_cast_or_null(adaptor.getOperand())) { + return DenseElementsAttr::get( + cast(getType()), + APInt(1, splat.getSplatValue().isNaN())); + } + return {}; +} + +//===----------------------------------------------------------------------===// +// IsNormalOp folder +//===----------------------------------------------------------------------===// + +OpFoldResult math::IsNormalOp::fold(FoldAdaptor adaptor) { + if (auto val = dyn_cast_or_null(adaptor.getOperand())) { + return BoolAttr::get(val.getContext(), val.getValue().isNormal()); + } + if (auto splat = dyn_cast_or_null(adaptor.getOperand())) { + return DenseElementsAttr::get( + cast(getType()), + APInt(1, splat.getSplatValue().isNormal())); + } + return {}; +} + //===----------------------------------------------------------------------===// // TanOp folder //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp index 448141735ba7f..1cfb866db0b51 100644 --- a/mlir/lib/Dialect/SCF/IR/SCF.cpp +++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp @@ -499,15 +499,27 @@ ParseResult ForOp::parse(OpAsmParser &parser, OperationState &result) { else if (parser.parseType(type)) return failure(); - // Resolve input operands. + // Set block argument types, so that they are known when parsing the region. regionArgs.front().type = type; + for (auto [iterArg, type] : + llvm::zip_equal(llvm::drop_begin(regionArgs), result.types)) + iterArg.type = type; + + // Parse the body region. + Region *body = result.addRegion(); + if (parser.parseRegion(*body, regionArgs)) + return failure(); + ForOp::ensureTerminator(*body, builder, result.location); + + // Resolve input operands. This should be done after parsing the region to + // catch invalid IR where operands were defined inside of the region. if (parser.resolveOperand(lb, type, result.operands) || parser.resolveOperand(ub, type, result.operands) || parser.resolveOperand(step, type, result.operands)) return failure(); if (hasIterArgs) { - for (auto argOperandType : - llvm::zip(llvm::drop_begin(regionArgs), operands, result.types)) { + for (auto argOperandType : llvm::zip_equal(llvm::drop_begin(regionArgs), + operands, result.types)) { Type type = std::get<2>(argOperandType); std::get<0>(argOperandType).type = type; if (parser.resolveOperand(std::get<1>(argOperandType), type, @@ -516,13 +528,6 @@ ParseResult ForOp::parse(OpAsmParser &parser, OperationState &result) { } } - // Parse the body region. - Region *body = result.addRegion(); - if (parser.parseRegion(*body, regionArgs)) - return failure(); - - ForOp::ensureTerminator(*body, builder, result.location); - // Parse the optional attribute list. if (parser.parseOptionalAttrDict(result.attributes)) return failure(); diff --git a/mlir/lib/Interfaces/InferIntRangeInterface.cpp b/mlir/lib/Interfaces/InferIntRangeInterface.cpp index 63658518dd4a3..1801e3f7c52fd 100644 --- a/mlir/lib/Interfaces/InferIntRangeInterface.cpp +++ b/mlir/lib/Interfaces/InferIntRangeInterface.cpp @@ -125,8 +125,11 @@ std::optional ConstantIntRanges::getConstantValue() const { } raw_ostream &mlir::operator<<(raw_ostream &os, const ConstantIntRanges &range) { - return os << "unsigned : [" << range.umin() << ", " << range.umax() - << "] signed : [" << range.smin() << ", " << range.smax() << "]"; + os << "unsigned : ["; + range.umin().print(os, /*isSigned*/ false); + os << ", "; + range.umax().print(os, /*isSigned*/ false); + return os << "] signed : [" << range.smin() << ", " << range.smax() << "]"; } IntegerValueRange IntegerValueRange::getMaxRange(Value value) { diff --git a/mlir/python/mlir/dialects/LinalgOps.td b/mlir/python/mlir/dialects/LinalgOps.td index b7658c85a9c44..89fb3f219e858 100644 --- a/mlir/python/mlir/dialects/LinalgOps.td +++ b/mlir/python/mlir/dialects/LinalgOps.td @@ -11,5 +11,6 @@ include "mlir/Dialect/Linalg/IR/LinalgOps.td" include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.td" +include "mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td" #endif diff --git a/mlir/python/mlir/dialects/linalg/__init__.py b/mlir/python/mlir/dialects/linalg/__init__.py index c5fbb833ee399..63586a5bb8bbb 100644 --- a/mlir/python/mlir/dialects/linalg/__init__.py +++ b/mlir/python/mlir/dialects/linalg/__init__.py @@ -58,7 +58,11 @@ from .opdsl.ops.core_named_ops import * from ...ir import * -from .._ods_common import get_op_result_or_value as _get_op_result_or_value +from .._ods_common import ( + get_op_result_or_value as _get_op_result_or_value, + get_op_result_or_op_results as _get_op_result_or_op_results, + _dispatch_mixed_values, +) from ...extras.meta import region_op @@ -149,7 +153,7 @@ def __init__( generic = region_op(GenericOp_, terminator=YieldOp) -def create_op( +def _create_matmul_like_op( op_type, *ins: Union[Operation, OpView, Value], outs: Sequence[Union[Operation, OpView, Value]], @@ -179,7 +183,11 @@ def matmul( indexing_maps: Optional[Sequence[AffineMapAttr]] = None, cast: Optional[Union[TypeFn, Attribute]] = None, ): - return create_op(MatmulOp, *ins, outs=outs, indexing_maps=indexing_maps, cast=cast) + return _get_op_result_or_op_results( + _create_matmul_like_op( + MatmulOp, *ins, outs=outs, indexing_maps=indexing_maps, cast=cast + ) + ) def batch_matmul( @@ -188,8 +196,10 @@ def batch_matmul( indexing_maps: Optional[Sequence[AffineMapAttr]] = None, cast: Optional[Union[TypeFn, Attribute]] = None, ): - return create_op( - BatchMatmulOp, *ins, outs=outs, indexing_maps=indexing_maps, cast=cast + return _get_op_result_or_op_results( + _create_matmul_like_op( + BatchMatmulOp, *ins, outs=outs, indexing_maps=indexing_maps, cast=cast + ) ) @@ -199,6 +209,72 @@ def contract( indexing_maps: Sequence[AffineMapAttr], cast: Optional[Union[TypeFn, Attribute]] = None, ): - return create_op( - ContractOp, *ins, outs=outs, indexing_maps=indexing_maps, cast=cast + return _get_op_result_or_op_results( + _create_matmul_like_op( + ContractOp, *ins, outs=outs, indexing_maps=indexing_maps, cast=cast + ) + ) + + +def pack( + source, + dest, + inner_dims_pos, + inner_tiles, + *, + padding_value=None, + outer_dims_perm=None, + loc=None, + ip=None, +) -> ir.Value: + ( + dynamic_inner_tiles, + # packed here means %1:2 packing (results packing) + _inner_tiles, + static_inner_tiles, + ) = _dispatch_mixed_values(inner_tiles) + + return _get_op_result_or_op_results( + PackOp( + source=source, + dest=dest, + inner_dims_pos=inner_dims_pos, + inner_tiles=dynamic_inner_tiles, + static_inner_tiles=static_inner_tiles, + padding_value=padding_value, + outer_dims_perm=outer_dims_perm, + loc=loc, + ip=ip, + ) + ) + + +def unpack( + source, + dest, + inner_dims_pos, + inner_tiles, + *, + outer_dims_perm=None, + loc=None, + ip=None, +) -> ir.Value: + ( + dynamic_inner_tiles, + # packed here means %1:2 packing (results packing) + _inner_tiles, + static_inner_tiles, + ) = _dispatch_mixed_values(inner_tiles) + + return _get_op_result_or_op_results( + UnPackOp( + source=source, + dest=dest, + inner_dims_pos=inner_dims_pos, + inner_tiles=dynamic_inner_tiles, + static_inner_tiles=static_inner_tiles, + outer_dims_perm=outer_dims_perm, + loc=loc, + ip=ip, + ) ) diff --git a/mlir/test/Conversion/MathToROCDL/math-to-rocdl.mlir b/mlir/test/Conversion/MathToROCDL/math-to-rocdl.mlir index e4b2f01d6544a..9448304f11dbd 100644 --- a/mlir/test/Conversion/MathToROCDL/math-to-rocdl.mlir +++ b/mlir/test/Conversion/MathToROCDL/math-to-rocdl.mlir @@ -513,3 +513,54 @@ module { "test.possible_terminator"() : () -> () }) : () -> () } + +// ----- + +module @test_module { + // CHECK: llvm.func @__ocml_sin_f16(f16) -> f16 + // CHECK-LABEL: func @math_sin_vector_1d + func.func @math_sin_vector_1d(%arg : vector<4xf16>) -> vector<4xf16> { + // CHECK: llvm.extractelement {{.*}} : vector<4xf16> + // CHECK: llvm.call @__ocml_sin_f16(%{{.*}}) : (f16) -> f16 + // CHECK: llvm.insertelement {{.*}} : vector<4xf16> + // CHECK: llvm.extractelement {{.*}} : vector<4xf16> + // CHECK: llvm.call @__ocml_sin_f16(%{{.*}}) : (f16) -> f16 + // CHECK: llvm.insertelement {{.*}} : vector<4xf16> + // CHECK: llvm.extractelement {{.*}} : vector<4xf16> + // CHECK: llvm.call @__ocml_sin_f16(%{{.*}}) : (f16) -> f16 + // CHECK: llvm.insertelement {{.*}} : vector<4xf16> + // CHECK: llvm.extractelement {{.*}} : vector<4xf16> + // CHECK: llvm.call @__ocml_sin_f16(%{{.*}}) : (f16) -> f16 + // CHECK: llvm.insertelement {{.*}} : vector<4xf16> + %result = math.sin %arg : vector<4xf16> + func.return %result : vector<4xf16> + } +} + +// ----- + +module @test_module { + // CHECK: llvm.func @__ocml_sin_f16(f16) -> f16 + // CHECK-LABEL: func @math_sin_vector_2d + func.func @math_sin_vector_2d(%arg : vector<2x2xf16>) -> vector<2x2xf16> { + // CHECK: builtin.unrealized_conversion_cast {{.*}} : vector<2x2xf16> to !llvm.array<2 x vector<2xf16>> + // CHECK: llvm.extractvalue {{.*}} : !llvm.array<2 x vector<2xf16>> + // CHECK: llvm.extractelement {{.*}} : vector<2xf16> + // CHECK: llvm.call @__ocml_sin_f16(%{{.*}}) : (f16) -> f16 + // CHECK: llvm.insertelement {{.*}} : vector<2xf16> + // CHECK: llvm.extractelement {{.*}} : vector<2xf16> + // CHECK: llvm.call @__ocml_sin_f16(%{{.*}}) : (f16) -> f16 + // CHECK: llvm.insertelement {{.*}} : vector<2xf16> + // CHECK: llvm.insertvalue {{.*}} : !llvm.array<2 x vector<2xf16>> + // CHECK: llvm.extractvalue {{.*}} : !llvm.array<2 x vector<2xf16>> + // CHECK: llvm.extractelement {{.*}} : vector<2xf16> + // CHECK: llvm.call @__ocml_sin_f16(%{{.*}}) : (f16) -> f16 + // CHECK: llvm.insertelement {{.*}} : vector<2xf16> + // CHECK: llvm.extractelement {{.*}} : vector<2xf16> + // CHECK: llvm.call @__ocml_sin_f16(%{{.*}}) : (f16) -> f16 + // CHECK: llvm.insertelement {{.*}} : vector<2xf16> + // CHECK: llvm.insertvalue {{.*}} : !llvm.array<2 x vector<2xf16>> + %result = math.sin %arg : vector<2x2xf16> + func.return %result : vector<2x2xf16> + } +} diff --git a/mlir/test/Dialect/Affine/affine-data-copy.mlir b/mlir/test/Dialect/Affine/affine-data-copy.mlir index 330cf92bafba4..5615acae5ecc4 100644 --- a/mlir/test/Dialect/Affine/affine-data-copy.mlir +++ b/mlir/test/Dialect/Affine/affine-data-copy.mlir @@ -300,14 +300,15 @@ func.func @affine_parallel(%85:memref<2x5x4x2xi64>) { } } } - // CHECK: affine.for - // CHECK-NEXT: affine.for %{{.*}} = 0 to 5 - // CHECK-NEXT: affine.for %{{.*}} = 0 to 4 - // CHECK-NEXT: affine.for %{{.*}} = 0 to 2 - + // Lower and upper bounds for the region can't be determined for the outermost + // dimension. No fast buffer generation. // CHECK: affine.for // CHECK-NEXT: affine.parallel // CHECK-NEXT: affine.parallel + // CHECK-NEXT: affine.for + // CHECK-NOT: affine.for + + return } diff --git a/mlir/test/Dialect/LLVMIR/canonicalize.mlir b/mlir/test/Dialect/LLVMIR/canonicalize.mlir index c509cd82227c2..a793caca064ec 100644 --- a/mlir/test/Dialect/LLVMIR/canonicalize.mlir +++ b/mlir/test/Dialect/LLVMIR/canonicalize.mlir @@ -96,6 +96,28 @@ llvm.func @fold_extract_extractvalue(%arr: !llvm.struct<(i64, array<1 x ptr<1>>) // ----- +// CHECK-LABEL: fold_extract_const +// CHECK-NOT: extractvalue +// CHECK: llvm.mlir.constant(5.000000e-01 : f64) +llvm.func @fold_extract_const() -> f64 { + %a = llvm.mlir.constant(dense<[-8.900000e+01, 5.000000e-01]> : tensor<2xf64>) : !llvm.array<2 x f64> + %b = llvm.extractvalue %a[1] : !llvm.array<2 x f64> + llvm.return %b : f64 +} + +// ----- + +// CHECK-LABEL: fold_extract_splat +// CHECK-NOT: extractvalue +// CHECK: llvm.mlir.constant(-8.900000e+01 : f64) +llvm.func @fold_extract_splat() -> f64 { + %a = llvm.mlir.constant(dense<-8.900000e+01> : tensor<2xf64>) : !llvm.array<2 x f64> + %b = llvm.extractvalue %a[1] : !llvm.array<2 x f64> + llvm.return %b : f64 +} + +// ----- + // CHECK-LABEL: fold_bitcast // CHECK-SAME: %[[ARG:[[:alnum:]]+]] // CHECK-NEXT: llvm.return %[[ARG]] diff --git a/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir b/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir index 7db997cd4c0b5..89734e7542801 100644 --- a/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir +++ b/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir @@ -225,6 +225,38 @@ func.func @fuse_by_collapsing_dynamic(%arg0 : tensor, // ----- +#map0 = affine_map<(d0, d1) -> (d0, d1)> +func.func @fuse_by_collapsing_dynamic_2(%arg0 : tensor, %sz0: index, %sz1: index) -> tensor { + %0 = tensor.expand_shape %arg0 [[0, 1]] output_shape [%sz0, %sz1] : tensor into tensor + %init = tensor.empty(%sz1, %sz0) : tensor + %1 = linalg.generic { + indexing_maps = [#map0, #map0], + iterator_types = ["parallel", "parallel"]} + ins(%0 : tensor) + outs(%init : tensor) { + ^bb0(%b0 : f32, %b1 : f32): + %out = arith.negf %b0 : f32 + linalg.yield %out : f32 + } -> tensor + return %1 : tensor +} + +// CHECK-LABEL: func @fuse_by_collapsing_dynamic_2 +// CHECK-SAME: %[[ARG0:.+]]: tensor +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index +// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] +// CHECK-DAG: %[[DIM0:.+]] = tensor.dim %[[EXPANDED]], %[[C0]] +// CHECK-DAG: %[[DIM1:.+]] = tensor.dim %[[EXPANDED]], %[[C1]] +// CHECK: %[[OUT:.+]] = linalg.generic +// CHECK-SAME: ins(%[[ARG0]] : tensor) +// CHECK-SAME: outs(%{{.*}} : tensor) +// CHECK: %[[EXPANDED_1:.+]] = tensor.expand_shape %[[OUT]] +// CHECK-SAME: output_shape [%[[DIM0]], %[[DIM1]]] +// CHECK: return %[[EXPANDED_1]] + +// ----- + #map0 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3)> func.func @fuse_reductions(%arg0 : tensor<2x?x5xf32>, %arg1 : tensor<2x5xf32>, %sz0: index) -> tensor<2x5xf32> { @@ -425,10 +457,11 @@ func.func @fuse_only_one_reassociation(%arg0 : tensor, %arg1 : tensor<4 // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> // CHECK: func @fuse_only_one_reassociation // CHECK-SAME: (%[[ARG0:.+]]: tensor, %[[ARG1:.+]]: tensor<4x?x?x8xf32>, %[[SZ0:.+]]: index, %[[SZ1:.+]]: index) -// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[EXPAND_ARG0:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2, 3]{{\]}} output_shape [%[[SZ0]], 4, %[[SZ1]], 8] +// CHECK-DAG: %[[DIM:.+]] = tensor.dim %[[EXPAND_ARG0]], %[[C0]] : tensor +// CHECK-DAG: %[[DIM_2:.+]] = tensor.dim %[[EXPAND_ARG0]], %[[C2]] : tensor // CHECK-DAG: %[[COLLAPSE_ARG0:.+]] = tensor.collapse_shape %[[EXPAND_ARG0]] {{\[}}[0], [1], [2, 3]{{\]}} // CHECK-DAG: %[[COLLAPSE_ARG1_0:.+]] = tensor.collapse_shape %[[ARG1]] {{\[}}[0], [1], [2, 3]{{\]}} // CHECK-DAG: %[[COLLAPSE_ARG1_1:.+]] = tensor.collapse_shape %[[ARG1]] {{\[}}[0], [1], [2, 3]{{\]}} @@ -437,10 +470,7 @@ func.func @fuse_only_one_reassociation(%arg0 : tensor, %arg1 : tensor<4 // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[COLLAPSE_ARG0]], %[[COLLAPSE_ARG1_0]] : // CHECK-SAME: outs(%[[COLLAPSE_ARG1_1]] : -// CHECK: %[[DIM:.+]] = tensor.dim %[[GENERIC]], %[[C1]] : tensor<4x?x?xf32> -// CHECK: %[[DIM_2:.+]] = tensor.dim %[[GENERIC]], %[[C2]] : tensor<4x?x?xf32> -// CHECK: %[[VAL_1:.+]] = arith.divsi %[[DIM_2]], %[[C8]] : index -// CHECK: %[[EXPANDED_3:.+]] = tensor.expand_shape %[[GENERIC]] {{\[\[}}0], [1], [2, 3]] output_shape [4, %[[DIM]], %[[VAL_1]], 8] : tensor<4x?x?xf32> into tensor<4x?x?x8xf32> +// CHECK: %[[EXPANDED_3:.+]] = tensor.expand_shape %[[GENERIC]] {{\[\[}}0], [1], [2, 3]] output_shape [4, %[[DIM]], %[[DIM_2]], 8] : tensor<4x?x?xf32> into tensor<4x?x?x8xf32> // CHECK: return %[[EXPANDED_3]] // ----- @@ -475,15 +505,16 @@ func.func @fold_non_consecutive_dims(%arg0 : tensor, %sz0: index, %sz1: // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1) -> (d1, d0)> // CHECK: func @fold_non_consecutive_dims( // CHECK-SAME: %[[ARG0:.+]]: tensor, %[[SZ0:.+]]: index, %[[SZ1:.+]]: index) -// CHECK: %[[C1:.+]] = arith.constant 1 : index -// CHECK: %[[C4:.+]] = arith.constant 4 : index -// CHECK: %[[C8:.+]] = arith.constant 8 : index -// CHECK: %[[C0:.+]] = arith.constant 0 : index -// CHECK: %[[C2:.+]] = arith.constant 2 : index +// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index +// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index // CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[\[}}0, 1], [2, 3]] output_shape [%[[SZ0]], 4, %[[SZ1]], 8] : tensor into tensor -// CHECK: %[[DIM:.+]] = tensor.dim %[[EXPANDED]], %[[C0]] -// CHECK: %[[DIM_0:.+]] = tensor.dim %[[EXPANDED]], %[[C2]] +// CHECK-DAG: %[[DIM:.+]] = tensor.dim %[[EXPANDED]], %[[C0]] +// CHECK-DAG: %[[DIM_0:.+]] = tensor.dim %[[EXPANDED]], %[[C2]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM_0]], %[[DIM]]) +// CHECK-DAG: %[[DIM_1:.+]] = tensor.dim %[[EXPANDED]], %[[C0]] +// CHECK-DAG: %[[DIM_2:.+]] = tensor.dim %[[EXPANDED]], %[[C2]] // CHECK: %[[COLLAPSE_INIT:.+]] = tensor.collapse_shape %[[INIT]] {{\[}}[0, 1], [2, 3]{{\]}} // CHECK: %[[GENERIC:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]] @@ -502,11 +533,7 @@ func.func @fold_non_consecutive_dims(%arg0 : tensor, %sz0: index, %sz1: // CHECK-DAG: %[[T6:.+]] = arith.addi %[[T5]], %[[T3]] // CHECK-DAG: %[[T7:.+]] = arith.index_cast %[[T6]] // CHECK: linalg.yield %[[T7]] -// CHECK: %[[DIM_1:.+]] = tensor.dim %[[GENERIC]], %[[C0]] : tensor -// CHECK: %[[DIM_2:.+]] = tensor.dim %[[GENERIC]], %[[C1]] : tensor -// CHECK: %[[VAL_2:.+]] = arith.divsi %[[DIM_1]], %[[C8]] : index -// CHECK: %[[VAL_3:.+]] = arith.divsi %[[DIM_2]], %[[C4]] : index -// CHECK: %[[EXPANDED_3:.+]] = tensor.expand_shape %[[GENERIC]] {{\[\[}}0, 1], [2, 3]] output_shape [%[[VAL_2]], 8, %[[VAL_3]], 4] : tensor into tensor +// CHECK: %[[EXPANDED_3:.+]] = tensor.expand_shape %[[GENERIC]] {{\[\[}}0, 1], [2, 3]] output_shape [%[[DIM_2]], 8, %[[DIM_1]], 4] : tensor into tensor // CHECK: return %[[EXPANDED_3]] // ----- diff --git a/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir b/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir index 7acbd843cd1e7..fd3c321722508 100644 --- a/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir +++ b/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir @@ -5,15 +5,14 @@ // CHECK-LABEL: func @reshape // CHECK-SAME: (%[[A:.*]]: tensor, %[[B:.*]]: tensor<16xf32>, %[[INIT:.*]]: tensor, %[[SZ0:.*]]: index) -// CHECK: %[[C112:.*]] = arith.constant 112 : index // CHECK: %[[C0:.*]] = arith.constant 0 : index +// CHECK: %[[EXPANDED:.*]] = tensor.expand_shape %[[A]] +// CHECK: %[[DIM:.*]] = tensor.dim %[[EXPANDED]], %[[C0]] // CHECK: %[[RI:.*]] = tensor.collapse_shape %[[INIT]] {{\[}}[0, 1], [2]] : tensor into tensor // CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP3]], #[[$MAP2]]], // CHECK-SAME: iterator_types = ["parallel", "parallel"]} // CHECK-SAME: ins(%[[A]], %[[B]] : tensor, tensor<16xf32>) outs(%[[RI]] : tensor) -// CHECK: %[[DIM:.*]] = tensor.dim %[[R]], %[[C0]] : tensor -// CHECK: %[[VAL_1:.*]] = arith.divsi %[[DIM]], %[[C112]] : index -// CHECK: %[[RR:.*]] = tensor.expand_shape %[[R]] {{\[\[}}0, 1], [2]] output_shape [%[[VAL_1]], 112, 16] : tensor into tensor +// CHECK: %[[RR:.*]] = tensor.expand_shape %[[R]] {{\[\[}}0, 1], [2]] output_shape [%[[DIM]], 112, 16] : tensor into tensor // CHECK: return %[[RR]] : tensor func.func @reshape(%A: tensor, %B: tensor<16xf32>, %init: tensor, %sz0: index) -> tensor { %0 = tensor.expand_shape %A [[0, 1], [2]] output_shape [%sz0, 112, 16] diff --git a/mlir/test/Dialect/Math/canonicalize.mlir b/mlir/test/Dialect/Math/canonicalize.mlir index d24f7649269fe..3743768d901e3 100644 --- a/mlir/test/Dialect/Math/canonicalize.mlir +++ b/mlir/test/Dialect/Math/canonicalize.mlir @@ -492,3 +492,75 @@ func.func @abs_poison() -> f32 { %1 = math.absf %0 : f32 return %1 : f32 } + +// CHECK-LABEL: @isfinite_fold +// CHECK: %[[cst:.+]] = arith.constant true +// CHECK: return %[[cst]] +func.func @isfinite_fold() -> i1 { + %c = arith.constant 2.0 : f32 + %r = math.isfinite %c : f32 + return %r : i1 +} + +// CHECK-LABEL: @isfinite_fold_vec +// CHECK: %[[cst:.+]] = arith.constant dense : vector<4xi1> +// CHECK: return %[[cst]] +func.func @isfinite_fold_vec() -> (vector<4xi1>) { + %v1 = arith.constant dense<2.0> : vector<4xf32> + %0 = math.isfinite %v1 : vector<4xf32> + return %0 : vector<4xi1> +} + +// CHECK-LABEL: @isinf_fold +// CHECK: %[[cst:.+]] = arith.constant false +// CHECK: return %[[cst]] +func.func @isinf_fold() -> i1 { + %c = arith.constant 2.0 : f32 + %r = math.isinf %c : f32 + return %r : i1 +} + +// CHECK-LABEL: @isinf_fold_vec +// CHECK: %[[cst:.+]] = arith.constant dense : vector<4xi1> +// CHECK: return %[[cst]] +func.func @isinf_fold_vec() -> (vector<4xi1>) { + %v1 = arith.constant dense<2.0> : vector<4xf32> + %0 = math.isinf %v1 : vector<4xf32> + return %0 : vector<4xi1> +} + +// CHECK-LABEL: @isnan_fold +// CHECK: %[[cst:.+]] = arith.constant false +// CHECK: return %[[cst]] +func.func @isnan_fold() -> i1 { + %c = arith.constant 2.0 : f32 + %r = math.isnan %c : f32 + return %r : i1 +} + +// CHECK-LABEL: @isnan_fold_vec +// CHECK: %[[cst:.+]] = arith.constant dense : vector<4xi1> +// CHECK: return %[[cst]] +func.func @isnan_fold_vec() -> (vector<4xi1>) { + %v1 = arith.constant dense<2.0> : vector<4xf32> + %0 = math.isnan %v1 : vector<4xf32> + return %0 : vector<4xi1> +} + +// CHECK-LABEL: @isnormal_fold +// CHECK: %[[cst:.+]] = arith.constant true +// CHECK: return %[[cst]] +func.func @isnormal_fold() -> i1 { + %c = arith.constant 2.0 : f32 + %r = math.isnormal %c : f32 + return %r : i1 +} + +// CHECK-LABEL: @isnormal_fold_vec +// CHECK: %[[cst:.+]] = arith.constant dense : vector<4xi1> +// CHECK: return %[[cst]] +func.func @isnormal_fold_vec() -> (vector<4xi1>) { + %v1 = arith.constant dense<2.0> : vector<4xf32> + %0 = math.isnormal %v1 : vector<4xf32> + return %0 : vector<4xi1> +} diff --git a/mlir/test/Dialect/SCF/invalid.mlir b/mlir/test/Dialect/SCF/invalid.mlir index 80576be880127..76c785f3e6166 100644 --- a/mlir/test/Dialect/SCF/invalid.mlir +++ b/mlir/test/Dialect/SCF/invalid.mlir @@ -747,3 +747,13 @@ func.func @parallel_missing_terminator(%0 : index) { return } +// ----- + +func.func @invalid_reference(%a: index) { + // expected-error @below{{use of undeclared SSA value name}} + scf.for %x = %a to %a step %a iter_args(%var = %foo) -> tensor { + %foo = "test.inner"() : () -> (tensor) + scf.yield %foo : tensor + } + return +} diff --git a/mlir/test/Target/LLVMIR/Import/intrinsic.ll b/mlir/test/Target/LLVMIR/Import/intrinsic.ll index 249a0552c87f3..569b0def37856 100644 --- a/mlir/test/Target/LLVMIR/Import/intrinsic.ll +++ b/mlir/test/Target/LLVMIR/Import/intrinsic.ll @@ -120,6 +120,25 @@ define void @trig_test(float %0, <8 x float> %1) { ret void } +; CHECK-LABEL: llvm.func @inv_trig_test +define void @inv_trig_test(float %0, <8 x float> %1) { + ; CHECK: llvm.intr.asin(%{{.*}}) : (f32) -> f32 + %3 = call float @llvm.asin.f32(float %0) + ; CHECK: llvm.intr.asin(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + %4 = call <8 x float> @llvm.asin.v8f32(<8 x float> %1) + + ; CHECK: llvm.intr.acos(%{{.*}}) : (f32) -> f32 + %5 = call float @llvm.acos.f32(float %0) + ; CHECK: llvm.intr.acos(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + %6 = call <8 x float> @llvm.acos.v8f32(<8 x float> %1) + + ; CHECK: llvm.intr.atan(%{{.*}}) : (f32) -> f32 + %7 = call float @llvm.atan.f32(float %0) + ; CHECK: llvm.intr.atan(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + %8 = call <8 x float> @llvm.atan.v8f32(<8 x float> %1) + + ret void +} ; CHECK-LABEL: llvm.func @hyperbolic_trig_test define void @hyperbolic_trig_test(float %0, <8 x float> %1) { ; CHECK: llvm.intr.sinh(%{{.*}}) : (f32) -> f32 diff --git a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir index 2c208789e36dd..3616a2e3c7b21 100644 --- a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir +++ b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir @@ -122,6 +122,26 @@ llvm.func @trig_test(%arg0: f32, %arg1: vector<8xf32>) { llvm.return } +// CHECK-LABEL: @inv_trig_test +llvm.func @inv_trig_test(%arg0: f32, %arg1: vector<8xf32>) { + // CHECK: call float @llvm.asin.f32 + llvm.intr.asin(%arg0) : (f32) -> f32 + // CHECK: call <8 x float> @llvm.asin.v8f32 + llvm.intr.asin(%arg1) : (vector<8xf32>) -> vector<8xf32> + + // CHECK: call float @llvm.acos.f32 + llvm.intr.acos(%arg0) : (f32) -> f32 + // CHECK: call <8 x float> @llvm.acos.v8f32 + llvm.intr.acos(%arg1) : (vector<8xf32>) -> vector<8xf32> + + // CHECK: call float @llvm.atan.f32 + llvm.intr.atan(%arg0) : (f32) -> f32 + // CHECK: call <8 x float> @llvm.atan.v8f32 + llvm.intr.atan(%arg1) : (vector<8xf32>) -> vector<8xf32> + + llvm.return +} + // CHECK-LABEL: @hyperbolic_trig_test llvm.func @hyperbolic_trig_test(%arg0: f32, %arg1: vector<8xf32>) { // CHECK: call float @llvm.sinh.f32 diff --git a/mlir/test/python/dialects/linalg/ops.py b/mlir/test/python/dialects/linalg/ops.py index 307a88709ad52..e32a911b24b11 100644 --- a/mlir/test/python/dialects/linalg/ops.py +++ b/mlir/test/python/dialects/linalg/ops.py @@ -566,3 +566,43 @@ def batch_matmul_op(A, Amem, B, Bmem, Btransposed, Btransposedmem, C, Cmem): ) print(module) + + +# CHECK-LABEL: TEST: testPackUnPackOp +@run +def testPackUnPackOp(): + with Context(), Location.unknown(): + module = Module.create() + f32 = F32Type.get() + with InsertionPoint(module.body): + + @func.FuncOp.from_py_func( + RankedTensorType.get((128, 128), f32), + RankedTensorType.get((16, 16, 8, 8), f32), + ) + def tensor_pack(src, dst): + packed = linalg.pack( + src, + dst, + inner_dims_pos=[1, 0], + inner_tiles=[8, 8], + padding_value=arith.constant(f32, 0.0), + ) + + unpacked = linalg.unpack( + packed, + src, + inner_dims_pos=[0, 1], + inner_tiles=[8, 8], + ) + + return unpacked + + # CHECK-LABEL: func.func @tensor_pack( + # CHECK-SAME: %[[VAL_0:.*]]: tensor<128x128xf32>, %[[VAL_1:.*]]: tensor<16x16x8x8xf32>) -> tensor<128x128xf32> { + # CHECK: %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f32 + # CHECK: %[[VAL_3:.*]] = linalg.pack %[[VAL_0]] padding_value(%[[VAL_2]] : f32) inner_dims_pos = [1, 0] inner_tiles = [8, 8] into %[[VAL_1]] : tensor<128x128xf32> -> tensor<16x16x8x8xf32> + # CHECK: %[[VAL_4:.*]] = linalg.unpack %[[VAL_3]] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %[[VAL_0]] : tensor<16x16x8x8xf32> -> tensor<128x128xf32> + # CHECK: return %[[VAL_4]] : tensor<128x128xf32> + # CHECK: } + print(module) diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 04bf6c3b34dac..36656325be4bd 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -521,13 +521,6 @@ enum library_type { library_throughput }; -#if KMP_OS_LINUX -enum clock_function_type { - clock_function_gettimeofday, - clock_function_clock_gettime -}; -#endif /* KMP_OS_LINUX */ - #if KMP_MIC_SUPPORTED enum mic_type { non_mic, mic1, mic2, mic3, dummy }; #endif @@ -3415,8 +3408,6 @@ extern kmp_bootstrap_lock_t __kmp_threads expansion to co-exist */ extern kmp_lock_t __kmp_global_lock; /* control OS/global access */ -extern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access */ -extern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */ extern enum library_type __kmp_library; @@ -3545,11 +3536,6 @@ extern int __kmp_hot_teams_mode; extern int __kmp_hot_teams_max_level; #endif -#if KMP_OS_LINUX -extern enum clock_function_type __kmp_clock_function; -extern int __kmp_clock_function_param; -#endif /* KMP_OS_LINUX */ - #if KMP_MIC_SUPPORTED extern enum mic_type __kmp_mic_type; #endif diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp index 5017cd3de4be5..52e0fdbdfb1da 100644 --- a/openmp/runtime/src/kmp_global.cpp +++ b/openmp/runtime/src/kmp_global.cpp @@ -242,11 +242,6 @@ enum sched_type __kmp_sch_map[kmp_sched_upper - kmp_sched_lower_ext + // of public intel extension schedules }; -#if KMP_OS_LINUX -enum clock_function_type __kmp_clock_function; -int __kmp_clock_function_param; -#endif /* KMP_OS_LINUX */ - #if KMP_MIC_SUPPORTED enum mic_type __kmp_mic_type = non_mic; #endif @@ -492,10 +487,6 @@ KMP_BOOTSTRAP_LOCK_INIT(__kmp_tp_cached_lock); KMP_ALIGN_CACHE_INTERNODE KMP_LOCK_INIT(__kmp_global_lock); /* Control OS/global access */ -KMP_ALIGN_CACHE_INTERNODE -kmp_queuing_lock_t __kmp_dispatch_lock; /* Control dispatch access */ -KMP_ALIGN_CACHE_INTERNODE -KMP_LOCK_INIT(__kmp_debug_lock); /* Control I/O access for KMP_DEBUG */ #else KMP_ALIGN_CACHE @@ -512,10 +503,6 @@ KMP_BOOTSTRAP_LOCK_INIT(__kmp_tp_cached_lock); KMP_ALIGN(128) KMP_LOCK_INIT(__kmp_global_lock); /* Control OS/global access */ -KMP_ALIGN(128) -kmp_queuing_lock_t __kmp_dispatch_lock; /* Control dispatch access */ -KMP_ALIGN(128) -KMP_LOCK_INIT(__kmp_debug_lock); /* Control I/O access for KMP_DEBUG */ #endif /* ----------------------------------------------- */ diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 2c8d9304c46bc..3e5d671cb7a48 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -7143,8 +7143,6 @@ static void __kmp_do_serial_initialize(void) { __kmp_stats_init(); #endif __kmp_init_lock(&__kmp_global_lock); - __kmp_init_queuing_lock(&__kmp_dispatch_lock); - __kmp_init_lock(&__kmp_debug_lock); __kmp_init_atomic_lock(&__kmp_atomic_lock); __kmp_init_atomic_lock(&__kmp_atomic_lock_1i); __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);