diff --git a/.github/workflows/premerge.yaml b/.github/workflows/premerge.yaml index 49753c0746cbe..2d366028c2337 100644 --- a/.github/workflows/premerge.yaml +++ b/.github/workflows/premerge.yaml @@ -14,8 +14,6 @@ on: # do this is that it allows us to take advantage of concurrency groups # to cancel in progress CI jobs whenever the PR is closed. - closed - paths: - - .github/workflows/premerge.yaml push: branches: - 'main' diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index bc45caf3ec8b7..1e427b2df11cf 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -498,6 +498,11 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) { if (!IslandOffset) return; + // Print label if it exists at this offset. + if (const BinaryData *BD = + BC.getBinaryDataAtAddress(getAddress() + *IslandOffset)) + OS << BD->getName() << ":\n"; + const size_t IslandSize = getSizeOfDataInCodeAt(*IslandOffset); BC.printData(OS, BC.extractData(getAddress() + *IslandOffset, IslandSize), *IslandOffset); @@ -1066,7 +1071,7 @@ size_t BinaryFunction::getSizeOfDataInCodeAt(uint64_t Offset) const { auto Iter = Islands->CodeOffsets.upper_bound(Offset); if (Iter != Islands->CodeOffsets.end()) return *Iter - Offset; - return getSize() - Offset; + return getMaxSize() - Offset; } std::optional diff --git a/bolt/test/AArch64/data-in-code.s b/bolt/test/AArch64/data-in-code.s index 8d3179a0c3350..1df5d4568542f 100644 --- a/bolt/test/AArch64/data-in-code.s +++ b/bolt/test/AArch64/data-in-code.s @@ -7,7 +7,8 @@ ## Check disassembly of BOLT input. # RUN: llvm-objdump %t.exe -d | FileCheck %s -# RUN: llvm-bolt %t.exe -o %t.bolt --print-disasm | FileCheck %s +# RUN: llvm-bolt %t.exe -o %t.bolt --print-disasm \ +# RUN: | FileCheck %s --check-prefixes CHECK,CHECK-BOLT-ONLY .text .balign 4 @@ -16,16 +17,21 @@ .type _start, %function _start: mov x0, #0x0 + ldr x1, .L1 .word 0x4f82e010 ret +.size _start, .-_start +.L1: .byte 0x0, 0xff, 0x42 # CHECK-LABEL: _start # CHECK: mov x0, #0x0 +# CHECK-NEXT: ldr x1 +# CHECK-BOLT-ONLY-SAME: ISLANDat[[ADDR:]] # CHECK-NEXT: .word 0x4f82e010 # CHECK-NEXT: ret +# CHECK-BOLT-ONLY-NEXT: ISLANDat[[ADDR]] # CHECK-NEXT: .short 0xff00 # CHECK-NEXT: .byte 0x42 -.size _start, .-_start ## Force relocation mode. .reloc 0, R_AARCH64_NONE diff --git a/clang-tools-extra/clangd/unittests/ASTTests.cpp b/clang-tools-extra/clangd/unittests/ASTTests.cpp index 32c8e8a63a215..d0bc3c4d7db98 100644 --- a/clang-tools-extra/clangd/unittests/ASTTests.cpp +++ b/clang-tools-extra/clangd/unittests/ASTTests.cpp @@ -329,7 +329,7 @@ TEST(ClangdAST, GetContainedAutoParamType) { auto &&d, auto *&e, auto (*f)(int) - ){}; + ){ return 0; }; int withoutAuto( int a, @@ -338,7 +338,7 @@ TEST(ClangdAST, GetContainedAutoParamType) { int &&d, int *&e, int (*f)(int) - ){}; + ){ return 0; }; )cpp"); TU.ExtraArgs.push_back("-std=c++20"); auto AST = TU.build(); diff --git a/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp b/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp index 4276a44275f53..282859c51a66f 100644 --- a/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp +++ b/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp @@ -113,7 +113,7 @@ TEST(WorkspaceSymbols, Unnamed) { TEST(WorkspaceSymbols, InMainFile) { TestTU TU; TU.Code = R"cpp( - int test() {} + int test() { return 0; } static void test2() {} )cpp"; EXPECT_THAT(getSymbols(TU, "test"), @@ -537,12 +537,14 @@ TEST(DocumentSymbols, InHeaderFile) { TestTU TU; TU.AdditionalFiles["bar.h"] = R"cpp( int foo() { + return 0; } )cpp"; TU.Code = R"cpp( int i; // declaration to finish preamble #include "bar.h" int test() { + return 0; } )cpp"; EXPECT_THAT(getSymbols(TU.build()), @@ -780,7 +782,7 @@ TEST(DocumentSymbols, FuncTemplates) { TestTU TU; Annotations Source(R"cpp( template - T foo() {} + T foo() { return T{}; } auto x = foo(); auto y = foo(); diff --git a/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp b/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp index 6ee641caeefe3..f9752d5d44f97 100644 --- a/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp +++ b/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp @@ -251,7 +251,7 @@ TEST(ParsedASTTest, NoCrashOnTokensWithTidyCheck) { // this check runs the preprocessor, we need to make sure it does not break // our recording logic. TU.ClangTidyProvider = addTidyChecks("modernize-use-trailing-return-type"); - TU.Code = "inline int foo() {}"; + TU.Code = "inline int foo() { return 0; }"; auto AST = TU.build(); const syntax::TokenBuffer &T = AST.getTokens(); diff --git a/clang-tools-extra/clangd/unittests/QualityTests.cpp b/clang-tools-extra/clangd/unittests/QualityTests.cpp index 576779fa3270a..619ea32115357 100644 --- a/clang-tools-extra/clangd/unittests/QualityTests.cpp +++ b/clang-tools-extra/clangd/unittests/QualityTests.cpp @@ -108,7 +108,7 @@ TEST(QualityTests, SymbolRelevanceSignalExtraction) { using flags::FLAGS_FOO; - int ::header_main() {} + int ::header_main() { return 0; } int main(); [[deprecated]] diff --git a/clang-tools-extra/clangd/unittests/RenameTests.cpp b/clang-tools-extra/clangd/unittests/RenameTests.cpp index 142ed171d1a1c..15866f43affa0 100644 --- a/clang-tools-extra/clangd/unittests/RenameTests.cpp +++ b/clang-tools-extra/clangd/unittests/RenameTests.cpp @@ -214,7 +214,7 @@ TEST(RenameTest, WithinFileRename) { template class Foo { public: - static T [[f^oo]]() {} + static T [[f^oo]]() { return T(); } }; void bar() { @@ -225,7 +225,7 @@ TEST(RenameTest, WithinFileRename) { template class Foo { public: - T [[f^oo]]() {} + T [[f^oo]]() { return T(); } }; void bar() { @@ -827,7 +827,7 @@ TEST(RenameTest, WithinFileRename) { // Issue 170: Rename symbol introduced by UsingDecl R"cpp( - namespace ns { void [[f^oo]](); } + namespace ns { void [[f^oo]](); } using ns::[[f^oo]]; @@ -1307,7 +1307,7 @@ TEST(RenameTest, Renameable) { "no symbol", false}, {R"cpp(// FIXME we probably want to rename both overloads here, - // but renaming currently assumes there's only a + // but renaming currently assumes there's only a // single canonical declaration. namespace ns { int foo(int); char foo(char); } using ns::^foo; @@ -1776,7 +1776,7 @@ TEST(CrossFileRenameTests, WithUpToDateIndex) { void [[foo]]() override {}; }; - void func(Base* b, Derived1* d1, + void func(Base* b, Derived1* d1, Derived2* d2, NotDerived* nd) { b->[[foo]](); d1->[[foo]](); diff --git a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp index 1ec51d862d0a6..94cecce1f038c 100644 --- a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp +++ b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp @@ -741,6 +741,7 @@ sizeof...($TemplateParameter[[Elements]]); $Class[[Foo]].$Field_static[[sharedInstance]].$Field[[someProperty]] $Operator[[=]] 1; self.$Field[[someProperty]] $Operator[[=]] self.$Field[[someProperty]] $Operator[[+]] self.$Field[[otherMethod]] $Operator[[+]] 1; self->$Field[[_someProperty]] $Operator[[=]] $Field[[_someProperty]] $Operator[[+]] 1; + return 0; } @end )cpp", diff --git a/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp b/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp index 7faef6f95d8f9..7ede19c321bc6 100644 --- a/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp +++ b/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp @@ -201,6 +201,7 @@ TEST(FoldingRanges, ASTAll) { R"cpp( #define FOO int foo() {\ int Variable = 42; \ + return 0; \ } // Do not generate folding range for braces within macro expansion. @@ -336,18 +337,18 @@ TEST(FoldingRanges, PseudoParserWithoutLineFoldings) { ]]}; )cpp", R"cpp( - /*[[ Multi + /*[[ Multi * line - * comment + * comment ]]*/ )cpp", R"cpp( //[[ Comment // 1]] - + //[[ Comment // 2]] - + // No folding for single line comment. /*[[ comment 3 diff --git a/clang-tools-extra/clangd/unittests/SymbolInfoTests.cpp b/clang-tools-extra/clangd/unittests/SymbolInfoTests.cpp index 6c91f3783a622..95b6eaedce97c 100644 --- a/clang-tools-extra/clangd/unittests/SymbolInfoTests.cpp +++ b/clang-tools-extra/clangd/unittests/SymbolInfoTests.cpp @@ -36,6 +36,7 @@ TEST(SymbolInfoTests, All) { void $decl[[foo]](); int bar() { fo^o(); + return 0; } )cpp", {ExpectedSymbolDetails{"foo", "", "c:@F@foo#", "decl"}}}, @@ -44,6 +45,7 @@ TEST(SymbolInfoTests, All) { void $def[[foo]]() {} int bar() { fo^o(); + return 0; } )cpp", {ExpectedSymbolDetails{"foo", "", "c:@F@foo#", "def", "def"}}}, @@ -53,6 +55,7 @@ TEST(SymbolInfoTests, All) { void $def[[foo]]() {} int bar() { fo^o(); + return 0; } )cpp", {ExpectedSymbolDetails{"foo", "", "c:@F@foo#", "decl", "def"}}}, @@ -83,6 +86,7 @@ TEST(SymbolInfoTests, All) { void $decl[[foo]](); int baz() { fo^o(); + return 0; } } )cpp", @@ -96,6 +100,7 @@ TEST(SymbolInfoTests, All) { namespace barbar { int baz() { bar::fo^o(); + return 0; } } )cpp", @@ -108,6 +113,7 @@ TEST(SymbolInfoTests, All) { namespace Nbaz { int baz() { ::fo^o(); + return 0; } } } @@ -121,6 +127,7 @@ TEST(SymbolInfoTests, All) { namespace barbar { int baz() { fo^o(); + return 0; } } )cpp", @@ -136,6 +143,7 @@ TEST(SymbolInfoTests, All) { int baz() { bar::BarType b; fo^o(b); + return 0; } } )cpp", diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp index 475b56b1dc230..e12d7691c58fb 100644 --- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp +++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp @@ -95,7 +95,7 @@ TEST(HighlightsTest, All) { )cpp", R"cpp(// Function - int [[^foo]](int) {} + int [[^foo]](int) { return 0; } int main() { [[foo]]([[foo]](42)); auto *X = &[[foo]]; @@ -2140,7 +2140,7 @@ TEST(FindReferences, WithinAST) { )cpp", R"cpp(// Function - int $def[[foo]](int) {} + int $def[[foo]](int) { return 0; } int main() { auto *X = &$(main)[[^foo]]; $(main)[[foo]](42); @@ -2160,7 +2160,7 @@ TEST(FindReferences, WithinAST) { R"cpp(// Method call struct Foo { int $decl(Foo)[[foo]](); }; - int Foo::$def(Foo)[[foo]]() {} + int Foo::$def(Foo)[[foo]]() { return 0; } int main() { Foo f; f.$(main)[[^foo]](); @@ -2258,7 +2258,7 @@ TEST(FindReferences, WithinAST) { )cpp", R"cpp(// Dependent code template void $decl[[foo]](T t); - template void bar(T t) { $(bar)[[foo]](t); } // foo in bar is uninstantiated. + template void bar(T t) { $(bar)[[foo]](t); } // foo in bar is uninstantiated. void baz(int x) { $(baz)[[f^oo]](x); } )cpp", R"cpp( @@ -2508,6 +2508,7 @@ TEST(FindReferences, ExplicitSymbols) { X $def(test)[[a]]; $(test)[[a]].operator bool(); if ($(test)[[a^]]) {} // ignore implicit conversion-operator AST node + return 0; } )cpp", }; @@ -2543,7 +2544,7 @@ TEST(FindReferences, UsedSymbolsFromInclude) { #define BAR 5 int bar1(); int bar2(); - class Bar {}; + class Bar {}; )cpp"); TU.AdditionalFiles["system/vector"] = guard(R"cpp( namespace std { @@ -2560,7 +2561,7 @@ TEST(FindReferences, UsedSymbolsFromInclude) { std::vector> ExpectedLocations; for (const auto &R : T.ranges()) ExpectedLocations.push_back(AllOf(rangeIs(R), attrsAre(0u))); - for (const auto &P : T.points()) + for (const auto &P : T.points()) EXPECT_THAT(findReferences(AST, P, 0).References, UnorderedElementsAreArray(ExpectedLocations)) << "Failed for Refs at " << P << "\n" @@ -2635,6 +2636,7 @@ TEST(FindReferences, NeedsIndexForMacro) { Annotations IndexedMain(R"cpp( int indexed_main() { int a = [[MACRO]](1); + return 0; } )cpp"); diff --git a/clang-tools-extra/clangd/unittests/tweaks/DefineInlineTests.cpp b/clang-tools-extra/clangd/unittests/tweaks/DefineInlineTests.cpp index 8d496b2a3ee73..5ec12396ae927 100644 --- a/clang-tools-extra/clangd/unittests/tweaks/DefineInlineTests.cpp +++ b/clang-tools-extra/clangd/unittests/tweaks/DefineInlineTests.cpp @@ -935,10 +935,11 @@ TEST_F(DefineInlineTest, AddInline) { // Check we put inline before cv-qualifiers. ExtraFiles["a.h"] = "const int foo();"; apply(R"cpp(#include "a.h" - const int fo^o() {})cpp", + const int fo^o() { return 0; })cpp", &EditedFiles); - EXPECT_THAT(EditedFiles, testing::ElementsAre(FileWithContents( - testPath("a.h"), "inline const int foo(){}"))); + EXPECT_THAT(EditedFiles, + testing::ElementsAre(FileWithContents( + testPath("a.h"), "inline const int foo(){ return 0; }"))); // No double inline. ExtraFiles["a.h"] = "inline void foo();"; diff --git a/clang-tools-extra/clangd/unittests/tweaks/ExpandDeducedTypeTests.cpp b/clang-tools-extra/clangd/unittests/tweaks/ExpandDeducedTypeTests.cpp index 3730ab4a87136..8da394d74b54d 100644 --- a/clang-tools-extra/clangd/unittests/tweaks/ExpandDeducedTypeTests.cpp +++ b/clang-tools-extra/clangd/unittests/tweaks/ExpandDeducedTypeTests.cpp @@ -69,8 +69,8 @@ TEST_F(ExpandDeducedTypeTest, Test) { EXPECT_THAT(apply(R"cpp(au^to s = &"foobar";)cpp"), StartsWith("fail: Could not expand type")); - EXPECT_EQ(apply("ns::Class * foo() { au^to c = foo(); }"), - "ns::Class * foo() { ns::Class * c = foo(); }"); + EXPECT_EQ(apply("ns::Class * foo() { au^to c = foo(); return nullptr; }"), + "ns::Class * foo() { ns::Class * c = foo(); return nullptr; }"); EXPECT_EQ( apply("void ns::Func() { au^to x = new ns::Class::Nested{}; }"), "void ns::Func() { ns::Class::Nested * x = new ns::Class::Nested{}; }"); diff --git a/clang-tools-extra/clangd/unittests/tweaks/ExtractVariableTests.cpp b/clang-tools-extra/clangd/unittests/tweaks/ExtractVariableTests.cpp index 552e693c0363a..3c65a58d6c945 100644 --- a/clang-tools-extra/clangd/unittests/tweaks/ExtractVariableTests.cpp +++ b/clang-tools-extra/clangd/unittests/tweaks/ExtractVariableTests.cpp @@ -116,6 +116,7 @@ TEST_F(ExtractVariableTest, Test) { struct T { int bar(int a = [[1]]) { int b = [[z]]; + return 0; } int z = [[1]]; } t; diff --git a/clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/strings/internal-file.h b/clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/strings/internal-file.h index 31798661a80fc..b9ce1c875ed13 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/strings/internal-file.h +++ b/clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/strings/internal-file.h @@ -10,7 +10,7 @@ std::string StringsFunction(std::string s1) { return s1; } class SomeContainer {}; namespace strings_internal { void InternalFunction() {} -template P InternalTemplateFunction(P a) {} +template void InternalTemplateFunction(P a) { int; } } // namespace strings_internal namespace container_internal { diff --git a/clang-tools-extra/test/clang-tidy/checkers/boost/use-to-string.cpp b/clang-tools-extra/test/clang-tidy/checkers/boost/use-to-string.cpp index 44ba172c2ff0b..f888c430e6883 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/boost/use-to-string.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/boost/use-to-string.cpp @@ -18,7 +18,7 @@ T lexical_cast(const V &) { struct my_weird_type {}; -std::string fun(const std::string &) {} +std::string fun(const std::string &) { return {}; } void test_to_string1() { @@ -75,7 +75,7 @@ void test_to_string2() { fun(boost::lexical_cast(j)); } -std::string fun(const std::wstring &) {} +std::string fun(const std::wstring &); void test_to_wstring() { int a; diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape-coro.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape-coro.cpp index 222577b124dce..aff13d19fd209 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape-coro.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape-coro.cpp @@ -1,5 +1,5 @@ // RUN: %check_clang_tidy -std=c++20 %s bugprone-exception-escape %t -- \ -// RUN: -- -fexceptions +// RUN: -- -fexceptions -Wno-error=return-type namespace std { diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape-rethrow.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape-rethrow.cpp index b20333d5b0b3b..6f961a247b9d2 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape-rethrow.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape-rethrow.cpp @@ -20,6 +20,7 @@ int throwsAndCallsRethrower() noexcept { } catch(...) { rethrower(); } + return 1; } int throwsAndCallsCallsRethrower() noexcept { @@ -29,6 +30,7 @@ int throwsAndCallsCallsRethrower() noexcept { } catch(...) { callsRethrower(); } + return 1; } void rethrowerNoexcept() noexcept { diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape.cpp index 26c443b139629..aae957dd7e090 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape.cpp @@ -665,6 +665,7 @@ int indirectly_recursive(int n) noexcept; int recursion_helper(int n) { indirectly_recursive(n); + return 0; } int indirectly_recursive(int n) noexcept { diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/fold-init-type.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/fold-init-type.cpp index 2a49960e02895..c813213c3dd0f 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/fold-init-type.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/fold-init-type.cpp @@ -8,24 +8,25 @@ T accumulate(InputIt first, InputIt last, T init) { // is instantiated. In practice this happens somewhere in the implementation // of `accumulate`. For tests, do it here. (void)*first; + return init; } template -T reduce(InputIt first, InputIt last, T init) { (void)*first; } +T reduce(InputIt first, InputIt last, T init) { (void)*first; return init; } template T reduce(ExecutionPolicy &&policy, - InputIt first, InputIt last, T init) { (void)*first; } + InputIt first, InputIt last, T init) { (void)*first; return init; } struct parallel_execution_policy {}; constexpr parallel_execution_policy par{}; template T inner_product(InputIt1 first1, InputIt1 last1, - InputIt2 first2, T value) { (void)*first1; (void)*first2; } + InputIt2 first2, T value) { (void)*first1; (void)*first2; return value; } template T inner_product(ExecutionPolicy &&policy, InputIt1 first1, InputIt1 last1, - InputIt2 first2, T value) { (void)*first1; (void)*first2; } + InputIt2 first2, T value) { (void)*first1; (void)*first2; return value; } } // namespace std diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/inc-dec-in-conditions-bitint-no-crash.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone/inc-dec-in-conditions-bitint-no-crash.c index cfb64c10fe46c..5cfa264e42d68 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/inc-dec-in-conditions-bitint-no-crash.c +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/inc-dec-in-conditions-bitint-no-crash.c @@ -5,5 +5,6 @@ _BitInt(8) v_401_0() { _BitInt(5) y = 0; 16777215wb ?: ++y; }); + return 0; } -// CHECK-MESSAGES: warning +// CHECK-MESSAGES: warning diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/spuriously-wake-up-functions.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone/spuriously-wake-up-functions.c index 8b84474d3f2d3..36b1215978603 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/spuriously-wake-up-functions.c +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/spuriously-wake-up-functions.c @@ -12,9 +12,9 @@ typedef struct cnd_t { } cnd_t; struct timespec {}; -int cnd_wait(cnd_t *cond, mtx_t *mutex){}; +int cnd_wait(cnd_t *cond, mtx_t *mutex){ return 0; }; int cnd_timedwait(cnd_t *cond, mtx_t *mutex, - const struct timespec *time_point){}; + const struct timespec *time_point){ return 0; }; struct Node1 list_c; static mtx_t lock; diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/spuriously-wake-up-functions.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/spuriously-wake-up-functions.cpp index 6db92ef939fa3..d7508009e19ad 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/spuriously-wake-up-functions.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/spuriously-wake-up-functions.cpp @@ -90,18 +90,18 @@ class condition_variable { void wait(unique_lock &lock, Predicate pred); template cv_status wait_until(unique_lock &lock, - const chrono::time_point &abs_time){}; + const chrono::time_point &abs_time){ return cv_status::no_timeout; }; template bool wait_until(unique_lock &lock, const chrono::time_point &abs_time, - Predicate pred){}; + Predicate pred){ return false; }; template cv_status wait_for(unique_lock &lock, - const chrono::duration &rel_time){}; + const chrono::duration &rel_time){ return cv_status::no_timeout; }; template bool wait_for(unique_lock &lock, const chrono::duration &rel_time, - Predicate pred){}; + Predicate pred){ return false; }; }; } // namespace std diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/stringview-nullptr.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/stringview-nullptr.cpp index 02fcab31dcf3e..ff5b256e71781 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/stringview-nullptr.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/stringview-nullptr.cpp @@ -27,7 +27,7 @@ class basic_string_view { constexpr basic_string_view(const basic_string_view &) {} - constexpr basic_string_view &operator=(const basic_string_view &) {} + constexpr basic_string_view &operator=(const basic_string_view &) { return *this; } }; template diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/suspicious-string-compare.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/suspicious-string-compare.cpp index 7e1dd6b444393..c14b094f3fca3 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/suspicious-string-compare.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/suspicious-string-compare.cpp @@ -89,6 +89,8 @@ int test_warning_patterns() { if (strcmp(A, "a") < 0.) return 0; // CHECK-MESSAGES: [[@LINE-2]]:7: warning: function 'strcmp' has suspicious implicit cast + + return 1; } int test_valid_patterns() { diff --git a/clang-tools-extra/test/clang-tidy/checkers/fuchsia/default-arguments-calls.cpp b/clang-tools-extra/test/clang-tidy/checkers/fuchsia/default-arguments-calls.cpp index 50b6d4c5676c3..ed7bcc7dacc30 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/fuchsia/default-arguments-calls.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/fuchsia/default-arguments-calls.cpp @@ -2,7 +2,7 @@ int foo(int value = 5) { return value; } -int f() { +void f() { foo(); // CHECK-NOTES: [[@LINE-1]]:3: warning: calling a function that uses a default argument is disallowed [fuchsia-default-arguments-calls] // CHECK-NOTES: [[@LINE-5]]:9: note: default parameter was declared here @@ -10,7 +10,7 @@ int f() { int bar(int value) { return value; } -int n() { +void n() { foo(0); bar(0); } diff --git a/clang-tools-extra/test/clang-tidy/checkers/fuchsia/multiple-inheritance.cpp b/clang-tools-extra/test/clang-tidy/checkers/fuchsia/multiple-inheritance.cpp index 6ce9ce8e65536..d53b3fde7736b 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/fuchsia/multiple-inheritance.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/fuchsia/multiple-inheritance.cpp @@ -144,7 +144,7 @@ struct WithTemplBase : T { WithTemplBase(); }; -int test_no_crash() { +void test_no_crash() { auto foo = []() {}; WithTemplBase(); } diff --git a/clang-tools-extra/test/clang-tidy/checkers/google/runtime-int-std.cpp b/clang-tools-extra/test/clang-tidy/checkers/google/runtime-int-std.cpp index 30f9b3cf1e90c..cd65de51a5ce9 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/google/runtime-int-std.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/google/runtime-int-std.cpp @@ -54,4 +54,5 @@ short bar(const short, unsigned short) { tmpl(); // CHECK-MESSAGES: [[@LINE-1]]:8: warning: consider replacing 'short' with 'std::int16_t' + return 0; } diff --git a/clang-tools-extra/test/clang-tidy/checkers/google/upgrade-googletest-case.cpp b/clang-tools-extra/test/clang-tidy/checkers/google/upgrade-googletest-case.cpp index ce70e79183521..39ff9b7f39634 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/google/upgrade-googletest-case.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/google/upgrade-googletest-case.cpp @@ -221,9 +221,9 @@ class FooTestInfo : public testing::TestInfo { // CHECK-FIXES: const char *test_suite_name() const; }; -const char *FooTestInfo::test_case_name() const {} +const char *FooTestInfo::test_case_name() const { return nullptr; } // CHECK-MESSAGES: [[@LINE-1]]:26: warning: Google Test APIs named with 'case' -// CHECK-FIXES: const char *FooTestInfo::test_suite_name() const {} +// CHECK-FIXES: const char *FooTestInfo::test_suite_name() const { return nullptr; } class BarTestInfo : public testing::TestInfo { public: @@ -491,26 +491,26 @@ class FooUnitTest : public testing::UnitTest { // CHECK-FIXES: const testing::TestSuite *GetTestSuite(int) const; }; -testing::TestCase *FooUnitTest::current_test_case() const {} +testing::TestCase *FooUnitTest::current_test_case() const { return nullptr; } // CHECK-MESSAGES: [[@LINE-1]]:10: warning: Google Test APIs named with 'case' // CHECK-MESSAGES: [[@LINE-2]]:33: warning: Google Test APIs named with 'case' -// CHECK-FIXES: testing::TestSuite *FooUnitTest::current_test_suite() const {} -int FooUnitTest::successful_test_case_count() const {} +// CHECK-FIXES: testing::TestSuite *FooUnitTest::current_test_suite() const { return nullptr; } +int FooUnitTest::successful_test_case_count() const { return 0; } // CHECK-MESSAGES: [[@LINE-1]]:18: warning: Google Test APIs named with 'case' -// CHECK-FIXES: int FooUnitTest::successful_test_suite_count() const {} -int FooUnitTest::failed_test_case_count() const {} +// CHECK-FIXES: int FooUnitTest::successful_test_suite_count() const { return 0; } +int FooUnitTest::failed_test_case_count() const { return 0; } // CHECK-MESSAGES: [[@LINE-1]]:18: warning: Google Test APIs named with 'case' -// CHECK-FIXES: int FooUnitTest::failed_test_suite_count() const {} -int FooUnitTest::total_test_case_count() const {} +// CHECK-FIXES: int FooUnitTest::failed_test_suite_count() const { return 0; } +int FooUnitTest::total_test_case_count() const { return 0; } // CHECK-MESSAGES: [[@LINE-1]]:18: warning: Google Test APIs named with 'case' -// CHECK-FIXES: int FooUnitTest::total_test_suite_count() const {} -int FooUnitTest::test_case_to_run_count() const {} +// CHECK-FIXES: int FooUnitTest::total_test_suite_count() const { return 0; } +int FooUnitTest::test_case_to_run_count() const { return 0; } // CHECK-MESSAGES: [[@LINE-1]]:18: warning: Google Test APIs named with 'case' -// CHECK-FIXES: int FooUnitTest::test_suite_to_run_count() const {} -const testing::TestCase *FooUnitTest::GetTestCase(int) const {} +// CHECK-FIXES: int FooUnitTest::test_suite_to_run_count() const { return 0; } +const testing::TestCase *FooUnitTest::GetTestCase(int) const { return 0; } // CHECK-MESSAGES: [[@LINE-1]]:16: warning: Google Test APIs named with 'case' // CHECK-MESSAGES: [[@LINE-2]]:39: warning: Google Test APIs named with 'case' -// CHECK-FIXES: const testing::TestSuite *FooUnitTest::GetTestSuite(int) const {} +// CHECK-FIXES: const testing::TestSuite *FooUnitTest::GetTestSuite(int) const { return 0; } // Type derived from testing::TestCase class BarUnitTest : public testing::UnitTest { diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-transform-values.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-transform-values.cpp index 9a4eb010609b4..109eddc195558 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-transform-values.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-transform-values.cpp @@ -54,8 +54,8 @@ void template_instantiation() { struct ConstNonConstClass { ConstNonConstClass(); ConstNonConstClass(double &np_local0); - double nonConstMethod() {} - double constMethod() const {} + double nonConstMethod() { return 0; } + double constMethod() const { return 0; } double modifyingMethod(double &np_arg0) const; double NonConstMember; diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-values.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-values.cpp index 0d1ff0db58371..5efb64bca2374 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-values.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-values.cpp @@ -283,8 +283,8 @@ void template_instantiation() { struct ConstNonConstClass { ConstNonConstClass(); ConstNonConstClass(double &np_local0); - double nonConstMethod() {} - double constMethod() const {} + double nonConstMethod() { return 0; } + double constMethod() const { return 0; } double modifyingMethod(double &np_arg0) const; double NonConstMember; diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/unused-parameters.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/unused-parameters.cpp index 524de45463e36..9b3dd070405b5 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/misc/unused-parameters.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/misc/unused-parameters.cpp @@ -33,9 +33,9 @@ void f(void (*fn)()) {;} // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: parameter 'fn' is unused [misc-unused-parameters] // CHECK-FIXES: {{^}}void f(void (* /*fn*/)()) {;}{{$}} -int *k([[clang::lifetimebound]] int *i) {;} +int *k([[clang::lifetimebound]] int *i) { return nullptr; } // CHECK-MESSAGES: :[[@LINE-1]]:38: warning: parameter 'i' is unused [misc-unused-parameters] -// CHECK-FIXES: {{^}}int *k({{\[\[clang::lifetimebound\]\]}} int * /*i*/) {;}{{$}} +// CHECK-FIXES: {{^}}int *k({{\[\[clang::lifetimebound\]\]}} int * /*i*/) { return nullptr; }{{$}} #define ATTR_BEFORE(x) [[clang::lifetimebound]] x int* m(ATTR_BEFORE(const int *i)) { return nullptr; } diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/use-internal-linkage-func.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/use-internal-linkage-func.cpp index 68951fcf0aaac..abf95b857c192 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/misc/use-internal-linkage-func.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/misc/use-internal-linkage-func.cpp @@ -17,25 +17,25 @@ void func_cpp_inc() {} // CHECK-MESSAGES: :[[@LINE-1]]:6: warning: function 'func_cpp_inc' // CHECK-FIXES: static void func_cpp_inc() {} -int* func_cpp_inc_return_ptr() {} +int* func_cpp_inc_return_ptr() { return nullptr; } // CHECK-MESSAGES: :[[@LINE-1]]:6: warning: function 'func_cpp_inc_return_ptr' -// CHECK-FIXES: static int* func_cpp_inc_return_ptr() {} +// CHECK-FIXES: static int* func_cpp_inc_return_ptr() { return nullptr; } -const int* func_cpp_inc_return_const_ptr() {} +const int* func_cpp_inc_return_const_ptr() { return nullptr; } // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: function 'func_cpp_inc_return_const_ptr' -// CHECK-FIXES: static const int* func_cpp_inc_return_const_ptr() {} +// CHECK-FIXES: static const int* func_cpp_inc_return_const_ptr() { return nullptr; } -int const* func_cpp_inc_return_ptr_const() {} +int const* func_cpp_inc_return_ptr_const() { return nullptr; } // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: function 'func_cpp_inc_return_ptr_const' -// CHECK-FIXES: static int const* func_cpp_inc_return_ptr_const() {} +// CHECK-FIXES: static int const* func_cpp_inc_return_ptr_const() { return nullptr; } -int * const func_cpp_inc_return_const() {} +int * const func_cpp_inc_return_const() { return nullptr; } // CHECK-MESSAGES: :[[@LINE-1]]:13: warning: function 'func_cpp_inc_return_const' -// CHECK-FIXES: static int * const func_cpp_inc_return_const() {} +// CHECK-FIXES: static int * const func_cpp_inc_return_const() { return nullptr; } -volatile const int* func_cpp_inc_return_volatile_const_ptr() {} +volatile const int* func_cpp_inc_return_volatile_const_ptr() { return nullptr; } // CHECK-MESSAGES: :[[@LINE-1]]:21: warning: function 'func_cpp_inc_return_volatile_const_ptr' -// CHECK-FIXES: static volatile const int* func_cpp_inc_return_volatile_const_ptr() {} +// CHECK-FIXES: static volatile const int* func_cpp_inc_return_volatile_const_ptr() { return nullptr; } [[nodiscard]] void func_nodiscard() {} // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: function 'func_nodiscard' diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/Inputs/use-auto/containers.h b/clang-tools-extra/test/clang-tidy/checkers/modernize/Inputs/use-auto/containers.h index c99b7a4407d5c..2c90d762f5d8a 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/Inputs/use-auto/containers.h +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/Inputs/use-auto/containers.h @@ -214,8 +214,8 @@ class map : public bidirectional_iterable>> { public: map() {} - iterator> find(const key &) {} - const_iterator>> find(const key &) const {} + iterator> find(const key &); + const_iterator>> find(const key &) const; }; template diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-bind.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-bind.cpp index 22b24d45fe63f..0d100ffa38b27 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-bind.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-bind.cpp @@ -46,7 +46,7 @@ struct D { operator bool() const { return true; } void MemberFunction(int x) {} - int MemberFunctionWithReturn(int x) {} + int MemberFunctionWithReturn(int x) { return 0; } static D *create(); }; @@ -342,7 +342,7 @@ void testCapturedSubexpressions() { struct E { void MemberFunction(int x) {} - int MemberFunctionWithReturn(int x) {} + int MemberFunctionWithReturn(int x) { return 0; } int operator()(int x, int y) const { return x + y; } void testMemberFunctions() { diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-c++20.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-c++20.cpp index 1eb8ebe3d51e3..c9391e3339623 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-c++20.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-c++20.cpp @@ -1,11 +1,11 @@ // RUN: %check_clang_tidy -std=c++20 %s modernize-avoid-c-arrays %t -int f1(int data[], int size) { - // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: do not declare C-style arrays, use 'std::span' instead +void f1(int data[], int size) { + // CHECK-MESSAGES: :[[@LINE-1]]:9: warning: do not declare C-style arrays, use 'std::span' instead int f4[] = {1, 2}; // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use 'std::array' instead } -int f2(int data[100]) { - // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: do not declare C-style arrays, use 'std::array' instead +void f2(int data[100]) { + // CHECK-MESSAGES: :[[@LINE-1]]:9: warning: do not declare C-style arrays, use 'std::array' instead } diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-main.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-main.cpp index a0c79bb55a686..8e1890c234223 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-main.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-main.cpp @@ -1,9 +1,13 @@ // RUN: %check_clang_tidy -std=c++17 %s modernize-avoid-c-arrays %t -int not_main(int argc, char *argv[]) { - // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: do not declare C-style arrays, use 'std::array' or 'std::vector' instead +namespace X { +// Not main +int main(int argc, char *argv[]) { + // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: do not declare C-style arrays, use 'std::array' or 'std::vector' instead int f4[] = {1, 2}; // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use 'std::array' instead + return 0; +} } int main(int argc, char *argv[]) { diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-three-arg-main.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-three-arg-main.cpp index bd39f0fb4f1c8..58eced408733a 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-three-arg-main.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-three-arg-main.cpp @@ -1,19 +1,23 @@ // RUN: %check_clang_tidy -std=c++17 %s modernize-avoid-c-arrays %t -int not_main(int argc, char *argv[], char *argw[]) { - // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: do not declare C-style arrays, use 'std::array' or 'std::vector' instead - // CHECK-MESSAGES: :[[@LINE-2]]:38: warning: do not declare C-style arrays, use 'std::array' or 'std::vector' instead +namespace X { +// Not main. +int main(int argc, char *argv[], char *argw[]) { + // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: do not declare C-style arrays, use 'std::array' or 'std::vector' instead + // CHECK-MESSAGES: :[[@LINE-2]]:34: warning: do not declare C-style arrays, use 'std::array' or 'std::vector' instead int f4[] = {1, 2}; // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use 'std::array' instead + return 0; +} } int main(int argc, char *argv[], char *argw[]) { int f5[] = {1, 2}; // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use 'std::array' instead - auto not_main = [](int argc, char *argv[], char *argw[]) { - // CHECK-MESSAGES: :[[@LINE-1]]:32: warning: do not declare C-style arrays, use 'std::array' or 'std::vector' instead - // CHECK-MESSAGES: :[[@LINE-2]]:46: warning: do not declare C-style arrays, use 'std::array' or 'std::vector' instead + auto main = [](int argc, char *argv[], char *argw[]) { + // CHECK-MESSAGES: :[[@LINE-1]]:28: warning: do not declare C-style arrays, use 'std::array' or 'std::vector' instead + // CHECK-MESSAGES: :[[@LINE-2]]:42: warning: do not declare C-style arrays, use 'std::array' or 'std::vector' instead int f6[] = {1, 2}; // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: do not declare C-style arrays, use 'std::array' instead }; diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/loop-convert-basic.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/loop-convert-basic.cpp index df2a2c1af1f54..8d1d7378e5cff 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/loop-convert-basic.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/loop-convert-basic.cpp @@ -170,6 +170,8 @@ const int *constArray() { // CHECK-FIXES: for (const int & I : ConstArr) // CHECK-FIXES-NEXT: if (Something) // CHECK-FIXES-NEXT: return &I; + + return nullptr; } struct HasArr { diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-emplace.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-emplace.cpp index 3f4a14cd9bb64..e6562cd18dbab 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-emplace.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-emplace.cpp @@ -49,7 +49,7 @@ class vector { template void emplace_back(Args &&... args){}; template - iterator emplace(const_iterator pos, Args &&...args){}; + iterator emplace(const_iterator pos, Args &&...args); ~vector(); }; @@ -69,7 +69,7 @@ class list { void push_back(T &&) {} template - iterator emplace(const_iterator pos, Args &&...args){}; + iterator emplace(const_iterator pos, Args &&...args); template void emplace_back(Args &&... args){}; template @@ -93,7 +93,7 @@ class deque { void push_front(T &&) {} template - iterator emplace(const_iterator pos, Args &&...args){}; + iterator emplace(const_iterator pos, Args &&...args); template void emplace_back(Args &&... args){}; template @@ -116,7 +116,7 @@ class forward_list { template void emplace_front(Args &&...args){}; template - iterator emplace_after(const_iterator pos, Args &&...args){}; + iterator emplace_after(const_iterator pos, Args &&...args); }; template @@ -131,7 +131,7 @@ class set { template void emplace(Args &&...args){}; template - iterator emplace_hint(const_iterator pos, Args &&...args){}; + iterator emplace_hint(const_iterator pos, Args &&...args); }; template @@ -146,7 +146,7 @@ class map { template void emplace(Args &&...args){}; template - iterator emplace_hint(const_iterator pos, Args &&...args){}; + iterator emplace_hint(const_iterator pos, Args &&...args); }; template @@ -161,7 +161,7 @@ class multiset { template void emplace(Args &&...args){}; template - iterator emplace_hint(const_iterator pos, Args &&...args){}; + iterator emplace_hint(const_iterator pos, Args &&...args); }; template @@ -176,7 +176,7 @@ class multimap { template void emplace(Args &&...args){}; template - iterator emplace_hint(const_iterator pos, Args &&...args){}; + iterator emplace_hint(const_iterator pos, Args &&...args); }; template @@ -191,7 +191,7 @@ class unordered_set { template void emplace(Args &&...args){}; template - iterator emplace_hint(const_iterator pos, Args &&...args){}; + iterator emplace_hint(const_iterator pos, Args &&...args); }; template @@ -206,7 +206,7 @@ class unordered_map { template void emplace(Args &&...args){}; template - iterator emplace_hint(const_iterator pos, Args &&...args){}; + iterator emplace_hint(const_iterator pos, Args &&...args); }; template @@ -221,7 +221,7 @@ class unordered_multiset { template void emplace(Args &&...args){}; template - iterator emplace_hint(const_iterator pos, Args &&...args){}; + iterator emplace_hint(const_iterator pos, Args &&...args); }; template @@ -236,7 +236,7 @@ class unordered_multimap { template void emplace(Args &&...args){}; template - iterator emplace_hint(const_iterator pos, Args &&...args){}; + iterator emplace_hint(const_iterator pos, Args &&...args); }; template diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-equals-default-copy.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-equals-default-copy.cpp index 4abb9c8555970..7f737148a7cd1 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-equals-default-copy.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-equals-default-copy.cpp @@ -1,6 +1,6 @@ // RUN: %check_clang_tidy %s modernize-use-equals-default %t -- \ // RUN: -config="{CheckOptions: {modernize-use-equals-default.IgnoreMacros: false}}" \ -// RUN: -- -fno-delayed-template-parsing -fexceptions +// RUN: -- -fno-delayed-template-parsing -fexceptions -Wno-error=return-type // Out of line definition. struct OL { diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-override.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-override.cpp index 89d1aa48c46a3..bad8b7a8d7f08 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-override.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-override.cpp @@ -203,13 +203,13 @@ struct InlineDefinitions : public Base { // CHECK-MESSAGES: :[[@LINE-2]]:16: warning: prefer using // CHECK-FIXES: {{^}} void j() const override - virtual MustUseResultObject k() {} // Has an implicit attribute. + virtual MustUseResultObject k(); // Has an implicit attribute. // CHECK-MESSAGES: :[[@LINE-1]]:31: warning: prefer using - // CHECK-FIXES: {{^}} MustUseResultObject k() override {} + // CHECK-FIXES: {{^}} MustUseResultObject k() override; - virtual bool l() MUST_USE_RESULT UNUSED {} + virtual bool l() MUST_USE_RESULT UNUSED; // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: prefer using - // CHECK-FIXES: {{^}} bool l() override MUST_USE_RESULT UNUSED {} + // CHECK-FIXES: {{^}} bool l() override MUST_USE_RESULT UNUSED; virtual void r() & {} diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-format.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-format.cpp index 0a5a63eba2596..2af2e8949a814 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-format.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-format.cpp @@ -100,7 +100,7 @@ std::string StrFormat_field_width_and_precision() { return s1 + s2 + s3 + s4 + s5 + s6; } -std::string StrFormat_macros() { +void StrFormat_macros() { // The function call is replaced even though it comes from a macro. #define FORMAT absl::StrFormat auto s1 = FORMAT("Hello %d", 42); diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-trailing-return-type.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-trailing-return-type.cpp index d9efc006b22ef..e1f36c52a7c01 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-trailing-return-type.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-trailing-return-type.cpp @@ -106,9 +106,9 @@ extern "C" int d2(int arg); inline int d3(int arg) noexcept(true); // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: use a trailing return type for this function [modernize-use-trailing-return-type] // CHECK-FIXES: {{^}}inline auto d3(int arg) noexcept(true) -> int;{{$}} -inline int d4(int arg) try { } catch(...) { } +inline int d4(int arg) try { return 0; } catch(...) { return 0; } // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: use a trailing return type for this function [modernize-use-trailing-return-type] -// CHECK-FIXES: {{^}}inline auto d4(int arg) -> int try { } catch(...) { }{{$}} +// CHECK-FIXES: {{^}}inline auto d4(int arg) -> int try { return 0; } catch(...) { return 0; }{{$}} int d5(int arg) throw(); // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: use a trailing return type for this function [modernize-use-trailing-return-type] // CHECK-FIXES: {{^}}auto d5(int arg) throw() -> int;{{$}} @@ -167,9 +167,9 @@ namespace N { } // CHECK-MESSAGES: :[[@LINE-2]]:9: warning: use a trailing return type for this function [modernize-use-trailing-return-type] // CHECK-FIXES: {{^}} auto e1() -> int;{{$}} -int N::e1() {} +int N::e1() { return 0; } // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: use a trailing return type for this function [modernize-use-trailing-return-type] -// CHECK-FIXES: {{^}}auto N::e1() -> int {}{{$}} +// CHECK-FIXES: {{^}}auto N::e1() -> int { return 0; }{{$}} // // Functions with unsupported return types @@ -260,14 +260,14 @@ struct B { B& operator=(const B&); // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: use a trailing return type for this function [modernize-use-trailing-return-type] // CHECK-FIXES: {{^}} auto operator=(const B&) -> B&;{{$}} - + double base1(int, bool b); // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: use a trailing return type for this function [modernize-use-trailing-return-type] // CHECK-FIXES: {{^}} auto base1(int, bool b) -> double;{{$}} - virtual double base2(int, bool b) {} + virtual double base2(int, bool b) { return 0; } // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: use a trailing return type for this function [modernize-use-trailing-return-type] -// CHECK-FIXES: {{^}} virtual auto base2(int, bool b) -> double {}{{$}} +// CHECK-FIXES: {{^}} virtual auto base2(int, bool b) -> double { return 0; }{{$}} virtual float base3() const = 0; // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use a trailing return type for this function [modernize-use-trailing-return-type] @@ -298,9 +298,9 @@ struct B { // CHECK-FIXES: {{^}} virtual auto base9() const noexcept -> const char * { return ""; }{{$}} }; -double B::base1(int, bool b) {} +double B::base1(int, bool b) { return 0; } // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: use a trailing return type for this function [modernize-use-trailing-return-type] -// CHECK-FIXES: {{^}}auto B::base1(int, bool b) -> double {}{{$}} +// CHECK-FIXES: {{^}}auto B::base1(int, bool b) -> double { return 0; }{{$}} struct D : B { virtual double f1(int, bool b) final; @@ -311,9 +311,9 @@ struct D : B { // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: use a trailing return type for this function [modernize-use-trailing-return-type] // CHECK-FIXES: {{^}} virtual auto base2(int, bool b) -> double override;{{$}} - virtual float base3() const override final { } + virtual float base3() const override final { return 0; } // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use a trailing return type for this function [modernize-use-trailing-return-type] -// CHECK-FIXES: {{^}} virtual auto base3() const -> float override final { }{{$}} +// CHECK-FIXES: {{^}} virtual auto base3() const -> float override final { return 0; }{{$}} const char * base9() const noexcept override { return ""; } // CHECK-MESSAGES: :[[@LINE-1]]:18: warning: use a trailing return type for this function [modernize-use-trailing-return-type] @@ -586,13 +586,13 @@ void c(int arg) { return; } struct D2 : B { D2(); virtual ~D2(); - + virtual auto f1(int, bool b) -> double final; virtual auto base2(int, bool b) -> double override; - virtual auto base3() const -> float override final { } + virtual auto base3() const -> float override final { return 0; } operator double(); }; auto l1 = [](int arg) {}; -auto l2 = [](int arg) -> double {}; +auto l2 = [](int arg) -> double { return 0; }; diff --git a/clang-tools-extra/test/clang-tidy/checkers/performance/Inputs/unnecessary-value-param/header-fixed.h b/clang-tools-extra/test/clang-tidy/checkers/performance/Inputs/unnecessary-value-param/header-fixed.h index a40b2b2ece52e..1dcdd7a5ea4b4 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/performance/Inputs/unnecessary-value-param/header-fixed.h +++ b/clang-tools-extra/test/clang-tidy/checkers/performance/Inputs/unnecessary-value-param/header-fixed.h @@ -12,4 +12,4 @@ int f1(int n, ABC v1); // line 11 -int f2( int n, const ABC& v2); // line 15 +void f2( int n, const ABC& v2); // line 15 diff --git a/clang-tools-extra/test/clang-tidy/checkers/performance/Inputs/unnecessary-value-param/header.h b/clang-tools-extra/test/clang-tidy/checkers/performance/Inputs/unnecessary-value-param/header.h index 94916755ddafe..d6f6e65ace79d 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/performance/Inputs/unnecessary-value-param/header.h +++ b/clang-tools-extra/test/clang-tidy/checkers/performance/Inputs/unnecessary-value-param/header.h @@ -12,4 +12,4 @@ int f1(int n, ABC v1); // line 11 -int f2( int n, ABC v2); // line 15 +void f2( int n, ABC v2); // line 15 diff --git a/clang-tools-extra/test/clang-tidy/checkers/performance/inefficient-string-concatenation.cpp b/clang-tools-extra/test/clang-tidy/checkers/performance/inefficient-string-concatenation.cpp index 1dbd56b322202..a1edf5fae2f9e 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/performance/inefficient-string-concatenation.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/performance/inefficient-string-concatenation.cpp @@ -6,15 +6,15 @@ class basic_string { public: basic_string() {} ~basic_string() {} - basic_string *operator+=(const basic_string &) {} - friend basic_string operator+(const basic_string &, const basic_string &) {} + basic_string *operator+=(const basic_string &); + friend basic_string operator+(const basic_string &, const basic_string &); }; typedef basic_string string; typedef basic_string wstring; } void f(std::string) {} -std::string g(std::string) {} +std::string g(std::string); int main() { std::string mystr1, mystr2; diff --git a/clang-tools-extra/test/clang-tidy/checkers/performance/unnecessary-value-param-header.cpp b/clang-tools-extra/test/clang-tidy/checkers/performance/unnecessary-value-param-header.cpp index 2b45bb719dbc5..8461248982447 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/performance/unnecessary-value-param-header.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/performance/unnecessary-value-param-header.cpp @@ -14,7 +14,7 @@ int f1(int n, ABC v1, ABC v2) { // CHECK-FIXES: int f1(int n, const ABC& v1, const ABC& v2) { return v1.get(n) + v2.get(n); } -int f2(int n, ABC v2) { - // CHECK-MESSAGES: [[@LINE-1]]:19: warning: the parameter 'v2' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param] - // CHECK-FIXES: int f2(int n, const ABC& v2) { +void f2(int n, ABC v2) { + // CHECK-MESSAGES: [[@LINE-1]]:20: warning: the parameter 'v2' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param] + // CHECK-FIXES: void f2(int n, const ABC& v2) { } diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/Inputs/identifier-naming/global-style1/header.h b/clang-tools-extra/test/clang-tidy/checkers/readability/Inputs/identifier-naming/global-style1/header.h index abbf7dfa48395..bbedc9b1df2dc 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/Inputs/identifier-naming/global-style1/header.h +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/Inputs/identifier-naming/global-style1/header.h @@ -4,4 +4,4 @@ void style_first_good(); void styleFirstBad(); -int thisIsMainLikeIgnored(int argc, const char *argv[]) {} +int thisIsMainLikeIgnored(int argc, const char *argv[]) { return 0; } diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/Inputs/identifier-naming/global-style2/header.h b/clang-tools-extra/test/clang-tidy/checkers/readability/Inputs/identifier-naming/global-style2/header.h index 9d3e846a080b9..3b3b1e9508e8f 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/Inputs/identifier-naming/global-style2/header.h +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/Inputs/identifier-naming/global-style2/header.h @@ -4,4 +4,4 @@ void STYLE_SECOND_GOOD(); void styleSecondBad(); -int thisIsMainLikeNotIgnored(int argc, const char *argv[]) {} +int thisIsMainLikeNotIgnored(int argc, const char *argv[]) { return 0; } diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/const-return-type-macros.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/const-return-type-macros.cpp index 5131011118f30..0a154c5d23d47 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/const-return-type-macros.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/const-return-type-macros.cpp @@ -6,16 +6,16 @@ // Regression tests involving macros #define CONCAT(a, b) a##b -CONCAT(cons, t) int p22(){} +CONCAT(cons, t) int p22(){ return 0; } // CHECK-MESSAGES: [[@LINE-1]]:1: warning: return type 'const int' is 'const'-qu // We warn, but we can't give a fix #define CONSTINT const int -CONSTINT p23() {} +CONSTINT p23() { return 0; } // CHECK-MESSAGES: [[@LINE-1]]:1: warning: return type 'const int' is 'const'-qu #define CONST const -CONST int p24() {} +CONST int p24() { return 0; } // CHECK-MESSAGES: [[@LINE-1]]:1: warning: return type 'const int' is 'const'-qu #define CREATE_FUNCTION() \ diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/const-return-type.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/const-return-type.cpp index 76a3555663b18..d913ab4dee9ba 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/const-return-type.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/const-return-type.cpp @@ -1,4 +1,4 @@ -// RUN: %check_clang_tidy -std=c++14-or-later %s readability-const-return-type %t +// RUN: %check_clang_tidy -std=c++14-or-later %s readability-const-return-type %t -- -- -Wno-error=return-type // p# = positive test // n# = negative test diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/convert-member-functions-to-static.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/convert-member-functions-to-static.cpp index 5ec1f221b2207..a6b95bdb57e4d 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/convert-member-functions-to-static.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/convert-member-functions-to-static.cpp @@ -32,6 +32,7 @@ class A { // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: method 'call_static_member' can be made static // CHECK-FIXES: {{^}} static int call_static_member() { already_static(); + return 0; } int read_static() { diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming.cpp index be5ba54513c67..1771836539d86 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming.cpp @@ -547,6 +547,7 @@ struct_type GlobalTypedefTestFunction(struct_type a_argument1) { // CHECK-FIXES: {{^}}struct_type_t GlobalTypedefTestFunction(struct_type_t a_argument1) { struct_type typedef_test_1; // CHECK-FIXES: {{^}} struct_type_t typedef_test_1; + return {}; } using my_struct_type = THIS___Structure; @@ -777,8 +778,8 @@ STATIC_MACRO void someFunc(ValueType a_v1, const ValueType& a_v2) {} // CHECK-FIXES: {{^}}STATIC_MACRO void someFunc(value_type_t a_v1, const value_type_t& a_v2) {} STATIC_MACRO void someFunc(const ValueType** p_a_v1, ValueType (*p_a_v2)()) {} // CHECK-FIXES: {{^}}STATIC_MACRO void someFunc(const value_type_t** p_a_v1, value_type_t (*p_a_v2)()) {} -STATIC_MACRO ValueType someFunc() {} -// CHECK-FIXES: {{^}}STATIC_MACRO value_type_t someFunc() {} +STATIC_MACRO ValueType someFunc() { return {}; } +// CHECK-FIXES: {{^}}STATIC_MACRO value_type_t someFunc() { return {}; } STATIC_MACRO void someFunc(MyFunPtr, const MyFunPtr****) {} // CHECK-FIXES: {{^}}STATIC_MACRO void someFunc(my_fun_ptr_t, const my_fun_ptr_t****) {} #undef STATIC_MACRO diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.cpp index c4b7a77b92f0a..75f666e3e07e5 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.cpp @@ -465,7 +465,7 @@ struct S { // CHECK-FIXES: S(bool a, bool b, bool c) : a(static_cast(a)), b(b), c(static_cast(c)) {} }; -bool f(S& s) { +void f(S& s) { functionTaking(s.a); // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: implicit conversion 'int' -> 'bool' // CHECK-FIXES: functionTaking(s.a != 0); diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/named-parameter.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/named-parameter.cpp index 8c6fb123ac023..50433d5d12ea9 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/named-parameter.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/named-parameter.cpp @@ -37,8 +37,8 @@ void operator delete[](void *x) throw(); void operator delete[](void * /*x*/) throw(); struct X { - X operator++(int) {} - X operator--(int) {} + void operator++(int) {} + void operator--(int) {} X(X&) = delete; X &operator=(X&) = default; @@ -86,22 +86,23 @@ void FDef2(int n, int) {} void FNoDef(int); class Z {}; +Z the_z; -Z &operator++(Z&) {} +Z &operator++(Z&) { return the_z; } // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: all parameters should be named in a function -// CHECK-FIXES: Z &operator++(Z& /*unused*/) {} +// CHECK-FIXES: Z &operator++(Z& /*unused*/) { return the_z; } -Z &operator++(Z&, int) {} +Z &operator++(Z&, int) { return the_z; } // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: all parameters should be named in a function -// CHECK-FIXES: Z &operator++(Z& /*unused*/, int) {} +// CHECK-FIXES: Z &operator++(Z& /*unused*/, int) { return the_z; } -Z &operator--(Z&) {} +Z &operator--(Z&) { return the_z; } // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: all parameters should be named in a function -// CHECK-FIXES: Z &operator--(Z& /*unused*/) {} +// CHECK-FIXES: Z &operator--(Z& /*unused*/) { return the_z; } -Z &operator--(Z&, int) {} +Z &operator--(Z&, int) { return the_z; } // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: all parameters should be named in a function -// CHECK-FIXES: Z &operator--(Z& /*unused*/, int) {} +// CHECK-FIXES: Z &operator--(Z& /*unused*/, int) { return the_z; } namespace testing { namespace internal { diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-declaration.c b/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-declaration.c index c2e8bf68b4ad7..dbcc4cf6d1022 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-declaration.c +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-declaration.c @@ -20,7 +20,7 @@ static int f(void); static int f(void); // f // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: redundant 'f' declaration // CHECK-FIXES: {{^}}// f{{$}} -static int f(void) {} +static int f(void) { return 0; } inline void g(void) {} diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-declaration.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-declaration.cpp index be505f55b86b0..595eccf8854ba 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-declaration.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-declaration.cpp @@ -38,7 +38,7 @@ static int f(); static int f(); // f // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: redundant 'f' declaration // CHECK-FIXES: {{^}}// f{{$}} -static int f() {} +static int f() { return 0; } // Original check crashed for the code below. namespace std { diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/static-accessed-through-instance.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/static-accessed-through-instance.cpp index 202fe9be6d00c..a0d51dec7f32d 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/static-accessed-through-instance.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/static-accessed-through-instance.cpp @@ -264,7 +264,7 @@ struct Qptr { } }; -int func(Qptr qp) { +void func(Qptr qp) { qp->y = 10; qp->K = 10; // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: static member accessed through instance [readability-static-accessed-through-instance] diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/suspicious-call-argument.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/suspicious-call-argument.cpp index edd3591517af3..27db92be21f20 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/suspicious-call-argument.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/suspicious-call-argument.cpp @@ -382,7 +382,7 @@ enum opcode { Foo, Bar }; static value *SimplifyRightShift( opcode Opcode, value *Op0, value *Op1, bool isExact, - const type1 &Q, unsigned MaxRecurse) {} + const type1 &Q, unsigned MaxRecurse) { return nullptr; } static value *SimplifyLShrInst(value *Op0, value *Op1, bool isExact, const type1 &Q, unsigned MaxRecurse) { if (value *V = SimplifyRightShift(Foo, Op0, Op1, isExact, Q, MaxRecurse)) diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/duplicate-fixes-of-alias-checkers.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/duplicate-fixes-of-alias-checkers.cpp index f67c20635064a..ff216298cfd60 100644 --- a/clang-tools-extra/test/clang-tidy/infrastructure/duplicate-fixes-of-alias-checkers.cpp +++ b/clang-tools-extra/test/clang-tidy/infrastructure/duplicate-fixes-of-alias-checkers.cpp @@ -31,7 +31,7 @@ class Foo { // CHECK-FIXES: _num2{}; }; -int should_use_emplace(std::vector &v) { +void should_use_emplace(std::vector &v) { v.push_back(Foo()); // CHECK-FIXES: v.emplace_back(); // CHECK-MESSAGES: warning: use emplace_back instead of push_back [hicpp-use-emplace,modernize-use-emplace] diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index db42fc5cc0da7..e1c61992512b5 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -144,6 +144,9 @@ Improvements to Clang's diagnostics - Fixed a bug where Clang's Analysis did not correctly model the destructor behavior of ``union`` members (#GH119415). - A statement attribute applied to a ``case`` label no longer suppresses 'bypassing variable initialization' diagnostics (#84072). +- The ``-Wunsafe-buffer-usage`` warning has been updated to warn + about unsafe libc function calls. Those new warnings are emitted + under the subgroup ``-Wunsafe-buffer-usage-in-libc-call``. Improvements to Clang's time-trace ---------------------------------- diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index f305cbbce4c60..0f96bf0762ca4 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -5039,6 +5039,11 @@ class HLSLBufferDecl final : public NamedDecl, public DeclContext { SourceLocation KwLoc; /// IsCBuffer - Whether the buffer is a cbuffer (and not a tbuffer). bool IsCBuffer; + /// HasValidPackoffset - Whether the buffer has valid packoffset annotations + // on all declarations + bool HasValidPackoffset; + // LayoutStruct - Layout struct for the buffer + CXXRecordDecl *LayoutStruct; HLSLBufferDecl(DeclContext *DC, bool CBuffer, SourceLocation KwLoc, IdentifierInfo *ID, SourceLocation IDLoc, @@ -5059,6 +5064,10 @@ class HLSLBufferDecl final : public NamedDecl, public DeclContext { SourceLocation getRBraceLoc() const { return RBraceLoc; } void setRBraceLoc(SourceLocation L) { RBraceLoc = L; } bool isCBuffer() const { return IsCBuffer; } + void setHasValidPackoffset(bool PO) { HasValidPackoffset = PO; } + bool hasValidPackoffset() const { return HasValidPackoffset; } + const CXXRecordDecl *getLayoutStruct() const { return LayoutStruct; } + void addLayoutStruct(CXXRecordDecl *LS); // Implement isa/cast/dyncast/etc. static bool classof(const Decl *D) { return classofKind(D->getKind()); } diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index 1d9743520654e..c3ff7ebd88516 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -6266,8 +6266,8 @@ class HLSLAttributedResourceType : public Type, public llvm::FoldingSetNode { LLVM_PREFERRED_TYPE(bool) uint8_t RawBuffer : 1; - Attributes(llvm::dxil::ResourceClass ResourceClass, bool IsROV, - bool RawBuffer) + Attributes(llvm::dxil::ResourceClass ResourceClass, bool IsROV = false, + bool RawBuffer = false) : ResourceClass(ResourceClass), IsROV(IsROV), RawBuffer(RawBuffer) {} Attributes() : Attributes(llvm::dxil::ResourceClass::UAV, false, false) {} diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 070cc792ca7db..db23afa6d6f0b 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -199,6 +199,10 @@ struct TransferrableTargetInfo { /// zero length bitfield, regardless of the zero length bitfield type. unsigned ZeroLengthBitfieldBoundary; + /// The largest container size which should be used for an over-sized + /// bitfield, in bits. + unsigned LargestOverSizedBitfieldContainer; + /// If non-zero, specifies a maximum alignment to truncate alignment /// specified in the aligned attribute of a static variable to this value. unsigned MaxAlignedAttribute; @@ -954,6 +958,10 @@ class TargetInfo : public TransferrableTargetInfo, return ZeroLengthBitfieldBoundary; } + unsigned getLargestOverSizedBitfieldContainer() const { + return LargestOverSizedBitfieldContainer; + } + /// Get the maximum alignment in bits for a static variable with /// aligned attribute. unsigned getMaxAlignedAttribute() const { return MaxAlignedAttribute; } diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 610207cf8b9a4..5a3be1690f335 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -1747,6 +1747,10 @@ void NamedDecl::printNestedNameSpecifier(raw_ostream &OS, } } + // Suppress transparent contexts like export or HLSLBufferDecl context + if (Ctx->isTransparentContext()) + continue; + // Skip non-named contexts such as linkage specifications and ExportDecls. const NamedDecl *ND = dyn_cast(Ctx); if (!ND) @@ -5717,7 +5721,7 @@ HLSLBufferDecl::HLSLBufferDecl(DeclContext *DC, bool CBuffer, SourceLocation IDLoc, SourceLocation LBrace) : NamedDecl(Decl::Kind::HLSLBuffer, DC, IDLoc, DeclarationName(ID)), DeclContext(Decl::Kind::HLSLBuffer), LBraceLoc(LBrace), KwLoc(KwLoc), - IsCBuffer(CBuffer) {} + IsCBuffer(CBuffer), HasValidPackoffset(false), LayoutStruct(nullptr) {} HLSLBufferDecl *HLSLBufferDecl::Create(ASTContext &C, DeclContext *LexicalParent, bool CBuffer, @@ -5747,6 +5751,12 @@ HLSLBufferDecl *HLSLBufferDecl::CreateDeserialized(ASTContext &C, SourceLocation(), SourceLocation()); } +void HLSLBufferDecl::addLayoutStruct(CXXRecordDecl *LS) { + assert(LayoutStruct == nullptr && "layout struct has already been set"); + LayoutStruct = LS; + addDecl(LS); +} + //===----------------------------------------------------------------------===// // ImportDecl Implementation //===----------------------------------------------------------------------===// diff --git a/clang/lib/AST/RecordLayoutBuilder.cpp b/clang/lib/AST/RecordLayoutBuilder.cpp index 3e38ba0a43d98..b8600e6a344a4 100644 --- a/clang/lib/AST/RecordLayoutBuilder.cpp +++ b/clang/lib/AST/RecordLayoutBuilder.cpp @@ -1469,15 +1469,18 @@ void ItaniumRecordLayoutBuilder::LayoutWideBitField(uint64_t FieldSize, // sizeof(T')*8 <= n. QualType IntegralPODTypes[] = { - Context.UnsignedCharTy, Context.UnsignedShortTy, Context.UnsignedIntTy, - Context.UnsignedLongTy, Context.UnsignedLongLongTy + Context.UnsignedCharTy, Context.UnsignedShortTy, + Context.UnsignedIntTy, Context.UnsignedLongTy, + Context.UnsignedLongLongTy, Context.UnsignedInt128Ty, }; QualType Type; + uint64_t MaxSize = + Context.getTargetInfo().getLargestOverSizedBitfieldContainer(); for (const QualType &QT : IntegralPODTypes) { uint64_t Size = Context.getTypeSize(QT); - if (Size > FieldSize) + if (Size > FieldSize || Size > MaxSize) break; Type = QT; @@ -1520,6 +1523,7 @@ void ItaniumRecordLayoutBuilder::LayoutWideBitField(uint64_t FieldSize, setSize(std::max(getSizeInBits(), getDataSizeInBits())); // Remember max struct/class alignment. + UnadjustedAlignment = std::max(UnadjustedAlignment, TypeAlign); UpdateAlignment(TypeAlign); } diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index c0bf4e686cf03..0699ec686e4e6 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -141,6 +141,7 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) { UseLeadingZeroLengthBitfield = true; UseExplicitBitFieldAlignment = true; ZeroLengthBitfieldBoundary = 0; + LargestOverSizedBitfieldContainer = 64; MaxAlignedAttribute = 0; HalfFormat = &llvm::APFloat::IEEEhalf(); FloatFormat = &llvm::APFloat::IEEEsingle(); diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index fad8d773bfc52..3633bab6e0df9 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -261,6 +261,10 @@ AArch64TargetInfo::AArch64TargetInfo(const llvm::Triple &Triple, assert(UseBitFieldTypeAlignment && "bitfields affect type alignment"); UseZeroLengthBitfieldAlignment = true; + // AAPCS64 allows any "fundamental integer data type" to be used for + // over-sized bitfields, which includes 128-bit integers. + LargestOverSizedBitfieldContainer = 128; + HasUnalignedAccess = true; // AArch64 targets default to using the ARM C++ ABI. diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index 856d8b1b2948d..547220fb1f1e1 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -17,16 +17,22 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "TargetInfo.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/Type.h" #include "clang/Basic/TargetOptions.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/Support/Alignment.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormatVariadic.h" using namespace clang; @@ -34,6 +40,9 @@ using namespace CodeGen; using namespace clang::hlsl; using namespace llvm; +static void createResourceInitFn(CodeGenModule &CGM, llvm::GlobalVariable *GV, + unsigned Slot, unsigned Space); + namespace { void addDxilValVersion(StringRef ValVersionStr, llvm::Module &M) { @@ -56,58 +65,17 @@ void addDxilValVersion(StringRef ValVersionStr, llvm::Module &M) { auto *DXILValMD = M.getOrInsertNamedMetadata(DXILValKey); DXILValMD->addOperand(Val); } -// cbuffer will be translated into global variable in special address space. -// If translate into C, -// cbuffer A { -// float a; -// float b; -// } -// float foo() { return a + b; } -// -// will be translated into -// -// struct A { -// float a; -// float b; -// } cbuffer_A __attribute__((address_space(4))); -// float foo() { return cbuffer_A.a + cbuffer_A.b; } -// -// layoutBuffer will create the struct A type. -// replaceBuffer will replace use of global variable a and b with cbuffer_A.a -// and cbuffer_A.b. -// -void layoutBuffer(CGHLSLRuntime::Buffer &Buf, const DataLayout &DL) { - if (Buf.Constants.empty()) - return; - - std::vector EltTys; - for (auto &Const : Buf.Constants) { - GlobalVariable *GV = Const.first; - Const.second = EltTys.size(); - llvm::Type *Ty = GV->getValueType(); - EltTys.emplace_back(Ty); - } - Buf.LayoutStruct = llvm::StructType::get(EltTys[0]->getContext(), EltTys); -} - -GlobalVariable *replaceBuffer(CGHLSLRuntime::Buffer &Buf) { - // Create global variable for CB. - GlobalVariable *CBGV = new GlobalVariable( - Buf.LayoutStruct, /*isConstant*/ true, - GlobalValue::LinkageTypes::ExternalLinkage, nullptr, - llvm::formatv("{0}{1}", Buf.Name, Buf.IsCBuffer ? ".cb." : ".tb."), - GlobalValue::NotThreadLocal); - - return CBGV; -} } // namespace -llvm::Type *CGHLSLRuntime::convertHLSLSpecificType(const Type *T) { +llvm::Type * +CGHLSLRuntime::convertHLSLSpecificType(const Type *T, + SmallVector *Packoffsets) { assert(T->isHLSLSpecificType() && "Not an HLSL specific type!"); // Check if the target has a specific translation for this type first. - if (llvm::Type *TargetTy = CGM.getTargetCodeGenInfo().getHLSLType(CGM, T)) + if (llvm::Type *TargetTy = + CGM.getTargetCodeGenInfo().getHLSLType(CGM, T, Packoffsets)) return TargetTy; llvm_unreachable("Generic handling of HLSL types is not supported."); @@ -117,48 +85,174 @@ llvm::Triple::ArchType CGHLSLRuntime::getArch() { return CGM.getTarget().getTriple().getArch(); } -void CGHLSLRuntime::addConstant(VarDecl *D, Buffer &CB) { - if (D->getStorageClass() == SC_Static) { - // For static inside cbuffer, take as global static. - // Don't add to cbuffer. - CGM.EmitGlobal(D); - return; - } +// Returns true if the type is an HLSL resource class +static bool isResourceRecordType(const clang::Type *Ty) { + return HLSLAttributedResourceType::findHandleTypeOnResource(Ty) != nullptr; +} - auto *GV = cast(CGM.GetAddrOfGlobalVar(D)); - GV->setExternallyInitialized(true); - // Add debug info for constVal. - if (CGDebugInfo *DI = CGM.getModuleDebugInfo()) - if (CGM.getCodeGenOpts().getDebugInfo() >= - codegenoptions::DebugInfoKind::LimitedDebugInfo) - DI->EmitGlobalVariable(cast(GV), D); - - // FIXME: support packoffset. - // See https://github.com/llvm/llvm-project/issues/57914. - uint32_t Offset = 0; - bool HasUserOffset = false; - - unsigned LowerBound = HasUserOffset ? Offset : UINT_MAX; - CB.Constants.emplace_back(std::make_pair(GV, LowerBound)); +// Returns true if the type is an HLSL resource class or an array of them +static bool isResourceRecordTypeOrArrayOf(const clang::Type *Ty) { + while (const ConstantArrayType *CAT = dyn_cast(Ty)) + Ty = CAT->getArrayElementTypeNoTypeQual(); + return isResourceRecordType(Ty); } -void CGHLSLRuntime::addBufferDecls(const DeclContext *DC, Buffer &CB) { - for (Decl *it : DC->decls()) { - if (auto *ConstDecl = dyn_cast(it)) { - addConstant(ConstDecl, CB); - } else if (isa(it)) { +// Emits constant global variables for buffer constants declarations +// and creates metadata linking the constant globals with the buffer global. +void CGHLSLRuntime::emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl, + llvm::GlobalVariable *BufGV) { + LLVMContext &Ctx = CGM.getLLVMContext(); + + // get the layout struct from constant buffer target type + llvm::Type *BufType = BufGV->getValueType(); + llvm::Type *BufLayoutType = + cast(BufType)->getTypeParameter(0); + llvm::StructType *LayoutStruct = cast( + cast(BufLayoutType)->getTypeParameter(0)); + + // Start metadata list associating the buffer global variable with its + // constatns + SmallVector BufGlobals; + BufGlobals.push_back(ValueAsMetadata::get(BufGV)); + + const auto *ElemIt = LayoutStruct->element_begin(); + for (Decl *D : BufDecl->decls()) { + if (isa(D)) // Nothing to do for this declaration. - } else if (isa(it)) { - // A function within an cbuffer is effectively a top-level function, - // as it only refers to globally scoped declarations. - CGM.EmitTopLevelDecl(it); + continue; + if (isa(D)) { + // A function within an cbuffer is effectively a top-level function. + CGM.EmitTopLevelDecl(D); + continue; } + VarDecl *VD = dyn_cast(D); + if (!VD) + continue; + + QualType VDTy = VD->getType(); + if (VDTy.getAddressSpace() != LangAS::hlsl_constant) { + if (VD->getStorageClass() == SC_Static || + VDTy.getAddressSpace() == LangAS::hlsl_groupshared || + isResourceRecordTypeOrArrayOf(VDTy.getTypePtr())) { + // Emit static and groupshared variables and resource classes inside + // cbuffer as regular globals + CGM.EmitGlobal(VD); + } else { + // Anything else that is not in the hlsl_constant address space must be + // an empty struct or a zero-sized array and can be ignored + assert(BufDecl->getASTContext().getTypeSize(VDTy) == 0 && + "constant buffer decl with non-zero sized type outside of " + "hlsl_constant address space"); + } + continue; + } + + assert(ElemIt != LayoutStruct->element_end() && + "number of elements in layout struct does not match"); + llvm::Type *LayoutType = *ElemIt++; + + // FIXME: handle resources inside user defined structs + // (llvm/wg-hlsl#175) + + // create global variable for the constant and to metadata list + GlobalVariable *ElemGV = + cast(CGM.GetAddrOfGlobalVar(VD, LayoutType)); + BufGlobals.push_back(ValueAsMetadata::get(ElemGV)); } + assert(ElemIt == LayoutStruct->element_end() && + "number of elements in layout struct does not match"); + + // add buffer metadata to the module + CGM.getModule() + .getOrInsertNamedMetadata("hlsl.cbs") + ->addOperand(MDNode::get(Ctx, BufGlobals)); } -void CGHLSLRuntime::addBuffer(const HLSLBufferDecl *D) { - Buffers.emplace_back(Buffer(D)); - addBufferDecls(D, Buffers.back()); +// Creates resource handle type for the HLSL buffer declaration +static const clang::HLSLAttributedResourceType * +createBufferHandleType(const HLSLBufferDecl *BufDecl) { + ASTContext &AST = BufDecl->getASTContext(); + QualType QT = AST.getHLSLAttributedResourceType( + AST.HLSLResourceTy, + QualType(BufDecl->getLayoutStruct()->getTypeForDecl(), 0), + HLSLAttributedResourceType::Attributes(ResourceClass::CBuffer)); + return cast(QT.getTypePtr()); +} + +static void fillPackoffsetLayout(const HLSLBufferDecl *BufDecl, + SmallVector &Layout) { + assert(Layout.empty() && "expected empty vector for layout"); + assert(BufDecl->hasValidPackoffset()); + + for (Decl *D : BufDecl->decls()) { + if (isa(D) || isa(D)) { + continue; + } + VarDecl *VD = dyn_cast(D); + if (!VD || VD->getType().getAddressSpace() != LangAS::hlsl_constant) + continue; + assert(VD->hasAttr() && + "expected packoffset attribute on every declaration"); + size_t Offset = VD->getAttr()->getOffsetInBytes(); + Layout.push_back(Offset); + } +} + +// Codegen for HLSLBufferDecl +void CGHLSLRuntime::addBuffer(const HLSLBufferDecl *BufDecl) { + + assert(BufDecl->isCBuffer() && "tbuffer codegen is not supported yet"); + + // create resource handle type for the buffer + const clang::HLSLAttributedResourceType *ResHandleTy = + createBufferHandleType(BufDecl); + + // empty constant buffer is ignored + if (ResHandleTy->getContainedType()->getAsCXXRecordDecl()->isEmpty()) + return; + + // create global variable for the constant buffer + SmallVector Layout; + if (BufDecl->hasValidPackoffset()) + fillPackoffsetLayout(BufDecl, Layout); + + llvm::TargetExtType *TargetTy = + cast(convertHLSLSpecificType( + ResHandleTy, BufDecl->hasValidPackoffset() ? &Layout : nullptr)); + llvm::GlobalVariable *BufGV = + new GlobalVariable(TargetTy, /*isConstant*/ true, + GlobalValue::LinkageTypes::ExternalLinkage, nullptr, + llvm::formatv("{0}{1}", BufDecl->getName(), + BufDecl->isCBuffer() ? ".cb" : ".tb"), + GlobalValue::NotThreadLocal); + CGM.getModule().insertGlobalVariable(BufGV); + + // Add globals for constant buffer elements and create metadata nodes + emitBufferGlobalsAndMetadata(BufDecl, BufGV); + + // Resource initialization + const HLSLResourceBindingAttr *RBA = + BufDecl->getAttr(); + // FIXME: handle implicit binding if no binding attribute is found + // (llvm/llvm-project#110722) + if (RBA) + createResourceInitFn(CGM, BufGV, RBA->getSlotNumber(), + RBA->getSpaceNumber()); +} + +llvm::TargetExtType * +CGHLSLRuntime::getHLSLBufferLayoutType(const RecordType *StructType) { + const auto Entry = LayoutTypes.find(StructType); + if (Entry != LayoutTypes.end()) + return Entry->getSecond(); + return nullptr; +} + +void CGHLSLRuntime::addHLSLBufferLayoutType(const RecordType *StructType, + llvm::TargetExtType *LayoutTy) { + assert(getHLSLBufferLayoutType(StructType) == nullptr && + "layout type for this struct already exist"); + LayoutTypes[StructType] = LayoutTy; } void CGHLSLRuntime::finishCodeGen() { @@ -169,28 +263,8 @@ void CGHLSLRuntime::finishCodeGen() { addDxilValVersion(TargetOpts.DxilValidatorVersion, M); generateGlobalCtorDtorCalls(); - - const DataLayout &DL = M.getDataLayout(); - - for (auto &Buf : Buffers) { - layoutBuffer(Buf, DL); - GlobalVariable *GV = replaceBuffer(Buf); - M.insertGlobalVariable(GV); - llvm::hlsl::ResourceClass RC = Buf.IsCBuffer - ? llvm::hlsl::ResourceClass::CBuffer - : llvm::hlsl::ResourceClass::SRV; - llvm::hlsl::ResourceKind RK = Buf.IsCBuffer - ? llvm::hlsl::ResourceKind::CBuffer - : llvm::hlsl::ResourceKind::TBuffer; - addBufferResourceAnnotation(GV, RC, RK, /*IsROV=*/false, - llvm::hlsl::ElementType::Invalid, Buf.Binding); - } } -CGHLSLRuntime::Buffer::Buffer(const HLSLBufferDecl *D) - : Name(D->getName()), IsCBuffer(D->isCBuffer()), - Binding(D->getAttr()) {} - void CGHLSLRuntime::addBufferResourceAnnotation(llvm::GlobalVariable *GV, llvm::hlsl::ResourceClass RC, llvm::hlsl::ResourceKind RK, @@ -524,21 +598,15 @@ void CGHLSLRuntime::generateGlobalCtorDtorCalls() { } } -// Returns true if the type is an HLSL resource class -static bool isResourceRecordType(const clang::Type *Ty) { - return HLSLAttributedResourceType::findHandleTypeOnResource(Ty) != nullptr; -} - -static void createResourceInitFn(CodeGenModule &CGM, const VarDecl *VD, - llvm::GlobalVariable *GV, unsigned Slot, - unsigned Space) { +static void createResourceInitFn(CodeGenModule &CGM, llvm::GlobalVariable *GV, + unsigned Slot, unsigned Space) { LLVMContext &Ctx = CGM.getLLVMContext(); llvm::Type *Int1Ty = llvm::Type::getInt1Ty(Ctx); llvm::Function *InitResFunc = llvm::Function::Create( llvm::FunctionType::get(CGM.VoidTy, false), llvm::GlobalValue::InternalLinkage, - ("_init_resource_" + VD->getName()).str(), CGM.getModule()); + ("_init_resource_" + GV->getName()).str(), CGM.getModule()); InitResFunc->addFnAttr(llvm::Attribute::AlwaysInline); llvm::BasicBlock *EntryBB = @@ -547,20 +615,15 @@ static void createResourceInitFn(CodeGenModule &CGM, const VarDecl *VD, const DataLayout &DL = CGM.getModule().getDataLayout(); Builder.SetInsertPoint(EntryBB); - const HLSLAttributedResourceType *AttrResType = - HLSLAttributedResourceType::findHandleTypeOnResource( - VD->getType().getTypePtr()); - - // FIXME: Only simple declarations of resources are supported for now. - // Arrays of resources or resources in user defined classes are - // not implemented yet. - assert(AttrResType != nullptr && - "Resource class must have a handle of HLSLAttributedResourceType"); - - llvm::Type *TargetTy = - CGM.getTargetCodeGenInfo().getHLSLType(CGM, AttrResType); - assert(TargetTy != nullptr && - "Failed to convert resource handle to target type"); + // Make sure the global variable is resource handle (cbuffer) or + // resource class (=class where the first element is a resource handle). + llvm::Type *HandleTy = GV->getValueType(); + assert((HandleTy->isTargetExtTy() || + (HandleTy->isStructTy() && + HandleTy->getStructElementType(0)->isTargetExtTy())) && + "unexpected type of the global"); + if (!HandleTy->isTargetExtTy()) + HandleTy = HandleTy->getStructElementType(0); llvm::Value *Args[] = { llvm::ConstantInt::get(CGM.IntTy, Space), /* reg_space */ @@ -572,9 +635,9 @@ static void createResourceInitFn(CodeGenModule &CGM, const VarDecl *VD, llvm::ConstantInt::get(Int1Ty, false) /* non-uniform */ }; llvm::Value *CreateHandle = Builder.CreateIntrinsic( - /*ReturnType=*/TargetTy, + /*ReturnType=*/HandleTy, CGM.getHLSLRuntime().getCreateHandleFromBindingIntrinsic(), Args, nullptr, - Twine(VD->getName()).concat("_h")); + Twine(GV->getName()).concat("_h")); llvm::Value *HandleRef = Builder.CreateStructGEP(GV->getValueType(), GV, 0); Builder.CreateAlignedStore(CreateHandle, HandleRef, @@ -601,8 +664,7 @@ void CGHLSLRuntime::handleGlobalVarDefinition(const VarDecl *VD, // not implemented yet. return; - createResourceInitFn(CGM, VD, GV, RBA->getSlotNumber(), - RBA->getSpaceNumber()); + createResourceInitFn(CGM, GV, RBA->getSlotNumber(), RBA->getSpaceNumber()); } llvm::Instruction *CGHLSLRuntime::getConvergenceToken(BasicBlock &BB) { diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index 8767a2ddceb96..a9da42324a038 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -15,6 +15,7 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_CGHLSLRUNTIME_H #define LLVM_CLANG_LIB_CODEGEN_CGHLSLRUNTIME_H +#include "llvm/ADT/DenseMap.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsDirectX.h" @@ -46,20 +47,26 @@ } \ } +using ResourceClass = llvm::dxil::ResourceClass; + namespace llvm { class GlobalVariable; class Function; class StructType; +class Metadata; } // namespace llvm namespace clang { +class NamedDecl; class VarDecl; class ParmVarDecl; class InitListExpr; class HLSLBufferDecl; class HLSLResourceBindingAttr; class Type; +class RecordType; class DeclContext; +class HLSLPackOffsetAttr; class FunctionDecl; @@ -126,16 +133,6 @@ class CGHLSLRuntime { unsigned Space; BufferResBinding(HLSLResourceBindingAttr *Attr); }; - struct Buffer { - Buffer(const HLSLBufferDecl *D); - llvm::StringRef Name; - // IsCBuffer - Whether the buffer is a cbuffer (and not a tbuffer). - bool IsCBuffer; - BufferResBinding Binding; - // Global variable and offset for each constant. - std::vector> Constants; - llvm::StructType *LayoutStruct = nullptr; - }; protected: CodeGenModule &CGM; @@ -147,7 +144,9 @@ class CGHLSLRuntime { CGHLSLRuntime(CodeGenModule &CGM) : CGM(CGM) {} virtual ~CGHLSLRuntime() {} - llvm::Type *convertHLSLSpecificType(const Type *T); + llvm::Type * + convertHLSLSpecificType(const Type *T, + SmallVector *Packoffsets = nullptr); void annotateHLSLResource(const VarDecl *D, llvm::GlobalVariable *GV); void generateGlobalCtorDtorCalls(); @@ -163,6 +162,10 @@ class CGHLSLRuntime { llvm::Instruction *getConvergenceToken(llvm::BasicBlock &BB); + llvm::TargetExtType * + getHLSLBufferLayoutType(const RecordType *LayoutStructTy); + void addHLSLBufferLayoutType(const RecordType *LayoutStructTy, + llvm::TargetExtType *LayoutTy); void emitInitListOpaqueValues(CodeGenFunction &CGF, InitListExpr *E); private: @@ -171,10 +174,11 @@ class CGHLSLRuntime { llvm::hlsl::ResourceKind RK, bool IsROV, llvm::hlsl::ElementType ET, BufferResBinding &Binding); - void addConstant(VarDecl *D, Buffer &CB); - void addBufferDecls(const DeclContext *DC, Buffer &CB); + void emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl, + llvm::GlobalVariable *BufGV); llvm::Triple::ArchType getArch(); - llvm::SmallVector Buffers; + + llvm::DenseMap LayoutTypes; }; } // namespace CodeGen diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt index 868ec847b9634..05ab6671453f8 100644 --- a/clang/lib/CodeGen/CMakeLists.txt +++ b/clang/lib/CodeGen/CMakeLists.txt @@ -106,6 +106,7 @@ add_clang_library(clangCodeGen ConstantInitBuilder.cpp CoverageMappingGen.cpp ItaniumCXXABI.cpp + HLSLBufferLayoutBuilder.cpp LinkInModulesPass.cpp MacroPPCallbacks.cpp MicrosoftCXXABI.cpp diff --git a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp new file mode 100644 index 0000000000000..1ae00023ab2bc --- /dev/null +++ b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp @@ -0,0 +1,229 @@ +//===- HLSLBufferLayoutBuilder.cpp ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "HLSLBufferLayoutBuilder.h" +#include "CGHLSLRuntime.h" +#include "CodeGenModule.h" +#include "clang/AST/Type.h" + +//===----------------------------------------------------------------------===// +// Implementation of constant buffer layout common between DirectX and +// SPIR/SPIR-V. +//===----------------------------------------------------------------------===// + +using namespace clang; +using namespace clang::CodeGen; + +namespace { + +// Creates a new array type with the same dimentions but with the new +// element type. +static llvm::Type * +createArrayWithNewElementType(CodeGenModule &CGM, + const ConstantArrayType *ArrayType, + llvm::Type *NewElemType) { + const clang::Type *ArrayElemType = ArrayType->getArrayElementTypeNoTypeQual(); + if (ArrayElemType->isConstantArrayType()) + NewElemType = createArrayWithNewElementType( + CGM, cast(ArrayElemType), NewElemType); + return llvm::ArrayType::get(NewElemType, ArrayType->getSExtSize()); +} + +// Returns the size of a scalar or vector in bytes +static unsigned getScalarOrVectorSizeInBytes(llvm::Type *Ty) { + assert(Ty->isVectorTy() || Ty->isIntegerTy() || Ty->isFloatingPointTy()); + if (Ty->isVectorTy()) { + llvm::FixedVectorType *FVT = cast(Ty); + return FVT->getNumElements() * + (FVT->getElementType()->getScalarSizeInBits() / 8); + } + return Ty->getScalarSizeInBits() / 8; +} + +} // namespace + +namespace clang { +namespace CodeGen { + +// Creates a layout type for given struct with HLSL constant buffer layout +// taking into account Packoffsets, if provided. +// Previously created layout types are cached by CGHLSLRuntime. +// +// The function iterates over all fields of the StructType (including base +// classes) and calls layoutField to converts each field to its corresponding +// LLVM type and to calculate its HLSL constant buffer layout. Any embedded +// structs (or arrays of structs) are converted to target layout types as well. +llvm::TargetExtType *HLSLBufferLayoutBuilder::createLayoutType( + const RecordType *StructType, + const llvm::SmallVector *Packoffsets) { + + // check if we already have the layout type for this struct + if (llvm::TargetExtType *Ty = + CGM.getHLSLRuntime().getHLSLBufferLayoutType(StructType)) + return Ty; + + SmallVector Layout; + SmallVector LayoutElements; + unsigned Index = 0; // packoffset index + unsigned EndOffset = 0; + + // reserve first spot in the layout vector for buffer size + Layout.push_back(0); + + // iterate over all fields of the record, including fields on base classes + llvm::SmallVector RecordTypes; + RecordTypes.push_back(StructType); + while (RecordTypes.back()->getAsCXXRecordDecl()->getNumBases()) { + CXXRecordDecl *D = RecordTypes.back()->getAsCXXRecordDecl(); + assert(D->getNumBases() == 1 && + "HLSL doesn't support multiple inheritance"); + RecordTypes.push_back(D->bases_begin()->getType()->getAs()); + } + while (!RecordTypes.empty()) { + const RecordType *RT = RecordTypes.back(); + RecordTypes.pop_back(); + + for (const auto *FD : RT->getDecl()->fields()) { + assert(!Packoffsets || Index < Packoffsets->size() && + "number of elements in layout struct does not " + "match number of packoffset annotations"); + + if (!layoutField(FD, EndOffset, Layout, LayoutElements, + Packoffsets ? (*Packoffsets)[Index] : -1)) + return nullptr; + Index++; + } + } + + // set the size of the buffer + Layout[0] = EndOffset; + + // create the layout struct type; anonymous struct have empty name but + // non-empty qualified name + const CXXRecordDecl *Decl = StructType->getAsCXXRecordDecl(); + std::string Name = + Decl->getName().empty() ? "anon" : Decl->getQualifiedNameAsString(); + llvm::StructType *StructTy = + llvm::StructType::create(LayoutElements, Name, true); + + // create target layout type + llvm::TargetExtType *NewLayoutTy = llvm::TargetExtType::get( + CGM.getLLVMContext(), LayoutTypeName, {StructTy}, Layout); + if (NewLayoutTy) + CGM.getHLSLRuntime().addHLSLBufferLayoutType(StructType, NewLayoutTy); + return NewLayoutTy; +} + +// The function converts a single field of HLSL Buffer to its corresponding +// LLVM type and calculates it's layout. Any embedded structs (or +// arrays of structs) are converted to target layout types as well. +// The converted type is appended to the LayoutElements list, the element +// offset is added to the Layout list and the EndOffset updated to the offset +// just after the lay-ed out element (which is basically the size of the +// buffer). +// Returns true if the conversion was successful. +// The packoffset parameter contains the field's layout offset provided by the +// user or -1 if there was no packoffset (or register(cX)) annotation. +bool HLSLBufferLayoutBuilder::layoutField( + const FieldDecl *FD, unsigned &EndOffset, SmallVector &Layout, + SmallVector &LayoutElements, int Packoffset) { + + // Size of element; for arrays this is a size of a single element in the + // array. Total array size of calculated as (ArrayCount-1) * ArrayStride + + // ElemSize. + unsigned ElemSize = 0; + unsigned ElemOffset = 0; + unsigned ArrayCount = 1; + unsigned ArrayStride = 0; + + const unsigned BufferRowAlign = 16U; + unsigned NextRowOffset = llvm::alignTo(EndOffset, BufferRowAlign); + + llvm::Type *ElemLayoutTy = nullptr; + QualType FieldTy = FD->getType(); + + if (FieldTy->isConstantArrayType()) { + // Unwrap array to find the element type and get combined array size. + QualType Ty = FieldTy; + while (Ty->isConstantArrayType()) { + const ConstantArrayType *ArrayTy = cast(Ty); + ArrayCount *= ArrayTy->getSExtSize(); + Ty = ArrayTy->getElementType(); + } + // For array of structures, create a new array with a layout type + // instead of the structure type. + if (Ty->isStructureType()) { + llvm::Type *NewTy = + cast(createLayoutType(Ty->getAsStructureType())); + if (!NewTy) + return false; + assert(isa(NewTy) && "expected target type"); + ElemSize = cast(NewTy)->getIntParameter(0); + ElemLayoutTy = createArrayWithNewElementType( + CGM, cast(FieldTy.getTypePtr()), NewTy); + } else { + // Array of vectors or scalars + ElemSize = + getScalarOrVectorSizeInBytes(CGM.getTypes().ConvertTypeForMem(Ty)); + ElemLayoutTy = CGM.getTypes().ConvertTypeForMem(FieldTy); + } + ArrayStride = llvm::alignTo(ElemSize, BufferRowAlign); + ElemOffset = (Packoffset != -1) ? Packoffset : NextRowOffset; + + } else if (FieldTy->isStructureType()) { + // Create a layout type for the structure + ElemLayoutTy = createLayoutType(FieldTy->getAsStructureType()); + if (!ElemLayoutTy) + return false; + assert(isa(ElemLayoutTy) && "expected target type"); + ElemSize = cast(ElemLayoutTy)->getIntParameter(0); + ElemOffset = (Packoffset != -1) ? Packoffset : NextRowOffset; + + } else { + // scalar or vector - find element size and alignment + unsigned Align = 0; + ElemLayoutTy = CGM.getTypes().ConvertTypeForMem(FieldTy); + if (ElemLayoutTy->isVectorTy()) { + // align vectors by sub element size + const llvm::FixedVectorType *FVT = + cast(ElemLayoutTy); + unsigned SubElemSize = FVT->getElementType()->getScalarSizeInBits() / 8; + ElemSize = FVT->getNumElements() * SubElemSize; + Align = SubElemSize; + } else { + assert(ElemLayoutTy->isIntegerTy() || ElemLayoutTy->isFloatingPointTy()); + ElemSize = ElemLayoutTy->getScalarSizeInBits() / 8; + Align = ElemSize; + } + + // calculate or get element offset for the vector or scalar + if (Packoffset != -1) { + ElemOffset = Packoffset; + } else { + ElemOffset = llvm::alignTo(EndOffset, Align); + // if the element does not fit, move it to the next row + if (ElemOffset + ElemSize > NextRowOffset) + ElemOffset = NextRowOffset; + } + } + + // Update end offset of the layout; do not update it if the EndOffset + // is already bigger than the new value (which may happen with unordered + // packoffset annotations) + unsigned NewEndOffset = + ElemOffset + (ArrayCount - 1) * ArrayStride + ElemSize; + EndOffset = std::max(EndOffset, NewEndOffset); + + // add the layout element and offset to the lists + Layout.push_back(ElemOffset); + LayoutElements.push_back(ElemLayoutTy); + return true; +} + +} // namespace CodeGen +} // namespace clang diff --git a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h new file mode 100644 index 0000000000000..57bb17c557b9c --- /dev/null +++ b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h @@ -0,0 +1,48 @@ +//===- HLSLBufferLayoutBuilder.h ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/DerivedTypes.h" + +namespace clang { +class RecordType; +class FieldDecl; + +namespace CodeGen { +class CodeGenModule; + +//===----------------------------------------------------------------------===// +// Implementation of constant buffer layout common between DirectX and +// SPIR/SPIR-V. +//===----------------------------------------------------------------------===// + +class HLSLBufferLayoutBuilder { +private: + CodeGenModule &CGM; + llvm::StringRef LayoutTypeName; + +public: + HLSLBufferLayoutBuilder(CodeGenModule &CGM, llvm::StringRef LayoutTypeName) + : CGM(CGM), LayoutTypeName(LayoutTypeName) {} + + // Returns LLVM target extension type with the name LayoutTypeName + // for given structure type and layout data. The first number in + // the Layout is the size followed by offsets for each struct element. + llvm::TargetExtType * + createLayoutType(const RecordType *StructType, + const llvm::SmallVector *Packoffsets = nullptr); + +private: + bool layoutField(const clang::FieldDecl *FD, unsigned &EndOffset, + llvm::SmallVector &Layout, + llvm::SmallVector &LayoutElements, + int Packoffset); +}; + +} // namespace CodeGen +} // namespace clang diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index 4a66683a3b91f..86057c14a549e 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -439,7 +439,9 @@ class TargetCodeGenInfo { } /// Return an LLVM type that corresponds to a HLSL type - virtual llvm::Type *getHLSLType(CodeGenModule &CGM, const Type *T) const { + virtual llvm::Type * + getHLSLType(CodeGenModule &CGM, const Type *T, + const SmallVector *Packoffsets = nullptr) const { return nullptr; } diff --git a/clang/lib/CodeGen/Targets/DirectX.cpp b/clang/lib/CodeGen/Targets/DirectX.cpp index 7935f7ae37004..77091eb45f5cf 100644 --- a/clang/lib/CodeGen/Targets/DirectX.cpp +++ b/clang/lib/CodeGen/Targets/DirectX.cpp @@ -7,8 +7,13 @@ //===----------------------------------------------------------------------===// #include "ABIInfoImpl.h" +#include "CodeGenModule.h" +#include "HLSLBufferLayoutBuilder.h" #include "TargetInfo.h" +#include "clang/AST/Type.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Type.h" using namespace clang; using namespace clang::CodeGen; @@ -24,11 +29,14 @@ class DirectXTargetCodeGenInfo : public TargetCodeGenInfo { DirectXTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) : TargetCodeGenInfo(std::make_unique(CGT)) {} - llvm::Type *getHLSLType(CodeGenModule &CGM, const Type *T) const override; + llvm::Type *getHLSLType( + CodeGenModule &CGM, const Type *T, + const SmallVector *Packoffsets = nullptr) const override; }; -llvm::Type *DirectXTargetCodeGenInfo::getHLSLType(CodeGenModule &CGM, - const Type *Ty) const { +llvm::Type *DirectXTargetCodeGenInfo::getHLSLType( + CodeGenModule &CGM, const Type *Ty, + const SmallVector *Packoffsets) const { auto *ResType = dyn_cast(Ty); if (!ResType) return nullptr; @@ -56,9 +64,19 @@ llvm::Type *DirectXTargetCodeGenInfo::getHLSLType(CodeGenModule &CGM, return llvm::TargetExtType::get(Ctx, TypeName, {ElemType}, Ints); } - case llvm::dxil::ResourceClass::CBuffer: - llvm_unreachable("dx.CBuffer handles are not implemented yet"); - break; + case llvm::dxil::ResourceClass::CBuffer: { + QualType ContainedTy = ResType->getContainedType(); + if (ContainedTy.isNull() || !ContainedTy->isStructureType()) + return nullptr; + + llvm::Type *BufferLayoutTy = + HLSLBufferLayoutBuilder(CGM, "dx.Layout") + .createLayoutType(ContainedTy->getAsStructureType(), Packoffsets); + if (!BufferLayoutTy) + return nullptr; + + return llvm::TargetExtType::get(Ctx, "dx.CBuffer", {BufferLayoutTy}); + } case llvm::dxil::ResourceClass::Sampler: llvm_unreachable("dx.Sampler handles are not implemented yet"); break; diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index b81ed29a5159b..c94db31ae1a89 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -52,7 +52,9 @@ class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo { unsigned getOpenCLKernelCallingConv() const override; llvm::Type *getOpenCLType(CodeGenModule &CGM, const Type *T) const override; - llvm::Type *getHLSLType(CodeGenModule &CGM, const Type *Ty) const override; + llvm::Type *getHLSLType( + CodeGenModule &CGM, const Type *Ty, + const SmallVector *Packoffsets = nullptr) const override; llvm::Type *getSPIRVImageTypeFromHLSLResource( const HLSLAttributedResourceType::Attributes &attributes, llvm::Type *ElementType, llvm::LLVMContext &Ctx) const; @@ -367,8 +369,9 @@ llvm::Type *CommonSPIRTargetCodeGenInfo::getOpenCLType(CodeGenModule &CGM, return nullptr; } -llvm::Type *CommonSPIRTargetCodeGenInfo::getHLSLType(CodeGenModule &CGM, - const Type *Ty) const { +llvm::Type *CommonSPIRTargetCodeGenInfo::getHLSLType( + CodeGenModule &CGM, const Type *Ty, + const SmallVector *Packoffsets) const { auto *ResType = dyn_cast(Ty); if (!ResType) return nullptr; diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 362df485a025c..d95763b22a819 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -14295,6 +14295,13 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl) { if (getLangOpts().OpenCL && Var->getType().getAddressSpace() == LangAS::opencl_local) return; + + // In HLSL, objects in the hlsl_constant address space are initialized + // externally, so don't synthesize an implicit initializer. + if (getLangOpts().HLSL && + Var->getType().getAddressSpace() == LangAS::hlsl_constant) + return; + // C++03 [dcl.init]p9: // If no initializer is specified for an object, and the // object is of (possibly cv-qualified) non-POD class type (or diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 20275ded8a561..d26d85d5861b1 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -176,9 +176,9 @@ Decl *SemaHLSL::ActOnStartBuffer(Scope *BufferScope, bool CBuffer, // https://learn.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-packing-rules static unsigned calculateLegacyCbufferSize(const ASTContext &Context, QualType T) { - unsigned Size = 0; constexpr unsigned CBufferAlign = 16; if (const RecordType *RT = T->getAs()) { + unsigned Size = 0; const RecordDecl *RD = RT->getDecl(); for (const FieldDecl *Field : RD->fields()) { QualType Ty = Field->getType(); @@ -191,22 +191,28 @@ static unsigned calculateLegacyCbufferSize(const ASTContext &Context, Size = llvm::alignTo(Size, FieldAlign); Size += FieldSize; } - } else if (const ConstantArrayType *AT = Context.getAsConstantArrayType(T)) { - if (unsigned ElementCount = AT->getSize().getZExtValue()) { - unsigned ElementSize = - calculateLegacyCbufferSize(Context, AT->getElementType()); - unsigned AlignedElementSize = llvm::alignTo(ElementSize, CBufferAlign); - Size = AlignedElementSize * (ElementCount - 1) + ElementSize; - } - } else if (const VectorType *VT = T->getAs()) { + return Size; + } + + if (const ConstantArrayType *AT = Context.getAsConstantArrayType(T)) { + unsigned ElementCount = AT->getSize().getZExtValue(); + if (ElementCount == 0) + return 0; + + unsigned ElementSize = + calculateLegacyCbufferSize(Context, AT->getElementType()); + unsigned AlignedElementSize = llvm::alignTo(ElementSize, CBufferAlign); + return AlignedElementSize * (ElementCount - 1) + ElementSize; + } + + if (const VectorType *VT = T->getAs()) { unsigned ElementCount = VT->getNumElements(); unsigned ElementSize = calculateLegacyCbufferSize(Context, VT->getElementType()); - Size = ElementSize * ElementCount; - } else { - Size = Context.getTypeSize(T) / 8; + return ElementSize * ElementCount; } - return Size; + + return Context.getTypeSize(T) / 8; } // Validate packoffset: @@ -239,6 +245,7 @@ static void validatePackoffset(Sema &S, HLSLBufferDecl *BufDecl) { // Make sure there is no overlap in packoffset - sort PackOffsetVec by offset // and compare adjacent values. + bool IsValid = true; ASTContext &Context = S.getASTContext(); std::sort(PackOffsetVec.begin(), PackOffsetVec.end(), [](const std::pair &LHS, @@ -257,8 +264,10 @@ static void validatePackoffset(Sema &S, HLSLBufferDecl *BufDecl) { VarDecl *NextVar = PackOffsetVec[i + 1].first; S.Diag(NextVar->getLocation(), diag::err_hlsl_packoffset_overlap) << NextVar << Var; + IsValid = false; } } + BufDecl->setHasValidPackoffset(IsValid); } // Returns true if the array has a zero size = if any of the dimensions is 0 @@ -500,7 +509,7 @@ void createHostLayoutStructForBuffer(Sema &S, HLSLBufferDecl *BufDecl) { } } LS->completeDefinition(); - BufDecl->addDecl(LS); + BufDecl->addLayoutStruct(LS); } // Handle end of cbuffer/tbuffer declaration diff --git a/clang/test/CodeGen/aapcs-align.cpp b/clang/test/CodeGen/aapcs-align.cpp index 4f393d9e6b7f3..c7bc5ba0bbfef 100644 --- a/clang/test/CodeGen/aapcs-align.cpp +++ b/clang/test/CodeGen/aapcs-align.cpp @@ -6,6 +6,11 @@ extern "C" { +// CHECK: @sizeof_OverSizedBitfield ={{.*}} global i32 8 +// CHECK: @alignof_OverSizedBitfield ={{.*}} global i32 8 +// CHECK: @sizeof_VeryOverSizedBitfield ={{.*}} global i32 16 +// CHECK: @alignof_VeryOverSizedBitfield ={{.*}} global i32 8 + // Base case, nothing interesting. struct S { int x, y; @@ -138,4 +143,42 @@ void g6() { // CHECK: call void @f6m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, [4 x i32] [i32 6, i32 7, i32 0, i32 undef]) // CHECK: declare void @f6(i32 noundef, [4 x i32]) // CHECK: declare void @f6m(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, [4 x i32]) + +// Over-sized bitfield, which results in a 64-bit container type, so 64-bit +// alignment. +struct OverSizedBitfield { + int x : 64; +}; + +unsigned sizeof_OverSizedBitfield = sizeof(OverSizedBitfield); +unsigned alignof_OverSizedBitfield = alignof(OverSizedBitfield); + +// CHECK: define{{.*}} void @g7 +// CHECK: call void @f7(i32 noundef 1, [1 x i64] [i64 42]) +// CHECK: declare void @f7(i32 noundef, [1 x i64]) +void f7(int a, OverSizedBitfield b); +void g7() { + OverSizedBitfield s = {42}; + f7(1, s); +} + +// There are no 128-bit fundamental data types defined by AAPCS32, so this gets +// a 64-bit container plus 64 bits of padding, giving it a size of 16 bytes and +// alignment of 8 bytes. +struct VeryOverSizedBitfield { + int x : 128; +}; + +unsigned sizeof_VeryOverSizedBitfield = sizeof(VeryOverSizedBitfield); +unsigned alignof_VeryOverSizedBitfield = alignof(VeryOverSizedBitfield); + +// CHECK: define{{.*}} void @g8 +// CHECK: call void @f8(i32 noundef 1, [2 x i64] [i64 42, i64 0]) +// CHECK: declare void @f8(i32 noundef, [2 x i64]) +void f8(int a, VeryOverSizedBitfield b); +void g8() { + VeryOverSizedBitfield s = {42}; + f8(1, s); +} + } diff --git a/clang/test/CodeGen/aapcs64-align.cpp b/clang/test/CodeGen/aapcs64-align.cpp index 7a8151022852e..e69faf231936c 100644 --- a/clang/test/CodeGen/aapcs64-align.cpp +++ b/clang/test/CodeGen/aapcs64-align.cpp @@ -5,6 +5,13 @@ extern "C" { +// CHECK: @sizeof_OverSizedBitfield ={{.*}} global i32 8 +// CHECK: @alignof_OverSizedBitfield ={{.*}} global i32 8 +// CHECK: @sizeof_VeryOverSizedBitfield ={{.*}} global i32 16 +// CHECK: @alignof_VeryOverSizedBitfield ={{.*}} global i32 16 +// CHECK: @sizeof_RidiculouslyOverSizedBitfield ={{.*}} global i32 32 +// CHECK: @alignof_RidiculouslyOverSizedBitfield ={{.*}} global i32 16 + // Base case, nothing interesting. struct S { long x, y; @@ -161,5 +168,62 @@ int test_bitint8(){ } // CHECK: ret i32 1 +// Over-sized bitfield, which results in a 64-bit container type, so 64-bit +// alignment. +struct OverSizedBitfield { + int x : 64; +}; + +unsigned sizeof_OverSizedBitfield = sizeof(OverSizedBitfield); +unsigned alignof_OverSizedBitfield = alignof(OverSizedBitfield); + +// CHECK: define{{.*}} void @g7 +// CHECK: call void @f7(i32 noundef 1, i64 42) +// CHECK: declare void @f7(i32 noundef, i64) +void f7(int a, OverSizedBitfield b); +void g7() { + OverSizedBitfield s = {42}; + f7(1, s); +} + +// AAPCS64 does have a 128-bit integer fundamental data type, so this gets a +// 128-bit container with 128-bit alignment. This is just within the limit of +// what can be passed directly. +struct VeryOverSizedBitfield { + int x : 128; +}; + +unsigned sizeof_VeryOverSizedBitfield = sizeof(VeryOverSizedBitfield); +unsigned alignof_VeryOverSizedBitfield = alignof(VeryOverSizedBitfield); + +// CHECK: define{{.*}} void @g8 +// CHECK: call void @f8(i32 noundef 1, i128 42) +// CHECK: declare void @f8(i32 noundef, i128) +void f8(int a, VeryOverSizedBitfield b); +void g8() { + VeryOverSizedBitfield s = {42}; + f8(1, s); +} + +// There are no bigger fundamental data types, so this gets a 128-bit container +// and 128 bits of padding, giving the struct a size of 32 bytes, and an +// alignment of 16 bytes. This is over the PCS size limit of 16 bytes, so it +// will be passed indirectly. +struct RidiculouslyOverSizedBitfield { + int x : 256; +}; + +unsigned sizeof_RidiculouslyOverSizedBitfield = sizeof(RidiculouslyOverSizedBitfield); +unsigned alignof_RidiculouslyOverSizedBitfield = alignof(RidiculouslyOverSizedBitfield); + +// CHECK: define{{.*}} void @g9 +// CHECK: call void @f9(i32 noundef 1, ptr noundef nonnull %agg.tmp) +// CHECK: declare void @f9(i32 noundef, ptr noundef) +void f9(int a, RidiculouslyOverSizedBitfield b); +void g9() { + RidiculouslyOverSizedBitfield s = {42}; + f9(1, s); +} + } diff --git a/clang/test/CodeGen/armv7k-abi.c b/clang/test/CodeGen/armv7k-abi.c index fd18dafa7d03f..872e6423a4a99 100644 --- a/clang/test/CodeGen/armv7k-abi.c +++ b/clang/test/CodeGen/armv7k-abi.c @@ -16,7 +16,7 @@ typedef struct { void simple_hfa(HFA h) {} // CHECK: define{{.*}} %struct.HFA @return_simple_hfa -HFA return_simple_hfa() {} +HFA return_simple_hfa() { return (HFA){0}; } typedef struct { double arr[4]; @@ -43,7 +43,7 @@ typedef struct { void big_struct_indirect(BigStruct b) {} // CHECK: define{{.*}} void @return_big_struct_indirect(ptr dead_on_unwind noalias writable sret -BigStruct return_big_struct_indirect() {} +BigStruct return_big_struct_indirect() { return (BigStruct){0}; } // Structs smaller than 16 bytes should be passed directly, and coerced to // either [N x i32] or [N x i64] depending on alignment requirements. @@ -58,7 +58,7 @@ typedef struct { void small_struct_direct(SmallStruct s) {} // CHECK: define{{.*}} [4 x i32] @return_small_struct_direct() -SmallStruct return_small_struct_direct() {} +SmallStruct return_small_struct_direct() { return (SmallStruct){0}; } typedef struct { float x; @@ -75,14 +75,14 @@ typedef struct { } PaddedSmallStruct; // CHECK: define{{.*}} i32 @return_padded_small_struct() -PaddedSmallStruct return_padded_small_struct() {} +PaddedSmallStruct return_padded_small_struct() { return (PaddedSmallStruct){0}; } typedef struct { char arr[7]; } OddlySizedStruct; // CHECK: define{{.*}} [2 x i32] @return_oddly_sized_struct() -OddlySizedStruct return_oddly_sized_struct() {} +OddlySizedStruct return_oddly_sized_struct() { return (OddlySizedStruct){0}; } // CHECK: define{{.*}} <4 x float> @test_va_arg_vec(ptr noundef %l) diff --git a/clang/test/CodeGenCXX/debug-info-structured-binding-bitfield.cpp b/clang/test/CodeGenCXX/debug-info-structured-binding-bitfield.cpp index e475f032f5ce3..b7aad6a5bcd21 100644 --- a/clang/test/CodeGenCXX/debug-info-structured-binding-bitfield.cpp +++ b/clang/test/CodeGenCXX/debug-info-structured-binding-bitfield.cpp @@ -248,8 +248,8 @@ struct S15 { }; // CHECK-LABEL: define dso_local void @_Z4fS15v -// CHECK: alloca %struct.S15, align 8 -// CHECK-NEXT: [[TMP0:%.*]] = alloca %struct.S15, align 8 +// CHECK: alloca %struct.S15, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = alloca %struct.S15, align 16 // CHECK: #dbg_declare(ptr [[TMP0]], [[S15_A:![0-9]+]], !DIExpression(DW_OP_LLVM_extract_bits_sext, 0, 32), // CHECK-NEXT: #dbg_declare(ptr [[TMP0]], [[S15_B:![0-9]+]], !DIExpression(DW_OP_plus_uconst, 16, DW_OP_LLVM_extract_bits_zext, 0, 32), // diff --git a/clang/test/CodeGenHLSL/cbuf.hlsl b/clang/test/CodeGenHLSL/cbuf.hlsl deleted file mode 100644 index 825e7b8161a60..0000000000000 --- a/clang/test/CodeGenHLSL/cbuf.hlsl +++ /dev/null @@ -1,33 +0,0 @@ -// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \ -// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s - -// RUN: %clang_cc1 -finclude-default-header -triple spirv-pc-vulkan-library %s \ -// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s - -// CHECK: @a = external addrspace(2) externally_initialized global float, align 4 -// CHECK: @b = external addrspace(2) externally_initialized global double, align 8 -// CHECK: @c = external addrspace(2) externally_initialized global float, align 4 -// CHECK: @d = external addrspace(2) externally_initialized global double, align 8 - -// CHECK: @[[CB:.+]] = external constant { float, double } -cbuffer A : register(b0, space2) { - float a; - double b; -} - -// CHECK: @[[TB:.+]] = external constant { float, double } -tbuffer A : register(t2, space1) { - float c; - double d; -} - -float foo() { -// CHECK: load float, ptr addrspace(2) @a, align 4 -// CHECK: load double, ptr addrspace(2) @b, align 8 -// CHECK: load float, ptr addrspace(2) @c, align 4 -// CHECK: load double, ptr addrspace(2) @d, align 8 - return a + b + c*d; -} - -// CHECK: !hlsl.cbufs = !{![[CBMD:[0-9]+]]} -// CHECK: ![[CBMD]] = !{ptr @[[CB]], i32 13, i32 0, i1 false, i32 0, i32 2} diff --git a/clang/test/CodeGenHLSL/cbuf_in_namespace.hlsl b/clang/test/CodeGenHLSL/cbuf_in_namespace.hlsl deleted file mode 100644 index 13c401d428331..0000000000000 --- a/clang/test/CodeGenHLSL/cbuf_in_namespace.hlsl +++ /dev/null @@ -1,29 +0,0 @@ -// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \ -// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s - -// RUN: %clang_cc1 -finclude-default-header -triple spirv-pc-vulkan-library %s \ -// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s - -// Make sure cbuffer inside namespace works. - -// CHECK: @_ZN2n02n11aE = external addrspace(2) externally_initialized global float, align 4 -// CHECK: @_ZN2n01bE = external addrspace(2) externally_initialized global float, align 4 - -// CHECK: @[[CB:.+]] = external constant { float } -// CHECK: @[[TB:.+]] = external constant { float } -namespace n0 { -namespace n1 { - cbuffer A { - float a; - } -} - tbuffer B { - float b; - } -} - -float foo() { -// CHECK: load float, ptr addrspace(2) @_ZN2n02n11aE, align 4 -// CHECK: load float, ptr addrspace(2) @_ZN2n01bE, align 4 - return n0::n1::a + n0::b; -} diff --git a/clang/test/CodeGenHLSL/cbuffer.hlsl b/clang/test/CodeGenHLSL/cbuffer.hlsl new file mode 100644 index 0000000000000..38093c6dfacd7 --- /dev/null +++ b/clang/test/CodeGenHLSL/cbuffer.hlsl @@ -0,0 +1,197 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-compute \ +// RUN: -fnative-half-type -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s + +// CHECK: %__cblayout_CBScalars = type <{ float, double, half, i64, i32, i16, i32, i64 }> +// CHECK: %__cblayout_CBVectors = type <{ <3 x float>, <3 x double>, <2 x half>, <3 x i64>, <4 x i32>, <3 x i16>, <3 x i64> }> +// CHECK: %__cblayout_CBArrays = type <{ [3 x float], [2 x <3 x double>], [2 x [2 x half]], [3 x i64], [2 x [3 x [4 x <4 x i32>]]], [1 x i16], [2 x i64], [4 x i32] }> +// CHECK: %__cblayout_CBStructs = type <{ target("dx.Layout", %A, 8, 0), target("dx.Layout", %B, 14, 0, 8), +// CHECK-SAME: target("dx.Layout", %C, 24, 0, 16), [5 x target("dx.Layout", %A, 8, 0)], +// CHECK-SAME: target("dx.Layout", %__cblayout_D, 94, 0), half, <3 x i16> }> + +// CHECK: %A = type <{ <2 x float> }> +// CHECK: %B = type <{ <2 x float>, <3 x i16> }> +// CHECK: %C = type <{ i32, target("dx.Layout", %A, 8, 0) }> +// CHECK: %__cblayout_D = type <{ [2 x [3 x target("dx.Layout", %B, 14, 0, 8)]] }> + +// CHECK: %__cblayout_CBMix = type <{ [2 x target("dx.Layout", %Test, 8, 0, 4)], float, [3 x [2 x <2 x float>]], float, +// CHECK-SAME: target("dx.Layout", %anon, 4, 0), double, target("dx.Layout", %anon.0, 8, 0), float, <1 x double>, i16 }> + +// CHECK: %Test = type <{ float, float }> +// CHECK: %anon = type <{ float }> +// CHECK: %anon.0 = type <{ <2 x i32> }> + +cbuffer CBScalars : register(b1, space5) { + float a1; + double a2; + float16_t a3; + uint64_t a4; + int a5; + uint16_t a6; + bool a7; + int64_t a8; +} + +// CHECK: @CBScalars.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CBScalars, +// CHECK-SAME: 56, 0, 8, 16, 24, 32, 36, 40, 48)) +// CHECK: @a1 = external addrspace(2) global float, align 4 +// CHECK: @a2 = external addrspace(2) global double, align 8 +// CHECK: @a3 = external addrspace(2) global half, align 2 +// CHECK: @a4 = external addrspace(2) global i64, align 8 +// CHECK: @a5 = external addrspace(2) global i32, align 4 +// CHECK: @a6 = external addrspace(2) global i16, align 2 +// CHECK: @a7 = external addrspace(2) global i32, align 4 +// CHECK: @a8 = external addrspace(2) global i64, align 8 + +cbuffer CBVectors { + float3 b1; + double3 b2; + float16_t2 b3; + uint64_t3 b4; + int4 b5; + uint16_t3 b6; + int64_t3 b7; + // FIXME: add a bool vectors after llvm-project/llvm#91639 is added +} + +// CHECK: @CBVectors.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CBVectors, +// CHECK-SAME: 136, 0, 16, 40, 48, 80, 96, 112)) +// CHECK: @b1 = external addrspace(2) global <3 x float>, align 16 +// CHECK: @b2 = external addrspace(2) global <3 x double>, align 32 +// CHECK: @b3 = external addrspace(2) global <2 x half>, align 4 +// CHECK: @b4 = external addrspace(2) global <3 x i64>, align 32 +// CHECK: @b5 = external addrspace(2) global <4 x i32>, align 16 +// CHECK: @b6 = external addrspace(2) global <3 x i16>, align 8 +// CHECK: @b7 = external addrspace(2) global <3 x i64>, align 32 + +cbuffer CBArrays : register(b2) { + float c1[3]; + double3 c2[2]; + float16_t c3[2][2]; + uint64_t c4[3]; + int4 c5[2][3][4]; + uint16_t c6[1]; + int64_t c7[2]; + bool c8[4]; +} + +// CHECK: @CBArrays.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CBArrays, +// CHECK-SAME: 708, 0, 48, 112, 176, 224, 608, 624, 656)) +// CHECK: @c1 = external addrspace(2) global [3 x float], align 4 +// CHECK: @c2 = external addrspace(2) global [2 x <3 x double>], align 32 +// CHECK: @c3 = external addrspace(2) global [2 x [2 x half]], align 2 +// CHECK: @c4 = external addrspace(2) global [3 x i64], align 8 +// CHECK: @c5 = external addrspace(2) global [2 x [3 x [4 x <4 x i32>]]], align 16 +// CHECK: @c6 = external addrspace(2) global [1 x i16], align 2 +// CHECK: @c7 = external addrspace(2) global [2 x i64], align 8 +// CHECK: @c8 = external addrspace(2) global [4 x i32], align 4 + +struct Empty {}; + +struct A { + float2 f1; +}; + +struct B : A { + uint16_t3 f2; +}; + +struct C { + int i; + A f3; +}; + +struct D { + B array_of_B[2][3]; + Empty es; +}; + +// CHECK: @CBStructs.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CBStructs, +// CHECK-SAME: 246, 0, 16, 32, 64, 144, 238, 240)) +// CHECK: @a = external addrspace(2) global target("dx.Layout", %A, 8, 0), align 8 +// CHECK: @b = external addrspace(2) global target("dx.Layout", %B, 14, 0, 8), align 8 +// CHECK: @c = external addrspace(2) global target("dx.Layout", %C, 24, 0, 16), align 8 +// CHECK: @array_of_A = external addrspace(2) global [5 x target("dx.Layout", %A, 8, 0)], align 8 +// CHECK: @d = external addrspace(2) global target("dx.Layout", %__cblayout_D, 94, 0), align 8 +// CHECK: @e = external addrspace(2) global half, align 2 +// CHECK: @f = external addrspace(2) global <3 x i16>, align 8 + +cbuffer CBStructs { + A a; + B b; + C c; + A array_of_A[5]; + D d; + half e; + uint16_t3 f; +}; + +struct Test { + float a, b; +}; + +// CHECK: @CBMix.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CBMix, +// CHECK-SAME: 170, 0, 24, 32, 120, 128, 136, 144, 152, 160, 168)) +// CHECK: @test = external addrspace(2) global [2 x target("dx.Layout", %Test, 8, 0, 4)], align 4 +// CHECK: @f1 = external addrspace(2) global float, align 4 +// CHECK: @f2 = external addrspace(2) global [3 x [2 x <2 x float>]], align 8 +// CHECK: @f3 = external addrspace(2) global float, align 4 +// CHECK: @f4 = external addrspace(2) global target("dx.Layout", %anon, 4, 0), align 4 +// CHECK: @f5 = external addrspace(2) global double, align 8 +// CHECK: @f6 = external addrspace(2) global target("dx.Layout", %anon.0, 8, 0), align 8 +// CHECK: @f7 = external addrspace(2) global float, align 4 +// CHECK: @f8 = external addrspace(2) global <1 x double>, align 8 +// CHECK: @f9 = external addrspace(2) global i16, align 2 + +cbuffer CBMix { + Test test[2]; + float f1; + float2 f2[3][2]; + float f3; + struct { float c; } f4; + double f5; + struct { int2 i; } f6; + float f7; + vector f8; + uint16_t f9; +}; + +// CHECK: define internal void @_init_resource_CBScalars.cb() +// CHECK-NEXT: entry: +// CHECK-NEXT: %[[HANDLE1:.*]] = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBScalars, 56, 0, 8, 16, 24, 32, 36, 40, 48)) +// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBScalarss_56_0_8_16_24_32_36_40_48tt(i32 5, i32 1, i32 1, i32 0, i1 false) +// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBScalars, 56, 0, 8, 16, 24, 32, 36, 40, 48)) %CBScalars.cb_h, ptr @CBScalars.cb, align 4 + +// CHECK: define internal void @_init_resource_CBArrays.cb() +// CHECK-NEXT: entry: +// CHECK-NEXT: %[[HANDLE2:.*]] = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBArrays, 708, 0, 48, 112, 176, 224, 608, 624, 656)) +// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBArrayss_708_0_48_112_176_224_608_624_656tt(i32 0, i32 2, i32 1, i32 0, i1 false) +// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBArrays, 708, 0, 48, 112, 176, 224, 608, 624, 656)) %CBArrays.cb_h, ptr @CBArrays.cb, align 4 + +RWBuffer Buf; + +[numthreads(4,1,1)] +void main() { + Buf[0] = a1 + b1.z + c1[2] + a.f1.y + f1; +} + +// CHECK: define internal void @_GLOBAL__sub_I_cbuffer.hlsl() +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @_init_resource_CBScalars.cb() +// CHECK-NEXT: call void @_init_resource_CBArrays.cb() + +// CHECK: !hlsl.cbs = !{![[CBSCALARS:[0-9]+]], ![[CBVECTORS:[0-9]+]], ![[CBARRAYS:[0-9]+]], ![[CBSTRUCTS:[0-9]+]], ![[CBMIX:[0-9]+]]} + +// CHECK: ![[CBSCALARS]] = !{ptr @CBScalars.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, +// CHECK-SAME: ptr addrspace(2) @a5, ptr addrspace(2) @a6, ptr addrspace(2) @a7, ptr addrspace(2) @a8} + +// CHECK: ![[CBVECTORS]] = !{ptr @CBVectors.cb, ptr addrspace(2) @b1, ptr addrspace(2) @b2, ptr addrspace(2) @b3, ptr addrspace(2) @b4, +// CHECK-SAME: ptr addrspace(2) @b5, ptr addrspace(2) @b6, ptr addrspace(2) @b7} + +// CHECK: ![[CBARRAYS]] = !{ptr @CBArrays.cb, ptr addrspace(2) @c1, ptr addrspace(2) @c2, ptr addrspace(2) @c3, ptr addrspace(2) @c4, +// CHECK-SAME: ptr addrspace(2) @c5, ptr addrspace(2) @c6, ptr addrspace(2) @c7, ptr addrspace(2) @c8} + +// CHECK: ![[CBSTRUCTS]] = !{ptr @CBStructs.cb, ptr addrspace(2) @a, ptr addrspace(2) @b, ptr addrspace(2) @c, ptr addrspace(2) @array_of_A, +// CHECK-SAME: ptr addrspace(2) @d, ptr addrspace(2) @e, ptr addrspace(2) @f} + +// CHECK: ![[CBMIX]] = !{ptr @CBMix.cb, ptr addrspace(2) @test, ptr addrspace(2) @f1, ptr addrspace(2) @f2, ptr addrspace(2) @f3, +// CHECK-SAME: ptr addrspace(2) @f4, ptr addrspace(2) @f5, ptr addrspace(2) @f6, ptr addrspace(2) @f7, ptr addrspace(2) @f8, ptr addrspace(2) @f9} diff --git a/clang/test/CodeGenHLSL/cbuffer_and_namespaces.hlsl b/clang/test/CodeGenHLSL/cbuffer_and_namespaces.hlsl new file mode 100644 index 0000000000000..393ca3825c638 --- /dev/null +++ b/clang/test/CodeGenHLSL/cbuffer_and_namespaces.hlsl @@ -0,0 +1,56 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s + +// Make sure cbuffer inside namespace works. + +// CHECK: %"n0::n1::__cblayout_A" = type <{ float }> +// CHECK: %"n0::__cblayout_B" = type <{ float }> +// CHECK: %"n0::n2::__cblayout_C" = type <{ float, target("dx.Layout", %"n0::Foo", 4, 0) }> +// CHECK: %"n0::Foo" = type <{ float }> + +// CHECK: @A.cb = external constant target("dx.CBuffer", target("dx.Layout", %"n0::n1::__cblayout_A", 4, 0)) +// CHECK: @_ZN2n02n11aE = external addrspace(2) global float, align 4 + +// CHECK: @B.cb = external constant target("dx.CBuffer", target("dx.Layout", %"n0::__cblayout_B", 4, 0)) +// CHECK: @_ZN2n01aE = external addrspace(2) global float, align 4 + +// CHECK: @C.cb = external constant target("dx.CBuffer", target("dx.Layout", %"n0::n2::__cblayout_C", 20, 0, 16)) +// CHECK: @_ZN2n02n21aE = external addrspace(2) global float, align 4 +// CHECK: external addrspace(2) global target("dx.Layout", %"n0::Foo", 4, 0), align 4 + +namespace n0 { + struct Foo { + float f; + }; + + namespace n1 { + cbuffer A { + float a; + } + } + cbuffer B { + float a; + } + namespace n2 { + cbuffer C { + float a; + Foo b; + } + } +} + +float foo() { + // CHECK: load float, ptr addrspace(2) @_ZN2n02n11aE, align 4 + // CHECK: load float, ptr addrspace(2) @_ZN2n01aE, align 4 + // CHECK: load float, ptr addrspace(2) @_ZN2n02n21aE, align 4 + return n0::n1::a + n0::a + n0::n2::a; +} + +[numthreads(4,1,1)] +void main() {} + +// CHECK: !hlsl.cbs = !{![[A:[0-9]+]], ![[B:[0-9]+]], ![[C:[0-9]+]]} +// CHECK: [[A]] = !{ptr @A.cb, ptr addrspace(2) @_ZN2n02n11aE} +// CHECK: [[B]] = !{ptr @B.cb, ptr addrspace(2) @_ZN2n01aE} +// CHECK: [[C]] = !{ptr @C.cb, ptr addrspace(2) @_ZN2n02n21aE, ptr addrspace(2) @_ZN2n02n21bE} diff --git a/clang/test/CodeGenHLSL/cbuffer_with_packoffset.hlsl b/clang/test/CodeGenHLSL/cbuffer_with_packoffset.hlsl new file mode 100644 index 0000000000000..870593986a976 --- /dev/null +++ b/clang/test/CodeGenHLSL/cbuffer_with_packoffset.hlsl @@ -0,0 +1,38 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-compute %s \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s + +// CHECK: %__cblayout_CB = type <{ float, double, <2 x i32> }> + +// CHECK: @CB.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 176, 16, 168, 88)) +// CHECK: @a = external addrspace(2) global float, align 4 +// CHECK: @b = external addrspace(2) global double, align 8 +// CHECK: @c = external addrspace(2) global <2 x i32>, align 8 + +cbuffer CB : register(b1, space3) { + float a : packoffset(c1.x); + double b : packoffset(c10.z); + int2 c : packoffset(c5.z); +} + +// CHECK: define internal void @_init_resource_CB.cb() +// CHECK-NEXT: entry: +// CHECK-NEXT: %CB.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 176, 16, 168, 88)) +// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBs_176_16_168_88tt(i32 3, i32 1, i32 1, i32 0, i1 false) + +float foo() { + // CHECK: load float, ptr addrspace(2) @a, align 4 + // CHECK: load double, ptr addrspace(2) @b, align 8 + return a + b; +} +// CHECK: define internal void @_GLOBAL__sub_I_cbuffer_with_packoffset.hlsl() +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @_init_resource_CB.cb() + +[numthreads(4,1,1)] +void main() { + foo(); +} + +// CHECK: !hlsl.cbs = !{![[CB:[0-9]+]]} +// CHECK: ![[CB]] = !{ptr @CB.cb, ptr addrspace(2) @a, ptr addrspace(2) @b, ptr addrspace(2) @c} diff --git a/clang/test/CodeGenHLSL/cbuffer_with_static_global_and_function.hlsl b/clang/test/CodeGenHLSL/cbuffer_with_static_global_and_function.hlsl new file mode 100644 index 0000000000000..99f40d8fc93d7 --- /dev/null +++ b/clang/test/CodeGenHLSL/cbuffer_with_static_global_and_function.hlsl @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s + +// CHECK: %__cblayout_A = type <{ float }> + +// CHECK: @A.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_A, 4, 0)) +// CHECK: @a = external addrspace(2) global float, align 4 +// CHECK-DAG: @_ZL1b = internal global float 3.000000e+00, align 4 +// CHECK-NOT: @B.cb + +cbuffer A { + float a; + static float b = 3; + float foo() { return a + b; } +} + +cbuffer B { + // intentionally empty +} + +// CHECK: define {{.*}} float @_Z3foov() #0 { +// CHECK: load float, ptr addrspace(2) @a, align 4 + +extern float bar() { + return foo(); +} + +// CHECK: !hlsl.cbs = !{![[CB:[0-9]+]]} +// CHECK: ![[CB]] = !{ptr @A.cb, ptr addrspace(2) @a} diff --git a/clang/test/CodeGenHLSL/static_global_and_function_in_cb.hlsl b/clang/test/CodeGenHLSL/static_global_and_function_in_cb.hlsl deleted file mode 100644 index 25f51cce2017d..0000000000000 --- a/clang/test/CodeGenHLSL/static_global_and_function_in_cb.hlsl +++ /dev/null @@ -1,22 +0,0 @@ -// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \ -// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s - -// RUN: %clang_cc1 -finclude-default-header -triple spirv-pc-vulkan-library %s \ -// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s - -cbuffer A { - // CHECK: @a = external addrspace(2) externally_initialized global float, align 4 - float a; - // CHECK: @_ZL1b = internal global float 3.000000e+00, align 4 - static float b = 3; - float foo() { return a + b; } -} -// CHECK: @[[CB:.+]] = external constant { float } - -// CHECK:define {{.*}} float @_Z3foov() -// CHECK:load float, ptr addrspace(2) @a, align 4 -// CHECK:load float, ptr @_ZL1b, align 4 - -float bar() { - return foo(); -} diff --git a/clang/test/CodeGenOpenCL/atomics-cas-remarks-gfx90a.cl b/clang/test/CodeGenOpenCL/atomics-cas-remarks-gfx90a.cl index 72027eda4571d..59bf87b554af3 100644 --- a/clang/test/CodeGenOpenCL/atomics-cas-remarks-gfx90a.cl +++ b/clang/test/CodeGenOpenCL/atomics-cas-remarks-gfx90a.cl @@ -36,7 +36,7 @@ typedef enum memory_scope { // GFX90A-CAS: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("agent-one-as") monotonic // GFX90A-CAS: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("one-as") monotonic // GFX90A-CAS: atomicrmw fadd ptr addrspace(1) {{.*}} syncscope("wavefront-one-as") monotonic -float atomic_cas(__global atomic_float *d, float a) { +void atomic_cas(__global atomic_float *d, float a) { float ret1 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group); float ret2 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_device); float ret3 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_all_svm_devices); diff --git a/clang/test/Index/comment-to-html-xml-conversion.cpp b/clang/test/Index/comment-to-html-xml-conversion.cpp index e0a7cff5a9a3d..941aa8a27b6bf 100644 --- a/clang/test/Index/comment-to-html-xml-conversion.cpp +++ b/clang/test/Index/comment-to-html-xml-conversion.cpp @@ -20,7 +20,6 @@ // RUN: FileCheck %s < %t/out.c-index-direct // RUN: FileCheck %s < %t/out.c-index-pch -// XFAIL: msan // XFAIL: valgrind #ifndef HEADER diff --git a/compiler-rt/test/hwasan/TestCases/libc_thread_freeres.c b/compiler-rt/test/hwasan/TestCases/libc_thread_freeres.c index e6d1731f30e37..e95f4c8c4fd43 100644 --- a/compiler-rt/test/hwasan/TestCases/libc_thread_freeres.c +++ b/compiler-rt/test/hwasan/TestCases/libc_thread_freeres.c @@ -11,6 +11,7 @@ void *ThreadFn(void *) { __hwasan_enable_allocator_tagging(); // This will trigger memory deallocation in __strerror_thread_freeres, // at a point when HwasanThread is already gone. + return NULL; } int main() { diff --git a/compiler-rt/test/ubsan/TestCases/Misc/Posix/diag-stacktrace.cpp b/compiler-rt/test/ubsan/TestCases/Misc/Posix/diag-stacktrace.cpp index 8b7cb6ade35ac..296171848255f 100644 --- a/compiler-rt/test/ubsan/TestCases/Misc/Posix/diag-stacktrace.cpp +++ b/compiler-rt/test/ubsan/TestCases/Misc/Posix/diag-stacktrace.cpp @@ -2,10 +2,10 @@ // UNSUPPORTED: target=thumb{{.*}} // UNSUPPORTED: android -// RUN: %clangxx -fsanitize=return %gmlt -O2 -fno-omit-frame-pointer -fasynchronous-unwind-tables %s -o %t +// RUN: %clangxx -Wno-error=return-type -fsanitize=return %gmlt -O2 -fno-omit-frame-pointer -fasynchronous-unwind-tables %s -o %t // RUN: %env_ubsan_opts=print_stacktrace=1:fast_unwind_on_fatal=0 not %run %t 2>&1 | FileCheck %s // RUN: %env_ubsan_opts=print_stacktrace=1:fast_unwind_on_fatal=1 not %run %t 2>&1 | FileCheck %s -// RUN: %clangxx -fsanitize=return %gmlt -O2 -fno-omit-frame-pointer -fno-exceptions -fno-asynchronous-unwind-tables %s -o %t +// RUN: %clangxx -Wno-error=return-type -fsanitize=return %gmlt -O2 -fno-omit-frame-pointer -fno-exceptions -fno-asynchronous-unwind-tables %s -o %t // RUN: %env_ubsan_opts=print_stacktrace=1:fast_unwind_on_fatal=0 not %run %t 2>&1 | FileCheck %s // RUN: %env_ubsan_opts=print_stacktrace=1:fast_unwind_on_fatal=1 not %run %t 2>&1 | FileCheck %s diff --git a/compiler-rt/test/ubsan/TestCases/Misc/missing_return.cpp b/compiler-rt/test/ubsan/TestCases/Misc/missing_return.cpp index 2ea76daf1fc16..6c1bc5525cd17 100644 --- a/compiler-rt/test/ubsan/TestCases/Misc/missing_return.cpp +++ b/compiler-rt/test/ubsan/TestCases/Misc/missing_return.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsanitize=return %gmlt %s -O3 -o %t +// RUN: %clangxx -Wno-error=return-type -fsanitize=return %gmlt %s -O3 -o %t // RUN: not %run %t 2>&1 | FileCheck %s // RUN: %env_ubsan_opts=print_stacktrace=1 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-STACKTRACE // Error message does not exact what expected diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index ac8f784fd811e..4b703b456cae2 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -467,6 +467,17 @@ if (APPLE) endif() endif() +# Set up job pools for flang. Some of the flang sources take a lot of memory to +# compile, so allow users to limit the number of parallel flang jobs. This is +# useful for building flang alongside several other projects since you can use +# the maximum number of build jobs for the other projects while limiting the +# number of flang compile jobs. +set(FLANG_PARALLEL_COMPILE_JOBS CACHE STRING + "The maximum number of concurrent compilation jobs for Flang (Ninja only)") +if (FLANG_PARALLEL_COMPILE_JOBS) + set_property(GLOBAL APPEND PROPERTY JOB_POOLS flang_compile_job_pool=${FLANG_PARALLEL_COMPILE_JOBS}) +endif() + include(AddFlang) include(FlangCommon) diff --git a/flang/cmake/modules/AddFlang.cmake b/flang/cmake/modules/AddFlang.cmake index badbd4e7b964b..ca233103ccdbe 100644 --- a/flang/cmake/modules/AddFlang.cmake +++ b/flang/cmake/modules/AddFlang.cmake @@ -94,6 +94,9 @@ function(add_flang_library name) set_property(GLOBAL APPEND PROPERTY FLANG_LIBS ${name}) endif() set_property(GLOBAL APPEND PROPERTY FLANG_EXPORTS ${name}) + if (FLANG_PARALLEL_COMPILE_JOBS) + set_property(TARGET ${name} PROPERTY JOB_POOL_COMPILE flang_compile_job_pool) + endif() else() # Add empty "phony" target add_custom_target(${name}) diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h index 65732ce7f3224..caec6a913293f 100644 --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -335,6 +335,7 @@ struct IntrinsicLibrary { mlir::Value genMalloc(mlir::Type, llvm::ArrayRef); template mlir::Value genMask(mlir::Type, llvm::ArrayRef); + mlir::Value genMatchAllSync(mlir::Type, llvm::ArrayRef); fir::ExtendedValue genMatmul(mlir::Type, llvm::ArrayRef); fir::ExtendedValue genMatmulTranspose(mlir::Type, llvm::ArrayRef); diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h index e82446a2ba884..56dcfa88ad92d 100644 --- a/flang/include/flang/Semantics/tools.h +++ b/flang/include/flang/Semantics/tools.h @@ -231,6 +231,7 @@ inline bool NeedCUDAAlloc(const Symbol &sym) { (*details->cudaDataAttr() == common::CUDADataAttr::Device || *details->cudaDataAttr() == common::CUDADataAttr::Managed || *details->cudaDataAttr() == common::CUDADataAttr::Unified || + *details->cudaDataAttr() == common::CUDADataAttr::Shared || *details->cudaDataAttr() == common::CUDADataAttr::Pinned)) { return true; } diff --git a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp index 70fa18ad65b9b..436f7a1154c7c 100644 --- a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp +++ b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "flang/Optimizer/Analysis/AliasAnalysis.h" +#include "flang/Optimizer/CodeGen/CGOps.h" #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/Dialect/FIROpsSupport.h" #include "flang/Optimizer/Dialect/FIRType.h" @@ -61,13 +62,17 @@ getOriginalDef(mlir::Value v, mlir::Type ty = defOp->getResultTypes()[0]; llvm::TypeSwitch(defOp) .Case([&](fir::ConvertOp op) { v = op.getValue(); }) - .Case([&](auto op) { - v = op.getMemref(); - auto varIf = llvm::cast(defOp); - attributes |= getAttrsFromVariable(varIf); - isCapturedInInternalProcedure |= - varIf.isCapturedInInternalProcedure(); - }) + .Case( + [&](auto op) { + v = op.getMemref(); + auto varIf = + llvm::dyn_cast(defOp); + if (varIf) { + attributes |= getAttrsFromVariable(varIf); + isCapturedInInternalProcedure |= + varIf.isCapturedInInternalProcedure(); + } + }) .Case([&](auto op) { if (fir::AliasAnalysis::isPointerReference(ty)) attributes.set(fir::AliasAnalysis::Attribute::Pointer); @@ -591,19 +596,21 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v, followBoxData = true; approximateSource = true; }) - .Case([&](auto op) { - if (followBoxData) { - v = op->getOperand(0); - defOp = v.getDefiningOp(); - } else - breakFromLoop = true; - }) + .Case( + [&](auto op) { + if (followBoxData) { + v = op->getOperand(0); + defOp = v.getDefiningOp(); + } else + breakFromLoop = true; + }) .Case([&](auto op) { // If load is inside target and it points to mapped item, // continue tracking. Operation *loadMemrefOp = op.getMemref().getDefiningOp(); bool isDeclareOp = llvm::isa_and_present(loadMemrefOp) || + llvm::isa_and_present(loadMemrefOp) || llvm::isa_and_present(loadMemrefOp); if (isDeclareOp && llvm::isa(loadMemrefOp->getParentOp())) { @@ -666,7 +673,8 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v, global = llvm::cast(op).getSymbol(); breakFromLoop = true; }) - .Case([&](auto op) { + .Case([&](auto op) { bool isPrivateItem = false; if (omp::BlockArgOpenMPOpInterface argIface = dyn_cast(op->getParentOp())) { @@ -700,30 +708,33 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v, return; } } - auto varIf = llvm::cast(defOp); - // While going through a declare operation collect - // the variable attributes from it. Right now, some - // of the attributes are duplicated, e.g. a TARGET dummy - // argument has the target attribute both on its declare - // operation and on the entry block argument. - // In case of host associated use, the declare operation - // is the only carrier of the variable attributes, - // so we have to collect them here. - attributes |= getAttrsFromVariable(varIf); - isCapturedInInternalProcedure |= - varIf.isCapturedInInternalProcedure(); - if (varIf.isHostAssoc()) { - // Do not track past such DeclareOp, because it does not - // currently provide any useful information. The host associated - // access will end up dereferencing the host association tuple, - // so we may as well stop right now. - v = defOp->getResult(0); - // TODO: if the host associated variable is a dummy argument - // of the host, I think, we can treat it as SourceKind::Argument - // for the purpose of alias analysis inside the internal procedure. - type = SourceKind::HostAssoc; - breakFromLoop = true; - return; + auto varIf = llvm::dyn_cast(defOp); + if (varIf) { + // While going through a declare operation collect + // the variable attributes from it. Right now, some + // of the attributes are duplicated, e.g. a TARGET dummy + // argument has the target attribute both on its declare + // operation and on the entry block argument. + // In case of host associated use, the declare operation + // is the only carrier of the variable attributes, + // so we have to collect them here. + attributes |= getAttrsFromVariable(varIf); + isCapturedInInternalProcedure |= + varIf.isCapturedInInternalProcedure(); + if (varIf.isHostAssoc()) { + // Do not track past such DeclareOp, because it does not + // currently provide any useful information. The host associated + // access will end up dereferencing the host association tuple, + // so we may as well stop right now. + v = defOp->getResult(0); + // TODO: if the host associated variable is a dummy argument + // of the host, I think, we can treat it as SourceKind::Argument + // for the purpose of alias analysis inside the internal + // procedure. + type = SourceKind::HostAssoc; + breakFromLoop = true; + return; + } } if (getLastInstantiationPoint) { // Fetch only the innermost instantiation point. diff --git a/flang/lib/Optimizer/Analysis/CMakeLists.txt b/flang/lib/Optimizer/Analysis/CMakeLists.txt index 4d4ad882c27d3..3249f8a76ae3e 100644 --- a/flang/lib/Optimizer/Analysis/CMakeLists.txt +++ b/flang/lib/Optimizer/Analysis/CMakeLists.txt @@ -6,12 +6,14 @@ add_flang_library(FIRAnalysis FIRDialect FIRSupport HLFIRDialect + FIRCodeGen LINK_LIBS FIRBuilder FIRDialect FIRSupport HLFIRDialect + FIRCodeGen MLIR_DEPS MLIRIR diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 93744fa58ebc0..754496921ca3a 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -469,6 +469,22 @@ static constexpr IntrinsicHandler handlers[]{ {"malloc", &I::genMalloc}, {"maskl", &I::genMask}, {"maskr", &I::genMask}, + {"match_all_syncjd", + &I::genMatchAllSync, + {{{"mask", asValue}, {"value", asValue}, {"pred", asAddr}}}, + /*isElemental=*/false}, + {"match_all_syncjf", + &I::genMatchAllSync, + {{{"mask", asValue}, {"value", asValue}, {"pred", asAddr}}}, + /*isElemental=*/false}, + {"match_all_syncjj", + &I::genMatchAllSync, + {{{"mask", asValue}, {"value", asValue}, {"pred", asAddr}}}, + /*isElemental=*/false}, + {"match_all_syncjx", + &I::genMatchAllSync, + {{{"mask", asValue}, {"value", asValue}, {"pred", asAddr}}}, + /*isElemental=*/false}, {"matmul", &I::genMatmul, {{{"matrix_a", asAddr}, {"matrix_b", asAddr}}}, @@ -6044,6 +6060,42 @@ mlir::Value IntrinsicLibrary::genMask(mlir::Type resultType, return result; } +mlir::Value +IntrinsicLibrary::genMatchAllSync(mlir::Type resultType, + llvm::ArrayRef args) { + assert(args.size() == 3); + bool is32 = args[1].getType().isInteger(32) || args[1].getType().isF32(); + + llvm::StringRef funcName = + is32 ? "llvm.nvvm.match.all.sync.i32p" : "llvm.nvvm.match.all.sync.i64p"; + mlir::MLIRContext *context = builder.getContext(); + mlir::Type i32Ty = builder.getI32Type(); + mlir::Type i64Ty = builder.getI64Type(); + mlir::Type i1Ty = builder.getI1Type(); + mlir::Type retTy = mlir::TupleType::get(context, {resultType, i1Ty}); + mlir::Type valTy = is32 ? i32Ty : i64Ty; + + mlir::FunctionType ftype = + mlir::FunctionType::get(context, {i32Ty, valTy}, {retTy}); + auto funcOp = builder.createFunction(loc, funcName, ftype); + llvm::SmallVector filteredArgs; + filteredArgs.push_back(args[0]); + if (args[1].getType().isF32() || args[1].getType().isF64()) + filteredArgs.push_back(builder.create(loc, valTy, args[1])); + else + filteredArgs.push_back(args[1]); + auto call = builder.create(loc, funcOp, filteredArgs); + auto zero = builder.getIntegerAttr(builder.getIndexType(), 0); + auto value = builder.create( + loc, resultType, call.getResult(0), builder.getArrayAttr(zero)); + auto one = builder.getIntegerAttr(builder.getIndexType(), 1); + auto pred = builder.create(loc, i1Ty, call.getResult(0), + builder.getArrayAttr(one)); + auto conv = builder.create(loc, resultType, pred); + builder.create(loc, conv, args[2]); + return value; +} + // MATMUL fir::ExtendedValue IntrinsicLibrary::genMatmul(mlir::Type resultType, diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index c76b7cde55bdd..439cc7a856236 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -292,6 +292,12 @@ struct AllocaOpConversion : public fir::FIROpConversion { rewriter.setInsertionPointAfter(size.getDefiningOp()); } + if (auto dataAttr = alloc->getAttrOfType( + cuf::getDataAttrName())) { + if (dataAttr.getValue() == cuf::DataAttribute::Shared) + allocaAs = 3; + } + // NOTE: we used to pass alloc->getAttrs() in the builder for non opaque // pointers! Only propagate pinned and bindc_name to help debugging, but // this should have no functional purpose (and passing the operand segment @@ -316,6 +322,7 @@ struct AllocaOpConversion : public fir::FIROpConversion { rewriter.replaceOpWithNewOp( alloc, ::getLlvmPtrType(alloc.getContext(), programAs), llvmAlloc); } + return mlir::success(); } }; diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp index b05991a29a321..fa82f3916a57e 100644 --- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp +++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp @@ -57,7 +57,8 @@ static llvm::LogicalResult checkCudaAttr(Op op) { if (op.getDataAttr() == cuf::DataAttribute::Device || op.getDataAttr() == cuf::DataAttribute::Managed || op.getDataAttr() == cuf::DataAttribute::Unified || - op.getDataAttr() == cuf::DataAttribute::Pinned) + op.getDataAttr() == cuf::DataAttribute::Pinned || + op.getDataAttr() == cuf::DataAttribute::Shared) return mlir::success(); return op.emitOpError() << "expect device, managed, pinned or unified cuda attribute"; diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90 index e473590a7d78f..c75c5c191ab51 100644 --- a/flang/module/cudadevice.f90 +++ b/flang/module/cudadevice.f90 @@ -562,4 +562,31 @@ attributes(device) integer(8) function clock64() end function end interface +interface match_all_sync + attributes(device) integer function match_all_syncjj(mask, val, pred) +!dir$ ignore_tkr(d) mask, (d) val, (d) pred + integer(4), value :: mask + integer(4), value :: val + integer(4) :: pred + end function + attributes(device) integer function match_all_syncjx(mask, val, pred) +!dir$ ignore_tkr(d) mask, (d) val, (d) pred + integer(4), value :: mask + integer(8), value :: val + integer(4) :: pred + end function + attributes(device) integer function match_all_syncjf(mask, val, pred) +!dir$ ignore_tkr(d) mask, (d) val, (d) pred + integer(4), value :: mask + real(4), value :: val + integer(4) :: pred + end function + attributes(device) integer function match_all_syncjd(mask, val, pred) +!dir$ ignore_tkr(d) mask, (d) val, (d) pred + integer(4), value :: mask + real(8), value :: val + integer(4) :: pred + end function +end interface + end module diff --git a/flang/test/Analysis/AliasAnalysis/fircg-as-sources.fir b/flang/test/Analysis/AliasAnalysis/fircg-as-sources.fir new file mode 100644 index 0000000000000..edb3b1dadb8cd --- /dev/null +++ b/flang/test/Analysis/AliasAnalysis/fircg-as-sources.fir @@ -0,0 +1,108 @@ +// Check aliasing with the address *in* (not *of*) a local (fir.alloca) pointer +// variable. +// +// Throughout this test, the ".fir" suffix on symbols indicates a version of the +// MLIR after convert-hlfir-to-fir. We would like alias analysis results to be +// the same in both versions. + +// RUN: fir-opt %s -split-input-file -o /dev/null --mlir-disable-threading \ +// RUN: -pass-pipeline='builtin.module(func.func(test-fir-alias-analysis))' \ +// RUN: 2>&1 | FileCheck -match-full-lines %s + +// subroutine test(p1, arr, t_arr, alloc, t_alloc, t, v) +// real, pointer :: p1 +// real :: arr(:) +// real, target :: t_arr(:) +// real, allocatable :: alloc +// real, allocatable, target :: t_alloc +// real, target :: t +// real :: v +// real, pointer :: p0 +// end subroutine test + +// check when fircg.ext_rebox and fircg.ext_declare are in the path of tracing the source +// CHECK-LABEL: Testing : "_QPtest.fir" +// CHECK-DAG: p0.tgt.fir#0 <-> arr(1).fir#0: NoAlias +// CHECK-DAG: p0.tgt.fir#0 <-> t_arr(1).fir#0: MayAlias +// CHECK-DAG: p0.tgt.fir#0 <-> alloc.tgt.fir#0: NoAlias +// CHECK-DAG: p0.tgt.fir#0 <-> t_alloc.tgt.fir#0: MayAlias +// CHECK-DAG: alloc.fir#0 <-> alloc.tgt.fir#0: NoAlias + +func.func @_QPtest.fir(%arg0: !fir.ref>> {fir.bindc_name = "p1"}, %arg1: !fir.box> {fir.bindc_name = "arr"}, %arg2: !fir.box> {fir.bindc_name = "t_arr", fir.target}, %arg3: !fir.ref>> {fir.bindc_name = "alloc"}, %arg4: !fir.ref>> {fir.bindc_name = "t_alloc", fir.target}, %arg5: !fir.ref {fir.bindc_name = "t", fir.target}, %arg6: !fir.ref {fir.bindc_name = "v"}) { + %0 = fir.dummy_scope : !fir.dscope + %1 = fircg.ext_declare %arg3 dummy_scope %0 {test.ptr = "alloc.fir", fortran_attrs = #fir.var_attrs, uniq_name = "_QFtestEalloc"} : (!fir.ref>>, !fir.dscope) -> !fir.ref>> + %2 = fir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtestEarr"} : (!fir.box>, !fir.dscope) -> !fir.box> + %3 = fircg.ext_rebox %2 : (!fir.box>) -> !fir.box> + %4 = fir.alloca !fir.box> {bindc_name = "p0", uniq_name = "_QFtestEp0"} + %5 = fircg.ext_declare %4 {test.ptr = "p0.fir", fortran_attrs = #fir.var_attrs, uniq_name = "_QFtestEp0"} : (!fir.ref>>) -> !fir.ref>> + %6 = fir.declare %arg0 dummy_scope %0 {test.ptr = "p1.fir", fortran_attrs = #fir.var_attrs, uniq_name = "_QFtestEp1"} : (!fir.ref>>, !fir.dscope) -> !fir.ref>> + %7 = fir.declare %arg5 dummy_scope %0 {test.ptr = "t.fir", fortran_attrs = #fir.var_attrs, uniq_name = "_QFtestEt"} : (!fir.ref, !fir.dscope) -> !fir.ref + %8 = fir.declare %arg4 dummy_scope %0 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtestEt_alloc"} : (!fir.ref>>, !fir.dscope) -> !fir.ref>> + %9 = fir.declare %arg2 dummy_scope %0 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtestEt_arr"} : (!fir.box>, !fir.dscope) -> !fir.box> + %10 = fircg.ext_rebox %9 : (!fir.box>) -> !fir.box> + %11 = fir.declare %arg6 dummy_scope %0 {test.ptr = "v.fir", uniq_name = "_QFtestEv"} : (!fir.ref, !fir.dscope) -> !fir.ref + %12 = fir.load %5 : !fir.ref>> + %13 = fir.box_addr %12 {test.ptr = "p0.tgt.fir"} : (!fir.box>) -> !fir.ptr + %14 = fir.load %6 : !fir.ref>> + %15 = fir.box_addr %14 {test.ptr = "p1.tgt.fir"} : (!fir.box>) -> !fir.ptr + %c1 = arith.constant 1 : index + %16 = fir.array_coor %3 %c1 {test.ptr="arr(1).fir"} : (!fir.box>, index) -> !fir.ref + %c1_0 = arith.constant 1 : index + %17 = fir.array_coor %10 %c1_0 {test.ptr="t_arr(1).fir"} : (!fir.box>, index) -> !fir.ref + %18 = fir.load %1 : !fir.ref>> + %19 = fir.box_addr %18 {test.ptr = "alloc.tgt.fir"} : (!fir.box>) -> !fir.heap + %20 = fir.load %8 : !fir.ref>> + %21 = fir.box_addr %20 {test.ptr = "t_alloc.tgt.fir"} : (!fir.box>) -> !fir.heap + return +} + +// ----- +// CHECK-LABEL: Testing : "_QFPtest3" + +// module pointers +// real, pointer :: p +// end module +// +// program main +// use pointers +// real, target :: var1 = 1, var2 =2 +// p => var1 +// +// call test3(p) +// +// contains +// subroutine test3(p1) +// real, pointer :: p1 +// p1 => var2 +// print *, p +// end subroutine +// end + +// check when there are fircg.ext_embox in the paths +// CHECK-DAG: p#0 <-> box.addr#0: NoAlias +// CHECK-DAG: box.addr#0 <-> func.region0#0: NoAlias +// CHECK-DAG: var2#0 <-> p#0: NoAlias +// CHECK-DAG: var2#0 <-> box.addr#0: MustAlias +// CHECK-DAG: var2#0 <-> func.region0#1: NoAlias +// CHECK-DAG: box.addr#0 <-> func.region0#1: NoAlias + +fir.global @_QMpointersEp : !fir.box> { + %0 = fir.zero_bits !fir.ptr + %1 = fircg.ext_embox %0 : (!fir.ptr) -> !fir.box> + fir.has_value %1 : !fir.box> +} + +fir.global internal @_QFEvar2 target : f32 { + %cst = arith.constant 2.000000e+00 : f32 + fir.has_value %cst : f32 +} + +func.func @_QFPtest3(%arg0: !fir.ref>> {fir.bindc_name = "p1"}, %arg1: !fir.ref) attributes {test.ptr = "func"} { + %3 = fir.load %arg0 {test.ptr = "arg0.load"}: !fir.ref>> + %4 = fir.address_of(@_QFEvar2) {test.ptr = "var2"} : !fir.ref + %5 = fir.address_of(@_QMpointersEp) {test.ptr = "p"} : !fir.ref>> + %6 = fircg.ext_embox %4 : (!fir.ref) -> !fir.box> + %13 = fir.box_addr %6 {test.ptr = "box.addr"} : (!fir.box>) -> !fir.ptr + return +} + diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index 6a5524102c0ea..1210dae8608c8 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -112,6 +112,25 @@ end ! CHECK: fir.call @llvm.nvvm.barrier0.popc(%c1{{.*}}) fastmath : (i32) -> i32 ! CHECK: fir.call @llvm.nvvm.barrier0.or(%c1{{.*}}) fastmath : (i32) -> i32 +attributes(device) subroutine testMatch() + integer :: a, ipred, mask, v32 + integer(8) :: v64 + real(4) :: r4 + real(8) :: r8 + a = match_all_sync(mask, v32, ipred) + a = match_all_sync(mask, v64, ipred) + a = match_all_sync(mask, r4, ipred) + a = match_all_sync(mask, r8, ipred) +end subroutine + +! CHECK-LABEL: func.func @_QPtestmatch() +! CHECK: fir.call @llvm.nvvm.match.all.sync.i32p +! CHECK: fir.call @llvm.nvvm.match.all.sync.i64p +! CHECK: fir.convert %{{.*}} : (f32) -> i32 +! CHECK: fir.call @llvm.nvvm.match.all.sync.i32p +! CHECK: fir.convert %{{.*}} : (f64) -> i64 +! CHECK: fir.call @llvm.nvvm.match.all.sync.i64p + ! CHECK: func.func private @llvm.nvvm.barrier0() ! CHECK: func.func private @llvm.nvvm.bar.warp.sync(i32) ! CHECK: func.func private @llvm.nvvm.membar.gl() @@ -120,3 +139,5 @@ end ! CHECK: func.func private @llvm.nvvm.barrier0.and(i32) -> i32 ! CHECK: func.func private @llvm.nvvm.barrier0.popc(i32) -> i32 ! CHECK: func.func private @llvm.nvvm.barrier0.or(i32) -> i32 +! CHECK: func.func private @llvm.nvvm.match.all.sync.i32p(i32, i32) -> tuple +! CHECK: func.func private @llvm.nvvm.match.all.sync.i64p(i32, i64) -> tuple diff --git a/libc/src/stdio/scanf_core/CMakeLists.txt b/libc/src/stdio/scanf_core/CMakeLists.txt index a8935d464417c..35b8b3d318a9f 100644 --- a/libc/src/stdio/scanf_core/CMakeLists.txt +++ b/libc/src/stdio/scanf_core/CMakeLists.txt @@ -54,15 +54,26 @@ add_object_library( libc.src.__support.arg_list ) -add_object_library( +if(LIBC_TARGET_OS_IS_GPU) +add_header_library( + reader + HDRS + reader.h + DEPENDS + libc.src.__support.macros.attributes +) +elseif((TARGET libc.src.__support.File.file) OR (NOT LLVM_LIBC_FULL_BUILD)) +add_header_library( reader - SRCS - reader.cpp HDRS reader.h DEPENDS libc.src.__support.macros.attributes + libc.hdr.types.FILE + libc.src.__support.File.file + ${use_system_file} ) +endif() add_object_library( converter diff --git a/libc/src/stdio/scanf_core/reader.cpp b/libc/src/stdio/scanf_core/reader.cpp deleted file mode 100644 index ec1f5c098dc7a..0000000000000 --- a/libc/src/stdio/scanf_core/reader.cpp +++ /dev/null @@ -1,29 +0,0 @@ -//===-- Reader definition for scanf -----------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "src/stdio/scanf_core/reader.h" -#include "src/__support/macros/config.h" -#include - -namespace LIBC_NAMESPACE_DECL { -namespace scanf_core { - -void Reader::ungetc(char c) { - --cur_chars_read; - if (rb != nullptr && rb->buff_cur > 0) { - // While technically c should be written back to the buffer, in scanf we - // always write the character that was already there. Additionally, the - // buffer is most likely to contain a string that isn't part of a file, - // which may not be writable. - --(rb->buff_cur); - return; - } - stream_ungetc(static_cast(c), input_stream); -} -} // namespace scanf_core -} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/scanf_core/reader.h b/libc/src/stdio/scanf_core/reader.h index f984fd9378910..1f8ec9695a314 100644 --- a/libc/src/stdio/scanf_core/reader.h +++ b/libc/src/stdio/scanf_core/reader.h @@ -9,15 +9,73 @@ #ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_READER_H #define LLVM_LIBC_SRC_STDIO_SCANF_CORE_READER_H +#include "hdr/types/FILE.h" + +#ifndef LIBC_COPT_STDIO_USE_SYSTEM_FILE +#include "src/__support/File/file.h" +#endif + +#if defined(LIBC_TARGET_ARCH_IS_GPU) +#include "src/stdio/getc.h" +#include "src/stdio/ungetc.h" +#endif + #include "src/__support/macros/attributes.h" // For LIBC_INLINE #include "src/__support/macros/config.h" + #include namespace LIBC_NAMESPACE_DECL { namespace scanf_core { - -using StreamGetc = int (*)(void *); -using StreamUngetc = void (*)(int, void *); +// We use the name "reader_internal" over "internal" because +// "internal" causes name lookups in files that include the current header to be +// ambigious i.e. `internal::foo` in those files, will try to lookup in +// `LIBC_NAMESPACE::scanf_core::internal` over `LIBC_NAMESPACE::internal` for +// e.g., `internal::ArgList` in `libc/src/stdio/scanf_core/scanf_main.h` +namespace reader_internal { + +#if defined(LIBC_TARGET_ARCH_IS_GPU) +// The GPU build provides FILE access through the host operating system's +// library. So here we simply use the public entrypoints like in the SYSTEM_FILE +// interface. Entrypoints should normally not call others, this is an exception. +// FIXME: We do not acquire any locks here, so this is not thread safe. +LIBC_INLINE int getc(void *f) { + return LIBC_NAMESPACE::getc(reinterpret_cast<::FILE *>(f)); +} + +LIBC_INLINE void ungetc(int c, void *f) { + LIBC_NAMESPACE::ungetc(c, reinterpret_cast<::FILE *>(f)); +} + +#elif !defined(LIBC_COPT_STDIO_USE_SYSTEM_FILE) + +LIBC_INLINE int getc(void *f) { + unsigned char c; + auto result = + reinterpret_cast(f)->read_unlocked(&c, 1); + size_t r = result.value; + if (result.has_error() || r != 1) + return '\0'; + + return c; +} + +LIBC_INLINE void ungetc(int c, void *f) { + reinterpret_cast(f)->ungetc_unlocked(c); +} + +#else // defined(LIBC_COPT_STDIO_USE_SYSTEM_FILE) + +// Since ungetc_unlocked isn't always available, we don't acquire the lock for +// system files. +LIBC_INLINE int getc(void *f) { return ::getc(reinterpret_cast<::FILE *>(f)); } + +LIBC_INLINE void ungetc(int c, void *f) { + ::ungetc(c, reinterpret_cast<::FILE *>(f)); +} +#endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE + +} // namespace reader_internal // This is intended to be either a raw string or a buffer syncronized with the // file's internal buffer. @@ -29,24 +87,15 @@ struct ReadBuffer { class Reader { ReadBuffer *rb; - void *input_stream = nullptr; - - // TODO: Remove these unnecessary function pointers - StreamGetc stream_getc = nullptr; - StreamUngetc stream_ungetc = nullptr; - size_t cur_chars_read = 0; public: // TODO: Set buff_len with a proper constant LIBC_INLINE Reader(ReadBuffer *string_buffer) : rb(string_buffer) {} - LIBC_INLINE Reader(void *stream, StreamGetc stream_getc_in, - StreamUngetc stream_ungetc_in, - ReadBuffer *stream_buffer = nullptr) - : rb(stream_buffer), input_stream(stream), stream_getc(stream_getc_in), - stream_ungetc(stream_ungetc_in) {} + LIBC_INLINE Reader(void *stream, ReadBuffer *stream_buffer = nullptr) + : rb(stream_buffer), input_stream(stream) {} // This returns the next character from the input and advances it by one // character. When it hits the end of the string or file it returns '\0' to @@ -59,12 +108,23 @@ class Reader { return output; } // This should reset the buffer if applicable. - return static_cast(stream_getc(input_stream)); + return static_cast(reader_internal::getc(input_stream)); } // This moves the input back by one character, placing c into the buffer if // this is a file reader, else c is ignored. - void ungetc(char c); + LIBC_INLINE void ungetc(char c) { + --cur_chars_read; + if (rb != nullptr && rb->buff_cur > 0) { + // While technically c should be written back to the buffer, in scanf we + // always write the character that was already there. Additionally, the + // buffer is most likely to contain a string that isn't part of a file, + // which may not be writable. + --(rb->buff_cur); + return; + } + reader_internal::ungetc(static_cast(c), input_stream); + } LIBC_INLINE size_t chars_read() { return cur_chars_read; } }; diff --git a/libc/src/stdio/scanf_core/vfscanf_internal.h b/libc/src/stdio/scanf_core/vfscanf_internal.h index 67126431fcded..4e20fa3b93091 100644 --- a/libc/src/stdio/scanf_core/vfscanf_internal.h +++ b/libc/src/stdio/scanf_core/vfscanf_internal.h @@ -18,8 +18,6 @@ #if defined(LIBC_TARGET_ARCH_IS_GPU) #include "src/stdio/ferror.h" -#include "src/stdio/getc.h" -#include "src/stdio/ungetc.h" #endif #include "hdr/types/FILE.h" @@ -38,14 +36,6 @@ LIBC_INLINE void flockfile(::FILE *) { return; } LIBC_INLINE void funlockfile(::FILE *) { return; } -LIBC_INLINE int getc(void *f) { - return LIBC_NAMESPACE::getc(reinterpret_cast<::FILE *>(f)); -} - -LIBC_INLINE void ungetc(int c, void *f) { - LIBC_NAMESPACE::ungetc(c, reinterpret_cast<::FILE *>(f)); -} - LIBC_INLINE int ferror_unlocked(::FILE *f) { return LIBC_NAMESPACE::ferror(f); } #elif !defined(LIBC_COPT_STDIO_USE_SYSTEM_FILE) @@ -58,21 +48,6 @@ LIBC_INLINE void funlockfile(FILE *f) { reinterpret_cast(f)->unlock(); } -LIBC_INLINE int getc(void *f) { - unsigned char c; - auto result = - reinterpret_cast(f)->read_unlocked(&c, 1); - size_t r = result.value; - if (result.has_error() || r != 1) - return '\0'; - - return c; -} - -LIBC_INLINE void ungetc(int c, void *f) { - reinterpret_cast(f)->ungetc_unlocked(c); -} - LIBC_INLINE int ferror_unlocked(FILE *f) { return reinterpret_cast(f)->error_unlocked(); } @@ -85,12 +60,6 @@ LIBC_INLINE void flockfile(::FILE *) { return; } LIBC_INLINE void funlockfile(::FILE *) { return; } -LIBC_INLINE int getc(void *f) { return ::getc(reinterpret_cast<::FILE *>(f)); } - -LIBC_INLINE void ungetc(int c, void *f) { - ::ungetc(c, reinterpret_cast<::FILE *>(f)); -} - LIBC_INLINE int ferror_unlocked(::FILE *f) { return ::ferror(f); } #endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE @@ -103,7 +72,7 @@ LIBC_INLINE int vfscanf_internal(::FILE *__restrict stream, const char *__restrict format, internal::ArgList &args) { internal::flockfile(stream); - scanf_core::Reader reader(stream, &internal::getc, internal::ungetc); + scanf_core::Reader reader(stream); int retval = scanf_core::scanf_main(&reader, format, args); if (retval == 0 && internal::ferror_unlocked(stream)) retval = EOF; diff --git a/libcxx/src/experimental/tzdb.cpp b/libcxx/src/experimental/tzdb.cpp index 1f18226636fd5..ac5c62bb81902 100644 --- a/libcxx/src/experimental/tzdb.cpp +++ b/libcxx/src/experimental/tzdb.cpp @@ -763,8 +763,9 @@ void __init_tzdb(tzdb& __tzdb, __tz::__rules_storage_type& __rules) { // - The file /etc/timezone. This text file contains the name of the time // zone. // - // On Linux systems it seems /etc/timezone is deprecated and being phased - // out. This file is used when /etc/localtime does not exist, or when it exists but is not a symlink. For more information and links see + // On Linux systems it seems /etc/timezone is deprecated and being phased out. + // This file is used when /etc/localtime does not exist, or when it exists but + // is not a symlink. For more information and links see // https://github.com/llvm/llvm-project/issues/105634 string __name = chrono::__current_zone_environment(); diff --git a/lldb/include/lldb/Core/Debugger.h b/lldb/include/lldb/Core/Debugger.h index 9c8a9623fe689..6ebc6147800e1 100644 --- a/lldb/include/lldb/Core/Debugger.h +++ b/lldb/include/lldb/Core/Debugger.h @@ -156,9 +156,9 @@ class Debugger : public std::enable_shared_from_this, void RestoreInputTerminalState(); - lldb::StreamSP GetAsyncOutputStream(); + lldb::StreamUP GetAsyncOutputStream(); - lldb::StreamSP GetAsyncErrorStream(); + lldb::StreamUP GetAsyncErrorStream(); CommandInterpreter &GetCommandInterpreter() { assert(m_command_interpreter_up.get()); diff --git a/lldb/include/lldb/lldb-forward.h b/lldb/include/lldb/lldb-forward.h index cda55ef06e549..c664d1398f74d 100644 --- a/lldb/include/lldb/lldb-forward.h +++ b/lldb/include/lldb/lldb-forward.h @@ -432,6 +432,7 @@ typedef std::unique_ptr StackFrameRecognizerManagerUP; typedef std::shared_ptr StopInfoSP; typedef std::shared_ptr StreamSP; +typedef std::unique_ptr StreamUP; typedef std::shared_ptr StreamFileSP; typedef std::shared_ptr LockableStreamFileSP; typedef std::shared_ptr diff --git a/lldb/source/Breakpoint/BreakpointOptions.cpp b/lldb/source/Breakpoint/BreakpointOptions.cpp index 09abcf5e081d2..242b5b30168c5 100644 --- a/lldb/source/Breakpoint/BreakpointOptions.cpp +++ b/lldb/source/Breakpoint/BreakpointOptions.cpp @@ -620,10 +620,8 @@ bool BreakpointOptions::BreakpointOptionsCallbackFunction( // Rig up the results secondary output stream to the debugger's, so the // output will come out synchronously if the debugger is set up that way. - StreamSP output_stream(debugger.GetAsyncOutputStream()); - StreamSP error_stream(debugger.GetAsyncErrorStream()); - result.SetImmediateOutputStream(output_stream); - result.SetImmediateErrorStream(error_stream); + result.SetImmediateOutputStream(debugger.GetAsyncOutputStream()); + result.SetImmediateErrorStream(debugger.GetAsyncErrorStream()); CommandInterpreterRunOptions options; options.SetStopOnContinue(true); diff --git a/lldb/source/Commands/CommandObjectCommands.cpp b/lldb/source/Commands/CommandObjectCommands.cpp index dd841cb5cb4cc..9510cf4d14467 100644 --- a/lldb/source/Commands/CommandObjectCommands.cpp +++ b/lldb/source/Commands/CommandObjectCommands.cpp @@ -815,10 +815,9 @@ a number follows 'f':" for (const std::string &line : lines) { Status error = AppendRegexSubstitution(line, check_only); if (error.Fail()) { - if (!GetDebugger().GetCommandInterpreter().GetBatchCommandMode()) { - StreamSP out_stream = GetDebugger().GetAsyncOutputStream(); - out_stream->Printf("error: %s\n", error.AsCString()); - } + if (!GetDebugger().GetCommandInterpreter().GetBatchCommandMode()) + GetDebugger().GetAsyncOutputStream()->Printf("error: %s\n", + error.AsCString()); } } } diff --git a/lldb/source/Commands/CommandObjectWatchpointCommand.cpp b/lldb/source/Commands/CommandObjectWatchpointCommand.cpp index 507ef3fbe4759..32cb80b421fd6 100644 --- a/lldb/source/Commands/CommandObjectWatchpointCommand.cpp +++ b/lldb/source/Commands/CommandObjectWatchpointCommand.cpp @@ -252,10 +252,8 @@ are no syntax errors may indicate that a function was declared but never called. // Rig up the results secondary output stream to the debugger's, so the // output will come out synchronously if the debugger is set up that // way. - StreamSP output_stream(debugger.GetAsyncOutputStream()); - StreamSP error_stream(debugger.GetAsyncErrorStream()); - result.SetImmediateOutputStream(output_stream); - result.SetImmediateErrorStream(error_stream); + result.SetImmediateOutputStream(debugger.GetAsyncOutputStream()); + result.SetImmediateErrorStream(debugger.GetAsyncErrorStream()); CommandInterpreterRunOptions options; options.SetStopOnContinue(true); diff --git a/lldb/source/Core/Debugger.cpp b/lldb/source/Core/Debugger.cpp index 242ef1c8a4596..585138535203d 100644 --- a/lldb/source/Core/Debugger.cpp +++ b/lldb/source/Core/Debugger.cpp @@ -257,7 +257,7 @@ Status Debugger::SetPropertyValue(const ExecutionContext *exe_ctx, std::list errors; StreamString feedback_stream; if (!target_sp->LoadScriptingResources(errors, feedback_stream)) { - lldb::StreamSP s = GetAsyncErrorStream(); + lldb::StreamUP s = GetAsyncErrorStream(); for (auto &error : errors) s->Printf("%s\n", error.AsCString()); if (feedback_stream.GetSize()) @@ -1328,13 +1328,13 @@ bool Debugger::PopIOHandler(const IOHandlerSP &pop_reader_sp) { return true; } -StreamSP Debugger::GetAsyncOutputStream() { - return std::make_shared(*this, +StreamUP Debugger::GetAsyncOutputStream() { + return std::make_unique(*this, StreamAsynchronousIO::STDOUT); } -StreamSP Debugger::GetAsyncErrorStream() { - return std::make_shared(*this, +StreamUP Debugger::GetAsyncErrorStream() { + return std::make_unique(*this, StreamAsynchronousIO::STDERR); } @@ -1577,8 +1577,7 @@ static void PrivateReportDiagnostic(Debugger &debugger, Severity severity, // diagnostic directly to the debugger's error stream. DiagnosticEventData event_data(severity, std::move(message), debugger_specific); - StreamSP stream = debugger.GetAsyncErrorStream(); - event_data.Dump(stream.get()); + event_data.Dump(debugger.GetAsyncErrorStream().get()); return; } EventSP event_sp = std::make_shared( @@ -1774,12 +1773,11 @@ void Debugger::HandleBreakpointEvent(const EventSP &event_sp) { if (num_new_locations > 0) { BreakpointSP breakpoint = Breakpoint::BreakpointEventData::GetBreakpointFromEvent(event_sp); - StreamSP output_sp(GetAsyncOutputStream()); - if (output_sp) { - output_sp->Printf("%d location%s added to breakpoint %d\n", + if (StreamUP output_up = GetAsyncOutputStream()) { + output_up->Printf("%d location%s added to breakpoint %d\n", num_new_locations, num_new_locations == 1 ? "" : "s", breakpoint->GetID()); - output_sp->Flush(); + output_up->Flush(); } } } @@ -1823,8 +1821,8 @@ void Debugger::HandleProcessEvent(const EventSP &event_sp) { ? EventDataStructuredData::GetProcessFromEvent(event_sp.get()) : Process::ProcessEventData::GetProcessFromEvent(event_sp.get()); - StreamSP output_stream_sp = GetAsyncOutputStream(); - StreamSP error_stream_sp = GetAsyncErrorStream(); + StreamUP output_stream_up = GetAsyncOutputStream(); + StreamUP error_stream_up = GetAsyncErrorStream(); const bool gui_enabled = IsForwardingEvents(); if (!gui_enabled) { @@ -1849,7 +1847,7 @@ void Debugger::HandleProcessEvent(const EventSP &event_sp) { if (got_state_changed && !state_is_stopped) { // This is a public stop which we are going to announce to the user, so // we should force the most relevant frame selection here. - Process::HandleProcessStateChangedEvent(event_sp, output_stream_sp.get(), + Process::HandleProcessStateChangedEvent(event_sp, output_stream_up.get(), SelectMostRelevantFrame, pop_process_io_handler); } @@ -1865,37 +1863,35 @@ void Debugger::HandleProcessEvent(const EventSP &event_sp) { if (plugin_sp) { auto structured_data_sp = EventDataStructuredData::GetObjectFromEvent(event_sp.get()); - if (output_stream_sp) { - StreamString content_stream; - Status error = - plugin_sp->GetDescription(structured_data_sp, content_stream); - if (error.Success()) { - if (!content_stream.GetString().empty()) { - // Add newline. - content_stream.PutChar('\n'); - content_stream.Flush(); - - // Print it. - output_stream_sp->PutCString(content_stream.GetString()); - } - } else { - error_stream_sp->Format("Failed to print structured " - "data with plugin {0}: {1}", - plugin_sp->GetPluginName(), error); + StreamString content_stream; + Status error = + plugin_sp->GetDescription(structured_data_sp, content_stream); + if (error.Success()) { + if (!content_stream.GetString().empty()) { + // Add newline. + content_stream.PutChar('\n'); + content_stream.Flush(); + + // Print it. + output_stream_up->PutCString(content_stream.GetString()); } + } else { + error_stream_up->Format("Failed to print structured " + "data with plugin {0}: {1}", + plugin_sp->GetPluginName(), error); } } } // Now display any stopped state changes after any STDIO if (got_state_changed && state_is_stopped) { - Process::HandleProcessStateChangedEvent(event_sp, output_stream_sp.get(), + Process::HandleProcessStateChangedEvent(event_sp, output_stream_up.get(), SelectMostRelevantFrame, pop_process_io_handler); } - output_stream_sp->Flush(); - error_stream_sp->Flush(); + output_stream_up->Flush(); + error_stream_up->Flush(); if (pop_process_io_handler) process_sp->PopProcessIOHandler(); @@ -1995,22 +1991,18 @@ lldb::thread_result_t Debugger::DefaultEventHandler() { const char *data = static_cast( EventDataBytes::GetBytesFromEvent(event_sp.get())); if (data && data[0]) { - StreamSP error_sp(GetAsyncErrorStream()); - if (error_sp) { - error_sp->PutCString(data); - error_sp->Flush(); - } + StreamUP error_up = GetAsyncErrorStream(); + error_up->PutCString(data); + error_up->Flush(); } } else if (event_type & CommandInterpreter:: eBroadcastBitAsynchronousOutputData) { const char *data = static_cast( EventDataBytes::GetBytesFromEvent(event_sp.get())); if (data && data[0]) { - StreamSP output_sp(GetAsyncOutputStream()); - if (output_sp) { - output_sp->PutCString(data); - output_sp->Flush(); - } + StreamUP output_up = GetAsyncOutputStream(); + output_up->PutCString(data); + output_up->Flush(); } } } else if (broadcaster == &m_broadcaster) { @@ -2125,7 +2117,7 @@ void Debugger::HandleProgressEvent(const lldb::EventSP &event_sp) { if (!file_sp->GetIsInteractive() || !file_sp->GetIsTerminalWithColors()) return; - StreamSP output = GetAsyncOutputStream(); + StreamUP output = GetAsyncOutputStream(); // Print over previous line, if any. output->Printf("\r"); @@ -2175,8 +2167,7 @@ void Debugger::HandleDiagnosticEvent(const lldb::EventSP &event_sp) { if (!data) return; - StreamSP stream = GetAsyncErrorStream(); - data->Dump(stream.get()); + data->Dump(GetAsyncErrorStream().get()); } bool Debugger::HasIOHandlerThread() const { diff --git a/lldb/source/Core/DynamicLoader.cpp b/lldb/source/Core/DynamicLoader.cpp index 9c6ca1e5f910c..76c71d2a49a48 100644 --- a/lldb/source/Core/DynamicLoader.cpp +++ b/lldb/source/Core/DynamicLoader.cpp @@ -328,7 +328,7 @@ ModuleSP DynamicLoader::LoadBinaryWithUUIDAndAddress( } } else { if (force_symbol_search) { - lldb::StreamSP s = target.GetDebugger().GetAsyncErrorStream(); + lldb::StreamUP s = target.GetDebugger().GetAsyncErrorStream(); s->Printf("Unable to find file"); if (!name.empty()) s->Printf(" %s", name.str().c_str()); diff --git a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp index 1d4cda6c046b7..60724f3900ae7 100644 --- a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp +++ b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp @@ -738,7 +738,7 @@ bool DynamicLoaderDarwinKernel::KextImageInfo::LoadImageUsingMemoryModule( } if (IsKernel() && m_uuid.IsValid()) { - lldb::StreamSP s = target.GetDebugger().GetAsyncOutputStream(); + lldb::StreamUP s = target.GetDebugger().GetAsyncOutputStream(); s->Printf("Kernel UUID: %s\n", m_uuid.GetAsString().c_str()); s->Printf("Load Address: 0x%" PRIx64 "\n", m_load_address); @@ -830,7 +830,7 @@ bool DynamicLoaderDarwinKernel::KextImageInfo::LoadImageUsingMemoryModule( } if (IsKernel() && !m_module_sp) { - lldb::StreamSP s = target.GetDebugger().GetAsyncErrorStream(); + lldb::StreamUP s = target.GetDebugger().GetAsyncErrorStream(); s->Printf("WARNING: Unable to locate kernel binary on the debugger " "system.\n"); if (kernel_search_error.Fail() && kernel_search_error.AsCString("") && @@ -974,7 +974,7 @@ bool DynamicLoaderDarwinKernel::KextImageInfo::LoadImageUsingMemoryModule( bool is_loaded = IsLoaded(); if (is_loaded && m_module_sp && IsKernel()) { - lldb::StreamSP s = target.GetDebugger().GetAsyncOutputStream(); + lldb::StreamUP s = target.GetDebugger().GetAsyncOutputStream(); ObjectFile *kernel_object_file = m_module_sp->GetObjectFile(); if (kernel_object_file) { addr_t file_address = diff --git a/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp b/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp index 3bf0a46de57af..a23ba3ad5c545 100644 --- a/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp +++ b/lldb/source/Plugins/DynamicLoader/FreeBSD-Kernel/DynamicLoaderFreeBSDKernel.cpp @@ -327,7 +327,7 @@ bool DynamicLoaderFreeBSDKernel::KModImageInfo::LoadImageUsingMemoryModule( Target &target = process->GetTarget(); if (IsKernel() && m_uuid.IsValid()) { - lldb::StreamSP s = target.GetDebugger().GetAsyncOutputStream(); + lldb::StreamUP s = target.GetDebugger().GetAsyncOutputStream(); s->Printf("Kernel UUID: %s\n", m_uuid.GetAsString().c_str()); s->Printf("Load Address: 0x%" PRIx64 "\n", m_load_address); } @@ -355,9 +355,9 @@ bool DynamicLoaderFreeBSDKernel::KModImageInfo::LoadImageUsingMemoryModule( if (!m_module_sp) m_module_sp = target.GetOrCreateModule(module_spec, true); if (IsKernel() && !m_module_sp) { - lldb::StreamSP s = target.GetDebugger().GetAsyncOutputStream(); - s->Printf("WARNING: Unable to locate kernel binary on the debugger " - "system.\n"); + target.GetDebugger().GetAsyncOutputStream()->Printf( + "WARNING: Unable to locate kernel binary on the debugger " + "system.\n"); } } @@ -464,7 +464,7 @@ bool DynamicLoaderFreeBSDKernel::KModImageInfo::LoadImageUsingMemoryModule( } if (IsLoaded() && m_module_sp && IsKernel()) { - lldb::StreamSP s = target.GetDebugger().GetAsyncOutputStream(); + lldb::StreamUP s = target.GetDebugger().GetAsyncOutputStream(); ObjectFile *kernel_object_file = m_module_sp->GetObjectFile(); if (kernel_object_file) { addr_t file_address = diff --git a/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.cpp b/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.cpp index 9b2907c680996..406e1d45dc39a 100644 --- a/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.cpp +++ b/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.cpp @@ -321,20 +321,10 @@ Status ProcessKDP::DoConnectRemote(llvm::StringRef remote_url) { SetID(1); GetThreadList(); SetPrivateState(eStateStopped); - StreamSP async_strm_sp(target.GetDebugger().GetAsyncOutputStream()); - if (async_strm_sp) { - const char *cstr; - if ((cstr = m_comm.GetKernelVersion()) != NULL) { - async_strm_sp->Printf("Version: %s\n", cstr); - async_strm_sp->Flush(); - } - // if ((cstr = m_comm.GetImagePath ()) != NULL) - // { - // async_strm_sp->Printf ("Image Path: - // %s\n", cstr); - // async_strm_sp->Flush(); - // } - } + const char *cstr; + if ((cstr = m_comm.GetKernelVersion()) != NULL) + target.GetDebugger().GetAsyncOutputStream()->Printf("Version: %s\n", + cstr); } else { return Status::FromErrorString("KDP_REATTACH failed"); } diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index f36595145e035..8a8c0f92fbbc2 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -5495,8 +5495,7 @@ class CommandObjectProcessGDBRemoteSpeedTest : public CommandObjectParsed { if (process) { StreamSP output_stream_sp = result.GetImmediateOutputStream(); if (!output_stream_sp) - output_stream_sp = - StreamSP(m_interpreter.GetDebugger().GetAsyncOutputStream()); + output_stream_sp = m_interpreter.GetDebugger().GetAsyncOutputStream(); result.SetImmediateOutputStream(output_stream_sp); const uint32_t num_packets = diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp index 0041c8f2b2db2..6db582096155f 100644 --- a/lldb/source/Target/Process.cpp +++ b/lldb/source/Target/Process.cpp @@ -2743,10 +2743,9 @@ Status Process::LaunchPrivate(ProcessLaunchInfo &launch_info, StateType &state, // Now that we know the process type, update its signal responses from the // ones stored in the Target: - if (m_unix_signals_sp) { - StreamSP warning_strm = GetTarget().GetDebugger().GetAsyncErrorStream(); - GetTarget().UpdateSignalsFromDummy(m_unix_signals_sp, warning_strm); - } + if (m_unix_signals_sp) + GetTarget().UpdateSignalsFromDummy( + m_unix_signals_sp, GetTarget().GetDebugger().GetAsyncErrorStream()); DynamicLoader *dyld = GetDynamicLoader(); if (dyld) @@ -3131,10 +3130,9 @@ void Process::CompleteAttach() { } // Now that we know the process type, update its signal responses from the // ones stored in the Target: - if (m_unix_signals_sp) { - StreamSP warning_strm = GetTarget().GetDebugger().GetAsyncErrorStream(); - GetTarget().UpdateSignalsFromDummy(m_unix_signals_sp, warning_strm); - } + if (m_unix_signals_sp) + GetTarget().UpdateSignalsFromDummy( + m_unix_signals_sp, GetTarget().GetDebugger().GetAsyncErrorStream()); // We have completed the attach, now it is time to find the dynamic loader // plug-in diff --git a/lldb/source/Target/StopInfo.cpp b/lldb/source/Target/StopInfo.cpp index 092d78d87a2b1..1c9ecbfe70c3c 100644 --- a/lldb/source/Target/StopInfo.cpp +++ b/lldb/source/Target/StopInfo.cpp @@ -1016,11 +1016,9 @@ class StopInfoWatchpoint : public StopInfo { wp_sp->CaptureWatchedValue(exe_ctx); Debugger &debugger = exe_ctx.GetTargetRef().GetDebugger(); - StreamSP output_sp = debugger.GetAsyncOutputStream(); - if (wp_sp->DumpSnapshots(output_sp.get())) { - output_sp->EOL(); - output_sp->Flush(); - } + StreamUP output_up = debugger.GetAsyncOutputStream(); + if (wp_sp->DumpSnapshots(output_up.get())) + output_up->EOL(); } } else { diff --git a/llvm/include/llvm/CodeGen/RDFRegisters.h b/llvm/include/llvm/CodeGen/RDFRegisters.h index 174ee115a1501..cc30b977ae421 100644 --- a/llvm/include/llvm/CodeGen/RDFRegisters.h +++ b/llvm/include/llvm/CodeGen/RDFRegisters.h @@ -111,7 +111,7 @@ struct RegisterRef { } static constexpr bool isRegId(unsigned Id) { - return Register::isPhysicalRegister(Id); + return Register(Id).isPhysical(); } static constexpr bool isUnitId(unsigned Id) { return Register(Id).isVirtual(); diff --git a/llvm/include/llvm/CodeGen/Register.h b/llvm/include/llvm/CodeGen/Register.h index 03e462872d3c2..6c02ffef89363 100644 --- a/llvm/include/llvm/CodeGen/Register.h +++ b/llvm/include/llvm/CodeGen/Register.h @@ -48,12 +48,6 @@ class Register { return Register(FI + MCRegister::FirstStackSlot); } - /// Return true if the specified register number is in - /// the physical register namespace. - static constexpr bool isPhysicalRegister(unsigned Reg) { - return MCRegister::isPhysicalRegister(Reg); - } - /// Convert a 0-based index to a virtual register number. /// This is the inverse operation of VirtReg2IndexFunctor below. static Register index2VirtReg(unsigned Index) { @@ -67,7 +61,9 @@ class Register { /// Return true if the specified register number is in the physical register /// namespace. - constexpr bool isPhysical() const { return isPhysicalRegister(Reg); } + constexpr bool isPhysical() const { + return MCRegister::isPhysicalRegister(Reg); + } /// Convert a virtual register number to a 0-based index. The first virtual /// register in a function will get the index 0. diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h index 9bdf940fc77b7..4385df518a111 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h @@ -23,57 +23,54 @@ namespace llvm::sandboxir { +class LegalityResult; + +struct Action { + unsigned Idx = 0; + const LegalityResult *LegalityRes = nullptr; + SmallVector Bndl; + SmallVector UserBndl; + unsigned Depth; + SmallVector Operands; + Value *Vec = nullptr; + Action(const LegalityResult *LR, ArrayRef B, ArrayRef UB, + unsigned Depth) + : LegalityRes(LR), Bndl(B), UserBndl(UB), Depth(Depth) {} +#ifndef NDEBUG + void print(raw_ostream &OS) const; + void dump() const; + friend raw_ostream &operator<<(raw_ostream &OS, const Action &A) { + A.print(OS); + return OS; + } +#endif // NDEBUG +}; + /// Maps the original instructions to the vectorized instrs and the reverse. /// For now an original instr can only map to a single vector. class InstrMaps { /// A map from the original values that got combined into vectors, to the - /// vector value(s). - DenseMap OrigToVectorMap; - /// A map from the vector value to a map of the original value to its lane. + /// vectorization Action. + DenseMap OrigToVectorMap; + /// A map from the vec Action to a map of the original value to its lane. /// Please note that for constant vectors, there may multiple original values /// with the same lane, as they may be coming from vectorizing different /// original values. - DenseMap> VectorToOrigLaneMap; - Context &Ctx; + DenseMap> VectorToOrigLaneMap; std::optional EraseInstrCB; -private: - void notifyEraseInstr(Value *V) { - // We don't know if V is an original or a vector value. - auto It = OrigToVectorMap.find(V); - if (It != OrigToVectorMap.end()) { - // V is an original value. - // Remove it from VectorToOrigLaneMap. - Value *Vec = It->second; - VectorToOrigLaneMap[Vec].erase(V); - // Now erase V from OrigToVectorMap. - OrigToVectorMap.erase(It); - } else { - // V is a vector value. - // Go over the original values it came from and remove them from - // OrigToVectorMap. - for (auto [Orig, Lane] : VectorToOrigLaneMap[V]) - OrigToVectorMap.erase(Orig); - // Now erase V from VectorToOrigLaneMap. - VectorToOrigLaneMap.erase(V); - } - } - public: - InstrMaps(Context &Ctx) : Ctx(Ctx) { - EraseInstrCB = Ctx.registerEraseInstrCallback( - [this](Instruction *I) { notifyEraseInstr(I); }); - } - ~InstrMaps() { Ctx.unregisterEraseInstrCallback(*EraseInstrCB); } + InstrMaps() = default; + ~InstrMaps() = default; /// \Returns the vector value that we got from vectorizing \p Orig, or /// nullptr if not found. - Value *getVectorForOrig(Value *Orig) const { + Action *getVectorForOrig(Value *Orig) const { auto It = OrigToVectorMap.find(Orig); return It != OrigToVectorMap.end() ? It->second : nullptr; } /// \Returns the lane of \p Orig before it got vectorized into \p Vec, or /// nullopt if not found. - std::optional getOrigLane(Value *Vec, Value *Orig) const { + std::optional getOrigLane(Action *Vec, Value *Orig) const { auto It1 = VectorToOrigLaneMap.find(Vec); if (It1 == VectorToOrigLaneMap.end()) return std::nullopt; @@ -84,7 +81,7 @@ class InstrMaps { return It2->second; } /// Update the map to reflect that \p Origs got vectorized into \p Vec. - void registerVector(ArrayRef Origs, Value *Vec) { + void registerVector(ArrayRef Origs, Action *Vec) { auto &OrigToLaneMap = VectorToOrigLaneMap[Vec]; unsigned Lane = 0; for (Value *Orig : Origs) { diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h index 132b12a7b4e6c..bc2942f87adcf 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h @@ -17,6 +17,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/Support/Casting.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h" #include "llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h" namespace llvm::sandboxir { @@ -206,22 +207,22 @@ class Widen final : public LegalityResult { class DiamondReuse final : public LegalityResult { friend class LegalityAnalysis; - Value *Vec; - DiamondReuse(Value *Vec) + Action *Vec; + DiamondReuse(Action *Vec) : LegalityResult(LegalityResultID::DiamondReuse), Vec(Vec) {} public: static bool classof(const LegalityResult *From) { return From->getSubclassID() == LegalityResultID::DiamondReuse; } - Value *getVector() const { return Vec; } + Action *getVector() const { return Vec; } }; class DiamondReuseWithShuffle final : public LegalityResult { friend class LegalityAnalysis; - Value *Vec; + Action *Vec; ShuffleMask Mask; - DiamondReuseWithShuffle(Value *Vec, const ShuffleMask &Mask) + DiamondReuseWithShuffle(Action *Vec, const ShuffleMask &Mask) : LegalityResult(LegalityResultID::DiamondReuseWithShuffle), Vec(Vec), Mask(Mask) {} @@ -229,7 +230,7 @@ class DiamondReuseWithShuffle final : public LegalityResult { static bool classof(const LegalityResult *From) { return From->getSubclassID() == LegalityResultID::DiamondReuseWithShuffle; } - Value *getVector() const { return Vec; } + Action *getVector() const { return Vec; } const ShuffleMask &getMask() const { return Mask; } }; @@ -250,18 +251,18 @@ class CollectDescr { /// Describes how to get a value element. If the value is a vector then it /// also provides the index to extract it from. class ExtractElementDescr { - Value *V; + PointerUnion V = nullptr; /// The index in `V` that the value can be extracted from. - /// This is nullopt if we need to use `V` as a whole. - std::optional ExtractIdx; + int ExtractIdx = 0; public: - ExtractElementDescr(Value *V, int ExtractIdx) + ExtractElementDescr(Action *V, int ExtractIdx) : V(V), ExtractIdx(ExtractIdx) {} - ExtractElementDescr(Value *V) : V(V), ExtractIdx(std::nullopt) {} - Value *getValue() const { return V; } - bool needsExtract() const { return ExtractIdx.has_value(); } - int getExtractIdx() const { return *ExtractIdx; } + ExtractElementDescr(Value *V) : V(V) {} + Action *getValue() const { return cast(V); } + Value *getScalar() const { return cast(V); } + bool needsExtract() const { return isa(V); } + int getExtractIdx() const { return ExtractIdx; } }; using DescrVecT = SmallVector; @@ -272,11 +273,11 @@ class CollectDescr { : Descrs(std::move(Descrs)) {} /// If all elements come from a single vector input, then return that vector /// and also the shuffle mask required to get them in order. - std::optional> getSingleInput() const { + std::optional> getSingleInput() const { const auto &Descr0 = *Descrs.begin(); - Value *V0 = Descr0.getValue(); if (!Descr0.needsExtract()) return std::nullopt; + auto *V0 = Descr0.getValue(); ShuffleMask::IndicesVecT MaskIndices; MaskIndices.push_back(Descr0.getExtractIdx()); for (const auto &Descr : drop_begin(Descrs)) { diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h index daf6499213d48..b28e9948d6f55 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h @@ -58,9 +58,33 @@ class BottomUpVec final : public RegionPass { /// function helps collect these instructions (along with the pointer operands /// for loads/stores) so that they can be cleaned up later. void collectPotentiallyDeadInstrs(ArrayRef Bndl); - /// Recursively try to vectorize \p Bndl and its operands. - Value *vectorizeRec(ArrayRef Bndl, ArrayRef UserBndl, - unsigned Depth); + + /// Helper class describing how(if) to vectorize the code. + class ActionsVector { + private: + SmallVector, 16> Actions; + + public: + auto begin() const { return Actions.begin(); } + auto end() const { return Actions.end(); } + void push_back(std::unique_ptr &&ActPtr) { + ActPtr->Idx = Actions.size(); + Actions.push_back(std::move(ActPtr)); + } + void clear() { Actions.clear(); } +#ifndef NDEBUG + void print(raw_ostream &OS) const; + void dump() const; +#endif // NDEBUG + }; + ActionsVector Actions; + /// Recursively try to vectorize \p Bndl and its operands. This populates the + /// `Actions` vector. + Action *vectorizeRec(ArrayRef Bndl, ArrayRef UserBndl, + unsigned Depth); + /// Generate vector instructions based on `Actions` and return the last vector + /// created. + Value *emitVectors(); /// Entry point for vectorization starting from \p Seeds. bool tryVectorize(ArrayRef Seeds); diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index cab70c5c01a45..a1d91de3bb788 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -677,7 +677,7 @@ class AccessAnalysis { const DenseMap &Strides, DenseMap &DepSetId, Loop *TheLoop, unsigned &RunningDepId, - unsigned ASId, bool ShouldCheckStride, bool Assume); + unsigned ASId, bool Assume); /// Check whether we can check the pointers at runtime for /// non-intersection. @@ -685,8 +685,9 @@ class AccessAnalysis { /// Returns true if we need no check or if we do and we can generate them /// (i.e. the pointers have computable bounds). bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE, - Loop *TheLoop, const DenseMap &Strides, - Value *&UncomputablePtr, bool ShouldCheckWrap = false); + Loop *TheLoop, + const DenseMap &Strides, + Value *&UncomputablePtr); /// Goes over all memory accesses, checks whether a RT check is needed /// and builds sets of dependent accesses. @@ -1115,13 +1116,11 @@ findForkedPointer(PredicatedScalarEvolution &PSE, return {{replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false}}; } -bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck, - MemAccessInfo Access, Type *AccessTy, - const DenseMap &StridesMap, - DenseMap &DepSetId, - Loop *TheLoop, unsigned &RunningDepId, - unsigned ASId, bool ShouldCheckWrap, - bool Assume) { +bool AccessAnalysis::createCheckForAccess( + RuntimePointerChecking &RtCheck, MemAccessInfo Access, Type *AccessTy, + const DenseMap &StridesMap, + DenseMap &DepSetId, Loop *TheLoop, + unsigned &RunningDepId, unsigned ASId, bool Assume) { Value *Ptr = Access.getPointer(); SmallVector> TranslatedPtrs = @@ -1152,8 +1151,7 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck, // When we run after a failing dependency check we have to make sure // we don't have wrapping pointers. - if (ShouldCheckWrap && - !isNoWrap(PSE, AR, TranslatedPtrs.size() == 1 ? Ptr : nullptr, AccessTy, + if (!isNoWrap(PSE, AR, TranslatedPtrs.size() == 1 ? Ptr : nullptr, AccessTy, TheLoop, Assume)) { return false; } @@ -1182,10 +1180,10 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck, return true; } -bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, - ScalarEvolution *SE, Loop *TheLoop, - const DenseMap &StridesMap, - Value *&UncomputablePtr, bool ShouldCheckWrap) { +bool AccessAnalysis::canCheckPtrAtRT( + RuntimePointerChecking &RtCheck, ScalarEvolution *SE, Loop *TheLoop, + const DenseMap &StridesMap, + Value *&UncomputablePtr) { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRT = true; @@ -1245,7 +1243,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, for (const auto &AccessTy : Accesses[Access]) { if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap, DepSetId, TheLoop, RunningDepId, ASId, - ShouldCheckWrap, false)) { + false)) { LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Access.getPointer() << '\n'); Retries.emplace_back(Access, AccessTy); @@ -1275,7 +1273,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, for (const auto &[Access, AccessTy] : Retries) { if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap, DepSetId, TheLoop, RunningDepId, ASId, - ShouldCheckWrap, /*Assume=*/true)) { + /*Assume=*/true)) { CanDoAliasSetRT = false; UncomputablePtr = Access.getPointer(); break; @@ -1431,8 +1429,8 @@ void AccessAnalysis::processMemAccesses() { typedef SmallVector ValueVector; ValueVector TempObjects; - UnderlyingObjects[Ptr] = {}; SmallVector &UOs = UnderlyingObjects[Ptr]; + UOs = {}; ::getUnderlyingObjects(Ptr, UOs, LI); LLVM_DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n"); @@ -2643,9 +2641,8 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI, // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. Value *UncomputablePtr = nullptr; - bool CanDoRTIfNeeded = - Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(), TheLoop, - SymbolicStrides, UncomputablePtr, false); + bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT( + *PtrRtChecking, PSE->getSE(), TheLoop, SymbolicStrides, UncomputablePtr); if (!CanDoRTIfNeeded) { const auto *I = dyn_cast_or_null(UncomputablePtr); recordAnalysis("CantIdentifyArrayBounds", I) @@ -2676,7 +2673,7 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI, auto *SE = PSE->getSE(); UncomputablePtr = nullptr; CanDoRTIfNeeded = Accesses.canCheckPtrAtRT( - *PtrRtChecking, SE, TheLoop, SymbolicStrides, UncomputablePtr, true); + *PtrRtChecking, SE, TheLoop, SymbolicStrides, UncomputablePtr); // Check that we found the bounds for the pointer. if (!CanDoRTIfNeeded) { diff --git a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp index d87649c4e6567..0f11423a84930 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp @@ -525,7 +525,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF, // Don't consider SP to be clobbered by register masks. for (auto It : RegVars) { unsigned int Reg = It.first; - if (Reg != SP && Register::isPhysicalRegister(Reg) && + if (Reg != SP && Register(Reg).isPhysical() && MO.clobbersPhysReg(Reg)) RegsToClobber.push_back(Reg); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index ddf0275ddfe6a..cf3673058c8e7 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -564,7 +564,7 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP, TFI->getDwarfFrameBase(*Asm->MF); switch (FrameBase.Kind) { case TargetFrameLowering::DwarfFrameBase::Register: { - if (Register::isPhysicalRegister(FrameBase.Location.Reg)) { + if (Register(FrameBase.Location.Reg).isPhysical()) { MachineLocation Location(FrameBase.Location.Reg); addAddress(*SPDie, dwarf::DW_AT_frame_base, Location); } diff --git a/llvm/lib/CodeGen/LiveRangeCalc.cpp b/llvm/lib/CodeGen/LiveRangeCalc.cpp index 1a9bc694ed0fd..a7c8c3fc8a25a 100644 --- a/llvm/lib/CodeGen/LiveRangeCalc.cpp +++ b/llvm/lib/CodeGen/LiveRangeCalc.cpp @@ -216,7 +216,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, report_fatal_error("Use not jointly dominated by defs."); } - if (Register::isPhysicalRegister(PhysReg)) { + if (Register(PhysReg).isPhysical()) { const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); bool IsLiveIn = MBB->isLiveIn(PhysReg); for (MCRegAliasIterator Alias(PhysReg, TRI, false); !IsLiveIn && Alias.isValid(); ++Alias) diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 0da7535031a7d..1cc1b2cbd81b9 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -3966,8 +3966,7 @@ void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) { // Find already scheduled copies with a single physreg dependence and move // them just above the scheduled instruction. for (SDep &Dep : Deps) { - if (Dep.getKind() != SDep::Data || - !Register::isPhysicalRegister(Dep.getReg())) + if (Dep.getKind() != SDep::Data || !Register(Dep.getReg()).isPhysical()) continue; SUnit *DepSU = Dep.getSUnit(); if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1) diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 14128dafbe4ee..2809056bfeba2 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -708,7 +708,7 @@ void RegAllocFastImpl::reloadAtBegin(MachineBasicBlock &MBB) { /// not used by a virtreg. Kill the physreg, marking it free. This may add /// implicit kills to MO->getParent() and invalidate MO. bool RegAllocFastImpl::usePhysReg(MachineInstr &MI, MCPhysReg Reg) { - assert(Register::isPhysicalRegister(Reg) && "expected physreg"); + assert(Register(Reg).isPhysical() && "expected physreg"); bool displacedAny = displacePhysReg(MI, Reg); setPhysRegState(Reg, regPreAssigned); markRegUsedInInstr(Reg); @@ -1289,7 +1289,7 @@ void RegAllocFastImpl::dumpState() const { assert(VirtReg.isVirtual() && "Bad map key"); MCPhysReg PhysReg = LR.PhysReg; if (PhysReg != 0) { - assert(Register::isPhysicalRegister(PhysReg) && "mapped to physreg"); + assert(Register(PhysReg).isPhysical() && "mapped to physreg"); for (MCRegUnit Unit : TRI->regunits(PhysReg)) { assert(RegUnitStates[Unit] == VirtReg && "inverse map valid"); } diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index fd4641ec6f124..288b9d9553b1d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -501,8 +501,8 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, F.isClobberKind()) { // Check for def of register or earlyclobber register. for (; NumVals; --NumVals, ++i) { - unsigned Reg = cast(Node->getOperand(i))->getReg(); - if (Register::isPhysicalRegister(Reg)) + Register Reg = cast(Node->getOperand(i))->getReg(); + if (Reg.isPhysical()) CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); } } else diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 133ac6b1327dd..a76498fcab8f2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -10125,9 +10125,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, auto DetectWriteToReservedRegister = [&]() { const MachineFunction &MF = DAG.getMachineFunction(); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - for (unsigned Reg : OpInfo.AssignedRegs.Regs) { - if (Register::isPhysicalRegister(Reg) && - TRI.isInlineAsmReadOnlyReg(MF, Reg)) { + for (Register Reg : OpInfo.AssignedRegs.Regs) { + if (Reg.isPhysical() && TRI.isInlineAsmReadOnlyReg(MF, Reg)) { const char *RegName = TRI.getName(Reg); emitInlineAsmError(Call, "write to reserved register '" + Twine(RegName) + "'"); @@ -11389,7 +11388,7 @@ void SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, assert((Op.getOpcode() != ISD::CopyFromReg || cast(Op.getOperand(1))->getReg() != Reg) && "Copy from a reg to the same reg!"); - assert(!Register::isPhysicalRegister(Reg) && "Is a physreg"); + assert(!Register(Reg).isPhysical() && "Is a physreg"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // If this is an InlineAsm we have to match the registers required, not the diff --git a/llvm/lib/ObjectYAML/XCOFFEmitter.cpp b/llvm/lib/ObjectYAML/XCOFFEmitter.cpp index f3a9fb188f51d..5d7d6a1141ba0 100644 --- a/llvm/lib/ObjectYAML/XCOFFEmitter.cpp +++ b/llvm/lib/ObjectYAML/XCOFFEmitter.cpp @@ -145,14 +145,16 @@ bool XCOFFWriter::initSectionHeaders(uint64_t &CurrentOffset) { uint64_t CurrentEndTDataAddr = 0; for (uint16_t I = 0, E = InitSections.size(); I < E; ++I) { // Assign indices for sections. - if (InitSections[I].SectionName.size() && - !SectionIndexMap[InitSections[I].SectionName]) { - // The section index starts from 1. - SectionIndexMap[InitSections[I].SectionName] = I + 1; - if ((I + 1) > MaxSectionIndex) { - ErrHandler("exceeded the maximum permitted section index of " + - Twine(MaxSectionIndex)); - return false; + if (InitSections[I].SectionName.size()) { + int16_t &SectionIndex = SectionIndexMap[InitSections[I].SectionName]; + if (!SectionIndex) { + // The section index starts from 1. + SectionIndex = I + 1; + if ((I + 1) > MaxSectionIndex) { + ErrHandler("exceeded the maximum permitted section index of " + + Twine(MaxSectionIndex)); + return false; + } } } @@ -779,19 +781,19 @@ bool XCOFFWriter::writeSymbols() { W.write(YamlSym.Value); } if (YamlSym.SectionName) { - if (!SectionIndexMap.count(*YamlSym.SectionName)) { + auto It = SectionIndexMap.find(*YamlSym.SectionName); + if (It == SectionIndexMap.end()) { ErrHandler("the SectionName " + *YamlSym.SectionName + " specified in the symbol does not exist"); return false; } - if (YamlSym.SectionIndex && - SectionIndexMap[*YamlSym.SectionName] != *YamlSym.SectionIndex) { + if (YamlSym.SectionIndex && It->second != *YamlSym.SectionIndex) { ErrHandler("the SectionName " + *YamlSym.SectionName + " and the SectionIndex (" + Twine(*YamlSym.SectionIndex) + ") refer to different sections"); return false; } - W.write(SectionIndexMap[*YamlSym.SectionName]); + W.write(It->second); } else { W.write(YamlSym.SectionIndex.value_or(0)); } diff --git a/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp index 174438c1863dd..c636719d86ca0 100644 --- a/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp +++ b/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp @@ -155,11 +155,11 @@ bool A57ChainingConstraint::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd, LiveIntervals &LIs = G.getMetadata().LIS; - if (Register::isPhysicalRegister(Rd) || Register::isPhysicalRegister(Ra)) { - LLVM_DEBUG(dbgs() << "Rd is a physical reg:" - << Register::isPhysicalRegister(Rd) << '\n'); - LLVM_DEBUG(dbgs() << "Ra is a physical reg:" - << Register::isPhysicalRegister(Ra) << '\n'); + if (Register(Rd).isPhysical() || Register(Ra).isPhysical()) { + LLVM_DEBUG(dbgs() << "Rd is a physical reg:" << Register(Rd).isPhysical() + << '\n'); + LLVM_DEBUG(dbgs() << "Ra is a physical reg:" << Register(Ra).isPhysical() + << '\n'); return false; } diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 839b7e81f8998..9a021925a6bd1 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1108,7 +1108,7 @@ ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg, if (!SubIdx) return MIB.addReg(Reg, State); - if (Register::isPhysicalRegister(Reg)) + if (Register(Reg).isPhysical()) return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); return MIB.addReg(Reg, State, SubIdx); } diff --git a/llvm/lib/Target/ARM/ARMLatencyMutations.cpp b/llvm/lib/Target/ARM/ARMLatencyMutations.cpp index 30e7ede68d787..601b3fa19978d 100644 --- a/llvm/lib/Target/ARM/ARMLatencyMutations.cpp +++ b/llvm/lib/Target/ARM/ARMLatencyMutations.cpp @@ -802,7 +802,7 @@ signed M85Overrides::modifyMixedWidthFP(const MachineInstr *SrcMI, OP.getSubReg() == ARM::ssub_1) return 1; } - } else if (Register::isPhysicalRegister(RegID)) { + } else if (Register(RegID).isPhysical()) { // Note that when the producer is narrower, not all of the producers // may be present in the scheduling graph; somewhere earlier in the // compiler, an implicit def/use of the aliased full register gets diff --git a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp index 3b157006d9224..df182613d1661 100644 --- a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -223,8 +223,8 @@ static bool areCombinableOperations(const TargetRegisterInfo *TRI, return true; } -static bool isEvenReg(unsigned Reg) { - assert(Register::isPhysicalRegister(Reg)); +static bool isEvenReg(Register Reg) { + assert(Reg.isPhysical()); if (Hexagon::IntRegsRegClass.contains(Reg)) return (Reg - Hexagon::R0) % 2 == 0; if (Hexagon::HvxVRRegClass.contains(Reg)) @@ -546,7 +546,7 @@ MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr &I1, // is even. bool IsI1LowReg = (I2DestReg - I1DestReg) == 1; bool IsI2LowReg = (I1DestReg - I2DestReg) == 1; - unsigned FirstRegIndex = IsI1LowReg ? I1DestReg : I2DestReg; + Register FirstRegIndex = IsI1LowReg ? I1DestReg : I2DestReg; if ((!IsI1LowReg && !IsI2LowReg) || !isEvenReg(FirstRegIndex)) continue; diff --git a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp index ee01ebc4daa26..3bb7175bbf8b9 100644 --- a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -275,7 +275,7 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, return false; } - unsigned cmpReg1, cmpOp2 = 0; // cmpOp2 assignment silences compiler warning. + Register cmpReg1, cmpOp2; cmpReg1 = MI.getOperand(1).getReg(); if (secondReg) { @@ -290,7 +290,7 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, // at machine code level, we don't need this, but if we decide // to move new value jump prior to RA, we would be needing this. MachineRegisterInfo &MRI = MF.getRegInfo(); - if (!Register::isPhysicalRegister(cmpOp2)) { + if (!cmpOp2.isPhysical()) { MachineInstr *def = MRI.getVRegDef(cmpOp2); if (def->getOpcode() == TargetOpcode::COPY) return false; @@ -480,7 +480,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { bool foundJump = false; bool foundCompare = false; bool invertPredicate = false; - unsigned predReg = 0; // predicate reg of the jump. + Register predReg; // predicate reg of the jump. unsigned cmpReg1 = 0; int cmpOp2 = 0; MachineBasicBlock::iterator jmpPos; @@ -516,7 +516,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { jmpPos = MII; jmpInstr = &MI; predReg = MI.getOperand(0).getReg(); - afterRA = Register::isPhysicalRegister(predReg); + afterRA = predReg.isPhysical(); // If ifconverter had not messed up with the kill flags of the // operands, the following check on the kill flag would suffice. diff --git a/llvm/lib/Target/Hexagon/RDFCopy.cpp b/llvm/lib/Target/Hexagon/RDFCopy.cpp index fafdad08909dd..76177901f658a 100644 --- a/llvm/lib/Target/Hexagon/RDFCopy.cpp +++ b/llvm/lib/Target/Hexagon/RDFCopy.cpp @@ -44,8 +44,8 @@ bool CopyPropagation::interpretAsCopy(const MachineInstr *MI, EqualityMap &EM) { const MachineOperand &Src = MI->getOperand(1); RegisterRef DstR = DFG.makeRegRef(Dst.getReg(), Dst.getSubReg()); RegisterRef SrcR = DFG.makeRegRef(Src.getReg(), Src.getSubReg()); - assert(Register::isPhysicalRegister(DstR.Reg)); - assert(Register::isPhysicalRegister(SrcR.Reg)); + assert(Register(DstR.Reg).isPhysical()); + assert(Register(SrcR.Reg).isPhysical()); const TargetRegisterInfo &TRI = DFG.getTRI(); if (TRI.getMinimalPhysRegClass(DstR.Reg) != TRI.getMinimalPhysRegClass(SrcR.Reg)) diff --git a/llvm/lib/Target/M68k/M68kRegisterInfo.cpp b/llvm/lib/Target/M68k/M68kRegisterInfo.cpp index 62fb72ba4fd5e..5375d4484a7ab 100644 --- a/llvm/lib/Target/M68k/M68kRegisterInfo.cpp +++ b/llvm/lib/Target/M68k/M68kRegisterInfo.cpp @@ -83,8 +83,7 @@ M68kRegisterInfo::getMatchingMegaReg(unsigned Reg, const TargetRegisterClass * M68kRegisterInfo::getMaximalPhysRegClass(unsigned reg, MVT VT) const { - assert(Register::isPhysicalRegister(reg) && - "reg must be a physical register"); + assert(Register(reg).isPhysical() && "reg must be a physical register"); // Pick the most sub register class of the right type that contains // this physreg. diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp index 06ae8e1296e51..e40c85abc8b5d 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp @@ -134,7 +134,7 @@ static void generateInstSeqImpl(int64_t Val, const MCSubtargetInfo &STI, } // Try to use SLLI_UW for Val when it is uint32 but not int32. - if (isUInt<32>((uint64_t)Val) && !isInt<32>((uint64_t)Val) && + if (isUInt<32>(Val) && !isInt<32>(Val) && STI.hasFeature(RISCV::FeatureStdExtZba)) { // Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with // SLLI_UW. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.cpp index cd84e68aed140..0469fbf15b251 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.cpp @@ -28,17 +28,17 @@ const SortRegion *SortRegionInfo::getRegionFor(const MachineBasicBlock *MBB) { // WE->contains(ML->getHeader()), but not ML->contains(WE->getHeader()). if ((ML && !WE) || (ML && WE && WE->contains(ML->getHeader()))) { // If the smallest region containing MBB is a loop - if (LoopMap.count(ML)) - return LoopMap[ML].get(); - LoopMap[ML] = std::make_unique>(ML); - return LoopMap[ML].get(); + auto [It, Inserted] = LoopMap.try_emplace(ML); + if (Inserted) + It->second = std::make_unique>(ML); + return It->second.get(); } else { // If the smallest region containing MBB is an exception - if (ExceptionMap.count(WE)) - return ExceptionMap[WE].get(); - ExceptionMap[WE] = - std::make_unique>(WE); - return ExceptionMap[WE].get(); + auto [It, Inserted] = ExceptionMap.try_emplace(WE); + if (Inserted) + It->second = + std::make_unique>(WE); + return It->second.get(); } } diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp index f2707afe195c4..56b7b8bfe1f66 100644 --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -1393,9 +1393,12 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { CallerORE.emit(OR); // Now update the entry count: - if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) { - uint64_t CallSiteCount = CallSiteToProfCountMap[User]; - CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount); + if (CalleeEntryCountV) { + if (auto It = CallSiteToProfCountMap.find(User); + It != CallSiteToProfCountMap.end()) { + uint64_t CallSiteCount = It->second; + CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount); + } } AnyInline = true; diff --git a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp index 2d9a3d1f8a110..78b9c7d06e183 100644 --- a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp +++ b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp @@ -178,11 +178,9 @@ struct AssumeBuilderState { if (tryToPreserveWithoutAddingAssume(RK)) return; MapKey Key{RK.WasOn, RK.AttrKind}; - auto Lookup = AssumedKnowledgeMap.find(Key); - if (Lookup == AssumedKnowledgeMap.end()) { - AssumedKnowledgeMap[Key] = RK.ArgValue; + auto [Lookup, Inserted] = AssumedKnowledgeMap.try_emplace(Key, RK.ArgValue); + if (Inserted) return; - } assert(((Lookup->second == 0 && RK.ArgValue == 0) || (Lookup->second != 0 && RK.ArgValue != 0)) && "inconsistent argument value"); diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/InstrMaps.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/InstrMaps.cpp index 4df4829a04c41..37f1ec450f2eb 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/InstrMaps.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/InstrMaps.cpp @@ -8,10 +8,23 @@ #include "llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h" #include "llvm/Support/Debug.h" +#include "llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h" namespace llvm::sandboxir { #ifndef NDEBUG +void Action::print(raw_ostream &OS) const { + OS << Idx << ". " << *LegalityRes << " Depth:" << Depth << "\n"; + OS.indent(2) << "Bndl:\n"; + for (Value *V : Bndl) + OS.indent(4) << *V << "\n"; + OS.indent(2) << "UserBndl:\n"; + for (Value *V : UserBndl) + OS.indent(4) << *V << "\n"; +} + +void Action::dump() const { print(dbgs()); } + void InstrMaps::dump() const { print(dbgs()); dbgs() << "\n"; diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp index d57732090dcd6..14438181f2602 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp @@ -156,12 +156,7 @@ Value *BottomUpVec::createVectorInstr(ArrayRef Bndl, // TODO: Propagate debug info. }; - auto *VecI = CreateVectorInstr(Bndl, Operands); - if (VecI != nullptr) { - Change = true; - IMaps->registerVector(Bndl, VecI); - } - return VecI; + return CreateVectorInstr(Bndl, Operands); } void BottomUpVec::tryEraseDeadInstrs() { @@ -266,135 +261,196 @@ void BottomUpVec::collectPotentiallyDeadInstrs(ArrayRef Bndl) { } } -Value *BottomUpVec::vectorizeRec(ArrayRef Bndl, - ArrayRef UserBndl, unsigned Depth) { - Value *NewVec = nullptr; - auto *UserBB = !UserBndl.empty() - ? cast(UserBndl.front())->getParent() - : cast(Bndl[0])->getParent(); +Action *BottomUpVec::vectorizeRec(ArrayRef Bndl, + ArrayRef UserBndl, unsigned Depth) { const auto &LegalityRes = Legality->canVectorize(Bndl); + auto ActionPtr = + std::make_unique(&LegalityRes, Bndl, UserBndl, Depth); + SmallVector Operands; switch (LegalityRes.getSubclassID()) { case LegalityResultID::Widen: { auto *I = cast(Bndl[0]); - SmallVector VecOperands; switch (I->getOpcode()) { case Instruction::Opcode::Load: - // Don't recurse towards the pointer operand. - VecOperands.push_back(cast(I)->getPointerOperand()); break; case Instruction::Opcode::Store: { // Don't recurse towards the pointer operand. - auto *VecOp = vectorizeRec(getOperand(Bndl, 0), Bndl, Depth + 1); - VecOperands.push_back(VecOp); - VecOperands.push_back(cast(I)->getPointerOperand()); + Action *OpA = vectorizeRec(getOperand(Bndl, 0), Bndl, Depth + 1); + Operands.push_back(OpA); break; } default: // Visit all operands. for (auto OpIdx : seq(I->getNumOperands())) { - auto *VecOp = vectorizeRec(getOperand(Bndl, OpIdx), Bndl, Depth + 1); - VecOperands.push_back(VecOp); + Action *OpA = vectorizeRec(getOperand(Bndl, OpIdx), Bndl, Depth + 1); + Operands.push_back(OpA); } break; } - NewVec = createVectorInstr(Bndl, VecOperands); - - // Collect any potentially dead scalar instructions, including the original - // scalars and pointer operands of loads/stores. - if (NewVec != nullptr) - collectPotentiallyDeadInstrs(Bndl); + // Update the maps to mark Bndl as "vectorized". + IMaps->registerVector(Bndl, ActionPtr.get()); break; } - case LegalityResultID::DiamondReuse: { - NewVec = cast(LegalityRes).getVector(); + case LegalityResultID::DiamondReuse: + case LegalityResultID::DiamondReuseWithShuffle: + case LegalityResultID::DiamondReuseMultiInput: + case LegalityResultID::Pack: break; } - case LegalityResultID::DiamondReuseWithShuffle: { - auto *VecOp = cast(LegalityRes).getVector(); - const ShuffleMask &Mask = - cast(LegalityRes).getMask(); - NewVec = createShuffle(VecOp, Mask, UserBB); - assert(NewVec->getType() == VecOp->getType() && - "Expected same type! Bad mask ?"); - break; + // Create actions in post-order. + ActionPtr->Operands = std::move(Operands); + auto *Action = ActionPtr.get(); + Actions.push_back(std::move(ActionPtr)); + return Action; +} + +#ifndef NDEBUG +void BottomUpVec::ActionsVector::print(raw_ostream &OS) const { + for (auto [Idx, Action] : enumerate(Actions)) { + Action->print(OS); + OS << "\n"; } - case LegalityResultID::DiamondReuseMultiInput: { - const auto &Descr = - cast(LegalityRes).getCollectDescr(); - Type *ResTy = VecUtils::getWideType(Bndl[0]->getType(), Bndl.size()); +} +void BottomUpVec::ActionsVector::dump() const { print(dbgs()); } +#endif // NDEBUG + +Value *BottomUpVec::emitVectors() { + Value *NewVec = nullptr; + for (const auto &ActionPtr : Actions) { + ArrayRef Bndl = ActionPtr->Bndl; + ArrayRef UserBndl = ActionPtr->UserBndl; + const LegalityResult &LegalityRes = *ActionPtr->LegalityRes; + unsigned Depth = ActionPtr->Depth; + auto *UserBB = !UserBndl.empty() + ? cast(UserBndl.front())->getParent() + : cast(Bndl[0])->getParent(); - // TODO: Try to get WhereIt without creating a vector. - SmallVector DescrInstrs; - for (const auto &ElmDescr : Descr.getDescrs()) { - if (auto *I = dyn_cast(ElmDescr.getValue())) - DescrInstrs.push_back(I); + switch (LegalityRes.getSubclassID()) { + case LegalityResultID::Widen: { + auto *I = cast(Bndl[0]); + SmallVector VecOperands; + switch (I->getOpcode()) { + case Instruction::Opcode::Load: + VecOperands.push_back(cast(I)->getPointerOperand()); + break; + case Instruction::Opcode::Store: { + VecOperands.push_back(ActionPtr->Operands[0]->Vec); + VecOperands.push_back(cast(I)->getPointerOperand()); + break; + } + default: + // Visit all operands. + for (Action *OpA : ActionPtr->Operands) { + auto *VecOp = OpA->Vec; + VecOperands.push_back(VecOp); + } + break; + } + NewVec = createVectorInstr(ActionPtr->Bndl, VecOperands); + // Collect any potentially dead scalar instructions, including the + // original scalars and pointer operands of loads/stores. + if (NewVec != nullptr) + collectPotentiallyDeadInstrs(Bndl); + break; + } + case LegalityResultID::DiamondReuse: { + NewVec = cast(LegalityRes).getVector()->Vec; + break; + } + case LegalityResultID::DiamondReuseWithShuffle: { + auto *VecOp = cast(LegalityRes).getVector()->Vec; + const ShuffleMask &Mask = + cast(LegalityRes).getMask(); + NewVec = createShuffle(VecOp, Mask, UserBB); + assert(NewVec->getType() == VecOp->getType() && + "Expected same type! Bad mask ?"); + break; } - BasicBlock::iterator WhereIt = - getInsertPointAfterInstrs(DescrInstrs, UserBB); + case LegalityResultID::DiamondReuseMultiInput: { + const auto &Descr = + cast(LegalityRes).getCollectDescr(); + Type *ResTy = VecUtils::getWideType(Bndl[0]->getType(), Bndl.size()); - Value *LastV = PoisonValue::get(ResTy); - unsigned Lane = 0; - for (const auto &ElmDescr : Descr.getDescrs()) { - Value *VecOp = ElmDescr.getValue(); - Context &Ctx = VecOp->getContext(); - Value *ValueToInsert; - if (ElmDescr.needsExtract()) { - ConstantInt *IdxC = - ConstantInt::get(Type::getInt32Ty(Ctx), ElmDescr.getExtractIdx()); - ValueToInsert = ExtractElementInst::create(VecOp, IdxC, WhereIt, - VecOp->getContext(), "VExt"); - } else { - ValueToInsert = VecOp; + // TODO: Try to get WhereIt without creating a vector. + SmallVector DescrInstrs; + for (const auto &ElmDescr : Descr.getDescrs()) { + auto *V = ElmDescr.needsExtract() ? ElmDescr.getValue()->Vec + : ElmDescr.getScalar(); + if (auto *I = dyn_cast(V)) + DescrInstrs.push_back(I); } - auto NumLanesToInsert = VecUtils::getNumLanes(ValueToInsert); - if (NumLanesToInsert == 1) { - // If we are inserting a scalar element then we need a single insert. - // %VIns = insert %DstVec, %SrcScalar, Lane - ConstantInt *LaneC = ConstantInt::get(Type::getInt32Ty(Ctx), Lane); - LastV = InsertElementInst::create(LastV, ValueToInsert, LaneC, WhereIt, - Ctx, "VIns"); - } else { - // If we are inserting a vector element then we need to extract and - // insert each vector element one by one with a chain of extracts and - // inserts, for example: - // %VExt0 = extract %SrcVec, 0 - // %VIns0 = insert %DstVec, %Vect0, Lane + 0 - // %VExt1 = extract %SrcVec, 1 - // %VIns1 = insert %VIns0, %Vect0, Lane + 1 - for (unsigned LnCnt = 0; LnCnt != NumLanesToInsert; ++LnCnt) { - auto *ExtrIdxC = ConstantInt::get(Type::getInt32Ty(Ctx), LnCnt); - auto *ExtrI = ExtractElementInst::create(ValueToInsert, ExtrIdxC, - WhereIt, Ctx, "VExt"); - unsigned InsLane = Lane + LnCnt; - auto *InsLaneC = ConstantInt::get(Type::getInt32Ty(Ctx), InsLane); - LastV = InsertElementInst::create(LastV, ExtrI, InsLaneC, WhereIt, - Ctx, "VIns"); + BasicBlock::iterator WhereIt = + getInsertPointAfterInstrs(DescrInstrs, UserBB); + + Value *LastV = PoisonValue::get(ResTy); + Context &Ctx = LastV->getContext(); + unsigned Lane = 0; + for (const auto &ElmDescr : Descr.getDescrs()) { + Value *VecOp = nullptr; + Value *ValueToInsert; + if (ElmDescr.needsExtract()) { + VecOp = ElmDescr.getValue()->Vec; + ConstantInt *IdxC = + ConstantInt::get(Type::getInt32Ty(Ctx), ElmDescr.getExtractIdx()); + ValueToInsert = ExtractElementInst::create( + VecOp, IdxC, WhereIt, VecOp->getContext(), "VExt"); + } else { + ValueToInsert = ElmDescr.getScalar(); + } + auto NumLanesToInsert = VecUtils::getNumLanes(ValueToInsert); + if (NumLanesToInsert == 1) { + // If we are inserting a scalar element then we need a single insert. + // %VIns = insert %DstVec, %SrcScalar, Lane + ConstantInt *LaneC = ConstantInt::get(Type::getInt32Ty(Ctx), Lane); + LastV = InsertElementInst::create(LastV, ValueToInsert, LaneC, + WhereIt, Ctx, "VIns"); + } else { + // If we are inserting a vector element then we need to extract and + // insert each vector element one by one with a chain of extracts and + // inserts, for example: + // %VExt0 = extract %SrcVec, 0 + // %VIns0 = insert %DstVec, %Vect0, Lane + 0 + // %VExt1 = extract %SrcVec, 1 + // %VIns1 = insert %VIns0, %Vect0, Lane + 1 + for (unsigned LnCnt = 0; LnCnt != NumLanesToInsert; ++LnCnt) { + auto *ExtrIdxC = ConstantInt::get(Type::getInt32Ty(Ctx), LnCnt); + auto *ExtrI = ExtractElementInst::create(ValueToInsert, ExtrIdxC, + WhereIt, Ctx, "VExt"); + unsigned InsLane = Lane + LnCnt; + auto *InsLaneC = ConstantInt::get(Type::getInt32Ty(Ctx), InsLane); + LastV = InsertElementInst::create(LastV, ExtrI, InsLaneC, WhereIt, + Ctx, "VIns"); + } } + Lane += NumLanesToInsert; } - Lane += NumLanesToInsert; + NewVec = LastV; + break; + } + case LegalityResultID::Pack: { + // If we can't vectorize the seeds then just return. + if (Depth == 0) + return nullptr; + NewVec = createPack(Bndl, UserBB); + break; + } + } + if (NewVec != nullptr) { + Change = true; + ActionPtr->Vec = NewVec; } - NewVec = LastV; - break; - } - case LegalityResultID::Pack: { - // If we can't vectorize the seeds then just return. - if (Depth == 0) - return nullptr; - NewVec = createPack(Bndl, UserBB); - break; - } - } #ifndef NDEBUG - if (AlwaysVerify) { - // This helps find broken IR by constantly verifying the function. Note that - // this is very expensive and should only be used for debugging. - Instruction *I0 = isa(Bndl[0]) - ? cast(Bndl[0]) - : cast(UserBndl[0]); - assert(!Utils::verifyFunction(I0->getParent()->getParent(), dbgs()) && - "Broken function!"); + if (AlwaysVerify) { + // This helps find broken IR by constantly verifying the function. Note + // that this is very expensive and should only be used for debugging. + Instruction *I0 = isa(Bndl[0]) + ? cast(Bndl[0]) + : cast(UserBndl[0]); + assert(!Utils::verifyFunction(I0->getParent()->getParent(), dbgs()) && + "Broken function!"); + } +#endif // NDEBUG } -#endif return NewVec; } @@ -402,7 +458,9 @@ bool BottomUpVec::tryVectorize(ArrayRef Bndl) { Change = false; DeadInstrCandidates.clear(); Legality->clear(); + Actions.clear(); vectorizeRec(Bndl, {}, /*Depth=*/0); + emitVectors(); tryEraseDeadInstrs(); return Change; } @@ -411,7 +469,7 @@ bool BottomUpVec::runOnRegion(Region &Rgn, const Analyses &A) { const auto &SeedSlice = Rgn.getAux(); assert(SeedSlice.size() >= 2 && "Bad slice!"); Function &F = *SeedSlice[0]->getParent()->getParent(); - IMaps = std::make_unique(F.getContext()); + IMaps = std::make_unique(); Legality = std::make_unique( A.getAA(), A.getScalarEvolution(), F.getParent()->getDataLayout(), F.getContext(), *IMaps); diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 746742e14d080..cdb8853f7503c 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -3036,8 +3036,6 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { Worklist.pushValue(NSV0B); Worklist.pushValue(NSV1A); Worklist.pushValue(NSV1B); - for (auto *S : Shuffles) - Worklist.add(S); return true; } diff --git a/llvm/test/Analysis/LoopAccessAnalysis/nusw-predicates.ll b/llvm/test/Analysis/LoopAccessAnalysis/nusw-predicates.ll index 5234d8f107271..d4f7f82a8cff1 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/nusw-predicates.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/nusw-predicates.ll @@ -15,6 +15,12 @@ define void @int_and_pointer_predicate(ptr %v, i32 %N) { ; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group [[GRP1:0x[0-9a-f]+]]: +; CHECK-NEXT: (Low: %v High: (2 + %v)) +; CHECK-NEXT: Member: %v +; CHECK-NEXT: Group [[GRP2:0x[0-9a-f]+]]: +; CHECK-NEXT: (Low: %v High: (6 + (4 * (trunc i32 %N to i16)) + %v)) +; CHECK-NEXT: Member: {%v,+,4}<%loop> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -57,36 +63,36 @@ define void @int_and_multiple_pointer_predicates(ptr %v, ptr %w, i32 %N) { ; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Check 0: -; CHECK-NEXT: Comparing group ([[GRP1:0x[0-9a-f]+]]): +; CHECK-NEXT: Comparing group ([[GRP3:0x[0-9a-f]+]]): ; CHECK-NEXT: ptr %v -; CHECK-NEXT: Against group ([[GRP2:0x[0-9a-f]+]]): +; CHECK-NEXT: Against group ([[GRP4:0x[0-9a-f]+]]): ; CHECK-NEXT: ptr %w ; CHECK-NEXT: Check 1: -; CHECK-NEXT: Comparing group ([[GRP1]]): +; CHECK-NEXT: Comparing group ([[GRP3]]): ; CHECK-NEXT: ptr %v -; CHECK-NEXT: Against group ([[GRP3:0x[0-9a-f]+]]): +; CHECK-NEXT: Against group ([[GRP5:0x[0-9a-f]+]]): ; CHECK-NEXT: %gep.w = getelementptr { i16, i16 }, ptr %w, i16 %iv.i16 ; CHECK-NEXT: Check 2: -; CHECK-NEXT: Comparing group ([[GRP4:0x[0-9a-f]+]]): +; CHECK-NEXT: Comparing group ([[GRP6:0x[0-9a-f]+]]): ; CHECK-NEXT: %gep.v = getelementptr { i16, i16 }, ptr %v, i16 %iv.i16 -; CHECK-NEXT: Against group ([[GRP2]]): +; CHECK-NEXT: Against group ([[GRP4]]): ; CHECK-NEXT: ptr %w ; CHECK-NEXT: Check 3: -; CHECK-NEXT: Comparing group ([[GRP4]]): +; CHECK-NEXT: Comparing group ([[GRP6]]): ; CHECK-NEXT: %gep.v = getelementptr { i16, i16 }, ptr %v, i16 %iv.i16 -; CHECK-NEXT: Against group ([[GRP3]]): +; CHECK-NEXT: Against group ([[GRP5]]): ; CHECK-NEXT: %gep.w = getelementptr { i16, i16 }, ptr %w, i16 %iv.i16 ; CHECK-NEXT: Grouped accesses: -; CHECK-NEXT: Group [[GRP1]]: +; CHECK-NEXT: Group [[GRP3]]: ; CHECK-NEXT: (Low: %v High: (2 + %v)) ; CHECK-NEXT: Member: %v -; CHECK-NEXT: Group [[GRP4]]: +; CHECK-NEXT: Group [[GRP6]]: ; CHECK-NEXT: (Low: %v High: (6 + (4 * (trunc i32 %N to i16)) + %v)) ; CHECK-NEXT: Member: {%v,+,4}<%loop> -; CHECK-NEXT: Group [[GRP2]]: +; CHECK-NEXT: Group [[GRP4]]: ; CHECK-NEXT: (Low: %w High: (2 + %w)) ; CHECK-NEXT: Member: %w -; CHECK-NEXT: Group [[GRP3]]: +; CHECK-NEXT: Group [[GRP5]]: ; CHECK-NEXT: (Low: %w High: (6 + (4 * (trunc i32 %N to i16)) + %w)) ; CHECK-NEXT: Member: {%w,+,4}<%loop> ; CHECK-EMPTY: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis-forked-pointers.ll b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis-forked-pointers.ll index 38b7389ae9083..021447d53f943 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis-forked-pointers.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis-forked-pointers.ll @@ -163,7 +163,7 @@ exit: define void @dependency_check_and_runtime_checks_needed_select_of_ptr_add_recs_may_wrap_1(ptr %a, ptr %b, ptr %c, i64 %offset, i64 %n) { ; CHECK-LABEL: 'dependency_check_and_runtime_checks_needed_select_of_ptr_add_recs_may_wrap_1' ; CHECK-NEXT: loop: -; CHECK-NEXT: Report: cannot check memory dependencies at runtime +; CHECK-NEXT: Report: cannot identify array bounds ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: @@ -204,7 +204,7 @@ exit: define void @dependency_check_and_runtime_checks_needed_select_of_ptr_add_recs_may_wrap_2(ptr %a, ptr %b, ptr %c, i64 %offset, i64 %n) { ; CHECK-LABEL: 'dependency_check_and_runtime_checks_needed_select_of_ptr_add_recs_may_wrap_2' ; CHECK-NEXT: loop: -; CHECK-NEXT: Report: cannot check memory dependencies at runtime +; CHECK-NEXT: Report: cannot identify array bounds ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll index 26c571b9cb63a..a15253a901488 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll @@ -72,27 +72,27 @@ define void @dependency_check_and_runtime_checks_needed_gepb_not_inbounds_iv2_st ; CHECK-NEXT: Comparing group ([[GRP4:0x[0-9a-f]+]]): ; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv ; CHECK-NEXT: Against group ([[GRP5:0x[0-9a-f]+]]): -; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset +; CHECK-NEXT: %gep.b = getelementptr i8, ptr %b, i64 %iv2 ; CHECK-NEXT: Check 1: ; CHECK-NEXT: Comparing group ([[GRP4]]): ; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv ; CHECK-NEXT: Against group ([[GRP6:0x[0-9a-f]+]]): -; CHECK-NEXT: %gep.b = getelementptr i8, ptr %b, i64 %iv2 +; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset ; CHECK-NEXT: Check 2: ; CHECK-NEXT: Comparing group ([[GRP5]]): -; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset -; CHECK-NEXT: Against group ([[GRP6]]): ; CHECK-NEXT: %gep.b = getelementptr i8, ptr %b, i64 %iv2 +; CHECK-NEXT: Against group ([[GRP6]]): +; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group [[GRP4]]: ; CHECK-NEXT: (Low: %a High: ((4 * %n) + %a)) ; CHECK-NEXT: Member: {%a,+,4}<%loop> ; CHECK-NEXT: Group [[GRP5]]: -; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a)) -; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop> -; CHECK-NEXT: Group [[GRP6]]: ; CHECK-NEXT: (Low: %b High: (-1 + (5 * %n) + %b)) ; CHECK-NEXT: Member: {%b,+,5}<%loop> +; CHECK-NEXT: Group [[GRP6]]: +; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a)) +; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -265,27 +265,27 @@ define void @dependency_check_and_runtime_checks_needed_gepb_may_wrap(ptr %a, pt ; CHECK-NEXT: Comparing group ([[GRP13:0x[0-9a-f]+]]): ; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv ; CHECK-NEXT: Against group ([[GRP14:0x[0-9a-f]+]]): -; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset +; CHECK-NEXT: %gep.b = getelementptr float, ptr %b, i64 %iv2 ; CHECK-NEXT: Check 1: ; CHECK-NEXT: Comparing group ([[GRP13]]): ; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv ; CHECK-NEXT: Against group ([[GRP15:0x[0-9a-f]+]]): -; CHECK-NEXT: %gep.b = getelementptr float, ptr %b, i64 %iv2 +; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset ; CHECK-NEXT: Check 2: ; CHECK-NEXT: Comparing group ([[GRP14]]): -; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset -; CHECK-NEXT: Against group ([[GRP15]]): ; CHECK-NEXT: %gep.b = getelementptr float, ptr %b, i64 %iv2 +; CHECK-NEXT: Against group ([[GRP15]]): +; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group [[GRP13]]: ; CHECK-NEXT: (Low: %a High: ((4 * %n) + %a)) ; CHECK-NEXT: Member: {%a,+,4}<%loop> ; CHECK-NEXT: Group [[GRP14]]: -; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a)) -; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop> -; CHECK-NEXT: Group [[GRP15]]: ; CHECK-NEXT: (Low: %b High: (-4 + (8 * %n) + %b)) ; CHECK-NEXT: Member: {%b,+,8}<%loop> +; CHECK-NEXT: Group [[GRP15]]: +; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a)) +; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/runtime-checks-may-wrap.ll b/llvm/test/Analysis/LoopAccessAnalysis/runtime-checks-may-wrap.ll index b27937862b261..cce6f829d05af 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/runtime-checks-may-wrap.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/runtime-checks-may-wrap.ll @@ -11,20 +11,21 @@ define void @geps_may_wrap(ptr %a, ptr %b, i64 %N) { ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Check 0: ; CHECK-NEXT: Comparing group ([[GRP1:0x[0-9a-f]+]]): -; CHECK-NEXT: %gep.iv = getelementptr i32, ptr %a, i64 %iv -; CHECK-NEXT: Against group ([[GRP2:0x[0-9a-f]+]]): ; CHECK-NEXT: ptr %b +; CHECK-NEXT: Against group ([[GRP2:0x[0-9a-f]+]]): +; CHECK-NEXT: %gep.iv = getelementptr i32, ptr %a, i64 %iv ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group [[GRP1]]: -; CHECK-NEXT: (Low: %a High: (16 + (12 * (trunc i128 ((zext i64 %N to i128) /u 3) to i16)) + %a)) -; CHECK-NEXT: Member: {%a,+,12}<%loop> -; CHECK-NEXT: Group [[GRP2]]: ; CHECK-NEXT: (Low: %b High: (4 + %b)) ; CHECK-NEXT: Member: %b +; CHECK-NEXT: Group [[GRP2]]: +; CHECK-NEXT: (Low: %a High: (16 + (12 * (trunc i128 ((zext i64 %N to i128) /u 3) to i16)) + %a)) +; CHECK-NEXT: Member: {%a,+,12}<%loop> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: ; CHECK-NEXT: {0,+,3}<%loop> Added Flags: +; CHECK-NEXT: {%a,+,12}<%loop> Added Flags: ; CHECK-EMPTY: ; CHECK-NEXT: Expressions re-written: ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index 754b86ab2fb87..cf4fc143fe8c3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -1123,15 +1123,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; DEFAULT: vector.scevcheck: -; DEFAULT-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 4 ; DEFAULT-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]]) ; DEFAULT-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 ; DEFAULT-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 ; DEFAULT-NEXT: [[TMP1:%.*]] = sub i64 0, [[MUL_RESULT]] -; DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]] -; DEFAULT-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[SCEVGEP]] +; DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[MUL_RESULT]] +; DEFAULT-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[DST]] ; DEFAULT-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]] -; DEFAULT-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 8 +; DEFAULT-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 4 ; DEFAULT-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]]) ; DEFAULT-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; DEFAULT-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 @@ -1139,12 +1138,13 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]] ; DEFAULT-NEXT: [[TMP7:%.*]] = icmp ult ptr [[TMP6]], [[SCEVGEP1]] ; DEFAULT-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW4]] +; DEFAULT-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 8 ; DEFAULT-NEXT: [[MUL5:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]]) ; DEFAULT-NEXT: [[MUL_RESULT6:%.*]] = extractvalue { i64, i1 } [[MUL5]], 0 ; DEFAULT-NEXT: [[MUL_OVERFLOW7:%.*]] = extractvalue { i64, i1 } [[MUL5]], 1 ; DEFAULT-NEXT: [[TMP9:%.*]] = sub i64 0, [[MUL_RESULT6]] -; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[MUL_RESULT6]] -; DEFAULT-NEXT: [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[DST]] +; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[SCEVGEP4]], i64 [[MUL_RESULT6]] +; DEFAULT-NEXT: [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[SCEVGEP4]] ; DEFAULT-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW7]] ; DEFAULT-NEXT: [[TMP13:%.*]] = or i1 [[TMP4]], [[TMP8]] ; DEFAULT-NEXT: [[TMP14:%.*]] = or i1 [[TMP13]], [[TMP12]] @@ -1337,15 +1337,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 ; PRED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; PRED: vector.scevcheck: -; PRED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 4 ; PRED-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]]) ; PRED-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 ; PRED-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 ; PRED-NEXT: [[TMP1:%.*]] = sub i64 0, [[MUL_RESULT]] -; PRED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]] -; PRED-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[SCEVGEP]] +; PRED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[MUL_RESULT]] +; PRED-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[DST]] ; PRED-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]] -; PRED-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 8 +; PRED-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 4 ; PRED-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]]) ; PRED-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; PRED-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 @@ -1353,12 +1352,13 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]] ; PRED-NEXT: [[TMP7:%.*]] = icmp ult ptr [[TMP6]], [[SCEVGEP1]] ; PRED-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW4]] +; PRED-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 8 ; PRED-NEXT: [[MUL5:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]]) ; PRED-NEXT: [[MUL_RESULT6:%.*]] = extractvalue { i64, i1 } [[MUL5]], 0 ; PRED-NEXT: [[MUL_OVERFLOW7:%.*]] = extractvalue { i64, i1 } [[MUL5]], 1 ; PRED-NEXT: [[TMP9:%.*]] = sub i64 0, [[MUL_RESULT6]] -; PRED-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[MUL_RESULT6]] -; PRED-NEXT: [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[DST]] +; PRED-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[SCEVGEP4]], i64 [[MUL_RESULT6]] +; PRED-NEXT: [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[SCEVGEP4]] ; PRED-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW7]] ; PRED-NEXT: [[TMP13:%.*]] = or i1 [[TMP4]], [[TMP8]] ; PRED-NEXT: [[TMP14:%.*]] = or i1 [[TMP13]], [[TMP12]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 4ea248254f2c6..f7b8758084056 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -500,18 +500,47 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: entry: ; STRIDED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; STRIDED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 -; STRIDED-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 24, i64 [[TMP1]]) +; STRIDED-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 80, i64 [[TMP1]]) ; STRIDED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP2]] ; STRIDED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; STRIDED: vector.scevcheck: +; STRIDED-NEXT: [[TMP24:%.*]] = shl i64 [[STRIDE:%.*]], 2 +; STRIDED-NEXT: [[TMP25:%.*]] = mul i64 [[STRIDE]], -4 +; STRIDED-NEXT: [[TMP26:%.*]] = icmp slt i64 [[TMP24]], 0 +; STRIDED-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i64 [[TMP25]], i64 [[TMP24]] +; STRIDED-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[TMP27]], i64 1023) +; STRIDED-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 +; STRIDED-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 +; STRIDED-NEXT: [[TMP28:%.*]] = sub i64 0, [[MUL_RESULT]] +; STRIDED-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[P2:%.*]], i64 [[MUL_RESULT]] +; STRIDED-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP28]] +; STRIDED-NEXT: [[TMP31:%.*]] = icmp ult ptr [[TMP29]], [[P2]] +; STRIDED-NEXT: [[TMP32:%.*]] = icmp ugt ptr [[TMP30]], [[P2]] +; STRIDED-NEXT: [[TMP33:%.*]] = select i1 [[TMP26]], i1 [[TMP32]], i1 [[TMP31]] +; STRIDED-NEXT: [[TMP13:%.*]] = or i1 [[TMP33]], [[MUL_OVERFLOW]] +; STRIDED-NEXT: [[TMP34:%.*]] = icmp slt i64 [[TMP24]], 0 +; STRIDED-NEXT: [[TMP15:%.*]] = select i1 [[TMP34]], i64 [[TMP25]], i64 [[TMP24]] +; STRIDED-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[TMP15]], i64 1023) +; STRIDED-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0 +; STRIDED-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1 +; STRIDED-NEXT: [[TMP16:%.*]] = sub i64 0, [[MUL_RESULT2]] +; STRIDED-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[MUL_RESULT2]] +; STRIDED-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP16]] +; STRIDED-NEXT: [[TMP37:%.*]] = icmp ult ptr [[TMP35]], [[P]] +; STRIDED-NEXT: [[TMP38:%.*]] = icmp ugt ptr [[TMP36]], [[P]] +; STRIDED-NEXT: [[TMP39:%.*]] = select i1 [[TMP34]], i1 [[TMP38]], i1 [[TMP37]] +; STRIDED-NEXT: [[TMP40:%.*]] = or i1 [[TMP39]], [[MUL_OVERFLOW3]] +; STRIDED-NEXT: [[TMP23:%.*]] = or i1 [[TMP13]], [[TMP40]] +; STRIDED-NEXT: br i1 [[TMP23]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK1:%.*]] ; STRIDED: vector.memcheck: -; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 [[STRIDE:%.*]], 4092 -; STRIDED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P2:%.*]], i64 [[TMP3]] +; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 [[STRIDE]], 4092 +; STRIDED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP3]] ; STRIDED-NEXT: [[TMP4:%.*]] = icmp ult ptr [[P2]], [[SCEVGEP]] ; STRIDED-NEXT: [[UMIN:%.*]] = select i1 [[TMP4]], ptr [[P2]], ptr [[SCEVGEP]] ; STRIDED-NEXT: [[TMP5:%.*]] = icmp ugt ptr [[P2]], [[SCEVGEP]] ; STRIDED-NEXT: [[UMAX:%.*]] = select i1 [[TMP5]], ptr [[P2]], ptr [[SCEVGEP]] ; STRIDED-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[UMAX]], i64 4 -; STRIDED-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP3]] +; STRIDED-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP3]] ; STRIDED-NEXT: [[TMP6:%.*]] = icmp ult ptr [[P]], [[SCEVGEP2]] ; STRIDED-NEXT: [[UMIN3:%.*]] = select i1 [[TMP6]], ptr [[P]], ptr [[SCEVGEP2]] ; STRIDED-NEXT: [[TMP7:%.*]] = icmp ugt ptr [[P]], [[SCEVGEP2]] @@ -554,7 +583,7 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; STRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; STRIDED: scalar.ph: -; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[VECTOR_MEMCHECK1]] ] ; STRIDED-NEXT: br label [[LOOP:%.*]] ; STRIDED: loop: ; STRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll index cf66264486095..b885d85a96800 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll @@ -114,8 +114,18 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr ; CHECK-SAME: ptr [[ARG:%.*]], i64 [[ARG1:%.*]], ptr [[ARG2:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[ARG1]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 18 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; CHECK: [[VECTOR_SCEVCHECK]]: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[ARG]], i64 16 +; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[ARG1]]) +; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 0, [[MUL_RESULT]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[SCEVGEP]] +; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[ARG1]], 4 ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP20]], 16 @@ -167,7 +177,7 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -266,19 +276,26 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[N]], 3 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 52 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 64 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] ; CHECK: [[VECTOR_SCEVCHECK]]: ; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[N]], 3 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 24 ; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = sub i64 0, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]] +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[MUL_RESULT]] +; CHECK-NEXT: [[TMP41:%.*]] = icmp ult ptr [[TMP32]], [[A]] +; CHECK-NEXT: [[TMP44:%.*]] = or i1 [[TMP41]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 4 +; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) +; CHECK-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1 +; CHECK-NEXT: [[TMP55:%.*]] = sub i64 0, [[MUL_RESULT2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT2]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp ult ptr [[TMP4]], [[SCEVGEP]] -; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 28 +; CHECK-NEXT: [[TMP57:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW3]] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 8 ; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 @@ -286,7 +303,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp ult ptr [[TMP8]], [[SCEVGEP1]] ; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW4]] -; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[A]], i64 20 +; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[A]], i64 12 ; CHECK-NEXT: [[MUL6:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT7:%.*]] = extractvalue { i64, i1 } [[MUL6]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW8:%.*]] = extractvalue { i64, i1 } [[MUL6]], 1 @@ -302,7 +319,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[SCEVGEP9]], i64 [[MUL_RESULT11]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp ult ptr [[TMP16]], [[SCEVGEP9]] ; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW12]] -; CHECK-NEXT: [[SCEVGEP13:%.*]] = getelementptr i8, ptr [[A]], i64 12 +; CHECK-NEXT: [[SCEVGEP13:%.*]] = getelementptr i8, ptr [[A]], i64 20 ; CHECK-NEXT: [[MUL14:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT15:%.*]] = extractvalue { i64, i1 } [[MUL14]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW16:%.*]] = extractvalue { i64, i1 } [[MUL14]], 1 @@ -310,7 +327,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[SCEVGEP13]], i64 [[MUL_RESULT15]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp ult ptr [[TMP20]], [[SCEVGEP13]] ; CHECK-NEXT: [[TMP22:%.*]] = or i1 [[TMP21]], [[MUL_OVERFLOW16]] -; CHECK-NEXT: [[SCEVGEP17:%.*]] = getelementptr i8, ptr [[A]], i64 8 +; CHECK-NEXT: [[SCEVGEP17:%.*]] = getelementptr i8, ptr [[A]], i64 24 ; CHECK-NEXT: [[MUL18:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT19:%.*]] = extractvalue { i64, i1 } [[MUL18]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW20:%.*]] = extractvalue { i64, i1 } [[MUL18]], 1 @@ -318,7 +335,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[SCEVGEP17]], i64 [[MUL_RESULT19]] ; CHECK-NEXT: [[TMP25:%.*]] = icmp ult ptr [[TMP24]], [[SCEVGEP17]] ; CHECK-NEXT: [[TMP26:%.*]] = or i1 [[TMP25]], [[MUL_OVERFLOW20]] -; CHECK-NEXT: [[SCEVGEP21:%.*]] = getelementptr i8, ptr [[A]], i64 4 +; CHECK-NEXT: [[SCEVGEP21:%.*]] = getelementptr i8, ptr [[A]], i64 28 ; CHECK-NEXT: [[MUL22:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT23:%.*]] = extractvalue { i64, i1 } [[MUL22]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW24:%.*]] = extractvalue { i64, i1 } [[MUL22]], 1 @@ -326,37 +343,47 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[SCEVGEP21]], i64 [[MUL_RESULT23]] ; CHECK-NEXT: [[TMP29:%.*]] = icmp ult ptr [[TMP28]], [[SCEVGEP21]] ; CHECK-NEXT: [[TMP30:%.*]] = or i1 [[TMP29]], [[MUL_OVERFLOW24]] +; CHECK-NEXT: [[SCEVGEP31:%.*]] = getelementptr i8, ptr [[B]], i64 4 +; CHECK-NEXT: [[MUL29:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[TMP2]]) +; CHECK-NEXT: [[MUL_RESULT30:%.*]] = extractvalue { i64, i1 } [[MUL29]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW31:%.*]] = extractvalue { i64, i1 } [[MUL29]], 1 +; CHECK-NEXT: [[TMP67:%.*]] = sub i64 0, [[MUL_RESULT30]] +; CHECK-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[SCEVGEP31]], i64 [[MUL_RESULT30]] +; CHECK-NEXT: [[TMP69:%.*]] = icmp ult ptr [[TMP68]], [[SCEVGEP31]] +; CHECK-NEXT: [[TMP70:%.*]] = or i1 [[TMP69]], [[MUL_OVERFLOW31]] ; CHECK-NEXT: [[MUL25:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT26:%.*]] = extractvalue { i64, i1 } [[MUL25]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW27:%.*]] = extractvalue { i64, i1 } [[MUL25]], 1 ; CHECK-NEXT: [[TMP31:%.*]] = sub i64 0, [[MUL_RESULT26]] -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[MUL_RESULT26]] -; CHECK-NEXT: [[TMP33:%.*]] = icmp ult ptr [[TMP32]], [[A]] +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr [[B]], i64 [[MUL_RESULT26]] +; CHECK-NEXT: [[TMP33:%.*]] = icmp ult ptr [[TMP71]], [[B]] ; CHECK-NEXT: [[TMP34:%.*]] = or i1 [[TMP33]], [[MUL_OVERFLOW27]] +; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP44]], [[TMP57]] ; CHECK-NEXT: [[TMP35:%.*]] = or i1 [[TMP6]], [[TMP10]] ; CHECK-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP14]] ; CHECK-NEXT: [[TMP37:%.*]] = or i1 [[TMP36]], [[TMP18]] ; CHECK-NEXT: [[TMP38:%.*]] = or i1 [[TMP37]], [[TMP22]] ; CHECK-NEXT: [[TMP39:%.*]] = or i1 [[TMP38]], [[TMP26]] ; CHECK-NEXT: [[TMP40:%.*]] = or i1 [[TMP39]], [[TMP30]] -; CHECK-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP34]] -; CHECK-NEXT: br i1 [[TMP41]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK-NEXT: [[TMP72:%.*]] = or i1 [[TMP40]], [[TMP70]] +; CHECK-NEXT: [[TMP73:%.*]] = or i1 [[TMP72]], [[TMP34]] +; CHECK-NEXT: br i1 [[TMP73]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP42:%.*]] = lshr i64 [[N]], 3 ; CHECK-NEXT: [[TMP43:%.*]] = shl i64 [[TMP42]], 5 -; CHECK-NEXT: [[TMP44:%.*]] = add i64 [[TMP43]], 32 -; CHECK-NEXT: [[SCEVGEP28:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP44]] ; CHECK-NEXT: [[TMP45:%.*]] = add nuw nsw i64 [[TMP43]], 4 ; CHECK-NEXT: [[SCEVGEP29:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP45]] +; CHECK-NEXT: [[TMP53:%.*]] = add i64 [[TMP43]], 32 +; CHECK-NEXT: [[SCEVGEP28:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP53]] ; CHECK-NEXT: [[TMP46:%.*]] = shl i64 [[TMP42]], 4 ; CHECK-NEXT: [[TMP47:%.*]] = add nuw nsw i64 [[TMP46]], 8 ; CHECK-NEXT: [[SCEVGEP30:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP47]] -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP29]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[B]], [[SCEVGEP28]] -; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: [[BOUND031:%.*]] = icmp ult ptr [[A]], [[SCEVGEP30]] +; CHECK-NEXT: [[BOUND2:%.*]] = icmp ult ptr [[A]], [[SCEVGEP29]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND1]], [[BOUND2]] ; CHECK-NEXT: [[BOUND132:%.*]] = icmp ult ptr [[B]], [[SCEVGEP28]] -; CHECK-NEXT: [[FOUND_CONFLICT33:%.*]] = and i1 [[BOUND031]], [[BOUND132]] +; CHECK-NEXT: [[BOUND133:%.*]] = icmp ult ptr [[A]], [[SCEVGEP30]] +; CHECK-NEXT: [[FOUND_CONFLICT33:%.*]] = and i1 [[BOUND132]], [[BOUND133]] ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT33]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -378,7 +405,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[STRIDED_VEC34:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP56:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP50]] ; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[B]], <4 x i64> [[VEC_IND]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP54]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison), !alias.scope [[META6:![0-9]+]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP54]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison), !alias.scope [[META6:![0-9]+]], !noalias [[META9:![0-9]+]] ; CHECK-NEXT: [[TMP58:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC34]], <4 x i32> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_GATHER]], <4 x i32> zeroinitializer, <8 x i32> @@ -390,7 +417,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 32) ; CHECK-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP64]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP64]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: @@ -431,7 +458,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: store i32 0, ptr [[GEP_A_7]], align 4 ; CHECK-NEXT: [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -498,7 +525,7 @@ define void @interleave_store_double_i64(ptr %dst) { ; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: @@ -512,7 +539,7 @@ define void @interleave_store_double_i64(ptr %dst) { ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_0]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -620,7 +647,7 @@ define void @interleave_store_i64_double_2(ptr %dst) { ; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: @@ -634,7 +661,7 @@ define void @interleave_store_i64_double_2(ptr %dst) { ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_1]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -670,10 +697,12 @@ attributes #1 = { "min-legal-vector-width"="0" "target-cpu"="cascadelake" } ; CHECK: [[META6]] = !{[[META7:![0-9]+]]} ; CHECK: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]} ; CHECK: [[META8]] = distinct !{[[META8]], !"LVerDomain"} -; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]} -; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]]} +; CHECK: [[META9]] = !{[[META10:![0-9]+]]} +; CHECK: [[META10]] = distinct !{[[META10]], [[META8]]} ; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]} -; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META2]], [[META1]]} +; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]]} ; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]], [[META2]]} ; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META2]], [[META1]]} +; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META1]], [[META2]]} +; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll index c076c0e849fa9..fc5795708c7d8 100644 --- a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll +++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll @@ -352,3 +352,37 @@ define void @diamondWithConstantVector(ptr %ptr) { store i32 %orB1, ptr %gepB1 ret void } + +; Check that we don't get def-after-use errors due to wrong placement +; of new vector instructions. +define void @vecInstrsPlacement(ptr %ptr0) { +; CHECK-LABEL: define void @vecInstrsPlacement( +; CHECK-SAME: ptr [[PTR0:%.*]]) { +; CHECK-NEXT: [[VECL2:%.*]] = load <2 x double>, ptr [[PTR0]], align 8 +; CHECK-NEXT: [[VECL:%.*]] = load <2 x double>, ptr [[PTR0]], align 8 +; CHECK-NEXT: [[VEC2:%.*]] = fmul <2 x double> [[VECL]], [[VECL2]] +; CHECK-NEXT: [[VEC:%.*]] = fmul <2 x double> [[VECL]], [[VECL2]] +; CHECK-NEXT: [[VEC5:%.*]] = fadd <2 x double> [[VEC]], [[VEC2]] +; CHECK-NEXT: store <2 x double> [[VEC5]], ptr [[PTR0]], align 8 +; CHECK-NEXT: ret void +; + %ptr1 = getelementptr inbounds double, ptr %ptr0, i64 1 + %ldA_0 = load double, ptr %ptr0 + %ldA_1 = load double, ptr %ptr1 + + %ldB_0 = load double, ptr %ptr0 + %ldB_1 = load double, ptr %ptr1 + + %mul0 = fmul double %ldA_0, %ldB_0 + %mul1 = fmul double %ldA_1, %ldB_1 + + %mul2 = fmul double %ldA_0, %ldB_0 + %mul3 = fmul double %ldA_1, %ldB_1 + + %add0 = fadd double %mul0, %mul2 + %add1 = fadd double %mul1, %mul3 + + store double %add0, ptr %ptr0 + store double %add1, ptr %ptr1 + ret void +} diff --git a/llvm/test/Transforms/SandboxVectorizer/scheduler.ll b/llvm/test/Transforms/SandboxVectorizer/scheduler.ll index 7741d8c64c8fc..5b9177ba4b3bf 100644 --- a/llvm/test/Transforms/SandboxVectorizer/scheduler.ll +++ b/llvm/test/Transforms/SandboxVectorizer/scheduler.ll @@ -7,17 +7,17 @@ define void @check_dag_scheduler_update(ptr noalias %p, ptr noalias %p1) { ; CHECK-LABEL: define void @check_dag_scheduler_update( ; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[P1:%.*]]) { ; CHECK-NEXT: [[I:%.*]] = load i32, ptr [[P]], align 4 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr i32, ptr [[P]], i64 32 +; CHECK-NEXT: [[VECL:%.*]] = load <4 x i32>, ptr [[P]], align 4 +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr i32, ptr [[P]], i64 34 ; CHECK-NEXT: [[I2:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 ; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr i32, ptr [[P]], i64 33 ; CHECK-NEXT: [[I4:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4 -; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr i32, ptr [[P]], i64 34 +; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr i32, ptr [[P]], i64 32 ; CHECK-NEXT: [[I6:%.*]] = load i32, ptr [[ARRAYIDX18]], align 4 ; CHECK-NEXT: [[PACK:%.*]] = insertelement <4 x i32> poison, i32 [[I]], i32 0 -; CHECK-NEXT: [[PACK1:%.*]] = insertelement <4 x i32> [[PACK]], i32 [[I2]], i32 1 +; CHECK-NEXT: [[PACK1:%.*]] = insertelement <4 x i32> [[PACK]], i32 [[I6]], i32 1 ; CHECK-NEXT: [[PACK2:%.*]] = insertelement <4 x i32> [[PACK1]], i32 [[I4]], i32 2 -; CHECK-NEXT: [[PACK3:%.*]] = insertelement <4 x i32> [[PACK2]], i32 [[I6]], i32 3 -; CHECK-NEXT: [[VECL:%.*]] = load <4 x i32>, ptr [[P]], align 4 +; CHECK-NEXT: [[PACK3:%.*]] = insertelement <4 x i32> [[PACK2]], i32 [[I2]], i32 3 ; CHECK-NEXT: [[VEC:%.*]] = add nsw <4 x i32> [[PACK3]], [[VECL]] ; CHECK-NEXT: store <4 x i32> [[VEC]], ptr [[P1]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrMapsTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrMapsTest.cpp index 5b033f0edcb02..c8fee1c24dbcb 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrMapsTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrMapsTest.cpp @@ -53,37 +53,41 @@ define void @foo(i8 %v0, i8 %v1, i8 %v2, i8 %v3, <2 x i8> %vec) { auto *VAdd0 = cast(&*It++); [[maybe_unused]] auto *Ret = cast(&*It++); - sandboxir::InstrMaps IMaps(Ctx); - // Check with empty IMaps. - EXPECT_EQ(IMaps.getVectorForOrig(Add0), nullptr); - EXPECT_EQ(IMaps.getVectorForOrig(Add1), nullptr); - EXPECT_FALSE(IMaps.getOrigLane(Add0, Add0)); - // Check with 1 match. - IMaps.registerVector({Add0, Add1}, VAdd0); - EXPECT_EQ(IMaps.getVectorForOrig(Add0), VAdd0); - EXPECT_EQ(IMaps.getVectorForOrig(Add1), VAdd0); - EXPECT_FALSE(IMaps.getOrigLane(VAdd0, VAdd0)); // Bad Orig value - EXPECT_FALSE(IMaps.getOrigLane(Add0, Add0)); // Bad Vector value - EXPECT_EQ(*IMaps.getOrigLane(VAdd0, Add0), 0U); - EXPECT_EQ(*IMaps.getOrigLane(VAdd0, Add1), 1U); - // Check when the same vector maps to different original values (which is - // common for vector constants). - IMaps.registerVector({Add2, Add3}, VAdd0); - EXPECT_EQ(*IMaps.getOrigLane(VAdd0, Add2), 0U); - EXPECT_EQ(*IMaps.getOrigLane(VAdd0, Add3), 1U); - // Check when we register for a second time. + sandboxir::InstrMaps IMaps; + { + // Check with empty IMaps. + sandboxir::Action A(nullptr, {Add0}, {}, 0); + EXPECT_EQ(IMaps.getVectorForOrig(Add0), nullptr); + EXPECT_EQ(IMaps.getVectorForOrig(Add1), nullptr); + EXPECT_FALSE(IMaps.getOrigLane(&A, Add0)); + } + { + // Check with 1 match. + sandboxir::Action A(nullptr, {Add0, Add1}, {}, 0); + sandboxir::Action OtherA(nullptr, {}, {}, 0); + IMaps.registerVector({Add0, Add1}, &A); + EXPECT_EQ(IMaps.getVectorForOrig(Add0), &A); + EXPECT_EQ(IMaps.getVectorForOrig(Add1), &A); + EXPECT_FALSE(IMaps.getOrigLane(&A, VAdd0)); // Bad Orig value + EXPECT_FALSE(IMaps.getOrigLane(&OtherA, Add0)); // Bad Vector value + EXPECT_EQ(*IMaps.getOrigLane(&A, Add0), 0U); + EXPECT_EQ(*IMaps.getOrigLane(&A, Add1), 1U); + } + { + // Check when the same vector maps to different original values (which is + // common for vector constants). + sandboxir::Action A(nullptr, {Add2, Add3}, {}, 0); + IMaps.registerVector({Add2, Add3}, &A); + EXPECT_EQ(*IMaps.getOrigLane(&A, Add2), 0U); + EXPECT_EQ(*IMaps.getOrigLane(&A, Add3), 1U); + } + { + // Check when we register for a second time. + sandboxir::Action A(nullptr, {Add2, Add3}, {}, 0); #ifndef NDEBUG - EXPECT_DEATH(IMaps.registerVector({Add1, Add0}, VAdd0), ".*exists.*"); + EXPECT_DEATH(IMaps.registerVector({Add1, Add0}, &A), ".*exists.*"); #endif // NDEBUG - // Check callbacks: erase original instr. - Add0->eraseFromParent(); - EXPECT_FALSE(IMaps.getOrigLane(VAdd0, Add0)); - EXPECT_EQ(*IMaps.getOrigLane(VAdd0, Add1), 1U); - EXPECT_EQ(IMaps.getVectorForOrig(Add0), nullptr); - // Check callbacks: erase vector instr. - VAdd0->eraseFromParent(); - EXPECT_FALSE(IMaps.getOrigLane(VAdd0, Add1)); - EXPECT_EQ(IMaps.getVectorForOrig(Add1), nullptr); + } } TEST_F(InstrMapsTest, VectorLanes) { @@ -91,7 +95,6 @@ TEST_F(InstrMapsTest, VectorLanes) { define void @foo(<2 x i8> %v0, <2 x i8> %v1, <4 x i8> %v2, <4 x i8> %v3) { %vadd0 = add <2 x i8> %v0, %v1 %vadd1 = add <2 x i8> %v0, %v1 - %vadd2 = add <4 x i8> %v2, %v3 ret void } )IR"); @@ -103,12 +106,14 @@ define void @foo(<2 x i8> %v0, <2 x i8> %v1, <4 x i8> %v2, <4 x i8> %v3) { auto *VAdd0 = cast(&*It++); auto *VAdd1 = cast(&*It++); - auto *VAdd2 = cast(&*It++); - sandboxir::InstrMaps IMaps(Ctx); + sandboxir::InstrMaps IMaps; - // Check that the vector lanes are calculated correctly. - IMaps.registerVector({VAdd0, VAdd1}, VAdd2); - EXPECT_EQ(*IMaps.getOrigLane(VAdd2, VAdd0), 0U); - EXPECT_EQ(*IMaps.getOrigLane(VAdd2, VAdd1), 2U); + { + // Check that the vector lanes are calculated correctly. + sandboxir::Action A(nullptr, {VAdd0, VAdd1}, {}, 0); + IMaps.registerVector({VAdd0, VAdd1}, &A); + EXPECT_EQ(*IMaps.getOrigLane(&A, VAdd0), 0U); + EXPECT_EQ(*IMaps.getOrigLane(&A, VAdd1), 2U); + } } diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp index 15f8166b705fc..99519d17d0e8e 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp @@ -133,7 +133,7 @@ define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg, float auto *Sel0 = cast(&*It++); auto *Sel1 = cast(&*It++); - llvm::sandboxir::InstrMaps IMaps(Ctx); + llvm::sandboxir::InstrMaps IMaps; sandboxir::LegalityAnalysis Legality(*AA, *SE, DL, Ctx, IMaps); const auto &Result = Legality.canVectorize({St0, St1}, /*SkipScheduling=*/true); @@ -285,7 +285,7 @@ define void @foo(ptr %ptr) { auto *St0 = cast(&*It++); auto *St1 = cast(&*It++); - llvm::sandboxir::InstrMaps IMaps(Ctx); + llvm::sandboxir::InstrMaps IMaps; sandboxir::LegalityAnalysis Legality(*AA, *SE, DL, Ctx, IMaps); { // Can vectorize St0,St1. @@ -321,7 +321,7 @@ define void @foo() { }; sandboxir::Context Ctx(C); - llvm::sandboxir::InstrMaps IMaps(Ctx); + llvm::sandboxir::InstrMaps IMaps; sandboxir::LegalityAnalysis Legality(*AA, *SE, DL, Ctx, IMaps); EXPECT_TRUE( Matches(Legality.createLegalityResult(), "Widen")); @@ -368,32 +368,34 @@ define void @foo(ptr %ptr) { sandboxir::CollectDescr::DescrVecT Descrs; using EEDescr = sandboxir::CollectDescr::ExtractElementDescr; - + SmallVector Bndl({VLd}); + SmallVector UB; + sandboxir::Action VLdA(nullptr, Bndl, UB, 0); { // Check single input, no shuffle. - Descrs.push_back(EEDescr(VLd, 0)); - Descrs.push_back(EEDescr(VLd, 1)); + Descrs.push_back(EEDescr(&VLdA, 0)); + Descrs.push_back(EEDescr(&VLdA, 1)); sandboxir::CollectDescr CD(std::move(Descrs)); EXPECT_TRUE(CD.getSingleInput()); - EXPECT_EQ(CD.getSingleInput()->first, VLd); + EXPECT_EQ(CD.getSingleInput()->first, &VLdA); EXPECT_THAT(CD.getSingleInput()->second, testing::ElementsAre(0, 1)); EXPECT_TRUE(CD.hasVectorInputs()); } { // Check single input, shuffle. - Descrs.push_back(EEDescr(VLd, 1)); - Descrs.push_back(EEDescr(VLd, 0)); + Descrs.push_back(EEDescr(&VLdA, 1)); + Descrs.push_back(EEDescr(&VLdA, 0)); sandboxir::CollectDescr CD(std::move(Descrs)); EXPECT_TRUE(CD.getSingleInput()); - EXPECT_EQ(CD.getSingleInput()->first, VLd); + EXPECT_EQ(CD.getSingleInput()->first, &VLdA); EXPECT_THAT(CD.getSingleInput()->second, testing::ElementsAre(1, 0)); EXPECT_TRUE(CD.hasVectorInputs()); } { // Check multiple inputs. Descrs.push_back(EEDescr(Ld0)); - Descrs.push_back(EEDescr(VLd, 0)); - Descrs.push_back(EEDescr(VLd, 1)); + Descrs.push_back(EEDescr(&VLdA, 0)); + Descrs.push_back(EEDescr(&VLdA, 1)); sandboxir::CollectDescr CD(std::move(Descrs)); EXPECT_FALSE(CD.getSingleInput()); EXPECT_TRUE(CD.hasVectorInputs()); diff --git a/llvm/utils/gn/secondary/clang/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/CodeGen/BUILD.gn index ff4f558ca2fcf..fdd631bc40398 100644 --- a/llvm/utils/gn/secondary/clang/lib/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/CodeGen/BUILD.gn @@ -93,6 +93,7 @@ static_library("CodeGen") { "CodeGenTypes.cpp", "ConstantInitBuilder.cpp", "CoverageMappingGen.cpp", + "HLSLBufferLayoutBuilder.cpp", "ItaniumCXXABI.cpp", "LinkInModulesPass.cpp", "MacroPPCallbacks.cpp", diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 05385ba491525..8f9a7e388ebc7 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -11136,6 +11136,7 @@ td_library( "include/mlir/Dialect/Linalg/IR/LinalgEnums.td", "include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td", "include/mlir/Dialect/Linalg/IR/LinalgOps.td", + "include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td", ], includes = ["include"], deps = [