diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 62844f7e6a2fa..a433a66e0b7a6 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -2275,9 +2275,13 @@ are listed below. .. option:: -fwhole-program-vtables + In LTO mode: Enable whole-program vtable optimizations, such as single-implementation devirtualization and virtual constant propagation, for classes with - :doc:`hidden LTO visibility `. Requires ``-flto``. + :doc:`hidden LTO visibility `. + In non-LTO mode: + Enables speculative devirtualization only without other features. + Doesn't require ``-flto`` or visibility. .. option:: -f[no]split-lto-unit @@ -5170,7 +5174,7 @@ Execute ``clang-cl /?`` to see a list of supported options: -fstandalone-debug Emit full debug info for all types used by the program -fstrict-aliasing Enable optimizations based on strict aliasing rules -fsyntax-only Run the preprocessor, parser and semantic analysis stages - -fwhole-program-vtables Enables whole-program vtable optimization. Requires -flto + -fwhole-program-vtables Enables whole-program vtable optimization. -gcodeview-ghash Emit type record hashes in a .debug$H section -gcodeview Generate CodeView debug information -gline-directives-only Emit debug line info directives only diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7e0a3cf5591ce..f6963aadfbc69 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -902,6 +902,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline( // non-integrated assemblers don't recognize .cgprofile section. PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS; PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO; + PTO.WholeProgramDevirt = CodeGenOpts.WholeProgramVTables; LoopAnalysisManager LAM; FunctionAnalysisManager FAM; diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp index 2897ccdf88660..cfb78d623c7ec 100644 --- a/clang/lib/CodeGen/CGVTables.cpp +++ b/clang/lib/CodeGen/CGVTables.cpp @@ -1359,7 +1359,8 @@ void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD, // Emit type metadata on vtables with LTO or IR instrumentation. // In IR instrumentation, the type metadata is used to find out vtable // definitions (for type profiling) among all global variables. - if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr()) + if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr() && + !getCodeGenOpts().WholeProgramVTables) return; CharUnits ComponentWidth = GetTargetTypeStoreSize(getVTableComponentType()); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 8556bcadf0915..cc337ad334f65 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7847,8 +7847,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, IsDeviceOffloadAction ? D.getLTOMode() : D.getOffloadLTOMode(); auto OtherIsUsingLTO = OtherLTOMode != LTOK_None; - if ((!IsUsingLTO && !OtherIsUsingLTO) || - (IsPS4 && !UnifiedLTO && (D.getLTOMode() != LTOK_Full))) + if (!IsUsingLTO && !OtherIsUsingLTO && !UnifiedLTO) { + if (const Arg *A = Args.getLastArg(options::OPT_O_Group)) + if (!A->getOption().matches(options::OPT_O0)) + CmdArgs.push_back("-fwhole-program-vtables"); + } else if ((!IsUsingLTO && !OtherIsUsingLTO) || + (IsPS4 && !UnifiedLTO && (D.getLTOMode() != LTOK_Full))) D.Diag(diag::err_drv_argument_only_allowed_with) << "-fwhole-program-vtables" << ((IsPS4 && !UnifiedLTO) ? "-flto=full" : "-flto"); diff --git a/clang/test/CodeGenCXX/type-metadata.cpp b/clang/test/CodeGenCXX/type-metadata.cpp index 1cb2fed8db3e6..faf0950cae936 100644 --- a/clang/test/CodeGenCXX/type-metadata.cpp +++ b/clang/test/CodeGenCXX/type-metadata.cpp @@ -14,6 +14,9 @@ // RUN: %clang_cc1 -O2 -flto -flto-unit -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=ITANIUM-OPT --check-prefix=ITANIUM-OPT-LAYOUT %s // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=MS --check-prefix=MS-TYPEMETADATA --check-prefix=TT-MS %s +// Test for the whole-program-vtables feature in nonlto mode: +// RUN: %clang_cc1 -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=TT-ITANIUM-DEFAULT-NOLTO %s + // Tests for cfi + whole-program-vtables: // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-VT --check-prefix=ITANIUM-HIDDEN --check-prefix=ITANIUM-COMMON-MD --check-prefix=TC-ITANIUM --check-prefix=ITANIUM-NO-RV-MD %s // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-VT --check-prefix=MS --check-prefix=MS-TYPEMETADATA --check-prefix=TC-MS %s @@ -178,6 +181,7 @@ void af(A *a) { // TT-ITANIUM-HIDDEN: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") // TT-ITANIUM-DEFAULT: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") // TT-MS: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !"?AUA@@") + // TT-ITANIUM-DEFAULT-NOLTO: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") // TC-ITANIUM: [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"_ZTS1A") // TC-ITANIUM-RV: [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 0, metadata !"_ZTS1A") // TC-MS: [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA@@") @@ -212,6 +216,7 @@ void df1(D *d) { // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]]) // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]]) // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUA@@") + // TT-ITANIUM-DEFAULT-NOLTO: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]]) // TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata ![[DTYPE:[0-9]+]]) // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 0, metadata ![[DTYPE:[0-9]+]]) // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA@@") @@ -224,6 +229,7 @@ void dg1(D *d) { // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B") // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B") // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUB@@") + // TT-ITANIUM-DEFAULT-NOLTO: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B") // TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata !"_ZTS1B") // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 4, metadata !"_ZTS1B") // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUB@@") @@ -236,6 +242,7 @@ void dh1(D *d) { // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]]) // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]]) // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]]) + // TT-ITANIUM-DEFAULT-NOLTO: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]]) // TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 16, metadata ![[DTYPE]]) // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 8, metadata ![[DTYPE]]) // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata ![[DTYPE:[0-9]+]]) @@ -297,6 +304,7 @@ void f(D *d) { // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE") // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE") // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUA@test2@@") + // TT-ITANIUM-DEFAULT-NOLTO: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE") // TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata !"_ZTSN5test21DE") // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 4, metadata !"_ZTSN5test21DE") // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA@test2@@") diff --git a/clang/test/Driver/whole-program-vtables.c b/clang/test/Driver/whole-program-vtables.c index 7f7c45e77f6f5..e0538b584f456 100644 --- a/clang/test/Driver/whole-program-vtables.c +++ b/clang/test/Driver/whole-program-vtables.c @@ -1,15 +1,11 @@ -// RUN: not %clang -target x86_64-unknown-linux -fwhole-program-vtables -### %s 2>&1 | FileCheck --check-prefix=NO-LTO %s -// RUN: not %clang_cl --target=x86_64-pc-win32 -fwhole-program-vtables -### -- %s 2>&1 | FileCheck --check-prefix=NO-LTO %s -// NO-LTO: invalid argument '-fwhole-program-vtables' only allowed with '-flto' +// RUN: %clang -target x86_64-unknown-linux -fwhole-program-vtables -O1 -### %s 2>&1 | FileCheck --check-prefix=WPD-NO-LTO %s +// RUN: %clang_cl --target=x86_64-pc-win32 -fwhole-program-vtables -O1 -### -- %s 2>&1 | FileCheck --check-prefix=WPD-NO-LTO %s +// WPD-NO-LTO: "-fwhole-program-vtables" // RUN: %clang -target x86_64-unknown-linux -fwhole-program-vtables -flto -### %s 2>&1 | FileCheck --check-prefix=LTO %s // RUN: not %clang_cl --target=x86_64-pc-win32 -fwhole-program-vtables -flto -### -- %s 2>&1 | FileCheck --check-prefix=LTO %s // LTO: "-fwhole-program-vtables" -/// -funified-lto does not imply -flto, so we still get an error that fwhole-program-vtables has no effect without -flto -// RUN: not %clang --target=x86_64-pc-linux-gnu -fwhole-program-vtables -funified-lto -### %s 2>&1 | FileCheck --check-prefix=NO-LTO %s -// RUN: not %clang --target=x86_64-pc-linux-gnu -fwhole-program-vtables -fno-unified-lto -### %s 2>&1 | FileCheck --check-prefix=NO-LTO %s - // RUN: %clang -target x86_64-unknown-linux -fwhole-program-vtables -fno-whole-program-vtables -flto -### %s 2>&1 | FileCheck --check-prefix=LTO-DISABLE %s // RUN: not %clang_cl --target=x86_64-pc-win32 -fwhole-program-vtables -fno-whole-program-vtables -flto -### -- %s 2>&1 | FileCheck --check-prefix=LTO-DISABLE %s // LTO-DISABLE-NOT: "-fwhole-program-vtables" diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index 51ccaa53447d7..ee08b11ce2c09 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -98,6 +98,12 @@ class PipelineTuningOptions { // analyses after various module->function or cgscc->function adaptors in the // default pipelines. bool EagerlyInvalidateAnalyses; + + /// Tuning option to enable/disable whole program devirtualization. + /// Its default value is false. + /// This is controlled by the `-whole-program-vtables` flag. + /// Used only in non-LTO mode. + bool WholeProgramDevirt; }; /// This class provides access to building LLVM's passes. diff --git a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h index 7a03405b4f462..fff27fae162a0 100644 --- a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h +++ b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h @@ -226,11 +226,15 @@ struct WholeProgramDevirtPass : public PassInfoMixin { ModuleSummaryIndex *ExportSummary; const ModuleSummaryIndex *ImportSummary; bool UseCommandLine = false; + const bool InLTOMode; WholeProgramDevirtPass() - : ExportSummary(nullptr), ImportSummary(nullptr), UseCommandLine(true) {} + : ExportSummary(nullptr), ImportSummary(nullptr), UseCommandLine(true), + InLTOMode(true) {} WholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) - : ExportSummary(ExportSummary), ImportSummary(ImportSummary) { + const ModuleSummaryIndex *ImportSummary, + bool InLTOMode = true) + : ExportSummary(ExportSummary), ImportSummary(ImportSummary), + InLTOMode(InLTOMode) { assert(!(ExportSummary && ImportSummary)); } LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &); diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index a99146d5eaa34..4b10c63fd4e02 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -321,6 +321,7 @@ PipelineTuningOptions::PipelineTuningOptions() { MergeFunctions = EnableMergeFunctions; InlinerThreshold = -1; EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; + WholeProgramDevirt = false; } namespace llvm { @@ -1629,6 +1630,23 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, if (!LTOPreLink) MPM.addPass(RelLookupTableConverterPass()); + if (PTO.WholeProgramDevirt && LTOPhase == ThinOrFullLTOPhase::None) { + MPM.addPass(WholeProgramDevirtPass(/*ExportSummary*/ nullptr, + /*ImportSummary*/ nullptr, + /*InLTOMode=*/false)); + MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, + lowertypetests::DropTestKind::Assume)); + if (EnableModuleInliner) { + MPM.addPass(ModuleInlinerPass(getInlineParamsFromOptLevel(Level), + UseInlineAdvisor, + ThinOrFullLTOPhase::None)); + } else { + MPM.addPass(ModuleInlinerWrapperPass( + getInlineParamsFromOptLevel(Level), + /* MandatoryFirst */ true, + InlineContext{ThinOrFullLTOPhase::None, InlinePass::CGSCCInliner})); + } + } return MPM; } diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 30e1dc7167a39..0fae02a3e89e1 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -24,7 +24,8 @@ // returns 0, or a single vtable's function returns 1, replace each virtual // call with a comparison of the vptr against that vtable's address. // -// This pass is intended to be used during the regular and thin LTO pipelines: +// This pass is intended to be used during the regular/thinLTO and non-LTO +// pipelines: // // During regular LTO, the pass determines the best optimization for each // virtual call and applies the resolutions directly to virtual calls that are @@ -48,6 +49,13 @@ // is supported. // - Import phase: (same as with hybrid case above). // +// In non-LTO mode: +// - The pass apply speculative devirtualization without requiring any type of +// visibility. +// - Skips other features like virtual constant propagation, uniform return +// value +// optimization, unique return value optimization, branch funnels to minimize +// the drawbacks of wrong speculation. //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/WholeProgramDevirt.h" @@ -60,7 +68,9 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TypeMetadataUtils.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Bitcode/BitcodeWriter.h" @@ -211,6 +221,14 @@ static cl::opt DevirtCheckMode( clEnumValN(WPDCheckMode::Fallback, "fallback", "Fallback to indirect when incorrect"))); +// This pass runs mainly in lto mode, it can run in nonlto mode for limited +// features. For testing, provide a way to tell that we are running in nonlto +// mode. +static cl::opt + TestNoLTOMode("wholeprogramdevirt-nolto", cl::Hidden, + cl::desc("Run whole program devirt outside LTO mode."), + cl::init(false)); + namespace { struct PatternList { std::vector Patterns; @@ -794,10 +812,28 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M, return FAM.getResult(F); }; if (UseCommandLine) { + if (TestNoLTOMode) + // we are outside LTO mode. enable speculative devirtualization: + DevirtCheckMode = WPDCheckMode::Fallback; if (!DevirtModule::runForTesting(M, AARGetter, OREGetter, LookupDomTree)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); } + std::optional Index; + // Force Fallback mode as it's safe in case it's non-LTO mode where + // we don't have hidden visibility. + if (!InLTOMode) { + DevirtCheckMode = WPDCheckMode::Fallback; + // In non-LTO mode, we don't have an ExportSummary, so we + // build the ExportSummary from the module. + assert(!ExportSummary && + "ExportSummary is expected to be empty in non-LTO mode"); + if (DevirtCheckMode == WPDCheckMode::Fallback && !ExportSummary) { + ProfileSummaryInfo PSI(M); + Index.emplace(buildModuleSummaryIndex(M, nullptr, &PSI)); + ExportSummary = Index.has_value() ? &Index.value() : nullptr; + } + } if (!DevirtModule(M, AARGetter, OREGetter, LookupDomTree, ExportSummary, ImportSummary) .run()) @@ -1091,10 +1127,12 @@ bool DevirtModule::tryFindVirtualCallTargets( if (!TM.Bits->GV->isConstant()) return false; - // We cannot perform whole program devirtualization analysis on a vtable - // with public LTO visibility. - if (TM.Bits->GV->getVCallVisibility() == - GlobalObject::VCallVisibilityPublic) + // If speculative devirtualization is NOT enabled, it's not safe to perform + // whole program devirtualization + // analysis on a vtable with public LTO visibility. + if (DevirtCheckMode != WPDCheckMode::Fallback && + TM.Bits->GV->getVCallVisibility() == + GlobalObject::VCallVisibilityPublic) return false; Function *Fn = nullptr; @@ -1112,6 +1150,11 @@ bool DevirtModule::tryFindVirtualCallTargets( // calls to pure virtuals are UB. if (Fn->getName() == "__cxa_pure_virtual") continue; + // In Most cases empty functions will be overridden by the + // implementation of the derived class, so we can skip them. + if (DevirtCheckMode == WPDCheckMode::Fallback && + Fn->getReturnType()->isVoidTy() && Fn->getInstructionCount() <= 1) + continue; // We can disregard unreachable functions as possible call targets, as // unreachable functions shouldn't be called. @@ -1333,10 +1376,11 @@ bool DevirtModule::trySingleImplDevirt( if (!IsExported) return false; - // If the only implementation has local linkage, we must promote to external - // to make it visible to thin LTO objects. We can only get here during the - // ThinLTO export phase. - if (TheFn->hasLocalLinkage()) { + // In case of non-speculative devirtualization, If the only implementation has + // local linkage, we must promote to external + // to make it visible to thin LTO objects. We can only get here during the + // ThinLTO export phase. + if (DevirtCheckMode != WPDCheckMode::Fallback && TheFn->hasLocalLinkage()) { std::string NewName = (TheFn->getName() + ".llvm.merged").str(); // Since we are renaming the function, any comdats with the same name must @@ -2315,6 +2359,11 @@ bool DevirtModule::run() { Function *TypeTestFunc = Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test); + // If we are applying speculative devirtualization, we can work on the public + // type test intrinsics. + if (!TypeTestFunc && DevirtCheckMode == WPDCheckMode::Fallback) + TypeTestFunc = + Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test); Function *TypeCheckedLoadFunc = Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_checked_load); Function *TypeCheckedLoadRelativeFunc = Intrinsic::getDeclarationIfExists( @@ -2437,8 +2486,12 @@ bool DevirtModule::run() { .WPDRes[S.first.ByteOffset]; if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos, S.first.ByteOffset, ExportSummary)) { - - if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) { + bool SingleImplDevirt = + trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res); + // In Speculative devirt mode, we skip virtual constant propagation + // and branch funneling to minimize the drawback if we got wrong + // speculation during devirtualization. + if (!SingleImplDevirt && DevirtCheckMode != WPDCheckMode::Fallback) { DidVirtualConstProp |= tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first); diff --git a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-nolto.ll b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-nolto.ll new file mode 100644 index 0000000000000..82d0be85ffc71 --- /dev/null +++ b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-nolto.ll @@ -0,0 +1,130 @@ +; -stats requires asserts +; REQUIRES: asserts + +; Check that we can still devirtualize outside LTO mode +; Check that we skip devirtualization for empty functions outside LTO mode + +; RUN: opt -S -passes=wholeprogramdevirt -wholeprogramdevirt-nolto -pass-remarks=wholeprogramdevirt -stats %s 2>&1 | FileCheck %s + +target datalayout = "e-p:64:64" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK: remark: devirt-single.cc:30:32: single-impl: devirtualized a call to vf +; CHECK: remark: devirt-single.cc:41:32: single-impl: devirtualized a call to vf +; CHECK: remark: devirt-single.cc:51:32: single-impl: devirtualized a call to vf +; CHECK: remark: devirt-single.cc:13:0: devirtualized vf +; CHECK-NOT: devirtualized + +@vt1 = constant [1 x ptr] [ptr @vf], !type !8 +@vt2 = constant [1 x ptr] [ptr @vf_empty], !type !12 + +define i1 @vf(ptr %this) #0 !dbg !7 { + ret i1 true +} + +; This should NOT be devietualized because during non-lto empty functions +; are skipped. +define void @vf_empty(ptr %this) !dbg !11 { + ret void +} + +; CHECK: define void @call +define void @call(ptr %obj) #1 !dbg !5 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid") + call void @llvm.assume(i1 %p) + %fptr = load ptr, ptr %vtable + ; CHECK: if.true.direct_targ: + ; CHECK: call i1 @vf( + ; CHECK: if.false.orig_indirect: + ; CHECK: call i1 %fptr( + call i1 %fptr(ptr %obj), !dbg !6 + ret void +} + + +; CHECK: define void @call1 +define void @call1(ptr %obj) #1 !dbg !9 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid1") + call void @llvm.assume(i1 %p) + %fptr = load ptr, ptr %vtable, align 8 + ; CHECK: call i1 %fptr + %1 = call i1 %fptr(ptr %obj), !dbg !10 + ret void +} +declare ptr @llvm.load.relative.i32(ptr, i32) + +@vt3 = private unnamed_addr constant [1 x i32] [ + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr @vt3 to i64)) to i32) +], align 4, !type !15 + +; CHECK: define void @call2 +define void @call2(ptr %obj) #1 !dbg !13 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0) + ; CHECK: if.true.direct_targ: + ; CHECK: call i1 @vf( + ; CHECK: if.false.orig_indirect: + ; CHECK: call i1 %fptr( + call i1 %fptr(ptr %obj), !dbg !14 + ret void +} + +@_ZTV1A.local = private unnamed_addr constant { [3 x i32] } { [3 x i32] [ + i32 0, ; offset to top + i32 0, ; rtti + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [3 x i32] }, ptr @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32) ; vf_emptyunc offset +] }, align 4, !type !18 + +; CHECK: define void @call3 +define void @call3(ptr %obj) #1 !dbg !16 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid3") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 8) + ; CHECK: if.true.direct_targ: + ; CHECK: call i1 @vf( + ; CHECK: if.false.orig_indirect: + ; CHECK: call i1 %fptr( + call i1 %fptr(ptr %obj), !dbg !17 + ret void +} + + +declare i1 @llvm.type.test(ptr, metadata) +declare void @llvm.assume(i1) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} +!llvm.ident = !{!4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 4.0.0 (trunk 278098)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug) +!1 = !DIFile(filename: "devirt-single.cc", directory: ".") +!2 = !{i32 2, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{!"clang version 4.0.0 (trunk 278098)"} +!5 = distinct !DISubprogram(name: "call", linkageName: "_Z4callPv", scope: !1, file: !1, line: 29, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!6 = !DILocation(line: 30, column: 32, scope: !5) +!7 = distinct !DISubprogram(name: "vf", linkageName: "_ZN3vt12vfEv", scope: !1, file: !1, line: 13, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!8 = !{i32 0, !"typeid"} + +!9 = distinct !DISubprogram(name: "call1", linkageName: "_Z5call1Pv", scope: !1, file: !1, line: 31, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!10 = !DILocation(line: 35, column: 32, scope: !9) +!11 = distinct !DISubprogram(name: "vf_empty", linkageName: "_ZN3vt18vf_emptyEv", scope: !1, file: !1, line: 23, isLocal: false, isDefinition: true, scopeLine: 23, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!12 = !{i32 0, !"typeid1"} + +!13 = distinct !DISubprogram(name: "call2", linkageName: "_Z5call2Pv", scope: !1, file: !1, line: 40, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!14 = !DILocation(line: 41, column: 32, scope: !13) +!15 = !{i32 0, !"typeid2"} + +!16 = distinct !DISubprogram(name: "call3", linkageName: "_Z5call3Pv", scope: !1, file: !1, line: 50, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!17 = !DILocation(line: 51, column: 32, scope: !16) +!18 = !{i32 0, !"typeid3"} + + + +; CHECK: 1 wholeprogramdevirt - Number of whole program devirtualization targets +; CHECK: 3 wholeprogramdevirt - Number of single implementation devirtualizations diff --git a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll index d8f5c912e9a50..f2cf45bb21808 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll @@ -11,6 +11,9 @@ ; Check wildcard ; RUN: opt -S -passes=wholeprogramdevirt -whole-program-visibility -pass-remarks=wholeprogramdevirt -wholeprogramdevirt-skip=vf?i1 %s 2>&1 | FileCheck %s --check-prefix=SKIP +; Check that no stats are reported when we enable devirtualization out of LTO mode. +; RUN: opt -S -passes=wholeprogramdevirt -wholeprogramdevirt-nolto -stats %s 2>&1 | FileCheck %s --check-prefix=CHECK-WPD-NOLTO + target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" @@ -225,3 +228,7 @@ declare ptr @llvm.load.relative.i32(ptr, i32) ; CHECK: 2 wholeprogramdevirt - Number of unique return value optimizations ; CHECK: 2 wholeprogramdevirt - Number of virtual constant propagations ; CHECK: 2 wholeprogramdevirt - Number of 1 bit virtual constant propagations + +; CHECK-WPD-NOLTO-NOT: 0 wholeprogramdevirt - Number of unique return value optimizations +; CHECK-WPD-NOLTO-NOT: 0 wholeprogramdevirt - Number of virtual constant propagations +; CHECK-WPD-NOLTO-NOT: 0 wholeprogramdevirt - Number of 1 bit virtual constant propagations