llvm · hassnaaHamdi · Jun 20, 2025 · Jul 12, 2025 · teresajohnson · Jul 14, 2025
diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
@@ -2275,9 +2275,13 @@ are listed below.
 
 .. option:: -fwhole-program-vtables
 
+   In LTO mode:
    Enable whole-program vtable optimizations, such as single-implementation
    devirtualization and virtual constant propagation, for classes with
-   :doc:`hidden LTO visibility <LTOVisibility>`. Requires ``-flto``.
+   :doc:`hidden LTO visibility <LTOVisibility>`.
+   In non-LTO mode:
+   Enables speculative devirtualization only without other features.
+   Doesn't require ``-flto`` or visibility.
 
 .. option:: -f[no]split-lto-unit
 
@@ -5170,7 +5174,7 @@ Execute ``clang-cl /?`` to see a list of supported options:
       -fstandalone-debug      Emit full debug info for all types used by the program
       -fstrict-aliasing	      Enable optimizations based on strict aliasing rules
       -fsyntax-only           Run the preprocessor, parser and semantic analysis stages
-      -fwhole-program-vtables Enables whole-program vtable optimization. Requires -flto
+      -fwhole-program-vtables Enables whole-program vtable optimization.
       -gcodeview-ghash        Emit type record hashes in a .debug$H section
       -gcodeview              Generate CodeView debug information
       -gline-directives-only  Emit debug line info directives only

diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
@@ -902,6 +902,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
   // non-integrated assemblers don't recognize .cgprofile section.
   PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS;
   PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO;
+  PTO.WholeProgramDevirt = CodeGenOpts.WholeProgramVTables;
 
   LoopAnalysisManager LAM;
   FunctionAnalysisManager FAM;

diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp
@@ -1359,7 +1359,8 @@ void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD,
   // Emit type metadata on vtables with LTO or IR instrumentation.
   // In IR instrumentation, the type metadata is used to find out vtable
   // definitions (for type profiling) among all global variables.
-  if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr())
+  if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr() &&
+      !getCodeGenOpts().WholeProgramVTables)
     return;
 
   CharUnits ComponentWidth = GetTargetTypeStoreSize(getVTableComponentType());

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -7847,8 +7847,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
         IsDeviceOffloadAction ? D.getLTOMode() : D.getOffloadLTOMode();
     auto OtherIsUsingLTO = OtherLTOMode != LTOK_None;
 
-    if ((!IsUsingLTO && !OtherIsUsingLTO) ||
-        (IsPS4 && !UnifiedLTO && (D.getLTOMode() != LTOK_Full)))
+    if (!IsUsingLTO && !OtherIsUsingLTO && !UnifiedLTO) {
+      if (const Arg *A = Args.getLastArg(options::OPT_O_Group))
+        if (!A->getOption().matches(options::OPT_O0))
+          CmdArgs.push_back("-fwhole-program-vtables");
+    } else if ((!IsUsingLTO && !OtherIsUsingLTO) ||
+               (IsPS4 && !UnifiedLTO && (D.getLTOMode() != LTOK_Full)))
       D.Diag(diag::err_drv_argument_only_allowed_with)
           << "-fwhole-program-vtables"
           << ((IsPS4 && !UnifiedLTO) ? "-flto=full" : "-flto");

diff --git a/clang/test/CodeGenCXX/type-metadata.cpp b/clang/test/CodeGenCXX/type-metadata.cpp
@@ -14,6 +14,9 @@
 // RUN: %clang_cc1 -O2 -flto -flto-unit -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=ITANIUM-OPT --check-prefix=ITANIUM-OPT-LAYOUT %s
 // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=MS --check-prefix=MS-TYPEMETADATA --check-prefix=TT-MS %s
 
+// Test for the whole-program-vtables feature in nonlto mode:
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=TT-ITANIUM-DEFAULT-NOLTO  %s
+
 // Tests for cfi + whole-program-vtables:
 // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-VT --check-prefix=ITANIUM-HIDDEN --check-prefix=ITANIUM-COMMON-MD --check-prefix=TC-ITANIUM --check-prefix=ITANIUM-NO-RV-MD %s
 // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-VT --check-prefix=MS --check-prefix=MS-TYPEMETADATA --check-prefix=TC-MS %s
@@ -178,6 +181,7 @@ void af(A *a) {
   // TT-ITANIUM-HIDDEN: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A")
   // TT-ITANIUM-DEFAULT: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A")
   // TT-MS: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !"?AUA@@")
+  // TT-ITANIUM-DEFAULT-NOLTO: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A")
   // TC-ITANIUM:    [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"_ZTS1A")
   // TC-ITANIUM-RV: [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 0, metadata !"_ZTS1A")
   // TC-MS: [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA@@")
@@ -212,6 +216,7 @@ void df1(D *d) {
   // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]])
   // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]])
   // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUA@@")
+  // TT-ITANIUM-DEFAULT-NOLTO: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]])
   // TC-ITANIUM:    {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata ![[DTYPE:[0-9]+]])
   // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 0, metadata ![[DTYPE:[0-9]+]])
   // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA@@")
@@ -224,6 +229,7 @@ void dg1(D *d) {
   // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B")
   // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B")
   // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUB@@")
+  // TT-ITANIUM-DEFAULT-NOLTO: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B")
   // TC-ITANIUM:    {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata !"_ZTS1B")
   // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 4, metadata !"_ZTS1B")
   // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUB@@")
@@ -236,6 +242,7 @@ void dh1(D *d) {
   // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]])
   // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]])
   // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]])
+  // TT-ITANIUM-DEFAULT-NOLTO: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]])
   // TC-ITANIUM:    {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 16, metadata ![[DTYPE]])
   // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 8, metadata ![[DTYPE]])
   // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata ![[DTYPE:[0-9]+]])
@@ -297,6 +304,7 @@ void f(D *d) {
   // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE")
   // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE")
   // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUA@test2@@")
+  // TT-ITANIUM-DEFAULT-NOLTO: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE")
   // TC-ITANIUM:    {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata !"_ZTSN5test21DE")
   // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 4, metadata !"_ZTSN5test21DE")
   // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA@test2@@")

diff --git a/clang/test/Driver/whole-program-vtables.c b/clang/test/Driver/whole-program-vtables.c
@@ -1,15 +1,11 @@
-// RUN: not %clang -target x86_64-unknown-linux -fwhole-program-vtables -### %s 2>&1 | FileCheck --check-prefix=NO-LTO %s
-// RUN: not %clang_cl --target=x86_64-pc-win32 -fwhole-program-vtables -### -- %s 2>&1 | FileCheck --check-prefix=NO-LTO %s
-// NO-LTO: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
+// RUN: %clang -target x86_64-unknown-linux -fwhole-program-vtables -O1 -### %s 2>&1 | FileCheck --check-prefix=WPD-NO-LTO %s
+// RUN: %clang_cl --target=x86_64-pc-win32 -fwhole-program-vtables -O1 -### -- %s 2>&1 | FileCheck --check-prefix=WPD-NO-LTO %s
+// WPD-NO-LTO: "-fwhole-program-vtables"
 
 // RUN: %clang -target x86_64-unknown-linux -fwhole-program-vtables -flto -### %s 2>&1 | FileCheck --check-prefix=LTO %s
 // RUN: not %clang_cl --target=x86_64-pc-win32 -fwhole-program-vtables -flto -### -- %s 2>&1 | FileCheck --check-prefix=LTO %s
 // LTO: "-fwhole-program-vtables"
 
-/// -funified-lto does not imply -flto, so we still get an error that fwhole-program-vtables has no effect without -flto
-// RUN: not %clang --target=x86_64-pc-linux-gnu -fwhole-program-vtables -funified-lto -### %s 2>&1 | FileCheck --check-prefix=NO-LTO %s
-// RUN: not %clang --target=x86_64-pc-linux-gnu -fwhole-program-vtables -fno-unified-lto -### %s 2>&1 | FileCheck --check-prefix=NO-LTO %s
-
 // RUN: %clang -target x86_64-unknown-linux -fwhole-program-vtables -fno-whole-program-vtables -flto -### %s 2>&1 | FileCheck --check-prefix=LTO-DISABLE %s
 // RUN: not %clang_cl --target=x86_64-pc-win32 -fwhole-program-vtables -fno-whole-program-vtables -flto -### -- %s 2>&1 | FileCheck --check-prefix=LTO-DISABLE %s
 // LTO-DISABLE-NOT: "-fwhole-program-vtables"
diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
@@ -98,6 +98,12 @@ class PipelineTuningOptions {
   // analyses after various module->function or cgscc->function adaptors in the
   // default pipelines.
   bool EagerlyInvalidateAnalyses;
+
+  /// Tuning option to enable/disable whole program devirtualization.
+  /// Its default value is false.
+  /// This is controlled by the `-whole-program-vtables` flag.
+  /// Used only in non-LTO mode.
+  bool WholeProgramDevirt;
 };
 
 /// This class provides access to building LLVM's passes.

diff --git a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
@@ -226,11 +226,15 @@ struct WholeProgramDevirtPass : public PassInfoMixin<WholeProgramDevirtPass> {
   ModuleSummaryIndex *ExportSummary;
   const ModuleSummaryIndex *ImportSummary;
   bool UseCommandLine = false;
+  const bool InLTOMode;
   WholeProgramDevirtPass()
-      : ExportSummary(nullptr), ImportSummary(nullptr), UseCommandLine(true) {}
+      : ExportSummary(nullptr), ImportSummary(nullptr), UseCommandLine(true),
+        InLTOMode(true) {}
   WholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary,
-                         const ModuleSummaryIndex *ImportSummary)
-      : ExportSummary(ExportSummary), ImportSummary(ImportSummary) {
+                         const ModuleSummaryIndex *ImportSummary,
+                         bool InLTOMode = true)
+      : ExportSummary(ExportSummary), ImportSummary(ImportSummary),
+        InLTOMode(InLTOMode) {
     assert(!(ExportSummary && ImportSummary));
   }
   LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &);

diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -321,6 +321,7 @@ PipelineTuningOptions::PipelineTuningOptions() {
   MergeFunctions = EnableMergeFunctions;
   InlinerThreshold = -1;
   EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
+  WholeProgramDevirt = false;
 }
 
 namespace llvm {
@@ -1629,6 +1630,23 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
   if (!LTOPreLink)
     MPM.addPass(RelLookupTableConverterPass());
 
+  if (PTO.WholeProgramDevirt && LTOPhase == ThinOrFullLTOPhase::None) {
+    MPM.addPass(WholeProgramDevirtPass(/*ExportSummary*/ nullptr,
+                                       /*ImportSummary*/ nullptr,
+                                       /*InLTOMode=*/false));
+    MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
+                                   lowertypetests::DropTestKind::Assume));
+    if (EnableModuleInliner) {
+      MPM.addPass(ModuleInlinerPass(getInlineParamsFromOptLevel(Level),
+                                    UseInlineAdvisor,
+                                    ThinOrFullLTOPhase::None));
+    } else {
+      MPM.addPass(ModuleInlinerWrapperPass(
+          getInlineParamsFromOptLevel(Level),
+          /* MandatoryFirst */ true,
+          InlineContext{ThinOrFullLTOPhase::None, InlinePass::CGSCCInliner}));
+    }
+  }
   return MPM;
 }
 

diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -24,7 +24,8 @@
 //   returns 0, or a single vtable's function returns 1, replace each virtual
 //   call with a comparison of the vptr against that vtable's address.
 //
-// This pass is intended to be used during the regular and thin LTO pipelines:
+// This pass is intended to be used during the regular/thinLTO and non-LTO
+// pipelines:
 //
 // During regular LTO, the pass determines the best optimization for each
 // virtual call and applies the resolutions directly to virtual calls that are
@@ -48,6 +49,13 @@
 //   is supported.
 // - Import phase: (same as with hybrid case above).
 //
+// In non-LTO mode:
+// - The pass apply speculative devirtualization without requiring any type of
+// visibility.
+// - Skips other features like virtual constant propagation, uniform return
+// value
+//   optimization, unique return value optimization, branch funnels to minimize
+//   the drawbacks of wrong speculation.
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/IPO/WholeProgramDevirt.h"
@@ -60,7 +68,9 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/ModuleSummaryAnalysis.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TypeMetadataUtils.h"
 #include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/Bitcode/BitcodeWriter.h"
@@ -211,6 +221,14 @@ static cl::opt<WPDCheckMode> DevirtCheckMode(
                clEnumValN(WPDCheckMode::Fallback, "fallback",
                           "Fallback to indirect when incorrect")));
 
+// This pass runs mainly in lto mode, it can run in nonlto mode for limited
+// features. For testing, provide a way to tell that we are running in nonlto
+// mode.
+static cl::opt<bool>
+    TestNoLTOMode("wholeprogramdevirt-nolto", cl::Hidden,
+                  cl::desc("Run whole program devirt outside LTO mode."),
+                  cl::init(false));
+
 namespace {
 struct PatternList {
   std::vector<GlobPattern> Patterns;
@@ -794,10 +812,28 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
     return FAM.getResult<DominatorTreeAnalysis>(F);
   };
   if (UseCommandLine) {
+    if (TestNoLTOMode)
+      // we are outside LTO mode. enable speculative devirtualization:
+      DevirtCheckMode = WPDCheckMode::Fallback;
     if (!DevirtModule::runForTesting(M, AARGetter, OREGetter, LookupDomTree))
       return PreservedAnalyses::all();
     return PreservedAnalyses::none();
   }
+  std::optional<ModuleSummaryIndex> Index;
+  // Force Fallback mode as it's safe in case it's non-LTO mode where
+  // we don't have hidden visibility.
+  if (!InLTOMode) {
+    DevirtCheckMode = WPDCheckMode::Fallback;
+    // In non-LTO mode, we don't have an ExportSummary, so we
+    // build the ExportSummary from the module.
+    assert(!ExportSummary &&
+           "ExportSummary is expected to be empty in non-LTO mode");
+    if (DevirtCheckMode == WPDCheckMode::Fallback && !ExportSummary) {
+      ProfileSummaryInfo PSI(M);
+      Index.emplace(buildModuleSummaryIndex(M, nullptr, &PSI));
+      ExportSummary = Index.has_value() ? &Index.value() : nullptr;
+    }
+  }
   if (!DevirtModule(M, AARGetter, OREGetter, LookupDomTree, ExportSummary,
                     ImportSummary)
            .run())
@@ -1091,10 +1127,12 @@ bool DevirtModule::tryFindVirtualCallTargets(
     if (!TM.Bits->GV->isConstant())
       return false;
 
-    // We cannot perform whole program devirtualization analysis on a vtable
-    // with public LTO visibility.
-    if (TM.Bits->GV->getVCallVisibility() ==
-        GlobalObject::VCallVisibilityPublic)
+    // If speculative devirtualization is NOT enabled, it's not safe to perform
+    // whole program devirtualization
+    //  analysis on a vtable with public LTO visibility.
+    if (DevirtCheckMode != WPDCheckMode::Fallback &&
+        TM.Bits->GV->getVCallVisibility() ==
+            GlobalObject::VCallVisibilityPublic)
       return false;
 
     Function *Fn = nullptr;
@@ -1112,6 +1150,11 @@ bool DevirtModule::tryFindVirtualCallTargets(
     // calls to pure virtuals are UB.
     if (Fn->getName() == "__cxa_pure_virtual")
       continue;
+    // In Most cases empty functions will be overridden by the
+    // implementation of the derived class, so we can skip them.
+    if (DevirtCheckMode == WPDCheckMode::Fallback &&
+        Fn->getReturnType()->isVoidTy() && Fn->getInstructionCount() <= 1)
+      continue;
 
     // We can disregard unreachable functions as possible call targets, as
     // unreachable functions shouldn't be called.
@@ -1333,10 +1376,11 @@ bool DevirtModule::trySingleImplDevirt(
   if (!IsExported)
     return false;
 
-  // If the only implementation has local linkage, we must promote to external
-  // to make it visible to thin LTO objects. We can only get here during the
-  // ThinLTO export phase.
-  if (TheFn->hasLocalLinkage()) {
+  // In case of non-speculative devirtualization, If the only implementation has
+  // local linkage, we must promote to external
+  //  to make it visible to thin LTO objects. We can only get here during the
+  //  ThinLTO export phase.
+  if (DevirtCheckMode != WPDCheckMode::Fallback && TheFn->hasLocalLinkage()) {
     std::string NewName = (TheFn->getName() + ".llvm.merged").str();
 
     // Since we are renaming the function, any comdats with the same name must
@@ -2315,6 +2359,11 @@ bool DevirtModule::run() {
 
   Function *TypeTestFunc =
       Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test);
+  // If we are applying speculative devirtualization, we can work on the public
+  // type test intrinsics.
+  if (!TypeTestFunc && DevirtCheckMode == WPDCheckMode::Fallback)
+    TypeTestFunc =
+        Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test);
   Function *TypeCheckedLoadFunc =
       Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_checked_load);
   Function *TypeCheckedLoadRelativeFunc = Intrinsic::getDeclarationIfExists(
@@ -2437,8 +2486,12 @@ bool DevirtModule::run() {
                  .WPDRes[S.first.ByteOffset];
     if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
                                   S.first.ByteOffset, ExportSummary)) {
-
-      if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) {
+      bool SingleImplDevirt =
+          trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res);
+      // In Speculative devirt mode, we skip virtual constant propagation
+      // and branch funneling to minimize the drawback if we got wrong
+      // speculation during devirtualization.
+      if (!SingleImplDevirt && DevirtCheckMode != WPDCheckMode::Fallback) {
         DidVirtualConstProp |=
             tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);