-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[WPD]: Apply speculative WPD in non-lto mode. #145031
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1359,7 +1359,8 @@ void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD, | |
// Emit type metadata on vtables with LTO or IR instrumentation. | ||
// In IR instrumentation, the type metadata is used to find out vtable | ||
// definitions (for type profiling) among all global variables. | ||
if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr()) | ||
if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr() && | ||
!getCodeGenOpts().WholeProgramVTables) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this change needed? |
||
return; | ||
|
||
CharUnits ComponentWidth = GetTargetTypeStoreSize(getVTableComponentType()); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7847,8 +7847,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, | |
IsDeviceOffloadAction ? D.getLTOMode() : D.getOffloadLTOMode(); | ||
auto OtherIsUsingLTO = OtherLTOMode != LTOK_None; | ||
|
||
if ((!IsUsingLTO && !OtherIsUsingLTO) || | ||
(IsPS4 && !UnifiedLTO && (D.getLTOMode() != LTOK_Full))) | ||
if (!IsUsingLTO && !OtherIsUsingLTO && !UnifiedLTO) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm having trouble following all of the conditions guarding this new if block and the existing one just below. There should probably be one set of guards around the warning, and another around whether |
||
if (const Arg *A = Args.getLastArg(options::OPT_O_Group)) | ||
if (!A->getOption().matches(options::OPT_O0)) | ||
CmdArgs.push_back("-fwhole-program-vtables"); | ||
} else if ((!IsUsingLTO && !OtherIsUsingLTO) || | ||
(IsPS4 && !UnifiedLTO && (D.getLTOMode() != LTOK_Full))) | ||
D.Diag(diag::err_drv_argument_only_allowed_with) | ||
<< "-fwhole-program-vtables" | ||
<< ((IsPS4 && !UnifiedLTO) ? "-flto=full" : "-flto"); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,11 @@ | ||
// RUN: not %clang -target x86_64-unknown-linux -fwhole-program-vtables -### %s 2>&1 | FileCheck --check-prefix=NO-LTO %s | ||
// RUN: not %clang_cl --target=x86_64-pc-win32 -fwhole-program-vtables -### -- %s 2>&1 | FileCheck --check-prefix=NO-LTO %s | ||
// NO-LTO: invalid argument '-fwhole-program-vtables' only allowed with '-flto' | ||
// RUN: %clang -target x86_64-unknown-linux -fwhole-program-vtables -O1 -### %s 2>&1 | FileCheck --check-prefix=WPD-NO-LTO %s | ||
// RUN: %clang_cl --target=x86_64-pc-win32 -fwhole-program-vtables -O1 -### -- %s 2>&1 | FileCheck --check-prefix=WPD-NO-LTO %s | ||
// WPD-NO-LTO: "-fwhole-program-vtables" | ||
|
||
// RUN: %clang -target x86_64-unknown-linux -fwhole-program-vtables -flto -### %s 2>&1 | FileCheck --check-prefix=LTO %s | ||
// RUN: not %clang_cl --target=x86_64-pc-win32 -fwhole-program-vtables -flto -### -- %s 2>&1 | FileCheck --check-prefix=LTO %s | ||
// LTO: "-fwhole-program-vtables" | ||
|
||
/// -funified-lto does not imply -flto, so we still get an error that fwhole-program-vtables has no effect without -flto | ||
// RUN: not %clang --target=x86_64-pc-linux-gnu -fwhole-program-vtables -funified-lto -### %s 2>&1 | FileCheck --check-prefix=NO-LTO %s | ||
// RUN: not %clang --target=x86_64-pc-linux-gnu -fwhole-program-vtables -fno-unified-lto -### %s 2>&1 | FileCheck --check-prefix=NO-LTO %s | ||
|
||
// RUN: %clang -target x86_64-unknown-linux -fwhole-program-vtables -fno-whole-program-vtables -flto -### %s 2>&1 | FileCheck --check-prefix=LTO-DISABLE %s | ||
// RUN: not %clang_cl --target=x86_64-pc-win32 -fwhole-program-vtables -fno-whole-program-vtables -flto -### -- %s 2>&1 | FileCheck --check-prefix=LTO-DISABLE %s | ||
// LTO-DISABLE-NOT: "-fwhole-program-vtables" |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -321,6 +321,7 @@ PipelineTuningOptions::PipelineTuningOptions() { | |
MergeFunctions = EnableMergeFunctions; | ||
InlinerThreshold = -1; | ||
EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; | ||
WholeProgramDevirt = false; | ||
} | ||
|
||
namespace llvm { | ||
|
@@ -1629,6 +1630,23 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, | |
if (!LTOPreLink) | ||
MPM.addPass(RelLookupTableConverterPass()); | ||
|
||
if (PTO.WholeProgramDevirt && LTOPhase == ThinOrFullLTOPhase::None) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this need to be guarded by whether whole program vtables was enabled? We don't for LTO. What happens if it is simply always invoked for non-LTO? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi Teresa, thanks for looking at the patch. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At least WPD and LTT should not do anything if there is no type metadata (the inliner invocation is a separate story, more comments about that below). |
||
MPM.addPass(WholeProgramDevirtPass(/*ExportSummary*/ nullptr, | ||
/*ImportSummary*/ nullptr, | ||
/*InLTOMode=*/false)); | ||
MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, | ||
lowertypetests::DropTestKind::Assume)); | ||
if (EnableModuleInliner) { | ||
MPM.addPass(ModuleInlinerPass(getInlineParamsFromOptLevel(Level), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think you want to reinvoke the inliner. Better to invoke WPD from the module simplifier right before the inliner is invoked there. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems that the passes that eliminate the unused GVs are depending on the inliner where constructors get inlined so it becomes clear which GVs are used and which are not. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess in LTO mode we effectively get 2 rounds of inlining too: one during the pre-LTO compile, which must be what is inlining the constructors, and then one in the LTO backends after WPD. This is unfortunately a bigger change, adding another full round of inlining. But I guess ok because this won't be on by default. |
||
UseInlineAdvisor, | ||
ThinOrFullLTOPhase::None)); | ||
} else { | ||
MPM.addPass(ModuleInlinerWrapperPass( | ||
getInlineParamsFromOptLevel(Level), | ||
/* MandatoryFirst */ true, | ||
InlineContext{ThinOrFullLTOPhase::None, InlinePass::CGSCCInliner})); | ||
} | ||
} | ||
return MPM; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,7 +24,8 @@ | |
// returns 0, or a single vtable's function returns 1, replace each virtual | ||
// call with a comparison of the vptr against that vtable's address. | ||
// | ||
// This pass is intended to be used during the regular and thin LTO pipelines: | ||
// This pass is intended to be used during the regular/thinLTO and non-LTO | ||
// pipelines: | ||
// | ||
// During regular LTO, the pass determines the best optimization for each | ||
// virtual call and applies the resolutions directly to virtual calls that are | ||
|
@@ -48,6 +49,13 @@ | |
// is supported. | ||
// - Import phase: (same as with hybrid case above). | ||
// | ||
// In non-LTO mode: | ||
// - The pass apply speculative devirtualization without requiring any type of | ||
// visibility. | ||
// - Skips other features like virtual constant propagation, uniform return | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's more fundamental in that I don't think some of these can be applied safely without whole program and in fact without some fullLTO. There is vtable rewriting that happens. |
||
// value | ||
// optimization, unique return value optimization, branch funnels to minimize | ||
// the drawbacks of wrong speculation. | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "llvm/Transforms/IPO/WholeProgramDevirt.h" | ||
|
@@ -60,7 +68,9 @@ | |
#include "llvm/ADT/Statistic.h" | ||
#include "llvm/Analysis/AssumptionCache.h" | ||
#include "llvm/Analysis/BasicAliasAnalysis.h" | ||
#include "llvm/Analysis/ModuleSummaryAnalysis.h" | ||
#include "llvm/Analysis/OptimizationRemarkEmitter.h" | ||
#include "llvm/Analysis/ProfileSummaryInfo.h" | ||
#include "llvm/Analysis/TypeMetadataUtils.h" | ||
#include "llvm/Bitcode/BitcodeReader.h" | ||
#include "llvm/Bitcode/BitcodeWriter.h" | ||
|
@@ -211,6 +221,14 @@ static cl::opt<WPDCheckMode> DevirtCheckMode( | |
clEnumValN(WPDCheckMode::Fallback, "fallback", | ||
"Fallback to indirect when incorrect"))); | ||
|
||
// This pass runs mainly in lto mode, it can run in nonlto mode for limited | ||
// features. For testing, provide a way to tell that we are running in nonlto | ||
// mode. | ||
static cl::opt<bool> | ||
TestNoLTOMode("wholeprogramdevirt-nolto", cl::Hidden, | ||
cl::desc("Run whole program devirt outside LTO mode."), | ||
cl::init(false)); | ||
|
||
namespace { | ||
struct PatternList { | ||
std::vector<GlobPattern> Patterns; | ||
|
@@ -794,10 +812,28 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M, | |
return FAM.getResult<DominatorTreeAnalysis>(F); | ||
}; | ||
if (UseCommandLine) { | ||
if (TestNoLTOMode) | ||
// we are outside LTO mode. enable speculative devirtualization: | ||
DevirtCheckMode = WPDCheckMode::Fallback; | ||
if (!DevirtModule::runForTesting(M, AARGetter, OREGetter, LookupDomTree)) | ||
return PreservedAnalyses::all(); | ||
return PreservedAnalyses::none(); | ||
} | ||
std::optional<ModuleSummaryIndex> Index; | ||
// Force Fallback mode as it's safe in case it's non-LTO mode where | ||
// we don't have hidden visibility. | ||
if (!InLTOMode) { | ||
DevirtCheckMode = WPDCheckMode::Fallback; | ||
// In non-LTO mode, we don't have an ExportSummary, so we | ||
// build the ExportSummary from the module. | ||
assert(!ExportSummary && | ||
"ExportSummary is expected to be empty in non-LTO mode"); | ||
if (DevirtCheckMode == WPDCheckMode::Fallback && !ExportSummary) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this if check needed? Both conditions should always be true (set or asserted above) |
||
ProfileSummaryInfo PSI(M); | ||
Index.emplace(buildModuleSummaryIndex(M, nullptr, &PSI)); | ||
ExportSummary = Index.has_value() ? &Index.value() : nullptr; | ||
} | ||
} | ||
if (!DevirtModule(M, AARGetter, OREGetter, LookupDomTree, ExportSummary, | ||
ImportSummary) | ||
.run()) | ||
|
@@ -1091,10 +1127,12 @@ bool DevirtModule::tryFindVirtualCallTargets( | |
if (!TM.Bits->GV->isConstant()) | ||
return false; | ||
|
||
// We cannot perform whole program devirtualization analysis on a vtable | ||
// with public LTO visibility. | ||
if (TM.Bits->GV->getVCallVisibility() == | ||
GlobalObject::VCallVisibilityPublic) | ||
// If speculative devirtualization is NOT enabled, it's not safe to perform | ||
// whole program devirtualization | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: fix line wrap |
||
// analysis on a vtable with public LTO visibility. | ||
if (DevirtCheckMode != WPDCheckMode::Fallback && | ||
TM.Bits->GV->getVCallVisibility() == | ||
GlobalObject::VCallVisibilityPublic) | ||
return false; | ||
|
||
Function *Fn = nullptr; | ||
|
@@ -1112,6 +1150,11 @@ bool DevirtModule::tryFindVirtualCallTargets( | |
// calls to pure virtuals are UB. | ||
if (Fn->getName() == "__cxa_pure_virtual") | ||
continue; | ||
// In Most cases empty functions will be overridden by the | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: uncapitalize Most |
||
// implementation of the derived class, so we can skip them. | ||
if (DevirtCheckMode == WPDCheckMode::Fallback && | ||
Fn->getReturnType()->isVoidTy() && Fn->getInstructionCount() <= 1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a performance heuristic? Perhaps these checks should be explicitly checking for |
||
continue; | ||
|
||
// We can disregard unreachable functions as possible call targets, as | ||
// unreachable functions shouldn't be called. | ||
|
@@ -1333,10 +1376,11 @@ bool DevirtModule::trySingleImplDevirt( | |
if (!IsExported) | ||
return false; | ||
|
||
// If the only implementation has local linkage, we must promote to external | ||
// to make it visible to thin LTO objects. We can only get here during the | ||
// ThinLTO export phase. | ||
if (TheFn->hasLocalLinkage()) { | ||
// In case of non-speculative devirtualization, If the only implementation has | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this going to break LTO if it uses Fallback mode for extra safety? Here too I think nits: fix capitalization and spacing in this comment block. |
||
// local linkage, we must promote to external | ||
// to make it visible to thin LTO objects. We can only get here during the | ||
// ThinLTO export phase. | ||
if (DevirtCheckMode != WPDCheckMode::Fallback && TheFn->hasLocalLinkage()) { | ||
std::string NewName = (TheFn->getName() + ".llvm.merged").str(); | ||
|
||
// Since we are renaming the function, any comdats with the same name must | ||
|
@@ -2315,6 +2359,11 @@ bool DevirtModule::run() { | |
|
||
Function *TypeTestFunc = | ||
Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test); | ||
// If we are applying speculative devirtualization, we can work on the public | ||
// type test intrinsics. | ||
if (!TypeTestFunc && DevirtCheckMode == WPDCheckMode::Fallback) | ||
TypeTestFunc = | ||
Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test); | ||
Function *TypeCheckedLoadFunc = | ||
Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_checked_load); | ||
Function *TypeCheckedLoadRelativeFunc = Intrinsic::getDeclarationIfExists( | ||
|
@@ -2437,8 +2486,12 @@ bool DevirtModule::run() { | |
.WPDRes[S.first.ByteOffset]; | ||
if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos, | ||
S.first.ByteOffset, ExportSummary)) { | ||
|
||
if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) { | ||
bool SingleImplDevirt = | ||
trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res); | ||
// In Speculative devirt mode, we skip virtual constant propagation | ||
// and branch funneling to minimize the drawback if we got wrong | ||
// speculation during devirtualization. | ||
if (!SingleImplDevirt && DevirtCheckMode != WPDCheckMode::Fallback) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here too, check |
||
DidVirtualConstProp |= | ||
tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first); | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thinking more about this, the option name doesn't really make sense in this case. Perhaps there should be a separate option controlling the non-LTO behavior. Let me think more about this part...