From f856c40a687821d0419ff9ed9dccbabe0a62ee01 Mon Sep 17 00:00:00 2001 From: Aaron Eline Date: Fri, 16 Jul 2021 12:06:52 -0400 Subject: [PATCH 1/9] Cleanup of root cause code --- clang/include/clang/3C/3CInteractiveData.h | 12 ++- clang/include/clang/3C/ConstraintVariables.h | 1 + clang/include/clang/3C/Constraints.h | 4 + clang/lib/3C/3CInteractiveData.cpp | 14 +-- clang/lib/3C/ProgramInfo.cpp | 107 +++++++++++-------- 5 files changed, 83 insertions(+), 55 deletions(-) diff --git a/clang/include/clang/3C/3CInteractiveData.h b/clang/include/clang/3C/3CInteractiveData.h index 2c72045fc575..868bdcfbb611 100644 --- a/clang/include/clang/3C/3CInteractiveData.h +++ b/clang/include/clang/3C/3CInteractiveData.h @@ -65,15 +65,19 @@ class ConstraintsInfo { // \ / // s // Here: s -> {p, q} and r -> {q} - std::map RCMap; + // IE: Maps a constraint variables to the set of root causes of wildness + std::map RootCauses; // This is source map: Map of Constraint var (which are directly // assigned WILD) and the set of constraint vars which are WILD because of // the above constraint. // For the above case, this contains: p -> {s}, q -> {r, s} - std::map SrcWMap; + // IE: Maps a root cause to the set of variables it constrains + std::map ConstrainedBy; - std::map PtrRCMap; - std::map> PtrSrcWMap; + // PTR versions of the above maps + // TODO understand this better + std::map PtrRootCauses; + std::map> PtrConstrainedBy; // Get score for each of the ConstraintKeys, which are wild. // For the above example, the score of s would be 0.5, similarly diff --git a/clang/include/clang/3C/ConstraintVariables.h b/clang/include/clang/3C/ConstraintVariables.h index d0ae7f678925..d701ce5d58d0 100644 --- a/clang/include/clang/3C/ConstraintVariables.h +++ b/clang/include/clang/3C/ConstraintVariables.h @@ -168,6 +168,7 @@ class ConstraintVariable { std::string getRewritableOriginalTy() const; std::string getName() const { return Name; } + // TODO is the word `valid` doing any real work here? or can it be dropped? void setValidDecl() { IsForDecl = true; } bool isForValidDecl() const { return IsForDecl; } diff --git a/clang/include/clang/3C/Constraints.h b/clang/include/clang/3C/Constraints.h index 278878736ac1..2245b8175ce7 100644 --- a/clang/include/clang/3C/Constraints.h +++ b/clang/include/clang/3C/Constraints.h @@ -129,7 +129,11 @@ class VarAtom : public Atom { return false; } + // TODO this should be renamed to something more informative, like "id" uint32_t getLoc() const { return Loc; } + + + std::string getName() const { return Name; } VarKind getVarKind() const { return KindV; } diff --git a/clang/lib/3C/3CInteractiveData.cpp b/clang/lib/3C/3CInteractiveData.cpp index bc6d417f513a..f454a213af3d 100644 --- a/clang/lib/3C/3CInteractiveData.cpp +++ b/clang/lib/3C/3CInteractiveData.cpp @@ -19,14 +19,14 @@ void ConstraintsInfo::clear() { AllWildAtoms.clear(); TotalNonDirectWildAtoms.clear(); ValidSourceFiles.clear(); - RCMap.clear(); - SrcWMap.clear(); + RootCauses.clear(); + ConstrainedBy.clear(); } -CVars &ConstraintsInfo::getRCVars(ConstraintKey Ckey) { return RCMap[Ckey]; } +CVars &ConstraintsInfo::getRCVars(ConstraintKey Ckey) { return RootCauses[Ckey]; } CVars &ConstraintsInfo::getSrcCVars(ConstraintKey Ckey) { - return SrcWMap[Ckey]; + return ConstrainedBy[Ckey]; } CVars ConstraintsInfo::getWildAffectedCKeys(const CVars &DWKeys) { @@ -50,7 +50,7 @@ float ConstraintsInfo::getPtrAffectedScore( const std::set CVs) { float TS = 0.0; for (auto *CV : CVs) - TS += (1.0 / PtrRCMap[CV].size()); + TS += (1.0 / PtrRootCauses[CV].size()); return TS; } @@ -127,12 +127,12 @@ void ConstraintsInfo::printConstraintStats(llvm::raw_ostream &O, O << "\"AtomsAffected\":" << AtomsAffected.size() << ", "; O << "\"AtomsScore\":" << getAtomAffectedScore(AtomsAffected) << ", "; - std::set PtrsAffected = PtrSrcWMap[Cause]; + std::set PtrsAffected = PtrConstrainedBy[Cause]; O << "\"PtrsAffected\":" << PtrsAffected.size() << ","; O << "\"PtrsScore\":" << getPtrAffectedScore(PtrsAffected); O << "}"; } int ConstraintsInfo::getNumPtrsAffected(ConstraintKey CK) { - return PtrSrcWMap[CK].size(); + return PtrConstrainedBy[CK].size(); } diff --git a/clang/lib/3C/ProgramInfo.cpp b/clang/lib/3C/ProgramInfo.cpp index 90c4c692f4fc..1700212ed476 100644 --- a/clang/lib/3C/ProgramInfo.cpp +++ b/clang/lib/3C/ProgramInfo.cpp @@ -961,79 +961,98 @@ FVConstraint *ProgramInfo::getStaticFuncConstraint(std::string FuncName, bool ProgramInfo::computeInterimConstraintState( const std::set &FilePaths) { - // Get all the valid vars of interest i.e., all the Vars that are present - // in one of the files being compiled. - CAtoms ValidVarsVec; - std::set AllValidVars; + // We need to compute two sets + // 1) The set of _all_ vars that refer to a Decl + std::set DeclVars; + // 2) The set of all DeclVars vars _in_ this file, which we call _relevant_ + std::set RelevantVars; + + // Compute the above two sets CVarSet Visited; - CAtoms Tmp; for (const auto &I : Variables) { std::string FileName = I.first.getFileName(); ConstraintVariable *C = I.second; if (C->isForValidDecl()) { - Tmp.clear(); + CAtoms Tmp; getVarsFromConstraint(C, Tmp, Visited); - AllValidVars.insert(Tmp.begin(), Tmp.end()); + DeclVars.insert(Tmp.begin(), Tmp.end()); if (canWrite(FileName)) - ValidVarsVec.insert(ValidVarsVec.begin(), Tmp.begin(), Tmp.end()); + RelevantVars.insert(Tmp.begin(), Tmp.end()); } } - // Make that into set, for efficiency. - std::set ValidVarsS; - ValidVarsS.insert(ValidVarsVec.begin(), ValidVarsVec.end()); auto GetLocOrZero = [](const Atom *Val) { if (const auto *VA = dyn_cast(Val)) return VA->getLoc(); return (ConstraintKey)0; }; - CVars ValidVarsKey; - std::transform(ValidVarsS.begin(), ValidVarsS.end(), - std::inserter(ValidVarsKey, ValidVarsKey.end()), GetLocOrZero); - CVars AllValidVarsKey; - std::transform(AllValidVars.begin(), AllValidVars.end(), - std::inserter(AllValidVarsKey, AllValidVarsKey.end()), + + //Map the above two sets into equivalent sets of keys + CVars RelevantVarsKey; + CVars DeclVarsKey; + + std::transform(RelevantVars.begin(), RelevantVars.end(), + std::inserter(RelevantVarsKey, RelevantVarsKey.end()), GetLocOrZero); + std::transform(DeclVars.begin(), DeclVars.end(), + std::inserter(DeclVarsKey, DeclVarsKey.end()), GetLocOrZero); CState.clear(); + std::set DirectWildVarAtoms; CS.getChkCG().getSuccessors(CS.getWild(), DirectWildVarAtoms); - CVars TmpCGrp; - CVars OnlyIndirect; - for (auto *A : DirectWildVarAtoms) { - auto *VA = dyn_cast(A); - if (VA == nullptr) + auto IsDeclVar = [&](VarAtom *VA) { + return DeclVars.find(VA) != DeclVars.end(); + }; + + auto IsRelevantVar = [&](VarAtom *VA) { + return RelevantVarsKey.find(VA->getLoc()) != RelevantVarsKey.end(); + }; + + auto IsDirectlyWild = [&](VarAtom *VA) { + return DirectWildVarAtoms.find(VA) != DirectWildVarAtoms.end(); + }; + + + // TODO this search loop needs optimizing + for (auto *WildAtom : DirectWildVarAtoms) { + auto *WildVarAtom = dyn_cast(WildAtom); + if (WildVarAtom == nullptr) continue; - TmpCGrp.clear(); - OnlyIndirect.clear(); + CVars ConstrainedByThis; + CVars IndirectConstraints; + auto BFSVisitor = [&](Atom *SearchAtom) { auto *SearchVA = dyn_cast(SearchAtom); - if (SearchVA && AllValidVars.find(SearchVA) != AllValidVars.end()) { - CState.RCMap[SearchVA->getLoc()].insert(VA->getLoc()); - if (ValidVarsKey.find(SearchVA->getLoc()) != ValidVarsKey.end()) - TmpCGrp.insert(SearchVA->getLoc()); - if (DirectWildVarAtoms.find(SearchVA) == DirectWildVarAtoms.end()) { - OnlyIndirect.insert(SearchVA->getLoc()); - } + if (SearchVA && IsDeclVar(SearchVA)) { + CState.RootCauses[SearchVA->getLoc()].insert(WildVarAtom->getLoc()); + + if (IsRelevantVar(SearchVA)) + ConstrainedByThis.insert(SearchVA->getLoc()); + + if (!IsDirectlyWild(SearchVA)) + IndirectConstraints.insert(SearchVA->getLoc()); + } + }; - CS.getChkCG().visitBreadthFirst(VA, BFSVisitor); + CS.getChkCG().visitBreadthFirst(WildVarAtom, BFSVisitor); - CState.TotalNonDirectWildAtoms.insert(OnlyIndirect.begin(), - OnlyIndirect.end()); + CState.TotalNonDirectWildAtoms.insert(IndirectConstraints.begin(), + IndirectConstraints.end()); // Should we consider only pointers which with in the source files or // external pointers that affected pointers within the source files. - CState.AllWildAtoms.insert(VA->getLoc()); - CVars &CGrp = CState.SrcWMap[VA->getLoc()]; - CGrp.insert(TmpCGrp.begin(), TmpCGrp.end()); + CState.AllWildAtoms.insert(WildVarAtom->getLoc()); + CVars &CGrp = CState.ConstrainedBy[WildVarAtom->getLoc()]; + CGrp.insert(ConstrainedByThis.begin(), ConstrainedByThis.end()); } - findIntersection(CState.AllWildAtoms, ValidVarsKey, CState.InSrcWildAtoms); - findIntersection(CState.TotalNonDirectWildAtoms, ValidVarsKey, + findIntersection(CState.AllWildAtoms, RelevantVarsKey, CState.InSrcWildAtoms); + findIntersection(CState.TotalNonDirectWildAtoms, RelevantVarsKey, CState.InSrcNonDirectWildAtoms); // The ConstraintVariable for a variable normally appears in Variables for the @@ -1123,17 +1142,17 @@ void ProgramInfo::computePtrLevelStats() { insertCVAtoms(I.second, AtomPtrMap); // Populate maps with per-pointer root cause information - for (auto Entry : CState.RCMap) { - assert("RCMap entry is not mapped to a pointer!" && + for (auto Entry : CState.RootCauses) { + assert("RootCauses entry is not mapped to a pointer!" && AtomPtrMap.find(Entry.first) != AtomPtrMap.end()); ConstraintVariable *CV = AtomPtrMap[Entry.first]; for (auto RC : Entry.second) - CState.PtrRCMap[CV].insert(RC); + CState.PtrRootCauses[CV].insert(RC); } - for (auto Entry : CState.SrcWMap) { + for (auto Entry : CState.ConstrainedBy) { for (auto Key : Entry.second) { assert(AtomPtrMap.find(Key) != AtomPtrMap.end()); - CState.PtrSrcWMap[Entry.first].insert(AtomPtrMap[Key]); + CState.PtrConstrainedBy[Entry.first].insert(AtomPtrMap[Key]); } } } From 2a8207062971f9b2008b637cd399508fa3ea6ee1 Mon Sep 17 00:00:00 2001 From: Aaron Eline Date: Fri, 16 Jul 2021 16:02:21 -0400 Subject: [PATCH 2/9] Root Cause code moved from LLVM's BFS implementation to our own, allowing for memo-ization --- clang/include/clang/3C/3CInteractiveData.h | 8 + clang/include/clang/3C/ProgramInfo.h | 5 + clang/lib/3C/ProgramInfo.cpp | 169 ++++++++++++++------- 3 files changed, 131 insertions(+), 51 deletions(-) diff --git a/clang/include/clang/3C/3CInteractiveData.h b/clang/include/clang/3C/3CInteractiveData.h index 868bdcfbb611..9b7c22c9178f 100644 --- a/clang/include/clang/3C/3CInteractiveData.h +++ b/clang/include/clang/3C/3CInteractiveData.h @@ -52,6 +52,14 @@ class ConstraintsInfo { std::set ValidSourceFiles; std::map AtomSourceMap; + void addRootCause(VarAtom *Var, VarAtom *RootCause) { + RootCauses[Var->getLoc()].insert(RootCause->getLoc()); + } + + CVars& getConstrainedBy(VarAtom *Var) { + return ConstrainedBy[Var->getLoc()]; + } + private: // Root cause map: This is the map of a Constraint var and a set of // Constraint vars (that are directly assigned WILD) which are the reason diff --git a/clang/include/clang/3C/ProgramInfo.h b/clang/include/clang/3C/ProgramInfo.h index 5e691c08bc77..fbff00c90ebc 100644 --- a/clang/include/clang/3C/ProgramInfo.h +++ b/clang/include/clang/3C/ProgramInfo.h @@ -100,6 +100,11 @@ class ProgramInfo : public ProgramVariableAdder { FVConstraint *getStaticFuncConstraint(std::string FuncName, std::string FileName) const; + + void doRootCauseAnalysis(std::set&, CVars&, std::set&, + ConstraintsGraph&); + + // Called when we are done adding constraints and visiting ASTs. // Links information about global symbols together and adds // constraints where appropriate. diff --git a/clang/lib/3C/ProgramInfo.cpp b/clang/lib/3C/ProgramInfo.cpp index 1700212ed476..6ff373083fb8 100644 --- a/clang/lib/3C/ProgramInfo.cpp +++ b/clang/lib/3C/ProgramInfo.cpp @@ -954,6 +954,123 @@ FVConstraint *ProgramInfo::getStaticFuncConstraint(std::string FuncName, return nullptr; } +class RCAFactory { + friend class RootCauseAnalysis; +private: + std::set &DeclVars; + CVars &RelevantVarsKeys; + std::set &DirectWildVarAtoms; + ConstraintsGraph &CG; + ConstraintsInfo &CState; + +public: + RCAFactory(std::set &DVs, CVars &RVs, std::set &DWVs, + ConstraintsGraph &CG, ConstraintsInfo &CState) + : DeclVars(DVs), RelevantVarsKeys(RVs), DirectWildVarAtoms(DWVs), + CG(CG), CState(CState) {} + + void analyzeRootCause(VarAtom*); + +}; + +class RootCauseAnalysis { + friend class RCAFactory; +private: + RCAFactory *F; + VarAtom *WildAtom; + CVars ConstrainedByThis; + CVars Indirect; + CVars Seen; + + bool isDeclVar(VarAtom *VA) { + return F->DeclVars.find(VA) != F->DeclVars.end(); + } + + bool isRelevantVar(VarAtom *VA) { + return F->RelevantVarsKeys.find(VA->getLoc()) != F->RelevantVarsKeys.end(); + } + + bool isDirectlyWild(VarAtom *VA) { + return F->DirectWildVarAtoms.find(VA) != F->DirectWildVarAtoms.end(); + } + + bool alreadySeen(VarAtom *VA) { + return Seen.find(VA->getLoc()) != Seen.end(); + } + + void markSeen(VarAtom *VA) { + Seen.insert(VA->getLoc()); + } + +public: + RootCauseAnalysis(RCAFactory *F, VarAtom *WA) : F(F), WildAtom(WA) { + traverse(WA); + } + + void traverse(Atom *ReachableNode) { + auto *ReachableVar = dyn_cast(ReachableNode); + if (ReachableVar == nullptr || alreadySeen(ReachableVar)) + return; + markSeen(ReachableVar); + if (isDeclVar(ReachableVar)) { + F->CState.addRootCause(ReachableVar, WildAtom); + + if (isRelevantVar(ReachableVar)) + ConstrainedByThis.insert(ReachableVar->getLoc()); + if (!isDirectlyWild(ReachableVar)) + Indirect.insert(ReachableVar->getLoc()); + } + std::set Neighbors; + F->CG.getNeighbors(ReachableVar, Neighbors, true); + for (auto *Neighbor : Neighbors) + traverse(Neighbor); + } + +}; + + +void RCAFactory::analyzeRootCause(VarAtom *DirectWild) { + RootCauseAnalysis RCA(this, DirectWild); + CState.AllWildAtoms.insert(DirectWild->getLoc()); + CVars &CGrp = CState.getConstrainedBy(DirectWild); + CGrp.insert(RCA.ConstrainedByThis.begin(), RCA.ConstrainedByThis.end()); +} + +void ProgramInfo::doRootCauseAnalysis(std::set &DeclVars, + CVars &RelevantVarsKey, + std::set &DirectWildVarAtoms, + ConstraintsGraph &CG) { + + // Quick Helper Functions + auto IsDeclVar = [&](VarAtom *VA) { + return DeclVars.find(VA) != DeclVars.end(); + }; + + auto IsRelevantVar = [&](VarAtom *VA) { + return RelevantVarsKey.find(VA->getLoc()) != RelevantVarsKey.end(); + }; + + auto IsDirectlyWild = [&](VarAtom *VA) { + return DirectWildVarAtoms.find(VA) != DirectWildVarAtoms.end(); + }; + + RCAFactory RCAF(DeclVars, RelevantVarsKey, DirectWildVarAtoms, CG, CState); + + // TODO this search loop needs optimizing + for (auto *WildAtom : DirectWildVarAtoms) { + auto *WildVarAtom = dyn_cast(WildAtom); + // TODO flip this conditional to make the control flow simpler + if (WildVarAtom == nullptr) + continue; + RCAF.analyzeRootCause(WildVarAtom); + } + + findIntersection(CState.AllWildAtoms, RelevantVarsKey, CState.InSrcWildAtoms); + findIntersection(CState.TotalNonDirectWildAtoms, RelevantVarsKey, + CState.InSrcNonDirectWildAtoms); + +} + // From the given constraint graph, this method computes the interim constraint // state that contains constraint vars which are directly assigned WILD and // other constraint vars that have been determined to be WILD because they @@ -1003,57 +1120,7 @@ bool ProgramInfo::computeInterimConstraintState( std::set DirectWildVarAtoms; CS.getChkCG().getSuccessors(CS.getWild(), DirectWildVarAtoms); - auto IsDeclVar = [&](VarAtom *VA) { - return DeclVars.find(VA) != DeclVars.end(); - }; - - auto IsRelevantVar = [&](VarAtom *VA) { - return RelevantVarsKey.find(VA->getLoc()) != RelevantVarsKey.end(); - }; - - auto IsDirectlyWild = [&](VarAtom *VA) { - return DirectWildVarAtoms.find(VA) != DirectWildVarAtoms.end(); - }; - - - // TODO this search loop needs optimizing - for (auto *WildAtom : DirectWildVarAtoms) { - auto *WildVarAtom = dyn_cast(WildAtom); - if (WildVarAtom == nullptr) - continue; - - CVars ConstrainedByThis; - CVars IndirectConstraints; - - - auto BFSVisitor = [&](Atom *SearchAtom) { - auto *SearchVA = dyn_cast(SearchAtom); - - if (SearchVA && IsDeclVar(SearchVA)) { - CState.RootCauses[SearchVA->getLoc()].insert(WildVarAtom->getLoc()); - - if (IsRelevantVar(SearchVA)) - ConstrainedByThis.insert(SearchVA->getLoc()); - - if (!IsDirectlyWild(SearchVA)) - IndirectConstraints.insert(SearchVA->getLoc()); - - } - - }; - CS.getChkCG().visitBreadthFirst(WildVarAtom, BFSVisitor); - - CState.TotalNonDirectWildAtoms.insert(IndirectConstraints.begin(), - IndirectConstraints.end()); - // Should we consider only pointers which with in the source files or - // external pointers that affected pointers within the source files. - CState.AllWildAtoms.insert(WildVarAtom->getLoc()); - CVars &CGrp = CState.ConstrainedBy[WildVarAtom->getLoc()]; - CGrp.insert(ConstrainedByThis.begin(), ConstrainedByThis.end()); - } - findIntersection(CState.AllWildAtoms, RelevantVarsKey, CState.InSrcWildAtoms); - findIntersection(CState.TotalNonDirectWildAtoms, RelevantVarsKey, - CState.InSrcNonDirectWildAtoms); + doRootCauseAnalysis(DeclVars, RelevantVarsKey, DirectWildVarAtoms, CS.getChkCG()); // The ConstraintVariable for a variable normally appears in Variables for the // definition, but it may also be reused directly in ExprConstraintVars for a From f3810f0b4ed84879f87c979de4c294b2c642588b Mon Sep 17 00:00:00 2001 From: Aaron Eline Date: Fri, 16 Jul 2021 17:00:02 -0400 Subject: [PATCH 3/9] WIP on memo --- clang/include/clang/3C/3CInteractiveData.h | 12 ++++++-- clang/lib/3C/ProgramInfo.cpp | 33 ++++++++++++++++++---- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/clang/include/clang/3C/3CInteractiveData.h b/clang/include/clang/3C/3CInteractiveData.h index 9b7c22c9178f..43ca178a5478 100644 --- a/clang/include/clang/3C/3CInteractiveData.h +++ b/clang/include/clang/3C/3CInteractiveData.h @@ -53,11 +53,19 @@ class ConstraintsInfo { std::map AtomSourceMap; void addRootCause(VarAtom *Var, VarAtom *RootCause) { - RootCauses[Var->getLoc()].insert(RootCause->getLoc()); + addRootCause(Var->getLoc(), RootCause->getLoc()); + } + + void addRootCause(ConstraintKey Var, ConstraintKey RootCause) { + RootCauses[Var].insert(RootCause); } CVars& getConstrainedBy(VarAtom *Var) { - return ConstrainedBy[Var->getLoc()]; + return getConstrainedBy(Var->getLoc()); + } + + CVars& getConstrainedBy(ConstraintKey Var) { + return ConstrainedBy[Var]; } private: diff --git a/clang/lib/3C/ProgramInfo.cpp b/clang/lib/3C/ProgramInfo.cpp index 6ff373083fb8..4aa5bdc33a5b 100644 --- a/clang/lib/3C/ProgramInfo.cpp +++ b/clang/lib/3C/ProgramInfo.cpp @@ -962,6 +962,8 @@ class RCAFactory { std::set &DirectWildVarAtoms; ConstraintsGraph &CG; ConstraintsInfo &CState; + // This functions as the memo-pad + std::map> ReachableBy; public: RCAFactory(std::set &DVs, CVars &RVs, std::set &DWVs, @@ -971,6 +973,21 @@ class RCAFactory { void analyzeRootCause(VarAtom*); + void markReachable(VarAtom* FromV, VarAtom *ToV) { + auto From = FromV->getLoc(), To = ToV->getLoc(); + + ReachableBy[From].insert(To); + // Check if To has reachable nodes, if so add them + if (ReachableBy.count(To) != 0) + ReachableBy[From].insert(ReachableBy[To].begin(), ReachableBy[To].end()); + } + + + void forAllReachable(VarAtom *From, std::function &F) { + for (auto K : ReachableBy[From->getLoc()]) + F(K); + } + }; class RootCauseAnalysis { @@ -1007,9 +1024,9 @@ class RootCauseAnalysis { traverse(WA); } - void traverse(Atom *ReachableNode) { - auto *ReachableVar = dyn_cast(ReachableNode); - if (ReachableVar == nullptr || alreadySeen(ReachableVar)) + // TODO memoize this + void traverse(VarAtom *ReachableVar) { + if (alreadySeen(ReachableVar)) return; markSeen(ReachableVar); if (isDeclVar(ReachableVar)) { @@ -1022,8 +1039,14 @@ class RootCauseAnalysis { } std::set Neighbors; F->CG.getNeighbors(ReachableVar, Neighbors, true); - for (auto *Neighbor : Neighbors) - traverse(Neighbor); + for (auto *Neighbor : Neighbors) { + auto* VarNeighbor = dyn_cast(Neighbor); + if (VarNeighbor == nullptr) + continue; + traverse(VarNeighbor); + // Mark our neighbor (and all transitively reachable nodes) as reachable + F->markReachable(ReachableVar, VarNeighbor); + } } }; From a35ecaeba67837ad0a467aa0138d822f4317bdca Mon Sep 17 00:00:00 2001 From: Aaron Eline Date: Mon, 19 Jul 2021 13:03:40 -0400 Subject: [PATCH 4/9] First draft of memoizing RCA. Passes unit tests. Needs cleaning/performance evaluation on a full project. --- clang/lib/3C/ProgramInfo.cpp | 65 ++++++++++++++++++++++++------------ 1 file changed, 44 insertions(+), 21 deletions(-) diff --git a/clang/lib/3C/ProgramInfo.cpp b/clang/lib/3C/ProgramInfo.cpp index 4aa5bdc33a5b..574c6498301a 100644 --- a/clang/lib/3C/ProgramInfo.cpp +++ b/clang/lib/3C/ProgramInfo.cpp @@ -955,6 +955,7 @@ FVConstraint *ProgramInfo::getStaticFuncConstraint(std::string FuncName, } class RCAFactory { + // TODO clean up access control friend class RootCauseAnalysis; private: std::set &DeclVars; @@ -982,10 +983,27 @@ class RCAFactory { ReachableBy[From].insert(ReachableBy[To].begin(), ReachableBy[To].end()); } + bool memoized(VarAtom *VA) { + return ReachableBy.count(VA->getLoc()) != 0; + } - void forAllReachable(VarAtom *From, std::function &F) { - for (auto K : ReachableBy[From->getLoc()]) - F(K); + std::set& getReachable(VarAtom *VA) { + assert(memoized(VA)); + return ReachableBy[VA->getLoc()]; + } + + bool isDeclVar(VarAtom *VA) { + return DeclVars.find(VA) != DeclVars.end(); + } + + bool isDeclVar(ConstraintKey Key) { + auto IsThisKey = [&](auto A) { + auto VA = dyn_cast(A); + return VA && VA->getLoc() == Key; + }; + + // NB: This might need to be optimized + return llvm::find_if(DeclVars, IsThisKey) != DeclVars.end(); } }; @@ -999,12 +1017,12 @@ class RootCauseAnalysis { CVars Indirect; CVars Seen; - bool isDeclVar(VarAtom *VA) { - return F->DeclVars.find(VA) != F->DeclVars.end(); + bool isRelevantVar(VarAtom *VA) { + return isRelevantVar(VA->getLoc()); } - bool isRelevantVar(VarAtom *VA) { - return F->RelevantVarsKeys.find(VA->getLoc()) != F->RelevantVarsKeys.end(); + bool isRelevantVar(ConstraintKey VA) { + return F->RelevantVarsKeys.find(VA) != F->RelevantVarsKeys.end(); } bool isDirectlyWild(VarAtom *VA) { @@ -1029,7 +1047,7 @@ class RootCauseAnalysis { if (alreadySeen(ReachableVar)) return; markSeen(ReachableVar); - if (isDeclVar(ReachableVar)) { + if (F->isDeclVar(ReachableVar)) { F->CState.addRootCause(ReachableVar, WildAtom); if (isRelevantVar(ReachableVar)) @@ -1037,6 +1055,24 @@ class RootCauseAnalysis { if (!isDirectlyWild(ReachableVar)) Indirect.insert(ReachableVar->getLoc()); } + + if (F->memoized(ReachableVar)) + traverseMemoizedNode(ReachableVar); + else + traverseNewNode(ReachableVar); + } + + void traverseMemoizedNode(VarAtom *VA) { + for (ConstraintKey K : F->getReachable(VA)) { + if (F->isDeclVar(K)) { + F->CState.addRootCause(K, WildAtom->getLoc()); + if (isRelevantVar(K)) + ConstrainedByThis.insert(K); + } + } + } + + void traverseNewNode(VarAtom *ReachableVar) { std::set Neighbors; F->CG.getNeighbors(ReachableVar, Neighbors, true); for (auto *Neighbor : Neighbors) { @@ -1064,19 +1100,6 @@ void ProgramInfo::doRootCauseAnalysis(std::set &DeclVars, std::set &DirectWildVarAtoms, ConstraintsGraph &CG) { - // Quick Helper Functions - auto IsDeclVar = [&](VarAtom *VA) { - return DeclVars.find(VA) != DeclVars.end(); - }; - - auto IsRelevantVar = [&](VarAtom *VA) { - return RelevantVarsKey.find(VA->getLoc()) != RelevantVarsKey.end(); - }; - - auto IsDirectlyWild = [&](VarAtom *VA) { - return DirectWildVarAtoms.find(VA) != DirectWildVarAtoms.end(); - }; - RCAFactory RCAF(DeclVars, RelevantVarsKey, DirectWildVarAtoms, CG, CState); // TODO this search loop needs optimizing From 3f93efbb0ead104d3c9ea550130f81e541eebd13 Mon Sep 17 00:00:00 2001 From: Aaron Eline Date: Mon, 19 Jul 2021 14:38:53 -0400 Subject: [PATCH 5/9] Code Cleanup --- clang/include/clang/3C/3CInteractiveData.h | 4 - clang/lib/3C/ProgramInfo.cpp | 103 ++++++++++++++------- 2 files changed, 69 insertions(+), 38 deletions(-) diff --git a/clang/include/clang/3C/3CInteractiveData.h b/clang/include/clang/3C/3CInteractiveData.h index 43ca178a5478..77283baf9cce 100644 --- a/clang/include/clang/3C/3CInteractiveData.h +++ b/clang/include/clang/3C/3CInteractiveData.h @@ -52,10 +52,6 @@ class ConstraintsInfo { std::set ValidSourceFiles; std::map AtomSourceMap; - void addRootCause(VarAtom *Var, VarAtom *RootCause) { - addRootCause(Var->getLoc(), RootCause->getLoc()); - } - void addRootCause(ConstraintKey Var, ConstraintKey RootCause) { RootCauses[Var].insert(RootCause); } diff --git a/clang/lib/3C/ProgramInfo.cpp b/clang/lib/3C/ProgramInfo.cpp index 574c6498301a..3bdc814b7f0e 100644 --- a/clang/lib/3C/ProgramInfo.cpp +++ b/clang/lib/3C/ProgramInfo.cpp @@ -954,15 +954,21 @@ FVConstraint *ProgramInfo::getStaticFuncConstraint(std::string FuncName, return nullptr; } +// Factory context for root cause analysis +// This class tracks global root cause analysis information class RCAFactory { - // TODO clean up access control - friend class RootCauseAnalysis; private: + // Set of vars that map to a decl std::set &DeclVars; + // Set of vars in this file CVars &RelevantVarsKeys; + // Set of vars that are directly wild std::set &DirectWildVarAtoms; + ConstraintsGraph &CG; ConstraintsInfo &CState; + + // Map a key (K) to the set of keys reachable by K // This functions as the memo-pad std::map> ReachableBy; @@ -974,6 +980,8 @@ class RCAFactory { void analyzeRootCause(VarAtom*); + // Mark ToV as being reachable from FromV + // Check nodes reachable from ToV, and add them as well void markReachable(VarAtom* FromV, VarAtom *ToV) { auto From = FromV->getLoc(), To = ToV->getLoc(); @@ -983,12 +991,13 @@ class RCAFactory { ReachableBy[From].insert(ReachableBy[To].begin(), ReachableBy[To].end()); } + // Check if a given VarAtom has had reachability data logged yet bool memoized(VarAtom *VA) { return ReachableBy.count(VA->getLoc()) != 0; } std::set& getReachable(VarAtom *VA) { - assert(memoized(VA)); + assert("Should only be called on memoized values" && memoized(VA)); return ReachableBy[VA->getLoc()]; } @@ -1006,28 +1015,50 @@ class RCAFactory { return llvm::find_if(DeclVars, IsThisKey) != DeclVars.end(); } + bool isRelevantVar(VarAtom *VA) { + return isRelevantVar(VA->getLoc()); + } + + bool isRelevantVar(ConstraintKey Key) { + return RelevantVarsKeys.find(Key) != RelevantVarsKeys.end(); + } + + bool isDirectlyWild(VarAtom *VA) { + return DirectWildVarAtoms.find(VA) != DirectWildVarAtoms.end(); + } + + void addRootCause(VarAtom *Target, VarAtom *Cause) { + CState.addRootCause(Target->getLoc(), Cause->getLoc()); + } + + void addRootCause(ConstraintKey Target, VarAtom *Cause) { + CState.addRootCause(Target, Cause->getLoc()); + } + + std::set getNeighbors(VarAtom *Node) { + std::set Neighbors; + CG.getNeighbors(Node, Neighbors, true); + return Neighbors; + } + }; +// This class performs the root cause analysis on a single wild atom +// It searches through the Constraint Graph and finds every atom constrained +// by the target wild atom. class RootCauseAnalysis { - friend class RCAFactory; private: + // Factory Context RCAFactory *F; + // The target of the search VarAtom *WildAtom; + // Set of variables constrained by the target CVars ConstrainedByThis; + // Set of variables indirect constraints CVars Indirect; + // Set of vars we've seen in this search (prevents cycles) CVars Seen; - bool isRelevantVar(VarAtom *VA) { - return isRelevantVar(VA->getLoc()); - } - - bool isRelevantVar(ConstraintKey VA) { - return F->RelevantVarsKeys.find(VA) != F->RelevantVarsKeys.end(); - } - - bool isDirectlyWild(VarAtom *VA) { - return F->DirectWildVarAtoms.find(VA) != F->DirectWildVarAtoms.end(); - } bool alreadySeen(VarAtom *VA) { return Seen.find(VA->getLoc()) != Seen.end(); @@ -1039,20 +1070,24 @@ class RootCauseAnalysis { public: RootCauseAnalysis(RCAFactory *F, VarAtom *WA) : F(F), WildAtom(WA) { + // Begin traversal out from the root cause of wildness traverse(WA); } - // TODO memoize this + CVars& getConstrainedBy(void) { + return ConstrainedByThis; + } + void traverse(VarAtom *ReachableVar) { if (alreadySeen(ReachableVar)) return; markSeen(ReachableVar); if (F->isDeclVar(ReachableVar)) { - F->CState.addRootCause(ReachableVar, WildAtom); + F->addRootCause(ReachableVar, WildAtom); - if (isRelevantVar(ReachableVar)) + if (F->isRelevantVar(ReachableVar)) ConstrainedByThis.insert(ReachableVar->getLoc()); - if (!isDirectlyWild(ReachableVar)) + if (!F->isDirectlyWild(ReachableVar)) Indirect.insert(ReachableVar->getLoc()); } @@ -1062,19 +1097,20 @@ class RootCauseAnalysis { traverseNewNode(ReachableVar); } + +private: void traverseMemoizedNode(VarAtom *VA) { for (ConstraintKey K : F->getReachable(VA)) { if (F->isDeclVar(K)) { - F->CState.addRootCause(K, WildAtom->getLoc()); - if (isRelevantVar(K)) + F->addRootCause(K, WildAtom); + if (F->isRelevantVar(K)) ConstrainedByThis.insert(K); } } } void traverseNewNode(VarAtom *ReachableVar) { - std::set Neighbors; - F->CG.getNeighbors(ReachableVar, Neighbors, true); + std::set Neighbors = F->getNeighbors(ReachableVar); for (auto *Neighbor : Neighbors) { auto* VarNeighbor = dyn_cast(Neighbor); if (VarNeighbor == nullptr) @@ -1089,10 +1125,14 @@ class RootCauseAnalysis { void RCAFactory::analyzeRootCause(VarAtom *DirectWild) { - RootCauseAnalysis RCA(this, DirectWild); CState.AllWildAtoms.insert(DirectWild->getLoc()); - CVars &CGrp = CState.getConstrainedBy(DirectWild); - CGrp.insert(RCA.ConstrainedByThis.begin(), RCA.ConstrainedByThis.end()); + + // Perform root cause analysis + RootCauseAnalysis RCA(this, DirectWild); + CVars &TotalConstrainedBy = CState.getConstrainedBy(DirectWild); + // Add all the new constraints we found into our total set + CVars &NewConstraints = RCA.getConstrainedBy(); + TotalConstrainedBy.insert(NewConstraints.begin(), NewConstraints.end()); } void ProgramInfo::doRootCauseAnalysis(std::set &DeclVars, @@ -1102,14 +1142,9 @@ void ProgramInfo::doRootCauseAnalysis(std::set &DeclVars, RCAFactory RCAF(DeclVars, RelevantVarsKey, DirectWildVarAtoms, CG, CState); - // TODO this search loop needs optimizing - for (auto *WildAtom : DirectWildVarAtoms) { - auto *WildVarAtom = dyn_cast(WildAtom); - // TODO flip this conditional to make the control flow simpler - if (WildVarAtom == nullptr) - continue; - RCAF.analyzeRootCause(WildVarAtom); - } + for (auto *WildAtom : DirectWildVarAtoms) + if (auto *WildVarAtom = dyn_cast(WildAtom)) + RCAF.analyzeRootCause(WildVarAtom); findIntersection(CState.AllWildAtoms, RelevantVarsKey, CState.InSrcWildAtoms); findIntersection(CState.TotalNonDirectWildAtoms, RelevantVarsKey, From 65d3ce0a263caa052a39c55edb0dbde44d272895 Mon Sep 17 00:00:00 2001 From: Aaron Eline Date: Tue, 20 Jul 2021 10:14:44 -0400 Subject: [PATCH 6/9] Optimization --- clang/include/clang/3C/ProgramInfo.h | 2 +- clang/lib/3C/ProgramInfo.cpp | 26 ++++++++++++-------------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/clang/include/clang/3C/ProgramInfo.h b/clang/include/clang/3C/ProgramInfo.h index fbff00c90ebc..5728407a3ec0 100644 --- a/clang/include/clang/3C/ProgramInfo.h +++ b/clang/include/clang/3C/ProgramInfo.h @@ -101,7 +101,7 @@ class ProgramInfo : public ProgramVariableAdder { std::string FileName) const; - void doRootCauseAnalysis(std::set&, CVars&, std::set&, + void doRootCauseAnalysis(CVars&, CVars&, std::set&, ConstraintsGraph&); diff --git a/clang/lib/3C/ProgramInfo.cpp b/clang/lib/3C/ProgramInfo.cpp index 3bdc814b7f0e..b99e39cb13b5 100644 --- a/clang/lib/3C/ProgramInfo.cpp +++ b/clang/lib/3C/ProgramInfo.cpp @@ -959,7 +959,7 @@ FVConstraint *ProgramInfo::getStaticFuncConstraint(std::string FuncName, class RCAFactory { private: // Set of vars that map to a decl - std::set &DeclVars; + CVars &DeclVars; // Set of vars in this file CVars &RelevantVarsKeys; // Set of vars that are directly wild @@ -972,12 +972,15 @@ class RCAFactory { // This functions as the memo-pad std::map> ReachableBy; + public: - RCAFactory(std::set &DVs, CVars &RVs, std::set &DWVs, + RCAFactory(CVars &DVs, CVars &RVs, std::set &DWVs, ConstraintsGraph &CG, ConstraintsInfo &CState) : DeclVars(DVs), RelevantVarsKeys(RVs), DirectWildVarAtoms(DWVs), CG(CG), CState(CState) {} + + void analyzeRootCause(VarAtom*); // Mark ToV as being reachable from FromV @@ -1002,18 +1005,12 @@ class RCAFactory { } bool isDeclVar(VarAtom *VA) { - return DeclVars.find(VA) != DeclVars.end(); + return DeclVars.find(VA->getLoc()) != DeclVars.end(); } bool isDeclVar(ConstraintKey Key) { - auto IsThisKey = [&](auto A) { - auto VA = dyn_cast(A); - return VA && VA->getLoc() == Key; - }; - - // NB: This might need to be optimized - return llvm::find_if(DeclVars, IsThisKey) != DeclVars.end(); - } + return DeclVars.find(Key) != DeclVars.end(); + }; bool isRelevantVar(VarAtom *VA) { return isRelevantVar(VA->getLoc()); @@ -1135,12 +1132,12 @@ void RCAFactory::analyzeRootCause(VarAtom *DirectWild) { TotalConstrainedBy.insert(NewConstraints.begin(), NewConstraints.end()); } -void ProgramInfo::doRootCauseAnalysis(std::set &DeclVars, +void ProgramInfo::doRootCauseAnalysis(CVars &DeclVarsKey, CVars &RelevantVarsKey, std::set &DirectWildVarAtoms, ConstraintsGraph &CG) { - RCAFactory RCAF(DeclVars, RelevantVarsKey, DirectWildVarAtoms, CG, CState); + RCAFactory RCAF(DeclVarsKey, RelevantVarsKey, DirectWildVarAtoms, CG, CState); for (auto *WildAtom : DirectWildVarAtoms) if (auto *WildVarAtom = dyn_cast(WildAtom)) @@ -1195,13 +1192,14 @@ bool ProgramInfo::computeInterimConstraintState( std::transform(DeclVars.begin(), DeclVars.end(), std::inserter(DeclVarsKey, DeclVarsKey.end()), GetLocOrZero); + DeclVarsKey.erase(0); // Remove all failed conversions CState.clear(); std::set DirectWildVarAtoms; CS.getChkCG().getSuccessors(CS.getWild(), DirectWildVarAtoms); - doRootCauseAnalysis(DeclVars, RelevantVarsKey, DirectWildVarAtoms, CS.getChkCG()); + doRootCauseAnalysis(DeclVarsKey, RelevantVarsKey, DirectWildVarAtoms, CS.getChkCG()); // The ConstraintVariable for a variable normally appears in Variables for the // definition, but it may also be reused directly in ExprConstraintVars for a From 19edeb4a550d64fa99a20601b240309ad35a5557 Mon Sep 17 00:00:00 2001 From: Aaron Eline Date: Fri, 23 Jul 2021 12:48:27 -0400 Subject: [PATCH 7/9] 20% Speed improvement by changing data structure --- clang/include/clang/3C/ProgramInfo.h | 7 +++++-- clang/lib/3C/ProgramInfo.cpp | 26 ++++++++++++++++---------- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/clang/include/clang/3C/ProgramInfo.h b/clang/include/clang/3C/ProgramInfo.h index 5728407a3ec0..e7ee9edf7edc 100644 --- a/clang/include/clang/3C/ProgramInfo.h +++ b/clang/include/clang/3C/ProgramInfo.h @@ -101,8 +101,11 @@ class ProgramInfo : public ProgramVariableAdder { std::string FileName) const; - void doRootCauseAnalysis(CVars&, CVars&, std::set&, - ConstraintsGraph&); + void doRootCauseAnalysis(llvm::DenseSet> &DeclVars, + CVars &RelevantVars, + std::set &DirectWild, + ConstraintsGraph &CG); // Called when we are done adding constraints and visiting ASTs. diff --git a/clang/lib/3C/ProgramInfo.cpp b/clang/lib/3C/ProgramInfo.cpp index b99e39cb13b5..07d19f62f885 100644 --- a/clang/lib/3C/ProgramInfo.cpp +++ b/clang/lib/3C/ProgramInfo.cpp @@ -14,6 +14,7 @@ #include "clang/3C/MappingVisitor.h" #include "clang/3C/Utils.h" #include "llvm/Support/JSON.h" +#include #include using namespace clang; @@ -954,12 +955,15 @@ FVConstraint *ProgramInfo::getStaticFuncConstraint(std::string FuncName, return nullptr; } +typedef llvm::DenseSet> ConstraintKeySet; + // Factory context for root cause analysis // This class tracks global root cause analysis information class RCAFactory { private: + //TODO explain the difference in types // Set of vars that map to a decl - CVars &DeclVars; + ConstraintKeySet &DeclVars; // Set of vars in this file CVars &RelevantVarsKeys; // Set of vars that are directly wild @@ -968,13 +972,14 @@ class RCAFactory { ConstraintsGraph &CG; ConstraintsInfo &CState; + // Map a key (K) to the set of keys reachable by K // This functions as the memo-pad - std::map> ReachableBy; + std::map ReachableBy; public: - RCAFactory(CVars &DVs, CVars &RVs, std::set &DWVs, + RCAFactory(ConstraintKeySet &DVs, CVars &RVs, std::set &DWVs, ConstraintsGraph &CG, ConstraintsInfo &CState) : DeclVars(DVs), RelevantVarsKeys(RVs), DirectWildVarAtoms(DWVs), CG(CG), CState(CState) {} @@ -999,7 +1004,7 @@ class RCAFactory { return ReachableBy.count(VA->getLoc()) != 0; } - std::set& getReachable(VarAtom *VA) { + ConstraintKeySet& getReachable(VarAtom *VA) { assert("Should only be called on memoized values" && memoized(VA)); return ReachableBy[VA->getLoc()]; } @@ -1132,7 +1137,7 @@ void RCAFactory::analyzeRootCause(VarAtom *DirectWild) { TotalConstrainedBy.insert(NewConstraints.begin(), NewConstraints.end()); } -void ProgramInfo::doRootCauseAnalysis(CVars &DeclVarsKey, +void ProgramInfo::doRootCauseAnalysis(ConstraintKeySet &DeclVarsKey, CVars &RelevantVarsKey, std::set &DirectWildVarAtoms, ConstraintsGraph &CG) { @@ -1185,14 +1190,15 @@ bool ProgramInfo::computeInterimConstraintState( //Map the above two sets into equivalent sets of keys CVars RelevantVarsKey; - CVars DeclVarsKey; + ConstraintKeySet DeclVarsKey; std::transform(RelevantVars.begin(), RelevantVars.end(), std::inserter(RelevantVarsKey, RelevantVarsKey.end()), GetLocOrZero); - std::transform(DeclVars.begin(), DeclVars.end(), - std::inserter(DeclVarsKey, DeclVarsKey.end()), - GetLocOrZero); - DeclVarsKey.erase(0); // Remove all failed conversions + + for (const auto* A : DeclVars) + if (const auto &VA = dyn_cast(A)) + DeclVarsKey.insert(VA->getLoc()); + CState.clear(); From 1a6a54fa2c4188cbad8d4030a518dac2aa00a159 Mon Sep 17 00:00:00 2001 From: Aaron Eline Date: Fri, 23 Jul 2021 16:54:01 -0400 Subject: [PATCH 8/9] WIP on further datastructure changes --- clang/include/clang/3C/Constraints.h | 9 +++++ clang/include/clang/3C/ProgramInfo.h | 9 ++--- clang/lib/3C/ProgramInfo.cpp | 50 ++++++++++++---------------- 3 files changed, 33 insertions(+), 35 deletions(-) diff --git a/clang/include/clang/3C/Constraints.h b/clang/include/clang/3C/Constraints.h index 2245b8175ce7..f15f3fa93c41 100644 --- a/clang/include/clang/3C/Constraints.h +++ b/clang/include/clang/3C/Constraints.h @@ -142,10 +142,19 @@ class VarAtom : public Atom { return Constraints; } + void setForDecl(void) { + IsForDecl = true; + } + + bool isForDecl(void) const { + return IsForDecl; + } + private: uint32_t Loc; std::string Name; const VarKind KindV; + bool IsForDecl = false; // The constraint expressions where this variable is mentioned on the // LHS of an equality. std::set> Constraints; diff --git a/clang/include/clang/3C/ProgramInfo.h b/clang/include/clang/3C/ProgramInfo.h index e7ee9edf7edc..3633207f72ef 100644 --- a/clang/include/clang/3C/ProgramInfo.h +++ b/clang/include/clang/3C/ProgramInfo.h @@ -100,12 +100,9 @@ class ProgramInfo : public ProgramVariableAdder { FVConstraint *getStaticFuncConstraint(std::string FuncName, std::string FileName) const; - - void doRootCauseAnalysis(llvm::DenseSet> &DeclVars, - CVars &RelevantVars, - std::set &DirectWild, - ConstraintsGraph &CG); + void doRootCauseAnalysis(CVars &RelevantVarsKey, + std::set &DirectWildVarAtoms, + ConstraintsGraph &CG); // Called when we are done adding constraints and visiting ASTs. diff --git a/clang/lib/3C/ProgramInfo.cpp b/clang/lib/3C/ProgramInfo.cpp index 07d19f62f885..938e5de82e1b 100644 --- a/clang/lib/3C/ProgramInfo.cpp +++ b/clang/lib/3C/ProgramInfo.cpp @@ -15,6 +15,7 @@ #include "clang/3C/Utils.h" #include "llvm/Support/JSON.h" #include +#include #include using namespace clang; @@ -955,6 +956,8 @@ FVConstraint *ProgramInfo::getStaticFuncConstraint(std::string FuncName, return nullptr; } + +typedef llvm::SmallPtrSet VarAtomSet; typedef llvm::DenseSet> ConstraintKeySet; // Factory context for root cause analysis @@ -963,7 +966,6 @@ class RCAFactory { private: //TODO explain the difference in types // Set of vars that map to a decl - ConstraintKeySet &DeclVars; // Set of vars in this file CVars &RelevantVarsKeys; // Set of vars that are directly wild @@ -975,13 +977,13 @@ class RCAFactory { // Map a key (K) to the set of keys reachable by K // This functions as the memo-pad - std::map ReachableBy; + llvm::DenseMap> ReachableBy; public: - RCAFactory(ConstraintKeySet &DVs, CVars &RVs, std::set &DWVs, + RCAFactory(CVars &RVs, std::set &DWVs, ConstraintsGraph &CG, ConstraintsInfo &CState) - : DeclVars(DVs), RelevantVarsKeys(RVs), DirectWildVarAtoms(DWVs), + : RelevantVarsKeys(RVs), DirectWildVarAtoms(DWVs), CG(CG), CState(CState) {} @@ -990,10 +992,11 @@ class RCAFactory { // Mark ToV as being reachable from FromV // Check nodes reachable from ToV, and add them as well + // TODO there are gains to be made by optimizing this function void markReachable(VarAtom* FromV, VarAtom *ToV) { auto From = FromV->getLoc(), To = ToV->getLoc(); - ReachableBy[From].insert(To); + ReachableBy[From].insert(ToV); // Check if To has reachable nodes, if so add them if (ReachableBy.count(To) != 0) ReachableBy[From].insert(ReachableBy[To].begin(), ReachableBy[To].end()); @@ -1004,18 +1007,11 @@ class RCAFactory { return ReachableBy.count(VA->getLoc()) != 0; } - ConstraintKeySet& getReachable(VarAtom *VA) { + VarAtomSet& getReachable(VarAtom *VA) { assert("Should only be called on memoized values" && memoized(VA)); return ReachableBy[VA->getLoc()]; } - bool isDeclVar(VarAtom *VA) { - return DeclVars.find(VA->getLoc()) != DeclVars.end(); - } - - bool isDeclVar(ConstraintKey Key) { - return DeclVars.find(Key) != DeclVars.end(); - }; bool isRelevantVar(VarAtom *VA) { return isRelevantVar(VA->getLoc()); @@ -1084,7 +1080,7 @@ class RootCauseAnalysis { if (alreadySeen(ReachableVar)) return; markSeen(ReachableVar); - if (F->isDeclVar(ReachableVar)) { + if (ReachableVar->isForDecl()) { F->addRootCause(ReachableVar, WildAtom); if (F->isRelevantVar(ReachableVar)) @@ -1102,11 +1098,11 @@ class RootCauseAnalysis { private: void traverseMemoizedNode(VarAtom *VA) { - for (ConstraintKey K : F->getReachable(VA)) { - if (F->isDeclVar(K)) { + for (VarAtom *K : F->getReachable(VA)) { + if (K->isForDecl()) { F->addRootCause(K, WildAtom); if (F->isRelevantVar(K)) - ConstrainedByThis.insert(K); + ConstrainedByThis.insert(K->getLoc()); } } } @@ -1137,12 +1133,11 @@ void RCAFactory::analyzeRootCause(VarAtom *DirectWild) { TotalConstrainedBy.insert(NewConstraints.begin(), NewConstraints.end()); } -void ProgramInfo::doRootCauseAnalysis(ConstraintKeySet &DeclVarsKey, - CVars &RelevantVarsKey, +void ProgramInfo::doRootCauseAnalysis(CVars &RelevantVarsKey, std::set &DirectWildVarAtoms, ConstraintsGraph &CG) { - RCAFactory RCAF(DeclVarsKey, RelevantVarsKey, DirectWildVarAtoms, CG, CState); + RCAFactory RCAF(RelevantVarsKey, DirectWildVarAtoms, CG, CState); for (auto *WildAtom : DirectWildVarAtoms) if (auto *WildVarAtom = dyn_cast(WildAtom)) @@ -1162,8 +1157,6 @@ bool ProgramInfo::computeInterimConstraintState( const std::set &FilePaths) { // We need to compute two sets - // 1) The set of _all_ vars that refer to a Decl - std::set DeclVars; // 2) The set of all DeclVars vars _in_ this file, which we call _relevant_ std::set RelevantVars; @@ -1175,7 +1168,11 @@ bool ProgramInfo::computeInterimConstraintState( if (C->isForValidDecl()) { CAtoms Tmp; getVarsFromConstraint(C, Tmp, Visited); - DeclVars.insert(Tmp.begin(), Tmp.end()); + // TODO setting this flag should likely being done earlier, + // during construction. + for (auto *A : Tmp) + if (auto *VA = dyn_cast(A)) + VA->setForDecl(); if (canWrite(FileName)) RelevantVars.insert(Tmp.begin(), Tmp.end()); } @@ -1190,22 +1187,17 @@ bool ProgramInfo::computeInterimConstraintState( //Map the above two sets into equivalent sets of keys CVars RelevantVarsKey; - ConstraintKeySet DeclVarsKey; std::transform(RelevantVars.begin(), RelevantVars.end(), std::inserter(RelevantVarsKey, RelevantVarsKey.end()), GetLocOrZero); - for (const auto* A : DeclVars) - if (const auto &VA = dyn_cast(A)) - DeclVarsKey.insert(VA->getLoc()); - CState.clear(); std::set DirectWildVarAtoms; CS.getChkCG().getSuccessors(CS.getWild(), DirectWildVarAtoms); - doRootCauseAnalysis(DeclVarsKey, RelevantVarsKey, DirectWildVarAtoms, CS.getChkCG()); + doRootCauseAnalysis(RelevantVarsKey, DirectWildVarAtoms, CS.getChkCG()); // The ConstraintVariable for a variable normally appears in Variables for the // definition, but it may also be reused directly in ExprConstraintVars for a From c514b68693f521d305547dc998ef8124bb6d2373 Mon Sep 17 00:00:00 2001 From: Aaron Eline Date: Mon, 2 Aug 2021 13:38:53 -0400 Subject: [PATCH 9/9] Some code cleanup --- clang/lib/3C/ProgramInfo.cpp | 52 ++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/clang/lib/3C/ProgramInfo.cpp b/clang/lib/3C/ProgramInfo.cpp index 938e5de82e1b..3fd9de39a67a 100644 --- a/clang/lib/3C/ProgramInfo.cpp +++ b/clang/lib/3C/ProgramInfo.cpp @@ -964,9 +964,7 @@ typedef llvm::DenseSet> ConstraintKe // This class tracks global root cause analysis information class RCAFactory { private: - //TODO explain the difference in types - // Set of vars that map to a decl - // Set of vars in this file + // Set of vars that map to a decl & are in a writable file CVars &RelevantVarsKeys; // Set of vars that are directly wild std::set &DirectWildVarAtoms; @@ -977,6 +975,7 @@ class RCAFactory { // Map a key (K) to the set of keys reachable by K // This functions as the memo-pad + // We use the more efficient LLVM set types here llvm::DenseMap> ReachableBy; @@ -1045,6 +1044,18 @@ class RCAFactory { // It searches through the Constraint Graph and finds every atom constrained // by the target wild atom. class RootCauseAnalysis { +public: + + RootCauseAnalysis(RCAFactory *F, VarAtom *WA) : F(F), WildAtom(WA) { + // Begin traversal out from the root cause of wildness + traverse(WA); + } + + // The set of all relevant variables constrained by the target + CVars& getConstrainedBy(void) { + return ConstrainedByThis; + } + private: // Factory Context RCAFactory *F; @@ -1052,10 +1063,10 @@ class RootCauseAnalysis { VarAtom *WildAtom; // Set of variables constrained by the target CVars ConstrainedByThis; - // Set of variables indirect constraints + // Set of variables indirectly constrained CVars Indirect; // Set of vars we've seen in this search (prevents cycles) - CVars Seen; + ConstraintKeySet Seen; bool alreadySeen(VarAtom *VA) { @@ -1066,15 +1077,6 @@ class RootCauseAnalysis { Seen.insert(VA->getLoc()); } -public: - RootCauseAnalysis(RCAFactory *F, VarAtom *WA) : F(F), WildAtom(WA) { - // Begin traversal out from the root cause of wildness - traverse(WA); - } - - CVars& getConstrainedBy(void) { - return ConstrainedByThis; - } void traverse(VarAtom *ReachableVar) { if (alreadySeen(ReachableVar)) @@ -1110,12 +1112,11 @@ class RootCauseAnalysis { void traverseNewNode(VarAtom *ReachableVar) { std::set Neighbors = F->getNeighbors(ReachableVar); for (auto *Neighbor : Neighbors) { - auto* VarNeighbor = dyn_cast(Neighbor); - if (VarNeighbor == nullptr) - continue; - traverse(VarNeighbor); - // Mark our neighbor (and all transitively reachable nodes) as reachable - F->markReachable(ReachableVar, VarNeighbor); + if (auto *VarNeighbor = dyn_cast(Neighbor)) { + traverse(VarNeighbor); + // Mark our neighbor (and all transitively reachable nodes) as reachable + F->markReachable(ReachableVar, VarNeighbor); + } } } @@ -1139,6 +1140,7 @@ void ProgramInfo::doRootCauseAnalysis(CVars &RelevantVarsKey, RCAFactory RCAF(RelevantVarsKey, DirectWildVarAtoms, CG, CState); + // Analyze the root causes for every directly wild atom for (auto *WildAtom : DirectWildVarAtoms) if (auto *WildVarAtom = dyn_cast(WildAtom)) RCAF.analyzeRootCause(WildVarAtom); @@ -1156,11 +1158,12 @@ void ProgramInfo::doRootCauseAnalysis(CVars &RelevantVarsKey, bool ProgramInfo::computeInterimConstraintState( const std::set &FilePaths) { - // We need to compute two sets - // 2) The set of all DeclVars vars _in_ this file, which we call _relevant_ + // The set of all DeclVars vars in a writable file, which we call _relevant_ std::set RelevantVars; // Compute the above two sets + + CVarSet Visited; for (const auto &I : Variables) { std::string FileName = I.first.getFileName(); @@ -1185,13 +1188,10 @@ bool ProgramInfo::computeInterimConstraintState( return (ConstraintKey)0; }; - //Map the above two sets into equivalent sets of keys + //Map the above set into equivalent set of keys CVars RelevantVarsKey; - std::transform(RelevantVars.begin(), RelevantVars.end(), std::inserter(RelevantVarsKey, RelevantVarsKey.end()), GetLocOrZero); - - CState.clear(); std::set DirectWildVarAtoms;