Skip to content

Commit d69a75f

Browse files
sys-igcigcbot
authored andcommitted
[Autobackout][FunctionalRegression]Revert of change: a2dbe99: Replace Atomic Fence with GenISA_source_value
Replace Atomic Fence with GenISA_source_value
1 parent cf2dc92 commit d69a75f

File tree

3 files changed

+5
-156
lines changed

3 files changed

+5
-156
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8575,8 +8575,6 @@ void EmitPass::EmitGenIntrinsicMessage(llvm::GenIntrinsicInst *inst) {
85758575
case GenISAIntrinsic::GenISA_source_value: {
85768576
m_encoder->Copy(m_currShader->GetNULL(), GetSymbol(inst->getOperand(0)));
85778577
m_encoder->Push();
8578-
m_encoder->Fence(false, false, false, false, false, false, false, true);
8579-
m_encoder->Push();
85808578
break;
85818579
}
85828580
case GenISAIntrinsic::GenISA_movcr: {

IGC/Compiler/Optimizer/SynchronizationObjectCoalescing.cpp

Lines changed: 5 additions & 151 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@ SPDX-License-Identifier: MIT
1717
#include "Compiler/IGCPassSupport.h"
1818
#include "SynchronizationObjectCoalescing.hpp"
1919
#include "visa_igc_common_header.h"
20-
#include "llvm/IR/IRBuilder.h"
21-
#include "llvm/Analysis/CFG.h"
2220
#include <utility>
2321
#include <map>
2422

@@ -287,9 +285,6 @@ class SynchronizationObjectCoalescing : public llvm::FunctionPass {
287285
static_cast<SynchronizationCaseMask>(WriteSyncRead | WriteSyncWrite | AtomicSyncRead | AtomicSyncWrite |
288286
WriteSyncAtomic | ReadSyncAtomic | ReadSyncWrite | AtomicSyncAtomic);
289287

290-
////////////////////////////////////////////////////////////////////////
291-
void CreateSourceValueInst(std::vector<llvm::Instruction *> &pAtomicInstToBeSourced, llvm::Instruction *pFenceInst);
292-
293288
////////////////////////////////////////////////////////////////////////
294289
void EraseRedundantInst(llvm::Instruction *pInst);
295290

@@ -332,7 +327,6 @@ class SynchronizationObjectCoalescing : public llvm::FunctionPass {
332327

333328
////////////////////////////////////////////////////////////////////////
334329
bool IsRequiredForAtomicOperationsOrdering(const llvm::Instruction *pSourceInst,
335-
std::vector<llvm::Instruction *> &pAtomicInstToBeSourced,
336330
bool onlyGlobalAtomics = false) const;
337331

338332
////////////////////////////////////////////////////////////////////////
@@ -446,7 +440,6 @@ class SynchronizationObjectCoalescing : public llvm::FunctionPass {
446440
std::vector<llvm::Instruction *> m_LscMemoryFences;
447441
std::vector<llvm::Instruction *> m_UntypedMemoryFences;
448442
std::vector<llvm::Instruction *> m_ThreadGroupBarriers;
449-
std::unordered_set<llvm::Instruction *> m_SourcedAtomicInstructions;
450443

451444
// this variable holds a mapping from a basic block to its memory instructions ordered by their occurrences in it
452445
// (the initial index of line of this basic block - the number of instructions preceding an instruction it its basic
@@ -545,107 +538,6 @@ bool SynchronizationObjectCoalescing::ProcessFunction() {
545538
return FindRedundancies();
546539
}
547540

548-
// Referenced from MemoryModelPass
549-
inline PHINode *FindDominatingPhi(DominatorTree &DT, Instruction *def, BasicBlock *postDominator) {
550-
IGC_ASSERT(def->getParent() != postDominator);
551-
IGC_ASSERT(!DT.dominates(def, postDominator));
552-
SmallPtrSet<PHINode *, 8> seen;
553-
SmallVector<User *, 8> worklist(def->users());
554-
while (!worklist.empty()) {
555-
PHINode *phi = dyn_cast<PHINode>(worklist.pop_back_val());
556-
if (phi == nullptr || seen.count(phi) > 0) {
557-
continue;
558-
}
559-
if (phi->getParent() == postDominator || DT.dominates(phi, postDominator)) {
560-
return phi;
561-
}
562-
seen.insert(phi);
563-
}
564-
return nullptr;
565-
}
566-
567-
////////////////////////////////////////////////////////////////////////
568-
/// @brief Fence Instruction responsible for only ordering of atomic Instructions
569-
/// can be replaced with Source Value Intrinsic which will still maintain
570-
/// the order of Instructions
571-
void SynchronizationObjectCoalescing::CreateSourceValueInst(std::vector<llvm::Instruction *> &pAtomicInstToBeSourced,
572-
llvm::Instruction *pFenceInst) {
573-
IGC_ASSERT(pAtomicInstToBeSourced.size() > 0);
574-
// reversing the list to source the atomic instructions in the order
575-
reverse(pAtomicInstToBeSourced.begin(), pAtomicInstToBeSourced.end());
576-
Function *funcPtr = GenISAIntrinsic::getDeclaration(pFenceInst->getModule(), GenISAIntrinsic::GenISA_source_value);
577-
BasicBlock *fenceBB = pFenceInst->getParent();
578-
579-
Function *F = pAtomicInstToBeSourced[0]->getFunction();
580-
DominatorTree DT(*F);
581-
PostDominatorTree PDT(*F);
582-
LoopInfo LI(DT);
583-
584-
for (llvm::Instruction *atomicInst : pAtomicInstToBeSourced) {
585-
// Making sure that the Fence Inst is potentially reachable from the atomic Instruction.
586-
if (!isPotentiallyReachable(atomicInst, pFenceInst, nullptr, &DT, &LI)) {
587-
continue;
588-
}
589-
590-
BasicBlock *atomicBB = atomicInst->getParent();
591-
BasicBlock *fenceDominator = fenceBB;
592-
Instruction *insertPoint = atomicBB->getTerminator();
593-
Value *sourceVal = cast<GenIntrinsicInst>(atomicInst);
594-
595-
// TODO: Determining Insert point can be improved which can postpone the source value intrinsic as long as possible.
596-
// Similar analysis is done in FindOptimalInsertPoints() in ApplyCacheControls.cpp
597-
598-
// Check if fence Instruction BB post dominates atomic Instruction BB
599-
// Else find the BB that is a predecessor of fence BB and post dominates atomic BB.
600-
// If we don't find one, then the insert point is near the terminator of atomic BB
601-
while (fenceDominator && fenceDominator != atomicBB) {
602-
if (PDT.dominates(fenceDominator, atomicBB)) {
603-
// If fence instruction is in same BB, then use fence as insert point
604-
// Else use the terminator of fenceDominator as insert point
605-
insertPoint = fenceBB == fenceDominator ? pFenceInst : fenceDominator->getTerminator();
606-
// It's possible that the atomic instruction does not dominate
607-
// the post-dominator, find a PHI user of the atomic instruction
608-
// that dominates the post-dominator.
609-
if (!DT.dominates(atomicBB, fenceDominator)) {
610-
PHINode *phi = FindDominatingPhi(DT, atomicInst, fenceDominator);
611-
if (phi) {
612-
sourceVal = phi;
613-
} else {
614-
// Fallback to inserting the source value in the basic
615-
// block with the atomic instruction.
616-
insertPoint = atomicBB->getTerminator();
617-
}
618-
}
619-
break;
620-
}
621-
fenceDominator = fenceDominator->getSinglePredecessor();
622-
}
623-
// If Fence is present in same BB as atomic, then insert at Fence Instruction
624-
if (fenceBB == atomicBB) {
625-
insertPoint = pFenceInst;
626-
}
627-
628-
IRBuilder<> builder(insertPoint);
629-
Type *sourceValType = sourceVal->getType();
630-
631-
// Source value intrinsic accepts only i32.
632-
if (sourceValType->isIntegerTy()) {
633-
sourceVal = builder.CreateZExtOrTrunc(sourceVal, builder.getInt32Ty());
634-
} else if (sourceValType->isFloatingPointTy()) {
635-
if (sourceValType->isFloatTy()) {
636-
sourceVal = builder.CreateBitCast(sourceVal, builder.getInt32Ty());
637-
} else {
638-
sourceVal = builder.CreateFPToUI(sourceVal, builder.getInt32Ty());
639-
}
640-
} else {
641-
IGC_ASSERT_MESSAGE(0, "Unexpected type");
642-
}
643-
644-
builder.CreateCall(funcPtr, {sourceVal});
645-
m_SourcedAtomicInstructions.insert(atomicInst);
646-
}
647-
}
648-
649541
////////////////////////////////////////////////////////////////////////
650542
void SynchronizationObjectCoalescing::EraseRedundantInst(llvm::Instruction *pInst) {
651543
bool isFence = IsFenceOperation(pInst);
@@ -848,13 +740,7 @@ bool SynchronizationObjectCoalescing::FindRedundancies() {
848740
}
849741
SynchronizationCaseMask referenceSyncCaseMask = GetStrictSynchronizationMask(pInst);
850742
bool isObligatory = (syncCaseMask & referenceSyncCaseMask) != 0;
851-
852-
std::vector<llvm::Instruction *> atomicInstToBeSourced;
853-
if (!isObligatory) {
854-
isObligatory =
855-
IsRequiredForAtomicOperationsOrdering(pInst, atomicInstToBeSourced, true /*onlyGlobalAtomics*/);
856-
}
857-
743+
isObligatory |= IsRequiredForAtomicOperationsOrdering(pInst, true /*onlyGlobalAtomics*/);
858744
bool verifyUnsynchronizedInstructions = IsFenceOperation(pInst);
859745
verifyUnsynchronizedInstructions &= (!isObligatory || syncCaseMask == ReadSyncWrite);
860746

@@ -881,9 +767,6 @@ bool SynchronizationObjectCoalescing::FindRedundancies() {
881767
#if _DEBUG
882768
RegisterRedundancyExplanation(pInst, ExplanationEntry::GlobalMemoryRedundancy);
883769
#endif // _DEBUG
884-
if (IGC_IS_FLAG_ENABLED(ReplaceAtomicFenceWithSourceValue) && atomicInstToBeSourced.size() > 0) {
885-
CreateSourceValueInst(atomicInstToBeSourced, const_cast<Instruction *>(pInst));
886-
}
887770
EraseRedundantGlobalScope(pInst);
888771
isModified = true;
889772
SetLocalMemoryInstructionMask();
@@ -948,12 +831,7 @@ bool SynchronizationObjectCoalescing::FindRedundancies() {
948831
GetSynchronizationMaskForAllResources(localForwardMemoryInstructionMask, localBackwardMemoryInstructionMask);
949832
SynchronizationCaseMask referenceSyncCaseMask = GetStrictSynchronizationMask(pInst);
950833
bool isObligatory = (syncCaseMask & referenceSyncCaseMask) != 0;
951-
952-
std::vector<llvm::Instruction *> atomicInstToBeSourced;
953-
if (!isObligatory) {
954-
isObligatory = IsRequiredForAtomicOperationsOrdering(pInst, atomicInstToBeSourced);
955-
}
956-
834+
isObligatory |= IsRequiredForAtomicOperationsOrdering(pInst);
957835
bool verifyUnsynchronizedInstructions = IsFenceOperation(pInst);
958836
verifyUnsynchronizedInstructions &= (!isObligatory || syncCaseMask == ReadSyncWrite);
959837

@@ -969,9 +847,6 @@ bool SynchronizationObjectCoalescing::FindRedundancies() {
969847
#if _DEBUG
970848
RegisterRedundancyExplanation(pInst, ExplanationEntry::StrictRedundancy);
971849
#endif // _DEBUG
972-
if (IGC_IS_FLAG_ENABLED(ReplaceAtomicFenceWithSourceValue) && atomicInstToBeSourced.size() > 0) {
973-
CreateSourceValueInst(atomicInstToBeSourced, const_cast<Instruction *>(pInst));
974-
}
975850
EraseRedundantInst(pInst);
976851
isModified = true;
977852
}
@@ -1856,9 +1731,8 @@ SynchronizationObjectCoalescing::GetUnsynchronizedForwardInstructionMask(const l
18561731
/// operations present before the fence (in program order)
18571732
/// @param pSourceInst the source synchronization instruction
18581733
/// @param onlyGlobalAtomics check only TGM and UGM atomic operations
1859-
bool SynchronizationObjectCoalescing::IsRequiredForAtomicOperationsOrdering(
1860-
const llvm::Instruction *pSourceInst, std::vector<llvm::Instruction *> &pAtomicInstToBeSourced,
1861-
bool onlyGlobalAtomics /*= false*/) const {
1734+
bool SynchronizationObjectCoalescing::IsRequiredForAtomicOperationsOrdering(const llvm::Instruction *pSourceInst,
1735+
bool onlyGlobalAtomics /*= false*/) const {
18621736
if (!IsFenceOperation(pSourceInst)) {
18631737
// Not a fence, nothing to check
18641738
return false;
@@ -1908,10 +1782,6 @@ bool SynchronizationObjectCoalescing::IsRequiredForAtomicOperationsOrdering(
19081782
{
19091783
isPotentiallyUnsynchronizedAtomic = false;
19101784
// Lambda that checks if a fence operation synchronizes the atomic operation.
1911-
// This can be improved to detect the users of atomic instruction and end the search for fences once we find the
1912-
// user. This user is essentially same as Source Value Intrinsic, however it can be reordered in visa affecting
1913-
// the execution order of atomic instructions. If we can find a way to treat this user as a special instruction
1914-
// and avoid reordering, we can skip creating new source value instruction.
19151785
std::function<bool(const llvm::Instruction *)> IsBoundaryInst = [this, &atomicPointerMemoryInstructionMask,
19161786
&isPotentiallyUnsynchronizedAtomic,
19171787
pSourceInst](const llvm::Instruction *pInst) {
@@ -1970,22 +1840,7 @@ bool SynchronizationObjectCoalescing::IsRequiredForAtomicOperationsOrdering(
19701840
if (!substituteFenceFound) {
19711841
// Found an atomic operation that requires the source fence
19721842
// instruction for correct memory ordering.
1973-
1974-
// If ReplaceAtomicFenceWithSourceValue is true, we can replace this fence with GenISA_source_value.
1975-
// This will source the atomic instruction and still maintains the order of atomic instructions.
1976-
// Else return true marking the fence instruction as Obligatory.
1977-
1978-
if (IGC_IS_FLAG_ENABLED(ReplaceAtomicFenceWithSourceValue)) {
1979-
// If a previous fence was replaced with source value intrinsic, GetVisibleMemoryInstructions will add the
1980-
// same atomic instruction again for the next fence resulting in multiple source value intrinsics but we need
1981-
// it to be sourced only once. Hence we check if it was already sourced previously. Continues to check all
1982-
// valid atomic Instructions to be sourced.
1983-
if (m_SourcedAtomicInstructions.find(const_cast<Instruction *>(pInst)) == m_SourcedAtomicInstructions.end()) {
1984-
pAtomicInstToBeSourced.push_back(const_cast<Instruction *>(pInst));
1985-
}
1986-
} else {
1987-
return true;
1988-
}
1843+
return true;
19891844
}
19901845
}
19911846
}
@@ -2147,7 +2002,6 @@ void SynchronizationObjectCoalescing::InvalidateMembers() {
21472002
m_OrderedFenceInstructionsInBasicBlockCache.clear();
21482003
m_OrderedBarrierInstructionsInBasicBlockCache.clear();
21492004
m_BasicBlockMemoryInstructionMaskCache.clear();
2150-
m_SourcedAtomicInstructions.clear();
21512005
#if _DEBUG
21522006
m_ExplanationEntries.clear();
21532007
#endif // _DEBUG

IGC/common/igc_flags.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -433,9 +433,6 @@ DECLARE_IGC_REGKEY(
433433
"The mask is casted to IGC::SyncInstMask and informs which synchronization objects should not be coalesced. Note "
434434
"that synchronization objects classified in multiple types are not disabled if any bit describing them is off.",
435435
true)
436-
DECLARE_IGC_REGKEY(bool, ReplaceAtomicFenceWithSourceValue, true,
437-
"Fences are required to maintain the order of atomic memory instructions. This flag will replace the fence with "
438-
"GenISA_source_value intrinsic which sources the result of atomic operation and still maintains the order.", true)
439436
DECLARE_IGC_REGKEY(bool, UnrollLoopForCodeSizeOnly, false,
440437
"Only unroll the loop if it can reduce program size/register pressure. Ignore all other threshold "
441438
"setting but still enable EnablePromoteLoopUnrollwithAlloca due to high likelyhood to reduce size.",

0 commit comments

Comments
 (0)