Skip to content

Commit a7599dc

Browse files
committed
[LV] Initial support for stores in early exit loops
Adds some basic support for a simple early exit loop with a store. This is vectorized such that when the next vector iteration would exit, we bail out to the scalar loop to handle the exit.
1 parent 5fe91f1 commit a7599dc

File tree

10 files changed

+599
-58
lines changed

10 files changed

+599
-58
lines changed

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,13 @@ class LoopVectorizationLegality {
407407
return hasUncountableEarlyExit() ? getUncountableEdge()->second : nullptr;
408408
}
409409

410+
/// Returns true if this is an early exit loop containing a store.
411+
bool isConditionCopyRequired() const { return RequiresEarlyExitConditionCopy; }
412+
413+
/// Returns the load instruction, if any, nearest to an uncountable early
414+
/// exit.
415+
std::optional<LoadInst *> getEarlyExitLoad() const { return EarlyExitLoad; }
416+
410417
/// Return true if there is store-load forwarding dependencies.
411418
bool isSafeForAnyStoreLoadForwardDistances() const {
412419
return LAI->getDepChecker().isSafeForAnyStoreLoadForwardDistances();
@@ -654,6 +661,16 @@ class LoopVectorizationLegality {
654661
/// Keep track of the loop edge to an uncountable exit, comprising a pair
655662
/// of (Exiting, Exit) blocks, if there is exactly one early exit.
656663
std::optional<std::pair<BasicBlock *, BasicBlock *>> UncountableEdge;
664+
665+
/// Indicates that we will need to copy the early exit condition into
666+
/// the vector preheader, as we will need to mask some operations in
667+
/// the loop (e.g. stores).
668+
bool RequiresEarlyExitConditionCopy = false;
669+
670+
/// The load used to determine an uncountable early-exit condition. This is
671+
/// only used to allow further analysis in canVectorizeMemory if we found
672+
/// what looks like a valid early exit loop with store beforehand.
673+
std::optional<LoadInst *> EarlyExitLoad;
657674
};
658675

659676
} // namespace llvm

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 117 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
1818
#include "llvm/Analysis/Loads.h"
1919
#include "llvm/Analysis/LoopInfo.h"
20+
#include "llvm/Analysis/MustExecute.h"
2021
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
2122
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
2223
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -1209,6 +1210,36 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
12091210
});
12101211
}
12111212

1213+
// FIXME: Remove or reduce this restriction. We're in a bit of an odd spot
1214+
// since we're (potentially) doing the load out of its normal order
1215+
// in the loop and that may throw off dependency checking.
1216+
// A forward dependency should be fine, but a backwards dep may not
1217+
// be even if LAA thinks it is due to performing the load for the
1218+
// vector iteration i+1 in vector iteration i.
1219+
if (isConditionCopyRequired()) {
1220+
assert(EarlyExitLoad.has_value() && "EE Store without condition load.");
1221+
1222+
if (LAI->canVectorizeMemory()) {
1223+
const MemoryDepChecker &DepChecker = LAI->getDepChecker();
1224+
const auto *Deps = DepChecker.getDependences();
1225+
1226+
for (const MemoryDepChecker::Dependence &Dep : *Deps) {
1227+
if (Dep.getDestination(DepChecker) == EarlyExitLoad ||
1228+
Dep.getSource(DepChecker) == EarlyExitLoad) {
1229+
// Refine language a little? This currently only applies when a store
1230+
// is present in the early exit loop.
1231+
reportVectorizationFailure(
1232+
"No dependencies allowed for early exit condition load",
1233+
"Early exit condition loads may not have a dependence with another"
1234+
" memory operation.",
1235+
"CantVectorizeStoreToLoopInvariantAddress", ORE,
1236+
TheLoop);
1237+
return false;
1238+
}
1239+
}
1240+
}
1241+
}
1242+
12121243
if (!LAI->canVectorizeMemory())
12131244
return canVectorizeIndirectUnsafeDependences();
12141245

@@ -1627,6 +1658,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
16271658
// Keep a record of all the exiting blocks.
16281659
SmallVector<const SCEVPredicate *, 4> Predicates;
16291660
std::optional<std::pair<BasicBlock *, BasicBlock *>> SingleUncountableEdge;
1661+
std::optional<LoadInst *> EELoad;
16301662
for (BasicBlock *BB : ExitingBlocks) {
16311663
const SCEV *EC =
16321664
PSE.getSE()->getPredicatedExitCount(TheLoop, BB, &Predicates);
@@ -1656,6 +1688,21 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
16561688
return false;
16571689
}
16581690

1691+
// For loops with stores.
1692+
// Record load for analysis by isDereferenceableAndAlignedInLoop
1693+
// and later by dependence analysis.
1694+
if (BranchInst *Br = dyn_cast<BranchInst>(BB->getTerminator())) {
1695+
// FIXME: Handle exit conditions with multiple users, more complex exit
1696+
// conditions than br(icmp(load, loop_inv)).
1697+
ICmpInst *Cmp = dyn_cast<ICmpInst>(Br->getCondition());
1698+
if (Cmp && Cmp->hasOneUse() &&
1699+
TheLoop->isLoopInvariant(Cmp->getOperand(1))) {
1700+
LoadInst *Load = dyn_cast<LoadInst>(Cmp->getOperand(0));
1701+
if (Load && Load->hasOneUse() && TheLoop->contains(Load))
1702+
EELoad = Load;
1703+
}
1704+
}
1705+
16591706
SingleUncountableEdge = {BB, ExitBlock};
16601707
} else
16611708
CountableExitingBlocks.push_back(BB);
@@ -1708,16 +1755,31 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
17081755
}
17091756
};
17101757

1758+
bool HasStore = false;
17111759
for (auto *BB : TheLoop->blocks())
17121760
for (auto &I : *BB) {
1761+
if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
1762+
HasStore = true;
1763+
if (SI->isSimple())
1764+
continue;
1765+
1766+
reportVectorizationFailure(
1767+
"Complex writes to memory unsupported in early exit loops",
1768+
"Cannot vectorize early exit loop with complex writes to memory",
1769+
"WritesInEarlyExitLoop", ORE, TheLoop);
1770+
return false;
1771+
}
1772+
17131773
if (I.mayWriteToMemory()) {
17141774
// We don't support writes to memory.
17151775
reportVectorizationFailure(
1716-
"Writes to memory unsupported in early exit loops",
1717-
"Cannot vectorize early exit loop with writes to memory",
1776+
"Complex writes to memory unsupported in early exit loops",
1777+
"Cannot vectorize early exit loop with complex writes to memory",
17181778
"WritesInEarlyExitLoop", ORE, TheLoop);
17191779
return false;
1720-
} else if (!IsSafeOperation(&I)) {
1780+
}
1781+
1782+
if (!IsSafeOperation(&I)) {
17211783
reportVectorizationFailure("Early exit loop contains operations that "
17221784
"cannot be speculatively executed",
17231785
"UnsafeOperationsEarlyExitLoop", ORE,
@@ -1732,13 +1794,53 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
17321794

17331795
// TODO: Handle loops that may fault.
17341796
Predicates.clear();
1735-
if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
1736-
&Predicates)) {
1797+
1798+
if (HasStore && EELoad.has_value()) {
1799+
LoadInst *LI = *EELoad;
1800+
if (isDereferenceableAndAlignedInLoop(LI, TheLoop, *PSE.getSE(), *DT, AC,
1801+
&Predicates)) {
1802+
ICFLoopSafetyInfo SafetyInfo;
1803+
SafetyInfo.computeLoopSafetyInfo(TheLoop);
1804+
// FIXME: We may have multiple levels of conditional loads, so will
1805+
// need to improve on outright rejection at some point.
1806+
if (!SafetyInfo.isGuaranteedToExecute(*LI, DT, TheLoop)) {
1807+
LLVM_DEBUG(
1808+
dbgs() << "Early exit condition load not guaranteed to execute.\n");
1809+
reportVectorizationFailure(
1810+
"Early exit condition load not guaranteed to execute",
1811+
"Cannot vectorize early exit loop when condition load is not "
1812+
"guaranteed to execute",
1813+
"EarlyExitLoadNotGuaranteed", ORE, TheLoop);
1814+
}
1815+
} else {
1816+
LLVM_DEBUG(dbgs() << "Early exit condition load potentially unsafe.\n");
1817+
reportVectorizationFailure("Uncounted loop condition not known safe",
1818+
"Cannot vectorize early exit loop with "
1819+
"possibly unsafe condition load",
1820+
"PotentiallyFaultingEarlyExitLoop", ORE,
1821+
TheLoop);
1822+
return false;
1823+
}
1824+
} else if (HasStore) {
1825+
LLVM_DEBUG(dbgs() << "Found early exit store but no condition load.\n");
17371826
reportVectorizationFailure(
1738-
"Loop may fault",
1739-
"Cannot vectorize potentially faulting early exit loop",
1740-
"PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
1827+
"Early exit loop with store but no condition load",
1828+
"Cannot vectorize early exit loop with store but no condition load",
1829+
"NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
17411830
return false;
1831+
} else {
1832+
// Read-only loop.
1833+
// FIXME: as with the loops with stores, only the loads contributing to
1834+
// the loop condition need to be guaranteed dereferenceable and
1835+
// aligned.
1836+
if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
1837+
&Predicates)) {
1838+
reportVectorizationFailure(
1839+
"Loop may fault",
1840+
"Cannot vectorize potentially faulting early exit loop",
1841+
"PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
1842+
return false;
1843+
}
17421844
}
17431845

17441846
[[maybe_unused]] const SCEV *SymbolicMaxBTC =
@@ -1751,6 +1853,11 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
17511853
"backedge taken count: "
17521854
<< *SymbolicMaxBTC << '\n');
17531855
UncountableEdge = SingleUncountableEdge;
1856+
if (HasStore) {
1857+
RequiresEarlyExitConditionCopy = true;
1858+
EarlyExitLoad = EELoad;
1859+
}
1860+
17541861
return true;
17551862
}
17561863

@@ -1823,6 +1930,8 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
18231930
} else {
18241931
if (!isVectorizableEarlyExitLoop()) {
18251932
UncountableEdge = std::nullopt;
1933+
EarlyExitLoad = std::nullopt;
1934+
RequiresEarlyExitConditionCopy = false;
18261935
if (DoExtraAnalysis)
18271936
Result = false;
18281937
else

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9246,6 +9246,15 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
92469246
VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths,
92479247
*Plan, CM.getMinimalBitwidths());
92489248
VPlanTransforms::optimize(*Plan);
9249+
9250+
// See if we can convert an early exit vplan to bail out to a scalar
9251+
// loop if state-changing operations (like stores) are present and
9252+
// an exit will be taken in the next vector iteration.
9253+
// If not, discard the plan.
9254+
if (Legal->isConditionCopyRequired() && !HasScalarVF &&
9255+
!VPlanTransforms::runPass(VPlanTransforms::tryEarlyExitConversion,
9256+
*Plan))
9257+
break;
92499258
// TODO: try to put it close to addActiveLaneMask().
92509259
// Discard the plan if it is not EVL-compatible
92519260
if (CM.foldTailWithEVL() && !HasScalarVF &&
@@ -9570,6 +9579,12 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
95709579
},
95719580
Range);
95729581
auto Plan = std::make_unique<VPlan>(OrigLoop);
9582+
9583+
// FIXME: Better place to put this? Or maybe an enum for how to handle
9584+
// early exits?
9585+
if (Legal->hasUncountableEarlyExit())
9586+
Plan->setEarlyExitContinuesInScalarLoop(Legal->isConditionCopyRequired());
9587+
95739588
// Build hierarchical CFG.
95749589
// TODO: Convert to VPlan-transform and consolidate all transforms for VPlan
95759590
// creation.
@@ -9876,6 +9891,12 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
98769891

98779892
// Create new empty VPlan
98789893
auto Plan = std::make_unique<VPlan>(OrigLoop);
9894+
9895+
// FIXME: Better place to put this? Or maybe an enum for how to handle
9896+
// early exits?
9897+
if (Legal->hasUncountableEarlyExit())
9898+
Plan->setEarlyExitContinuesInScalarLoop(Legal->isConditionCopyRequired());
9899+
98799900
// Build hierarchical CFG
98809901
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
98819902
HCFGBuilder.buildPlainCFG();

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3522,6 +3522,13 @@ class VPlan {
35223522
/// VPlan is destroyed.
35233523
SmallVector<VPBlockBase *> CreatedBlocks;
35243524

3525+
/// Indicates that an early exit loop will exit before the condition is
3526+
/// reached, and that the scalar loop must perform the last few iterations.
3527+
/// FIXME: Is this the right place? We mainly want to make sure that we
3528+
/// know about this for transforming the plan to copy&move the exit
3529+
/// condition, but maybe it doesn't need to be in the plan itself.
3530+
bool EarlyExitContinuesInScalarLoop = false;
3531+
35253532
/// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
35263533
/// wrapping the original header of the scalar loop.
35273534
VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
@@ -3825,6 +3832,16 @@ class VPlan {
38253832
return ExitBlocks.size() > 1 || ExitBlocks[0]->getNumPredecessors() > 1;
38263833
}
38273834

3835+
/// Returns true if all exit paths should reach the scalar loop.
3836+
bool shouldEarlyExitContinueInScalarLoop() const {
3837+
return EarlyExitContinuesInScalarLoop;
3838+
}
3839+
3840+
/// Set early exit vectorization to always reach the scalar loop.
3841+
void setEarlyExitContinuesInScalarLoop(bool Continues) {
3842+
EarlyExitContinuesInScalarLoop = Continues;
3843+
}
3844+
38283845
/// Returns true if the scalar tail may execute after the vector loop. Note
38293846
/// that this relies on unneeded branches to the scalar tail loop being
38303847
/// removed.

0 commit comments

Comments
 (0)