|
19 | 19 | //
|
20 | 20 | //===----------------------------------------------------------------------===//
|
21 | 21 |
|
| 22 | +#include "AMDGPUUnifyDivergentExitNodes.h" |
22 | 23 | #include "AMDGPU.h"
|
23 | 24 | #include "SIDefines.h"
|
24 | 25 | #include "llvm/ADT/ArrayRef.h"
|
@@ -53,40 +54,48 @@ using namespace llvm;
|
53 | 54 |
|
54 | 55 | namespace {
|
55 | 56 |
|
56 |
| -class AMDGPUUnifyDivergentExitNodes : public FunctionPass { |
| 57 | +class AMDGPUUnifyDivergentExitNodesImpl { |
57 | 58 | private:
|
58 | 59 | const TargetTransformInfo *TTI = nullptr;
|
59 | 60 |
|
60 | 61 | public:
|
61 |
| - static char ID; // Pass identification, replacement for typeid |
62 |
| - |
63 |
| - AMDGPUUnifyDivergentExitNodes() : FunctionPass(ID) { |
64 |
| - initializeAMDGPUUnifyDivergentExitNodesPass(*PassRegistry::getPassRegistry()); |
65 |
| - } |
| 62 | + AMDGPUUnifyDivergentExitNodesImpl() = delete; |
| 63 | + AMDGPUUnifyDivergentExitNodesImpl(const TargetTransformInfo *TTI) |
| 64 | + : TTI(TTI) {} |
66 | 65 |
|
67 | 66 | // We can preserve non-critical-edgeness when we unify function exit nodes
|
68 |
| - void getAnalysisUsage(AnalysisUsage &AU) const override; |
69 | 67 | BasicBlock *unifyReturnBlockSet(Function &F, DomTreeUpdater &DTU,
|
70 | 68 | ArrayRef<BasicBlock *> ReturningBlocks,
|
71 | 69 | StringRef Name);
|
72 |
| - bool runOnFunction(Function &F) override; |
| 70 | + bool run(Function &F, DominatorTree &DT, const PostDominatorTree &PDT, |
| 71 | + const UniformityInfo &UA); |
73 | 72 | };
|
74 | 73 |
|
| 74 | +class AMDGPUUnifyDivergentExitNodes : public FunctionPass { |
| 75 | +public: |
| 76 | + static char ID; |
| 77 | + AMDGPUUnifyDivergentExitNodes() : FunctionPass(ID) { |
| 78 | + initializeAMDGPUUnifyDivergentExitNodesPass( |
| 79 | + *PassRegistry::getPassRegistry()); |
| 80 | + } |
| 81 | + void getAnalysisUsage(AnalysisUsage &AU) const override; |
| 82 | + bool runOnFunction(Function &F) override; |
| 83 | +}; |
75 | 84 | } // end anonymous namespace
|
76 | 85 |
|
77 | 86 | char AMDGPUUnifyDivergentExitNodes::ID = 0;
|
78 | 87 |
|
79 | 88 | char &llvm::AMDGPUUnifyDivergentExitNodesID = AMDGPUUnifyDivergentExitNodes::ID;
|
80 | 89 |
|
81 | 90 | INITIALIZE_PASS_BEGIN(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
|
82 |
| - "Unify divergent function exit nodes", false, false) |
| 91 | + "Unify divergent function exit nodes", false, false) |
83 | 92 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
84 | 93 | INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
|
85 | 94 | INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
|
86 | 95 | INITIALIZE_PASS_END(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
|
87 | 96 | "Unify divergent function exit nodes", false, false)
|
88 | 97 |
|
89 |
| -void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{ |
| 98 | +void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const { |
90 | 99 | if (RequireAndPreserveDomTree)
|
91 | 100 | AU.addRequired<DominatorTreeWrapperPass>();
|
92 | 101 |
|
@@ -132,7 +141,7 @@ static bool isUniformlyReached(const UniformityInfo &UA, BasicBlock &BB) {
|
132 | 141 | return true;
|
133 | 142 | }
|
134 | 143 |
|
135 |
| -BasicBlock *AMDGPUUnifyDivergentExitNodes::unifyReturnBlockSet( |
| 144 | +BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet( |
136 | 145 | Function &F, DomTreeUpdater &DTU, ArrayRef<BasicBlock *> ReturningBlocks,
|
137 | 146 | StringRef Name) {
|
138 | 147 | // Otherwise, we need to insert a new basic block into the function, add a PHI
|
@@ -180,21 +189,14 @@ BasicBlock *AMDGPUUnifyDivergentExitNodes::unifyReturnBlockSet(
|
180 | 189 | return NewRetBlock;
|
181 | 190 | }
|
182 | 191 |
|
183 |
| -bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) { |
184 |
| - DominatorTree *DT = nullptr; |
185 |
| - if (RequireAndPreserveDomTree) |
186 |
| - DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
187 |
| - |
188 |
| - auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); |
| 192 | +bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree &DT, |
| 193 | + const PostDominatorTree &PDT, |
| 194 | + const UniformityInfo &UA) { |
189 | 195 | if (PDT.root_size() == 0 ||
|
190 | 196 | (PDT.root_size() == 1 &&
|
191 | 197 | !isa<BranchInst>(PDT.getRoot()->getTerminator())))
|
192 | 198 | return false;
|
193 | 199 |
|
194 |
| - UniformityInfo &UA = |
195 |
| - getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo(); |
196 |
| - TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); |
197 |
| - |
198 | 200 | // Loop over all of the blocks in a function, tracking all of the blocks that
|
199 | 201 | // return.
|
200 | 202 | SmallVector<BasicBlock *, 4> ReturningBlocks;
|
@@ -327,3 +329,30 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
|
327 | 329 | unifyReturnBlockSet(F, DTU, ReturningBlocks, "UnifiedReturnBlock");
|
328 | 330 | return true;
|
329 | 331 | }
|
| 332 | + |
| 333 | +bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) { |
| 334 | + DominatorTree *DT = nullptr; |
| 335 | + if (RequireAndPreserveDomTree) |
| 336 | + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
| 337 | + const auto &PDT = |
| 338 | + getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); |
| 339 | + const auto &UA = getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo(); |
| 340 | + const auto *TranformInfo = |
| 341 | + &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); |
| 342 | + return AMDGPUUnifyDivergentExitNodesImpl(TranformInfo).run(F, *DT, PDT, UA); |
| 343 | +} |
| 344 | + |
| 345 | +PreservedAnalyses |
| 346 | +AMDGPUUnifyDivergentExitNodesPass::run(Function &F, |
| 347 | + FunctionAnalysisManager &AM) { |
| 348 | + DominatorTree *DT = nullptr; |
| 349 | + if (RequireAndPreserveDomTree) |
| 350 | + DT = &AM.getResult<DominatorTreeAnalysis>(F); |
| 351 | + |
| 352 | + const auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F); |
| 353 | + const auto &UA = AM.getResult<UniformityInfoAnalysis>(F); |
| 354 | + const auto *TransformInfo = &AM.getResult<TargetIRAnalysis>(F); |
| 355 | + return AMDGPUUnifyDivergentExitNodesImpl(TransformInfo).run(F, *DT, PDT, UA) |
| 356 | + ? PreservedAnalyses::none() |
| 357 | + : PreservedAnalyses::all(); |
| 358 | +} |
0 commit comments