-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[VPlan] Use VPlan operand order for VPBlendRecipes. #139475
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
bbe4f92
ceba41a
aaf50ee
c885321
017511b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8538,36 +8538,30 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate( | |
return nullptr; | ||
} | ||
|
||
VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi, | ||
ArrayRef<VPValue *> Operands) { | ||
unsigned NumIncoming = Phi->getNumIncomingValues(); | ||
VPBlendRecipe *VPRecipeBuilder::tryToBlend(VPWidenPHIRecipe *PhiR) { | ||
unsigned NumIncoming = PhiR->getNumIncoming(); | ||
|
||
// We know that all PHIs in non-header blocks are converted into selects, so | ||
// we don't have to worry about the insertion order and we can just use the | ||
// builder. At this point we generate the predication tree. There may be | ||
// duplications since this is a simple recursive scan, but future | ||
// optimizations will clean it up. | ||
|
||
// Map incoming IR BasicBlocks to incoming VPValues, for lookup below. | ||
// TODO: Add operands and masks in order from the VPlan predecessors. | ||
DenseMap<BasicBlock *, VPValue *> VPIncomingValues; | ||
for (const auto &[Idx, Pred] : enumerate(predecessors(Phi->getParent()))) | ||
VPIncomingValues[Pred] = Operands[Idx]; | ||
|
||
SmallVector<VPValue *, 2> OperandsWithMask; | ||
for (unsigned In = 0; In < NumIncoming; In++) { | ||
BasicBlock *Pred = Phi->getIncomingBlock(In); | ||
OperandsWithMask.push_back(VPIncomingValues.lookup(Pred)); | ||
VPValue *EdgeMask = getEdgeMask(Pred, Phi->getParent()); | ||
OperandsWithMask.push_back(PhiR->getIncomingValue(In)); | ||
const VPBasicBlock *Pred = PhiR->getIncomingBlock(In); | ||
VPValue *EdgeMask = getEdgeMask(Pred, PhiR->getParent()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. getEdgeMask() works with a given VPBB instead of BB? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, in preparation for VPPredicator, I sunk accesses of |
||
if (!EdgeMask) { | ||
assert(In == 0 && "Both null and non-null edge masks found"); | ||
assert(all_equal(Operands) && | ||
assert(all_equal(PhiR->operands()) && | ||
"Distinct incoming values with one having a full mask"); | ||
break; | ||
} | ||
OperandsWithMask.push_back(EdgeMask); | ||
} | ||
return new VPBlendRecipe(Phi, OperandsWithMask); | ||
return new VPBlendRecipe(cast<PHINode>(PhiR->getUnderlyingInstr()), | ||
OperandsWithMask); | ||
} | ||
|
||
VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, | ||
|
@@ -8954,15 +8948,21 @@ bool VPRecipeBuilder::getScaledReductions( | |
return false; | ||
} | ||
|
||
VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe( | ||
Instruction *Instr, ArrayRef<VPValue *> Operands, VFRange &Range) { | ||
VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R, | ||
VFRange &Range) { | ||
// First, check for specific widening recipes that deal with inductions, Phi | ||
// nodes, calls and memory operations. | ||
VPRecipeBase *Recipe; | ||
if (auto *Phi = dyn_cast<PHINode>(Instr)) { | ||
if (Phi->getParent() != OrigLoop->getHeader()) | ||
return tryToBlend(Phi, Operands); | ||
|
||
Instruction *Instr = R->getUnderlyingInstr(); | ||
SmallVector<VPValue *, 4> Operands(R->operands()); | ||
if (auto *PhiR = dyn_cast<VPWidenPHIRecipe>(R)) { | ||
VPBasicBlock *Parent = PhiR->getParent(); | ||
VPRegionBlock *LoopRegionOf = Parent->getEnclosingLoopRegion(); | ||
// Handle phis in non-header blocks. | ||
if (!LoopRegionOf || LoopRegionOf->getEntry() != Parent) | ||
return tryToBlend(PhiR); | ||
|
||
auto *Phi = cast<PHINode>(R->getUnderlyingInstr()); | ||
assert(Operands.size() == 2 && "Must have 2 operands for header phis"); | ||
if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, Range))) | ||
return Recipe; | ||
|
@@ -9527,11 +9527,12 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range, | |
continue; | ||
} | ||
|
||
SmallVector<VPValue *, 4> Operands(R.operands()); | ||
VPRecipeBase *Recipe = | ||
RecipeBuilder.tryToCreateWidenRecipe(Instr, Operands, Range); | ||
if (!Recipe) | ||
RecipeBuilder.tryToCreateWidenRecipe(SingleDef, Range); | ||
if (!Recipe) { | ||
SmallVector<VPValue *, 4> Operands(R.operands()); | ||
Recipe = RecipeBuilder.handleReplication(Instr, Operands, Range); | ||
} | ||
|
||
RecipeBuilder.setRecipe(Instr, Recipe); | ||
if (isa<VPWidenIntOrFpInductionRecipe>(Recipe) && isa<TruncInst>(Instr)) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,7 +28,7 @@ define void @test_blend_feeding_replicated_store_1(i64 %N, ptr noalias %src, ptr | |
; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i1> zeroinitializer, <16 x i1> zeroinitializer | ||
; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i1> [[TMP6]], splat (i1 true) | ||
; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i1> [[TMP7]], [[TMP8]] | ||
; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP7]], <16 x ptr> [[BROADCAST_SPLAT]], <16 x ptr> zeroinitializer | ||
; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP6]], <16 x ptr> [[BROADCAST_SPLAT]], <16 x ptr> zeroinitializer | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this change ok - from TMP7 (=TMP6?0:0, i.e., =0) to TMP6 (which may include non-zeroes)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, this is due to the second condition being constant There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, ok, only some entries of PREDPHI are used/live - when !TMP6, and then they must be set to null; other entries may be set arbitrarily as in undef. Setting all to null seems a simpler solution? Such trivial "branch on false" cases deserve folding before predicating - blend is redundant and best avoided. |
||
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i1> [[TMP9]], i32 0 | ||
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] | ||
; CHECK: [[PRED_STORE_IF]]: | ||
|
@@ -219,7 +219,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i | |
; CHECK-NEXT: [[TMP4:%.*]] = xor <16 x i1> [[TMP3]], splat (i1 true) | ||
; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP4]], <16 x i1> [[TMP5]], <16 x i1> zeroinitializer | ||
; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i1> [[TMP6]], [[TMP3]] | ||
; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> zeroinitializer, <16 x i8> splat (i8 1) | ||
; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP3]], <16 x i8> splat (i8 1), <16 x i8> zeroinitializer | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Select operands exchange order and condition changes from TMP6=(!TMP3)?TMP5:0 to TMP3? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So entries of TMP3 that are true continue to set PREDPHI to i8 1, but entries of TMP3 that are false originally set PREDPHI to either 0 or 1 according to TMP5 (which is !%c.0 if followed correctly), but now sets it to 0 regardless of TMP5? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, but we only use i8 1 when TMP3 is true and i8 0 zero otherwise. When TMP3 is false, PREDPHI is not used when %c.0 is true. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, ok, if TMP3 is false only some entries of PREDPHI are used/live - and then these must be set to 0; other entries may be set arbitrarily as in undef. Setting all to 0 seems a simpler solution. Branches on invariant conditions (as in %c.0) could be optimized by loop unswitching or retained in the vectorize loop as being uniform. |
||
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x i1> [[TMP7]], i32 0 | ||
; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] | ||
; CHECK: [[PRED_STORE_IF]]: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -354,7 +354,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { | |
; TFCOMMON-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP8]]) | ||
; TFCOMMON-NEXT: [[TMP10:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i1> zeroinitializer | ||
; TFCOMMON-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP10]]) | ||
; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP8]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i64> [[TMP11]] | ||
; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP10]], <vscale x 2 x i64> [[TMP11]], <vscale x 2 x i64> [[TMP9]] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TMP10 seems to be the complementary of TMP8 where it matters - for lanes that are active, given that TMP7 = !TMP6. |
||
; TFCOMMON-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] | ||
; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP12]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) | ||
; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] | ||
|
@@ -397,8 +397,8 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { | |
; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i1> [[TMP12]], <vscale x 2 x i1> zeroinitializer | ||
; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP19]]) | ||
; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD3]], <vscale x 2 x i1> [[TMP20]]) | ||
; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP15]], <vscale x 2 x i64> [[TMP17]], <vscale x 2 x i64> [[TMP21]] | ||
; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select <vscale x 2 x i1> [[TMP16]], <vscale x 2 x i64> [[TMP18]], <vscale x 2 x i64> [[TMP22]] | ||
; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP19]], <vscale x 2 x i64> [[TMP21]], <vscale x 2 x i64> [[TMP17]] | ||
; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select <vscale x 2 x i1> [[TMP20]], <vscale x 2 x i64> [[TMP22]], <vscale x 2 x i64> [[TMP18]] | ||
Comment on lines
+400
to
+401
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Swap operands based on complementary TMP13 = !TMP11 and TMP14 = !TMP12. |
||
; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] | ||
; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() | ||
; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 2 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -46,8 +46,8 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 | |
; CHECK-NEXT: [[TMP20:%.*]] = xor <vscale x 8 x i1> [[TMP14]], splat (i1 true) | ||
; CHECK-NEXT: [[TMP21:%.*]] = select <vscale x 8 x i1> [[TMP13]], <vscale x 8 x i1> [[TMP20]], <vscale x 8 x i1> zeroinitializer | ||
; CHECK-NEXT: [[TMP22:%.*]] = or <vscale x 8 x i1> [[TMP19]], [[TMP21]] | ||
; CHECK-NEXT: [[EXT:%.*]] = extractelement <vscale x 8 x i1> [[TMP19]], i32 0 | ||
; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[EXT]], i64 [[INDEX]], i64 poison | ||
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <vscale x 8 x i1> [[TMP21]], i32 0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TMP21 should complement TMP19 (at least its first element/bit) to retain semantics of flipping PREDPHI's operands. I.e., TMP13 ? TMP20 : 0 should complement TMP17 ? TMP18 : 0. Gets harder to follow. |
||
; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP23]], i64 poison, i64 [[INDEX]] | ||
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARG]], i64 [[PREDPHI]] | ||
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i16, ptr [[TMP24]], i32 0 | ||
; CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> zeroinitializer, ptr [[TMP25]], i32 2, <vscale x 8 x i1> [[TMP22]]) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,9 +20,9 @@ define void @test(ptr %p, i64 %a, i8 %b) { | |
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ] | ||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE8]] ] | ||
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 9) | ||
; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <16 x i32> [[VEC_IND]], splat (i32 2) | ||
; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <16 x i32> [[VEC_IND]], splat (i32 2) | ||
; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i1> [[TMP4]], <16 x i1> zeroinitializer | ||
; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP3]], <16 x i32> [[TMP2]] | ||
; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP2]], <16 x i32> [[TMP3]] | ||
Comment on lines
+23
to
+25
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Flipping select operands along with complementing its condition - the relevant part of which is masked via TMP5. |
||
; CHECK-NEXT: [[TMP6:%.*]] = shl <16 x i32> [[PREDPHI]], splat (i32 8) | ||
; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i8> | ||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 0 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,10 +33,10 @@ define i32 @foo(ptr nocapture %A, ptr nocapture %B, i32 %n) { | |
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] | ||
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META3]] | ||
; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]] | ||
; CHECK-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], splat (i32 20) | ||
; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], splat (i32 19) | ||
; CHECK-NEXT: [[TMP9:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD2]], splat (i32 4) | ||
; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> splat (i32 4), <4 x i32> splat (i32 5) | ||
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP10]], <4 x i32> splat (i32 3) | ||
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> splat (i32 3), <4 x i32> [[TMP10]] | ||
Comment on lines
+36
to
+39
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Trivial flipping. |
||
; CHECK-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[PREDPHI]], <4 x i32> splat (i32 9) | ||
; CHECK-NEXT: store <4 x i32> [[PREDPHI3]], ptr [[TMP5]], align 4, !alias.scope [[META0]], !noalias [[META3]] | ||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 | ||
|
@@ -141,16 +141,14 @@ define i32 @multi_variable_if_nest(ptr nocapture %A, ptr nocapture %B, i32 %n) { | |
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META12]] | ||
; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]] | ||
; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], splat (i32 19) | ||
; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true) | ||
; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP7]], [[TMP9]] | ||
; CHECK-NEXT: [[TMP11:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD2]], splat (i32 4) | ||
; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> splat (i32 4), <4 x i32> splat (i32 5) | ||
; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> splat (i32 6), <4 x i32> splat (i32 11) | ||
; CHECK-NEXT: [[TMP14:%.*]] = and <4 x i1> [[TMP7]], [[TMP8]] | ||
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> splat (i32 3), <4 x i32> splat (i32 9) | ||
; CHECK-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP12]], <4 x i32> [[PREDPHI]] | ||
; CHECK-NEXT: [[PREDPHI4:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> splat (i32 7), <4 x i32> splat (i32 18) | ||
; CHECK-NEXT: [[PREDPHI5:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP13]], <4 x i32> [[PREDPHI4]] | ||
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> splat (i32 3), <4 x i32> [[TMP12]] | ||
; CHECK-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[PREDPHI]], <4 x i32> splat (i32 9) | ||
; CHECK-NEXT: [[PREDPHI4:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> splat (i32 7), <4 x i32> [[TMP13]] | ||
; CHECK-NEXT: [[PREDPHI5:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[PREDPHI4]], <4 x i32> splat (i32 18) | ||
; CHECK-NEXT: store <4 x i32> [[PREDPHI3]], ptr [[TMP5]], align 4, !alias.scope [[META9]], !noalias [[META12]] | ||
; CHECK-NEXT: store <4 x i32> [[PREDPHI5]], ptr [[TMP6]], align 4, !alias.scope [[META12]] | ||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The operands of VPWidenPHIRecipe are set according to the order of the original predecessors of the underlying Phi, so unclear why this change affects test?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
They were set according to the order of incoming values/blocks in the original phi, which in some cases can be different to the order of predecessors in LLVM IR, which is causing the test changes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it make sense to set the operands according to order of predecessors instead? At-least temporarily, potentially initially, separating the reordering and associated test affects into a separate patch, leaving this one an NFC.
PlainCFGBuilder::createVPInstructionsForVPBB()
sets the operands of VPWidenPHIRecipe according to the order of VPBB's predecessors, which should be consistent with that of underlying BB. It is the code below which traverses the operands of the original Phi according to their order. Would something like the following change here switch to predecessor order:?