Skip to content

Commit d97992e

Browse files
committed
[GISel] Combine compare of bitfield extracts or'd together.
Equivalent of the previous DAG patch for GISel. The shifts are BFXs in GISel, so the canonical form of the entire expression is different than in the DAG. The mask is not at the root of the expression, it remains on the leaves instead. See #136727
1 parent a3ee9d6 commit d97992e

File tree

5 files changed

+483
-139
lines changed

5 files changed

+483
-139
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -641,6 +641,8 @@ class CombinerHelper {
641641
/// KnownBits information.
642642
bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo) const;
643643

644+
bool combineMergedBFXCompare(MachineInstr &MI) const;
645+
644646
/// \returns true if (and (or x, c1), c2) can be replaced with (and x, c2)
645647
bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const;
646648

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1085,6 +1085,14 @@ def double_icmp_zero_or_combine: GICombineRule<
10851085
(G_ICMP $root, $p, $ordst, 0))
10861086
>;
10871087

1088+
// Transform ((X | (G_UBFX X, ...) | ...) == 0) (or != 0)
1089+
// into a compare of a extract/mask of X
1090+
def icmp_merged_bfx_combine: GICombineRule<
1091+
(defs root:$root),
1092+
(combine (G_ICMP $dst, $p, $src, 0):$root,
1093+
[{ return Helper.combineMergedBFXCompare(*${root}); }])
1094+
>;
1095+
10881096
def and_or_disjoint_mask : GICombineRule<
10891097
(defs root:$root, build_fn_matchinfo:$info),
10901098
(match (wip_match_opcode G_AND):$root,
@@ -2052,7 +2060,8 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
20522060
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
20532061
simplify_neg_minmax, combine_concat_vector,
20542062
sext_trunc, zext_trunc, prefer_sign_combines, shuffle_combines,
2055-
combine_use_vector_truncate, merge_combines, overflow_combines]>;
2063+
combine_use_vector_truncate, merge_combines, overflow_combines,
2064+
icmp_merged_bfx_combine]>;
20562065

20572066
// A combine group used to for prelegalizer combiners at -O0. The combines in
20582067
// this group have been selected based on experiments to balance code size and

llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,3 +140,92 @@ bool CombinerHelper::matchCanonicalizeFCmp(const MachineInstr &MI,
140140

141141
return false;
142142
}
143+
144+
bool CombinerHelper::combineMergedBFXCompare(MachineInstr &MI) const {
145+
const GICmp *Cmp = cast<GICmp>(&MI);
146+
147+
ICmpInst::Predicate CC = Cmp->getCond();
148+
if (CC != CmpInst::ICMP_EQ && CC != CmpInst::ICMP_NE)
149+
return false;
150+
151+
Register CmpLHS = Cmp->getLHSReg();
152+
Register CmpRHS = Cmp->getRHSReg();
153+
154+
LLT OpTy = MRI.getType(CmpLHS);
155+
if (!OpTy.isScalar() || OpTy.isPointer())
156+
return false;
157+
158+
assert(isZeroOrZeroSplat(CmpRHS, /*AllowUndefs=*/false));
159+
160+
Register Src;
161+
const auto IsSrc = [&](Register R) {
162+
if (!Src) {
163+
Src = R;
164+
return true;
165+
}
166+
167+
return Src == R;
168+
};
169+
170+
MachineInstr *CmpLHSDef = MRI.getVRegDef(CmpLHS);
171+
if (CmpLHSDef->getOpcode() != TargetOpcode::G_OR)
172+
return false;
173+
174+
APInt PartsMask(OpTy.getSizeInBits(), 0);
175+
SmallVector<MachineInstr *> Worklist = {CmpLHSDef};
176+
while (!Worklist.empty()) {
177+
MachineInstr *Cur = Worklist.pop_back_val();
178+
179+
Register Dst = Cur->getOperand(0).getReg();
180+
if (!MRI.hasOneUse(Dst) && Dst != Src)
181+
return false;
182+
183+
if (Cur->getOpcode() == TargetOpcode::G_OR) {
184+
Worklist.push_back(MRI.getVRegDef(Cur->getOperand(1).getReg()));
185+
Worklist.push_back(MRI.getVRegDef(Cur->getOperand(2).getReg()));
186+
continue;
187+
}
188+
189+
if (Cur->getOpcode() == TargetOpcode::G_UBFX) {
190+
Register Op = Cur->getOperand(1).getReg();
191+
Register Width = Cur->getOperand(2).getReg();
192+
Register Off = Cur->getOperand(3).getReg();
193+
194+
auto WidthCst = getIConstantVRegVal(Width, MRI);
195+
auto OffCst = getIConstantVRegVal(Off, MRI);
196+
if (!WidthCst || !OffCst || !IsSrc(Op))
197+
return false;
198+
199+
unsigned Start = OffCst->getZExtValue();
200+
unsigned End = Start + WidthCst->getZExtValue();
201+
if (End > OpTy.getScalarSizeInBits())
202+
return false;
203+
PartsMask.setBits(Start, End);
204+
continue;
205+
}
206+
207+
if (Cur->getOpcode() == TargetOpcode::G_AND) {
208+
Register LHS = Cur->getOperand(1).getReg();
209+
Register RHS = Cur->getOperand(2).getReg();
210+
211+
auto MaskCst = getIConstantVRegVal(RHS, MRI);
212+
if (!MaskCst || !MaskCst->isMask() || !IsSrc(LHS))
213+
return false;
214+
215+
PartsMask |= *MaskCst;
216+
continue;
217+
}
218+
219+
return false;
220+
}
221+
222+
if (!PartsMask.isMask() || !Src)
223+
return false;
224+
225+
assert(OpTy == MRI.getType(Src) && "Ignored a type casting operation?");
226+
auto MaskedSrc =
227+
Builder.buildAnd(OpTy, Src, Builder.buildConstant(OpTy, PartsMask));
228+
Builder.buildICmp(CC, Cmp->getReg(0), MaskedSrc, CmpRHS, Cmp->getFlags());
229+
MI.eraseFromParent();
230+
return true;
231+
}

0 commit comments

Comments
 (0)