Skip to content

Commit 20853a7

Browse files
committed
[InstCombine] Simplify cttz/ctlz + icmp eq/ne into mask check
Checking whether a number has a certain number of trailing / leading zeros means checking whether it is of the form XXXX1000 / 0001XXXX, which can be done with an and+icmp. Related to https://bugs.llvm.org/show_bug.cgi?id=28668. As a next step, this can be extended to non-equality predicates. Differential Revision: https://reviews.llvm.org/D55745 llvm-svn: 349530
1 parent 59ee2c5 commit 20853a7

File tree

3 files changed

+68
-37
lines changed

3 files changed

+68
-37
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2765,6 +2765,7 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
27652765

27662766
// Handle icmp {eq|ne} <intrinsic>, Constant.
27672767
Type *Ty = II->getType();
2768+
unsigned BitWidth = C.getBitWidth();
27682769
switch (II->getIntrinsicID()) {
27692770
case Intrinsic::bswap:
27702771
Worklist.Add(II);
@@ -2773,21 +2774,39 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
27732774
return &Cmp;
27742775

27752776
case Intrinsic::ctlz:
2776-
case Intrinsic::cttz:
2777+
case Intrinsic::cttz: {
27772778
// ctz(A) == bitwidth(A) -> A == 0 and likewise for !=
2778-
if (C == C.getBitWidth()) {
2779+
if (C == BitWidth) {
27792780
Worklist.Add(II);
27802781
Cmp.setOperand(0, II->getArgOperand(0));
27812782
Cmp.setOperand(1, ConstantInt::getNullValue(Ty));
27822783
return &Cmp;
27832784
}
2785+
2786+
// ctz(A) == C -> A & Mask1 == Mask2, where Mask2 only has bit C set
2787+
// and Mask1 has bits 0..C+1 set. Similar for ctl, but for high bits.
2788+
// Limit to one use to ensure we don't increase instruction count.
2789+
unsigned Num = C.getLimitedValue(BitWidth);
2790+
if (Num != BitWidth && II->hasOneUse()) {
2791+
bool IsTrailing = II->getIntrinsicID() == Intrinsic::cttz;
2792+
APInt Mask1 = IsTrailing ? APInt::getLowBitsSet(BitWidth, Num + 1)
2793+
: APInt::getHighBitsSet(BitWidth, Num + 1);
2794+
APInt Mask2 = IsTrailing
2795+
? APInt::getOneBitSet(BitWidth, Num)
2796+
: APInt::getOneBitSet(BitWidth, BitWidth - Num - 1);
2797+
Cmp.setOperand(0, Builder.CreateAnd(II->getArgOperand(0), Mask1));
2798+
Cmp.setOperand(1, ConstantInt::get(Ty, Mask2));
2799+
Worklist.Add(II);
2800+
return &Cmp;
2801+
}
27842802
break;
2803+
}
27852804

27862805
case Intrinsic::ctpop: {
27872806
// popcount(A) == 0 -> A == 0 and likewise for !=
27882807
// popcount(A) == bitwidth(A) -> A == -1 and likewise for !=
27892808
bool IsZero = C.isNullValue();
2790-
if (IsZero || C == C.getBitWidth()) {
2809+
if (IsZero || C == BitWidth) {
27912810
Worklist.Add(II);
27922811
Cmp.setOperand(0, II->getArgOperand(0));
27932812
auto *NewOp =

llvm/test/Transforms/InstCombine/cmp-intrinsic.ll

Lines changed: 44 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,7 @@ define i1 @ctlz_eq_bitwidth_i32(i32 %x) {
5454

5555
define i1 @ctlz_eq_zero_i32(i32 %x) {
5656
; CHECK-LABEL: @ctlz_eq_zero_i32(
57-
; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0
58-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LZ]], 0
57+
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
5958
; CHECK-NEXT: ret i1 [[CMP]]
6059
;
6160
%lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
@@ -65,8 +64,7 @@ define i1 @ctlz_eq_zero_i32(i32 %x) {
6564

6665
define <2 x i1> @ctlz_ne_zero_v2i32(<2 x i32> %a) {
6766
; CHECK-LABEL: @ctlz_ne_zero_v2i32(
68-
; CHECK-NEXT: [[X:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[A:%.*]], i1 false)
69-
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X]], zeroinitializer
67+
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[A:%.*]], <i32 -1, i32 -1>
7068
; CHECK-NEXT: ret <2 x i1> [[CMP]]
7169
;
7270
%x = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
@@ -76,8 +74,7 @@ define <2 x i1> @ctlz_ne_zero_v2i32(<2 x i32> %a) {
7674

7775
define i1 @ctlz_eq_bw_minus_1_i32(i32 %x) {
7876
; CHECK-LABEL: @ctlz_eq_bw_minus_1_i32(
79-
; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0
80-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LZ]], 31
77+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 1
8178
; CHECK-NEXT: ret i1 [[CMP]]
8279
;
8380
%lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
@@ -87,8 +84,7 @@ define i1 @ctlz_eq_bw_minus_1_i32(i32 %x) {
8784

8885
define <2 x i1> @ctlz_ne_bw_minus_1_v2i32(<2 x i32> %a) {
8986
; CHECK-LABEL: @ctlz_ne_bw_minus_1_v2i32(
90-
; CHECK-NEXT: [[X:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[A:%.*]], i1 false)
91-
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X]], <i32 31, i32 31>
87+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], <i32 1, i32 1>
9288
; CHECK-NEXT: ret <2 x i1> [[CMP]]
9389
;
9490
%x = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
@@ -98,8 +94,8 @@ define <2 x i1> @ctlz_ne_bw_minus_1_v2i32(<2 x i32> %a) {
9894

9995
define i1 @ctlz_eq_other_i32(i32 %x) {
10096
; CHECK-LABEL: @ctlz_eq_other_i32(
101-
; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0
102-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LZ]], 24
97+
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], -128
98+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 128
10399
; CHECK-NEXT: ret i1 [[CMP]]
104100
;
105101
%lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
@@ -109,15 +105,28 @@ define i1 @ctlz_eq_other_i32(i32 %x) {
109105

110106
define <2 x i1> @ctlz_ne_other_v2i32(<2 x i32> %a) {
111107
; CHECK-LABEL: @ctlz_ne_other_v2i32(
112-
; CHECK-NEXT: [[X:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[A:%.*]], i1 false)
113-
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X]], <i32 24, i32 24>
108+
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 -128, i32 -128>
109+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], <i32 128, i32 128>
114110
; CHECK-NEXT: ret <2 x i1> [[CMP]]
115111
;
116112
%x = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
117113
%cmp = icmp ne <2 x i32> %x, <i32 24, i32 24>
118114
ret <2 x i1> %cmp
119115
}
120116

117+
define i1 @ctlz_eq_other_i32_multiuse(i32 %x, i32* %p) {
118+
; CHECK-LABEL: @ctlz_eq_other_i32_multiuse(
119+
; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0
120+
; CHECK-NEXT: store i32 [[LZ]], i32* [[P:%.*]], align 4
121+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LZ]], 24
122+
; CHECK-NEXT: ret i1 [[CMP]]
123+
;
124+
%lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
125+
store i32 %lz, i32* %p
126+
%cmp = icmp eq i32 %lz, 24
127+
ret i1 %cmp
128+
}
129+
121130
define <2 x i1> @ctlz_ne_bitwidth_v2i32(<2 x i32> %a) {
122131
; CHECK-LABEL: @ctlz_ne_bitwidth_v2i32(
123132
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], zeroinitializer
@@ -150,8 +159,8 @@ define <2 x i1> @cttz_eq_bitwidth_v2i32(<2 x i32> %a) {
150159

151160
define i1 @cttz_eq_zero_i33(i33 %x) {
152161
; CHECK-LABEL: @cttz_eq_zero_i33(
153-
; CHECK-NEXT: [[TZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1
154-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i33 [[TZ]], 0
162+
; CHECK-NEXT: [[TMP1:%.*]] = and i33 [[X:%.*]], 1
163+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i33 [[TMP1]], 0
155164
; CHECK-NEXT: ret i1 [[CMP]]
156165
;
157166
%tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false)
@@ -161,8 +170,8 @@ define i1 @cttz_eq_zero_i33(i33 %x) {
161170

162171
define <2 x i1> @cttz_ne_zero_v2i32(<2 x i32> %a) {
163172
; CHECK-LABEL: @cttz_ne_zero_v2i32(
164-
; CHECK-NEXT: [[X:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[A:%.*]], i1 false)
165-
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X]], zeroinitializer
173+
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 1, i32 1>
174+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer
166175
; CHECK-NEXT: ret <2 x i1> [[CMP]]
167176
;
168177
%x = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false)
@@ -172,8 +181,7 @@ define <2 x i1> @cttz_ne_zero_v2i32(<2 x i32> %a) {
172181

173182
define i1 @cttz_eq_bw_minus_1_i33(i33 %x) {
174183
; CHECK-LABEL: @cttz_eq_bw_minus_1_i33(
175-
; CHECK-NEXT: [[TZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1
176-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i33 [[TZ]], 32
184+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i33 [[X:%.*]], -4294967296
177185
; CHECK-NEXT: ret i1 [[CMP]]
178186
;
179187
%tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false)
@@ -183,8 +191,7 @@ define i1 @cttz_eq_bw_minus_1_i33(i33 %x) {
183191

184192
define <2 x i1> @cttz_ne_bw_minus_1_v2i32(<2 x i32> %a) {
185193
; CHECK-LABEL: @cttz_ne_bw_minus_1_v2i32(
186-
; CHECK-NEXT: [[X:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[A:%.*]], i1 false)
187-
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X]], <i32 31, i32 31>
194+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], <i32 -2147483648, i32 -2147483648>
188195
; CHECK-NEXT: ret <2 x i1> [[CMP]]
189196
;
190197
%x = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false)
@@ -194,8 +201,8 @@ define <2 x i1> @cttz_ne_bw_minus_1_v2i32(<2 x i32> %a) {
194201

195202
define i1 @cttz_eq_other_i33(i33 %x) {
196203
; CHECK-LABEL: @cttz_eq_other_i33(
197-
; CHECK-NEXT: [[TZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1
198-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i33 [[TZ]], 4
204+
; CHECK-NEXT: [[TMP1:%.*]] = and i33 [[X:%.*]], 31
205+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i33 [[TMP1]], 16
199206
; CHECK-NEXT: ret i1 [[CMP]]
200207
;
201208
%tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false)
@@ -205,15 +212,28 @@ define i1 @cttz_eq_other_i33(i33 %x) {
205212

206213
define <2 x i1> @cttz_ne_other_v2i32(<2 x i32> %a) {
207214
; CHECK-LABEL: @cttz_ne_other_v2i32(
208-
; CHECK-NEXT: [[X:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[A:%.*]], i1 false)
209-
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X]], <i32 4, i32 4>
215+
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 31, i32 31>
216+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], <i32 16, i32 16>
210217
; CHECK-NEXT: ret <2 x i1> [[CMP]]
211218
;
212219
%x = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false)
213220
%cmp = icmp ne <2 x i32> %x, <i32 4, i32 4>
214221
ret <2 x i1> %cmp
215222
}
216223

224+
define i1 @cttz_eq_other_i33_multiuse(i33 %x, i33* %p) {
225+
; CHECK-LABEL: @cttz_eq_other_i33_multiuse(
226+
; CHECK-NEXT: [[LZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1
227+
; CHECK-NEXT: store i33 [[LZ]], i33* [[P:%.*]], align 4
228+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i33 [[LZ]], 4
229+
; CHECK-NEXT: ret i1 [[CMP]]
230+
;
231+
%lz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false)
232+
store i33 %lz, i33* %p
233+
%cmp = icmp eq i33 %lz, 4
234+
ret i1 %cmp
235+
}
236+
217237
define i1 @ctpop_eq_zero_i11(i11 %x) {
218238
; CHECK-LABEL: @ctpop_eq_zero_i11(
219239
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i11 [[X:%.*]], 0

llvm/test/Transforms/InstCombine/intrinsics.ll

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -354,13 +354,9 @@ define i1 @cttz_knownbits3(i32 %arg) {
354354
ret i1 %res
355355
}
356356

357-
; TODO: The icmp is unnecessary given the known bits of the input.
358357
define <2 x i1> @cttz_knownbits3_vec(<2 x i32> %arg) {
359358
; CHECK-LABEL: @cttz_knownbits3_vec(
360-
; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[ARG:%.*]], <i32 4, i32 4>
361-
; CHECK-NEXT: [[CNT:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[OR]], i1 true)
362-
; CHECK-NEXT: [[RES:%.*]] = icmp eq <2 x i32> [[CNT]], <i32 3, i32 3>
363-
; CHECK-NEXT: ret <2 x i1> [[RES]]
359+
; CHECK-NEXT: ret <2 x i1> zeroinitializer
364360
;
365361
%or = or <2 x i32> %arg, <i32 4, i32 4>
366362
%cnt = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %or, i1 true) nounwind readnone
@@ -450,13 +446,9 @@ define i1 @ctlz_knownbits3(i8 %arg) {
450446
ret i1 %res
451447
}
452448

453-
; TODO: The icmp is unnecessary given the known bits of the input.
454449
define <2 x i1> @ctlz_knownbits3_vec(<2 x i8> %arg) {
455450
; CHECK-LABEL: @ctlz_knownbits3_vec(
456-
; CHECK-NEXT: [[OR:%.*]] = or <2 x i8> [[ARG:%.*]], <i8 32, i8 32>
457-
; CHECK-NEXT: [[CNT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[OR]], i1 true)
458-
; CHECK-NEXT: [[RES:%.*]] = icmp eq <2 x i8> [[CNT]], <i8 3, i8 3>
459-
; CHECK-NEXT: ret <2 x i1> [[RES]]
451+
; CHECK-NEXT: ret <2 x i1> zeroinitializer
460452
;
461453
%or = or <2 x i8> %arg, <i8 32, i8 32>
462454
%cnt = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %or, i1 true) nounwind readnone

0 commit comments

Comments
 (0)