Skip to content

Commit dec770c

Browse files
author
himadhith
committed
NFC test cases lockdown for vector compare equal
1 parent 809e290 commit dec770c

File tree

2 files changed

+382
-0
lines changed

2 files changed

+382
-0
lines changed
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix=POWERPC_64
3+
// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix=POWERPC_64LE
4+
// RUN: %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix=POWERPC_32
5+
6+
// POWERPC_64-LABEL: define dso_local signext i32 @test_Greater_than(
7+
// POWERPC_64-SAME: ptr noundef [[COLAUTHS:%.*]]) #[[ATTR0:[0-9]+]] {
8+
// POWERPC_64-NEXT: [[ENTRY:.*:]]
9+
// POWERPC_64-NEXT: [[COLAUTHS_ADDR:%.*]] = alloca ptr, align 8
10+
// POWERPC_64-NEXT: [[RESULT:%.*]] = alloca i16, align 2
11+
// POWERPC_64-NEXT: [[I:%.*]] = alloca i32, align 4
12+
// POWERPC_64-NEXT: store ptr [[COLAUTHS]], ptr [[COLAUTHS_ADDR]], align 8
13+
// POWERPC_64-NEXT: store i16 0, ptr [[RESULT]], align 2
14+
// POWERPC_64-NEXT: store i32 0, ptr [[I]], align 4
15+
// POWERPC_64-NEXT: br label %[[FOR_COND:.*]]
16+
// POWERPC_64: [[FOR_COND]]:
17+
// POWERPC_64-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
18+
// POWERPC_64-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4
19+
// POWERPC_64-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
20+
// POWERPC_64: [[FOR_BODY]]:
21+
// POWERPC_64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[COLAUTHS_ADDR]], align 8
22+
// POWERPC_64-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
23+
// POWERPC_64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64
24+
// POWERPC_64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 [[IDXPROM]]
25+
// POWERPC_64-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
26+
// POWERPC_64-NEXT: [[CONV:%.*]] = zext i16 [[TMP3]] to i32
27+
// POWERPC_64-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0
28+
// POWERPC_64-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
29+
// POWERPC_64: [[IF_THEN]]:
30+
// POWERPC_64-NEXT: [[TMP4:%.*]] = load i16, ptr [[RESULT]], align 2
31+
// POWERPC_64-NEXT: [[INC:%.*]] = add i16 [[TMP4]], 1
32+
// POWERPC_64-NEXT: store i16 [[INC]], ptr [[RESULT]], align 2
33+
// POWERPC_64-NEXT: br label %[[IF_END]]
34+
// POWERPC_64: [[IF_END]]:
35+
// POWERPC_64-NEXT: br label %[[FOR_INC:.*]]
36+
// POWERPC_64: [[FOR_INC]]:
37+
// POWERPC_64-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4
38+
// POWERPC_64-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP5]], 1
39+
// POWERPC_64-NEXT: store i32 [[INC3]], ptr [[I]], align 4
40+
// POWERPC_64-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
41+
// POWERPC_64: [[FOR_END]]:
42+
// POWERPC_64-NEXT: [[TMP6:%.*]] = load i16, ptr [[RESULT]], align 2
43+
// POWERPC_64-NEXT: [[CONV4:%.*]] = zext i16 [[TMP6]] to i32
44+
// POWERPC_64-NEXT: ret i32 [[CONV4]]
45+
//
46+
// POWERPC_64LE-LABEL: define dso_local signext i32 @test_Greater_than(
47+
// POWERPC_64LE-SAME: ptr noundef [[COLAUTHS:%.*]]) #[[ATTR0:[0-9]+]] {
48+
// POWERPC_64LE-NEXT: [[ENTRY:.*:]]
49+
// POWERPC_64LE-NEXT: [[COLAUTHS_ADDR:%.*]] = alloca ptr, align 8
50+
// POWERPC_64LE-NEXT: [[RESULT:%.*]] = alloca i16, align 2
51+
// POWERPC_64LE-NEXT: [[I:%.*]] = alloca i32, align 4
52+
// POWERPC_64LE-NEXT: store ptr [[COLAUTHS]], ptr [[COLAUTHS_ADDR]], align 8
53+
// POWERPC_64LE-NEXT: store i16 0, ptr [[RESULT]], align 2
54+
// POWERPC_64LE-NEXT: store i32 0, ptr [[I]], align 4
55+
// POWERPC_64LE-NEXT: br label %[[FOR_COND:.*]]
56+
// POWERPC_64LE: [[FOR_COND]]:
57+
// POWERPC_64LE-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
58+
// POWERPC_64LE-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4
59+
// POWERPC_64LE-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
60+
// POWERPC_64LE: [[FOR_BODY]]:
61+
// POWERPC_64LE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[COLAUTHS_ADDR]], align 8
62+
// POWERPC_64LE-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
63+
// POWERPC_64LE-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64
64+
// POWERPC_64LE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 [[IDXPROM]]
65+
// POWERPC_64LE-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
66+
// POWERPC_64LE-NEXT: [[CONV:%.*]] = zext i16 [[TMP3]] to i32
67+
// POWERPC_64LE-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0
68+
// POWERPC_64LE-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
69+
// POWERPC_64LE: [[IF_THEN]]:
70+
// POWERPC_64LE-NEXT: [[TMP4:%.*]] = load i16, ptr [[RESULT]], align 2
71+
// POWERPC_64LE-NEXT: [[INC:%.*]] = add i16 [[TMP4]], 1
72+
// POWERPC_64LE-NEXT: store i16 [[INC]], ptr [[RESULT]], align 2
73+
// POWERPC_64LE-NEXT: br label %[[IF_END]]
74+
// POWERPC_64LE: [[IF_END]]:
75+
// POWERPC_64LE-NEXT: br label %[[FOR_INC:.*]]
76+
// POWERPC_64LE: [[FOR_INC]]:
77+
// POWERPC_64LE-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4
78+
// POWERPC_64LE-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP5]], 1
79+
// POWERPC_64LE-NEXT: store i32 [[INC3]], ptr [[I]], align 4
80+
// POWERPC_64LE-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
81+
// POWERPC_64LE: [[FOR_END]]:
82+
// POWERPC_64LE-NEXT: [[TMP6:%.*]] = load i16, ptr [[RESULT]], align 2
83+
// POWERPC_64LE-NEXT: [[CONV4:%.*]] = zext i16 [[TMP6]] to i32
84+
// POWERPC_64LE-NEXT: ret i32 [[CONV4]]
85+
//
86+
// POWERPC_32-LABEL: define dso_local i32 @test_Greater_than(
87+
// POWERPC_32-SAME: ptr noundef [[COLAUTHS:%.*]]) #[[ATTR0:[0-9]+]] {
88+
// POWERPC_32-NEXT: [[ENTRY:.*:]]
89+
// POWERPC_32-NEXT: [[COLAUTHS_ADDR:%.*]] = alloca ptr, align 4
90+
// POWERPC_32-NEXT: [[RESULT:%.*]] = alloca i16, align 2
91+
// POWERPC_32-NEXT: [[I:%.*]] = alloca i32, align 4
92+
// POWERPC_32-NEXT: store ptr [[COLAUTHS]], ptr [[COLAUTHS_ADDR]], align 4
93+
// POWERPC_32-NEXT: store i16 0, ptr [[RESULT]], align 2
94+
// POWERPC_32-NEXT: store i32 0, ptr [[I]], align 4
95+
// POWERPC_32-NEXT: br label %[[FOR_COND:.*]]
96+
// POWERPC_32: [[FOR_COND]]:
97+
// POWERPC_32-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
98+
// POWERPC_32-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4
99+
// POWERPC_32-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
100+
// POWERPC_32: [[FOR_BODY]]:
101+
// POWERPC_32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[COLAUTHS_ADDR]], align 4
102+
// POWERPC_32-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
103+
// POWERPC_32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 [[TMP2]]
104+
// POWERPC_32-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
105+
// POWERPC_32-NEXT: [[CONV:%.*]] = zext i16 [[TMP3]] to i32
106+
// POWERPC_32-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0
107+
// POWERPC_32-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
108+
// POWERPC_32: [[IF_THEN]]:
109+
// POWERPC_32-NEXT: [[TMP4:%.*]] = load i16, ptr [[RESULT]], align 2
110+
// POWERPC_32-NEXT: [[INC:%.*]] = add i16 [[TMP4]], 1
111+
// POWERPC_32-NEXT: store i16 [[INC]], ptr [[RESULT]], align 2
112+
// POWERPC_32-NEXT: br label %[[IF_END]]
113+
// POWERPC_32: [[IF_END]]:
114+
// POWERPC_32-NEXT: br label %[[FOR_INC:.*]]
115+
// POWERPC_32: [[FOR_INC]]:
116+
// POWERPC_32-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4
117+
// POWERPC_32-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP5]], 1
118+
// POWERPC_32-NEXT: store i32 [[INC3]], ptr [[I]], align 4
119+
// POWERPC_32-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
120+
// POWERPC_32: [[FOR_END]]:
121+
// POWERPC_32-NEXT: [[TMP6:%.*]] = load i16, ptr [[RESULT]], align 2
122+
// POWERPC_32-NEXT: [[CONV4:%.*]] = zext i16 [[TMP6]] to i32
123+
// POWERPC_32-NEXT: ret i32 [[CONV4]]
124+
//
125+
int test_Greater_than(unsigned short *colauths) {
126+
unsigned short result = 0;
127+
for (int i = 0; i < 4; i++) {
128+
if (colauths[i] > 0) {
129+
result++;
130+
}
131+
}
132+
return result;
133+
}
134+
//.
135+
// POWERPC_64: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
136+
// POWERPC_64: [[META3]] = !{!"llvm.loop.mustprogress"}
137+
//.
138+
// POWERPC_64LE: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
139+
// POWERPC_64LE: [[META3]] = !{!"llvm.loop.mustprogress"}
140+
//.
141+
// POWERPC_32: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
142+
// POWERPC_32: [[META3]] = !{!"llvm.loop.mustprogress"}
143+
//.
Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
2+
; RUN: < %s | FileCheck %s --check-prefix=POWERPC_64LE
3+
4+
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64-ibm-aix \
5+
; RUN: < %s | FileCheck %s --check-prefix=POWERPC_64
6+
7+
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc-ibm-aix \
8+
; RUN: < %s | FileCheck %s --check-prefix=POWERPC_32
9+
10+
define i32 @test_Greater_than(ptr %colauths, i32 signext %ncols) {
11+
; POWERPC_64LE-LABEL: test_Greater_than:
12+
; POWERPC_64LE: .LBB0_6: # %vector.body
13+
; POWERPC_64LE-DAG: #
14+
; POWERPC_64LE-DAG: lxv 50, -64(4)
15+
; POWERPC_64LE-DAG: vcmpequh 18, 18, 3
16+
; POWERPC_64LE-DAG: xxlnor 50, 50, 50
17+
; POWERPC_64LE-DAG: vmrghh 19, 18, 18
18+
; POWERPC_64LE-DAG: vmrglh 18, 18, 18
19+
; POWERPC_64LE-DAG: xxland 51, 51, 34
20+
; POWERPC_64LE-DAG: xxland 50, 50, 34
21+
; POWERPC_64LE-DAG: vadduwm 5, 5, 19
22+
; POWERPC_64LE: .LBB0_10: # %vec.epilog.vector.body
23+
; POWERPC_64LE-DAG: #
24+
; POWERPC_64LE-DAG: lxv 32, 0(4)
25+
; POWERPC_64LE-DAG: addi 4, 4, 16
26+
; POWERPC_64LE-DAG: vcmpequh 0, 0, 4
27+
; POWERPC_64LE-DAG: xxlnor 32, 32, 32
28+
; POWERPC_64LE-DAG: vmrglh 1, 0, 0
29+
; POWERPC_64LE-DAG: vmrghh 0, 0, 0
30+
; POWERPC_64LE-DAG: xxland 33, 33, 34
31+
; POWERPC_64LE-DAG: xxland 32, 32, 34
32+
; POWERPC_64LE-DAG: vadduwm 5, 5, 0
33+
; POWERPC_64LE-DAG: vadduwm 3, 3, 1
34+
; POWERPC_64LE-DAG: bdnz .LBB0_10
35+
; POWERPC_64LE: blr
36+
;
37+
; POWERPC_64-LABEL: test_Greater_than:
38+
; POWERPC_64: L..BB0_6: # %vector.body
39+
; POWERPC_64-DAG: #
40+
; POWERPC_64-DAG: lxv 50, -64(4)
41+
; POWERPC_64-DAG: vcmpequh 18, 18, 3
42+
; POWERPC_64-DAG: xxlnor 50, 50, 50
43+
; POWERPC_64-DAG: vmrglh 19, 18, 18
44+
; POWERPC_64-DAG: vmrghh 18, 18, 18
45+
; POWERPC_64-DAG: xxland 51, 51, 34
46+
; POWERPC_64-DAG: xxland 50, 50, 34
47+
; POWERPC_64-DAG: vadduwm 5, 5, 19
48+
; POWERPC_64: L..BB0_10: # %vec.epilog.vector.body
49+
; POWERPC_64-DAG: #
50+
; POWERPC_64-DAG: lxv 32, 0(4)
51+
; POWERPC_64-DAG: addi 4, 4, 16
52+
; POWERPC_64-DAG: vcmpequh 0, 0, 4
53+
; POWERPC_64-DAG: xxlnor 32, 32, 32
54+
; POWERPC_64-DAG: vmrghh 1, 0, 0
55+
; POWERPC_64-DAG: vmrglh 0, 0, 0
56+
; POWERPC_64-DAG: xxland 33, 33, 34
57+
; POWERPC_64-DAG: xxland 32, 32, 34
58+
; POWERPC_64-DAG: vadduwm 5, 5, 0
59+
; POWERPC_64-DAG: vadduwm 3, 3, 1
60+
; POWERPC_64-DAG: bdnz L..BB0_10
61+
; POWERPC_64: blr
62+
;
63+
; POWERPC_32-LABEL: test_Greater_than:
64+
; POWERPC_32: L..BB0_7: # %vector.body
65+
; POWERPC_32-DAG: #
66+
; POWERPC_32-DAG: lxv 50, 0(10)
67+
; POWERPC_32-DAG: addic 9, 9, 64
68+
; POWERPC_32-DAG: addze 5, 5
69+
; POWERPC_32-DAG: xor 11, 9, 6
70+
; POWERPC_32-DAG: or. 11, 11, 5
71+
; POWERPC_32-DAG: vcmpequh 18, 18, 3
72+
; POWERPC_32-DAG: xxlnor 50, 50, 50
73+
; POWERPC_32-DAG: vmrglh 19, 18, 18
74+
; POWERPC_32-DAG: vmrghh 18, 18, 18
75+
; POWERPC_32-DAG: xxland 51, 51, 34
76+
; POWERPC_32-DAG: xxland 50, 50, 34
77+
; POWERPC_32-DAG: vadduwm 5, 5, 19
78+
; POWERPC_32: L..BB0_11: # %vec.epilog.vector.body
79+
; POWERPC_32-DAG: #
80+
; POWERPC_32-DAG: slwi 5, 9, 1
81+
; POWERPC_32-DAG: addic 9, 9, 8
82+
; POWERPC_32-DAG: addze 7, 7
83+
; POWERPC_32-DAG: lxvx 32, 3, 5
84+
; POWERPC_32-DAG: xor 5, 9, 6
85+
; POWERPC_32-DAG: or. 5, 5, 7
86+
; POWERPC_32-DAG: vcmpequh 0, 0, 3
87+
; POWERPC_32-DAG: xxlnor 32, 32, 32
88+
; POWERPC_32-DAG: vmrghh 1, 0, 0
89+
; POWERPC_32-DAG: vmrglh 0, 0, 0
90+
; POWERPC_32-DAG: xxland 33, 33, 34
91+
; POWERPC_32-DAG: xxland 32, 32, 34
92+
; POWERPC_32-DAG: vadduwm 5, 5, 0
93+
; POWERPC_32-DAG: vadduwm 4, 4, 1
94+
; POWERPC_32-DAG: bne 0, L..BB0_11
95+
; POWERPC_32: blr
96+
entry:
97+
%cmp5 = icmp sgt i32 %ncols, 0
98+
br i1 %cmp5, label %iter.check, label %for.cond.cleanup
99+
100+
iter.check: ; preds = %entry
101+
%wide.trip.count = zext nneg i32 %ncols to i64
102+
%min.iters.check = icmp ult i32 %ncols, 8
103+
br i1 %min.iters.check, label %for.body.preheader, label %vector.main.loop.iter.check
104+
105+
for.body.preheader: ; preds = %vec.epilog.iter.check, %vec.epilog.middle.block, %iter.check
106+
%indvars.iv.ph = phi i64 [ 0, %iter.check ], [ %n.vec, %vec.epilog.iter.check ], [ %n.vec31, %vec.epilog.middle.block ]
107+
%num_cols_needed.06.ph = phi i32 [ 0, %iter.check ], [ %33, %vec.epilog.iter.check ], [ %40, %vec.epilog.middle.block ]
108+
br label %for.body
109+
110+
vector.main.loop.iter.check: ; preds = %iter.check
111+
%min.iters.check9 = icmp ult i32 %ncols, 64
112+
br i1 %min.iters.check9, label %vec.epilog.ph, label %vector.ph
113+
114+
vector.ph: ; preds = %vector.main.loop.iter.check
115+
%n.vec = and i64 %wide.trip.count, 2147483584
116+
br label %vector.body
117+
118+
vector.body: ; preds = %vector.body, %vector.ph
119+
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
120+
%vec.phi = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %24, %vector.body ]
121+
%vec.phi10 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %25, %vector.body ]
122+
%vec.phi11 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %26, %vector.body ]
123+
%vec.phi12 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %27, %vector.body ]
124+
%vec.phi13 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %28, %vector.body ]
125+
%vec.phi14 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %29, %vector.body ]
126+
%vec.phi15 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %30, %vector.body ]
127+
%vec.phi16 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %31, %vector.body ]
128+
%0 = getelementptr inbounds nuw i16, ptr %colauths, i64 %index
129+
%1 = getelementptr inbounds nuw i8, ptr %0, i64 16
130+
%2 = getelementptr inbounds nuw i8, ptr %0, i64 32
131+
%3 = getelementptr inbounds nuw i8, ptr %0, i64 48
132+
%4 = getelementptr inbounds nuw i8, ptr %0, i64 64
133+
%5 = getelementptr inbounds nuw i8, ptr %0, i64 80
134+
%6 = getelementptr inbounds nuw i8, ptr %0, i64 96
135+
%7 = getelementptr inbounds nuw i8, ptr %0, i64 112
136+
%wide.load = load <8 x i16>, ptr %0, align 2, !tbaa !5
137+
%wide.load17 = load <8 x i16>, ptr %1, align 2, !tbaa !5
138+
%wide.load18 = load <8 x i16>, ptr %2, align 2, !tbaa !5
139+
%wide.load19 = load <8 x i16>, ptr %3, align 2, !tbaa !5
140+
%wide.load20 = load <8 x i16>, ptr %4, align 2, !tbaa !5
141+
%wide.load21 = load <8 x i16>, ptr %5, align 2, !tbaa !5
142+
%wide.load22 = load <8 x i16>, ptr %6, align 2, !tbaa !5
143+
%wide.load23 = load <8 x i16>, ptr %7, align 2, !tbaa !5
144+
%8 = icmp ne <8 x i16> %wide.load, zeroinitializer
145+
%9 = icmp ne <8 x i16> %wide.load17, zeroinitializer
146+
%10 = icmp ne <8 x i16> %wide.load18, zeroinitializer
147+
%11 = icmp ne <8 x i16> %wide.load19, zeroinitializer
148+
%12 = icmp ne <8 x i16> %wide.load20, zeroinitializer
149+
%13 = icmp ne <8 x i16> %wide.load21, zeroinitializer
150+
%14 = icmp ne <8 x i16> %wide.load22, zeroinitializer
151+
%15 = icmp ne <8 x i16> %wide.load23, zeroinitializer
152+
%16 = zext <8 x i1> %8 to <8 x i32>
153+
%17 = zext <8 x i1> %9 to <8 x i32>
154+
%18 = zext <8 x i1> %10 to <8 x i32>
155+
%19 = zext <8 x i1> %11 to <8 x i32>
156+
%20 = zext <8 x i1> %12 to <8 x i32>
157+
%21 = zext <8 x i1> %13 to <8 x i32>
158+
%22 = zext <8 x i1> %14 to <8 x i32>
159+
%23 = zext <8 x i1> %15 to <8 x i32>
160+
%24 = add <8 x i32> %vec.phi, %16
161+
%25 = add <8 x i32> %vec.phi10, %17
162+
%26 = add <8 x i32> %vec.phi11, %18
163+
%27 = add <8 x i32> %vec.phi12, %19
164+
%28 = add <8 x i32> %vec.phi13, %20
165+
%29 = add <8 x i32> %vec.phi14, %21
166+
%30 = add <8 x i32> %vec.phi15, %22
167+
%31 = add <8 x i32> %vec.phi16, %23
168+
%index.next = add nuw i64 %index, 64
169+
%32 = icmp eq i64 %index.next, %n.vec
170+
br i1 %32, label %middle.block, label %vector.body, !llvm.loop !9
171+
172+
middle.block: ; preds = %vector.body
173+
%bin.rdx = add <8 x i32> %25, %24
174+
%bin.rdx24 = add <8 x i32> %26, %bin.rdx
175+
%bin.rdx25 = add <8 x i32> %27, %bin.rdx24
176+
%bin.rdx26 = add <8 x i32> %28, %bin.rdx25
177+
%bin.rdx27 = add <8 x i32> %29, %bin.rdx26
178+
%bin.rdx28 = add <8 x i32> %30, %bin.rdx27
179+
%bin.rdx29 = add <8 x i32> %31, %bin.rdx28
180+
%33 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %bin.rdx29)
181+
%cmp.n = icmp eq i64 %n.vec, %wide.trip.count
182+
br i1 %cmp.n, label %for.cond.cleanup, label %vec.epilog.iter.check
183+
184+
vec.epilog.iter.check: ; preds = %middle.block
185+
%n.vec.remaining = and i64 %wide.trip.count, 56
186+
%min.epilog.iters.check = icmp eq i64 %n.vec.remaining, 0
187+
br i1 %min.epilog.iters.check, label %for.body.preheader, label %vec.epilog.ph
188+
189+
vec.epilog.ph: ; preds = %vec.epilog.iter.check, %vector.main.loop.iter.check
190+
%vec.epilog.resume.val = phi i64 [ %n.vec, %vec.epilog.iter.check ], [ 0, %vector.main.loop.iter.check ]
191+
%bc.merge.rdx = phi i32 [ %33, %vec.epilog.iter.check ], [ 0, %vector.main.loop.iter.check ]
192+
%n.vec31 = and i64 %wide.trip.count, 2147483640
193+
%34 = insertelement <8 x i32> <i32 poison, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, i32 %bc.merge.rdx, i64 0
194+
br label %vec.epilog.vector.body
195+
196+
vec.epilog.vector.body: ; preds = %vec.epilog.vector.body, %vec.epilog.ph
197+
%index32 = phi i64 [ %vec.epilog.resume.val, %vec.epilog.ph ], [ %index.next35, %vec.epilog.vector.body ]
198+
%vec.phi33 = phi <8 x i32> [ %34, %vec.epilog.ph ], [ %38, %vec.epilog.vector.body ]
199+
%35 = getelementptr inbounds nuw i16, ptr %colauths, i64 %index32
200+
%wide.load34 = load <8 x i16>, ptr %35, align 2, !tbaa !5
201+
%36 = icmp ne <8 x i16> %wide.load34, zeroinitializer
202+
%37 = zext <8 x i1> %36 to <8 x i32>
203+
%38 = add <8 x i32> %vec.phi33, %37
204+
%index.next35 = add nuw i64 %index32, 8
205+
%39 = icmp eq i64 %index.next35, %n.vec31
206+
br i1 %39, label %vec.epilog.middle.block, label %vec.epilog.vector.body, !llvm.loop !13
207+
208+
vec.epilog.middle.block: ; preds = %vec.epilog.vector.body
209+
%40 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %38)
210+
%cmp.n36 = icmp eq i64 %n.vec31, %wide.trip.count
211+
br i1 %cmp.n36, label %for.cond.cleanup, label %for.body.preheader
212+
213+
for.cond.cleanup: ; preds = %for.body, %middle.block, %vec.epilog.middle.block, %entry
214+
%num_cols_needed.0.lcssa = phi i32 [ 0, %entry ], [ %33, %middle.block ], [ %40, %vec.epilog.middle.block ], [ %spec.select, %for.body ]
215+
ret i32 %num_cols_needed.0.lcssa
216+
217+
for.body: ; preds = %for.body.preheader, %for.body
218+
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
219+
%num_cols_needed.06 = phi i32 [ %spec.select, %for.body ], [ %num_cols_needed.06.ph, %for.body.preheader ]
220+
%arrayidx = getelementptr inbounds nuw i16, ptr %colauths, i64 %indvars.iv
221+
%41 = load i16, ptr %arrayidx, align 2, !tbaa !5
222+
%tobool.not = icmp ne i16 %41, 0
223+
%inc = zext i1 %tobool.not to i32
224+
%spec.select = add nuw nsw i32 %num_cols_needed.06, %inc
225+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
226+
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
227+
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !14
228+
}
229+
230+
!5 = !{!6, !6, i64 0}
231+
!6 = !{!"short", !7, i64 0}
232+
!7 = !{!"omnipotent char", !8, i64 0}
233+
!8 = !{!"Simple C/C++ TBAA"}
234+
!9 = distinct !{!9, !10, !11, !12}
235+
!10 = !{!"llvm.loop.mustprogress"}
236+
!11 = !{!"llvm.loop.isvectorized", i32 1}
237+
!12 = !{!"llvm.loop.unroll.runtime.disable"}
238+
!13 = distinct !{!13, !10, !11, !12}
239+
!14 = distinct !{!14, !10, !12, !11}

0 commit comments

Comments
 (0)