|
| 1 | +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \ |
| 2 | +; RUN: < %s | FileCheck %s --check-prefix=POWERPC_64LE |
| 3 | + |
| 4 | +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64-ibm-aix \ |
| 5 | +; RUN: < %s | FileCheck %s --check-prefix=POWERPC_64 |
| 6 | + |
| 7 | +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc-ibm-aix \ |
| 8 | +; RUN: < %s | FileCheck %s --check-prefix=POWERPC_32 |
| 9 | + |
| 10 | +define i32 @test_Greater_than(ptr %colauths, i32 signext %ncols) { |
| 11 | +; POWERPC_64LE-LABEL: test_Greater_than: |
| 12 | +; POWERPC_64LE: .LBB0_6: # %vector.body |
| 13 | +; POWERPC_64LE-DAG: # |
| 14 | +; POWERPC_64LE-DAG: lxv 50, -64(4) |
| 15 | +; POWERPC_64LE-DAG: vcmpequh 18, 18, 3 |
| 16 | +; POWERPC_64LE-DAG: xxlnor 50, 50, 50 |
| 17 | +; POWERPC_64LE-DAG: vmrghh 19, 18, 18 |
| 18 | +; POWERPC_64LE-DAG: vmrglh 18, 18, 18 |
| 19 | +; POWERPC_64LE-DAG: xxland 51, 51, 34 |
| 20 | +; POWERPC_64LE-DAG: xxland 50, 50, 34 |
| 21 | +; POWERPC_64LE-DAG: vadduwm 5, 5, 19 |
| 22 | +; POWERPC_64LE: .LBB0_10: # %vec.epilog.vector.body |
| 23 | +; POWERPC_64LE-DAG: # |
| 24 | +; POWERPC_64LE-DAG: lxv 32, 0(4) |
| 25 | +; POWERPC_64LE-DAG: addi 4, 4, 16 |
| 26 | +; POWERPC_64LE-DAG: vcmpequh 0, 0, 4 |
| 27 | +; POWERPC_64LE-DAG: xxlnor 32, 32, 32 |
| 28 | +; POWERPC_64LE-DAG: vmrglh 1, 0, 0 |
| 29 | +; POWERPC_64LE-DAG: vmrghh 0, 0, 0 |
| 30 | +; POWERPC_64LE-DAG: xxland 33, 33, 34 |
| 31 | +; POWERPC_64LE-DAG: xxland 32, 32, 34 |
| 32 | +; POWERPC_64LE-DAG: vadduwm 5, 5, 0 |
| 33 | +; POWERPC_64LE-DAG: vadduwm 3, 3, 1 |
| 34 | +; POWERPC_64LE-DAG: bdnz .LBB0_10 |
| 35 | +; POWERPC_64LE: blr |
| 36 | +; |
| 37 | +; POWERPC_64-LABEL: test_Greater_than: |
| 38 | +; POWERPC_64: L..BB0_6: # %vector.body |
| 39 | +; POWERPC_64-DAG: # |
| 40 | +; POWERPC_64-DAG: lxv 50, -64(4) |
| 41 | +; POWERPC_64-DAG: vcmpequh 18, 18, 3 |
| 42 | +; POWERPC_64-DAG: xxlnor 50, 50, 50 |
| 43 | +; POWERPC_64-DAG: vmrglh 19, 18, 18 |
| 44 | +; POWERPC_64-DAG: vmrghh 18, 18, 18 |
| 45 | +; POWERPC_64-DAG: xxland 51, 51, 34 |
| 46 | +; POWERPC_64-DAG: xxland 50, 50, 34 |
| 47 | +; POWERPC_64-DAG: vadduwm 5, 5, 19 |
| 48 | +; POWERPC_64: L..BB0_10: # %vec.epilog.vector.body |
| 49 | +; POWERPC_64-DAG: # |
| 50 | +; POWERPC_64-DAG: lxv 32, 0(4) |
| 51 | +; POWERPC_64-DAG: addi 4, 4, 16 |
| 52 | +; POWERPC_64-DAG: vcmpequh 0, 0, 4 |
| 53 | +; POWERPC_64-DAG: xxlnor 32, 32, 32 |
| 54 | +; POWERPC_64-DAG: vmrghh 1, 0, 0 |
| 55 | +; POWERPC_64-DAG: vmrglh 0, 0, 0 |
| 56 | +; POWERPC_64-DAG: xxland 33, 33, 34 |
| 57 | +; POWERPC_64-DAG: xxland 32, 32, 34 |
| 58 | +; POWERPC_64-DAG: vadduwm 5, 5, 0 |
| 59 | +; POWERPC_64-DAG: vadduwm 3, 3, 1 |
| 60 | +; POWERPC_64-DAG: bdnz L..BB0_10 |
| 61 | +; POWERPC_64: blr |
| 62 | +; |
| 63 | +; POWERPC_32-LABEL: test_Greater_than: |
| 64 | +; POWERPC_32: L..BB0_7: # %vector.body |
| 65 | +; POWERPC_32-DAG: # |
| 66 | +; POWERPC_32-DAG: lxv 50, 0(10) |
| 67 | +; POWERPC_32-DAG: addic 9, 9, 64 |
| 68 | +; POWERPC_32-DAG: addze 5, 5 |
| 69 | +; POWERPC_32-DAG: xor 11, 9, 6 |
| 70 | +; POWERPC_32-DAG: or. 11, 11, 5 |
| 71 | +; POWERPC_32-DAG: vcmpequh 18, 18, 3 |
| 72 | +; POWERPC_32-DAG: xxlnor 50, 50, 50 |
| 73 | +; POWERPC_32-DAG: vmrglh 19, 18, 18 |
| 74 | +; POWERPC_32-DAG: vmrghh 18, 18, 18 |
| 75 | +; POWERPC_32-DAG: xxland 51, 51, 34 |
| 76 | +; POWERPC_32-DAG: xxland 50, 50, 34 |
| 77 | +; POWERPC_32-DAG: vadduwm 5, 5, 19 |
| 78 | +; POWERPC_32: L..BB0_11: # %vec.epilog.vector.body |
| 79 | +; POWERPC_32-DAG: # |
| 80 | +; POWERPC_32-DAG: slwi 5, 9, 1 |
| 81 | +; POWERPC_32-DAG: addic 9, 9, 8 |
| 82 | +; POWERPC_32-DAG: addze 7, 7 |
| 83 | +; POWERPC_32-DAG: lxvx 32, 3, 5 |
| 84 | +; POWERPC_32-DAG: xor 5, 9, 6 |
| 85 | +; POWERPC_32-DAG: or. 5, 5, 7 |
| 86 | +; POWERPC_32-DAG: vcmpequh 0, 0, 3 |
| 87 | +; POWERPC_32-DAG: xxlnor 32, 32, 32 |
| 88 | +; POWERPC_32-DAG: vmrghh 1, 0, 0 |
| 89 | +; POWERPC_32-DAG: vmrglh 0, 0, 0 |
| 90 | +; POWERPC_32-DAG: xxland 33, 33, 34 |
| 91 | +; POWERPC_32-DAG: xxland 32, 32, 34 |
| 92 | +; POWERPC_32-DAG: vadduwm 5, 5, 0 |
| 93 | +; POWERPC_32-DAG: vadduwm 4, 4, 1 |
| 94 | +; POWERPC_32-DAG: bne 0, L..BB0_11 |
| 95 | +; POWERPC_32: blr |
| 96 | + entry: |
| 97 | + %cmp5 = icmp sgt i32 %ncols, 0 |
| 98 | + br i1 %cmp5, label %iter.check, label %for.cond.cleanup |
| 99 | + |
| 100 | +iter.check: ; preds = %entry |
| 101 | + %wide.trip.count = zext nneg i32 %ncols to i64 |
| 102 | + %min.iters.check = icmp ult i32 %ncols, 8 |
| 103 | + br i1 %min.iters.check, label %for.body.preheader, label %vector.main.loop.iter.check |
| 104 | + |
| 105 | +for.body.preheader: ; preds = %vec.epilog.iter.check, %vec.epilog.middle.block, %iter.check |
| 106 | + %indvars.iv.ph = phi i64 [ 0, %iter.check ], [ %n.vec, %vec.epilog.iter.check ], [ %n.vec31, %vec.epilog.middle.block ] |
| 107 | + %num_cols_needed.06.ph = phi i32 [ 0, %iter.check ], [ %33, %vec.epilog.iter.check ], [ %40, %vec.epilog.middle.block ] |
| 108 | + br label %for.body |
| 109 | + |
| 110 | +vector.main.loop.iter.check: ; preds = %iter.check |
| 111 | + %min.iters.check9 = icmp ult i32 %ncols, 64 |
| 112 | + br i1 %min.iters.check9, label %vec.epilog.ph, label %vector.ph |
| 113 | + |
| 114 | +vector.ph: ; preds = %vector.main.loop.iter.check |
| 115 | + %n.vec = and i64 %wide.trip.count, 2147483584 |
| 116 | + br label %vector.body |
| 117 | + |
| 118 | +vector.body: ; preds = %vector.body, %vector.ph |
| 119 | + %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] |
| 120 | + %vec.phi = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %24, %vector.body ] |
| 121 | + %vec.phi10 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %25, %vector.body ] |
| 122 | + %vec.phi11 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %26, %vector.body ] |
| 123 | + %vec.phi12 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %27, %vector.body ] |
| 124 | + %vec.phi13 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %28, %vector.body ] |
| 125 | + %vec.phi14 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %29, %vector.body ] |
| 126 | + %vec.phi15 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %30, %vector.body ] |
| 127 | + %vec.phi16 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %31, %vector.body ] |
| 128 | + %0 = getelementptr inbounds nuw i16, ptr %colauths, i64 %index |
| 129 | + %1 = getelementptr inbounds nuw i8, ptr %0, i64 16 |
| 130 | + %2 = getelementptr inbounds nuw i8, ptr %0, i64 32 |
| 131 | + %3 = getelementptr inbounds nuw i8, ptr %0, i64 48 |
| 132 | + %4 = getelementptr inbounds nuw i8, ptr %0, i64 64 |
| 133 | + %5 = getelementptr inbounds nuw i8, ptr %0, i64 80 |
| 134 | + %6 = getelementptr inbounds nuw i8, ptr %0, i64 96 |
| 135 | + %7 = getelementptr inbounds nuw i8, ptr %0, i64 112 |
| 136 | + %wide.load = load <8 x i16>, ptr %0, align 2, !tbaa !5 |
| 137 | + %wide.load17 = load <8 x i16>, ptr %1, align 2, !tbaa !5 |
| 138 | + %wide.load18 = load <8 x i16>, ptr %2, align 2, !tbaa !5 |
| 139 | + %wide.load19 = load <8 x i16>, ptr %3, align 2, !tbaa !5 |
| 140 | + %wide.load20 = load <8 x i16>, ptr %4, align 2, !tbaa !5 |
| 141 | + %wide.load21 = load <8 x i16>, ptr %5, align 2, !tbaa !5 |
| 142 | + %wide.load22 = load <8 x i16>, ptr %6, align 2, !tbaa !5 |
| 143 | + %wide.load23 = load <8 x i16>, ptr %7, align 2, !tbaa !5 |
| 144 | + %8 = icmp ne <8 x i16> %wide.load, zeroinitializer |
| 145 | + %9 = icmp ne <8 x i16> %wide.load17, zeroinitializer |
| 146 | + %10 = icmp ne <8 x i16> %wide.load18, zeroinitializer |
| 147 | + %11 = icmp ne <8 x i16> %wide.load19, zeroinitializer |
| 148 | + %12 = icmp ne <8 x i16> %wide.load20, zeroinitializer |
| 149 | + %13 = icmp ne <8 x i16> %wide.load21, zeroinitializer |
| 150 | + %14 = icmp ne <8 x i16> %wide.load22, zeroinitializer |
| 151 | + %15 = icmp ne <8 x i16> %wide.load23, zeroinitializer |
| 152 | + %16 = zext <8 x i1> %8 to <8 x i32> |
| 153 | + %17 = zext <8 x i1> %9 to <8 x i32> |
| 154 | + %18 = zext <8 x i1> %10 to <8 x i32> |
| 155 | + %19 = zext <8 x i1> %11 to <8 x i32> |
| 156 | + %20 = zext <8 x i1> %12 to <8 x i32> |
| 157 | + %21 = zext <8 x i1> %13 to <8 x i32> |
| 158 | + %22 = zext <8 x i1> %14 to <8 x i32> |
| 159 | + %23 = zext <8 x i1> %15 to <8 x i32> |
| 160 | + %24 = add <8 x i32> %vec.phi, %16 |
| 161 | + %25 = add <8 x i32> %vec.phi10, %17 |
| 162 | + %26 = add <8 x i32> %vec.phi11, %18 |
| 163 | + %27 = add <8 x i32> %vec.phi12, %19 |
| 164 | + %28 = add <8 x i32> %vec.phi13, %20 |
| 165 | + %29 = add <8 x i32> %vec.phi14, %21 |
| 166 | + %30 = add <8 x i32> %vec.phi15, %22 |
| 167 | + %31 = add <8 x i32> %vec.phi16, %23 |
| 168 | + %index.next = add nuw i64 %index, 64 |
| 169 | + %32 = icmp eq i64 %index.next, %n.vec |
| 170 | + br i1 %32, label %middle.block, label %vector.body, !llvm.loop !9 |
| 171 | + |
| 172 | +middle.block: ; preds = %vector.body |
| 173 | + %bin.rdx = add <8 x i32> %25, %24 |
| 174 | + %bin.rdx24 = add <8 x i32> %26, %bin.rdx |
| 175 | + %bin.rdx25 = add <8 x i32> %27, %bin.rdx24 |
| 176 | + %bin.rdx26 = add <8 x i32> %28, %bin.rdx25 |
| 177 | + %bin.rdx27 = add <8 x i32> %29, %bin.rdx26 |
| 178 | + %bin.rdx28 = add <8 x i32> %30, %bin.rdx27 |
| 179 | + %bin.rdx29 = add <8 x i32> %31, %bin.rdx28 |
| 180 | + %33 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %bin.rdx29) |
| 181 | + %cmp.n = icmp eq i64 %n.vec, %wide.trip.count |
| 182 | + br i1 %cmp.n, label %for.cond.cleanup, label %vec.epilog.iter.check |
| 183 | + |
| 184 | +vec.epilog.iter.check: ; preds = %middle.block |
| 185 | + %n.vec.remaining = and i64 %wide.trip.count, 56 |
| 186 | + %min.epilog.iters.check = icmp eq i64 %n.vec.remaining, 0 |
| 187 | + br i1 %min.epilog.iters.check, label %for.body.preheader, label %vec.epilog.ph |
| 188 | + |
| 189 | +vec.epilog.ph: ; preds = %vec.epilog.iter.check, %vector.main.loop.iter.check |
| 190 | + %vec.epilog.resume.val = phi i64 [ %n.vec, %vec.epilog.iter.check ], [ 0, %vector.main.loop.iter.check ] |
| 191 | + %bc.merge.rdx = phi i32 [ %33, %vec.epilog.iter.check ], [ 0, %vector.main.loop.iter.check ] |
| 192 | + %n.vec31 = and i64 %wide.trip.count, 2147483640 |
| 193 | + %34 = insertelement <8 x i32> <i32 poison, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, i32 %bc.merge.rdx, i64 0 |
| 194 | + br label %vec.epilog.vector.body |
| 195 | + |
| 196 | +vec.epilog.vector.body: ; preds = %vec.epilog.vector.body, %vec.epilog.ph |
| 197 | + %index32 = phi i64 [ %vec.epilog.resume.val, %vec.epilog.ph ], [ %index.next35, %vec.epilog.vector.body ] |
| 198 | + %vec.phi33 = phi <8 x i32> [ %34, %vec.epilog.ph ], [ %38, %vec.epilog.vector.body ] |
| 199 | + %35 = getelementptr inbounds nuw i16, ptr %colauths, i64 %index32 |
| 200 | + %wide.load34 = load <8 x i16>, ptr %35, align 2, !tbaa !5 |
| 201 | + %36 = icmp ne <8 x i16> %wide.load34, zeroinitializer |
| 202 | + %37 = zext <8 x i1> %36 to <8 x i32> |
| 203 | + %38 = add <8 x i32> %vec.phi33, %37 |
| 204 | + %index.next35 = add nuw i64 %index32, 8 |
| 205 | + %39 = icmp eq i64 %index.next35, %n.vec31 |
| 206 | + br i1 %39, label %vec.epilog.middle.block, label %vec.epilog.vector.body, !llvm.loop !13 |
| 207 | + |
| 208 | +vec.epilog.middle.block: ; preds = %vec.epilog.vector.body |
| 209 | + %40 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %38) |
| 210 | + %cmp.n36 = icmp eq i64 %n.vec31, %wide.trip.count |
| 211 | + br i1 %cmp.n36, label %for.cond.cleanup, label %for.body.preheader |
| 212 | + |
| 213 | +for.cond.cleanup: ; preds = %for.body, %middle.block, %vec.epilog.middle.block, %entry |
| 214 | + %num_cols_needed.0.lcssa = phi i32 [ 0, %entry ], [ %33, %middle.block ], [ %40, %vec.epilog.middle.block ], [ %spec.select, %for.body ] |
| 215 | + ret i32 %num_cols_needed.0.lcssa |
| 216 | + |
| 217 | +for.body: ; preds = %for.body.preheader, %for.body |
| 218 | + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] |
| 219 | + %num_cols_needed.06 = phi i32 [ %spec.select, %for.body ], [ %num_cols_needed.06.ph, %for.body.preheader ] |
| 220 | + %arrayidx = getelementptr inbounds nuw i16, ptr %colauths, i64 %indvars.iv |
| 221 | + %41 = load i16, ptr %arrayidx, align 2, !tbaa !5 |
| 222 | + %tobool.not = icmp ne i16 %41, 0 |
| 223 | + %inc = zext i1 %tobool.not to i32 |
| 224 | + %spec.select = add nuw nsw i32 %num_cols_needed.06, %inc |
| 225 | + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 |
| 226 | + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count |
| 227 | + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !14 |
| 228 | +} |
| 229 | + |
| 230 | +!5 = !{!6, !6, i64 0} |
| 231 | +!6 = !{!"short", !7, i64 0} |
| 232 | +!7 = !{!"omnipotent char", !8, i64 0} |
| 233 | +!8 = !{!"Simple C/C++ TBAA"} |
| 234 | +!9 = distinct !{!9, !10, !11, !12} |
| 235 | +!10 = !{!"llvm.loop.mustprogress"} |
| 236 | +!11 = !{!"llvm.loop.isvectorized", i32 1} |
| 237 | +!12 = !{!"llvm.loop.unroll.runtime.disable"} |
| 238 | +!13 = distinct !{!13, !10, !11, !12} |
| 239 | +!14 = distinct !{!14, !10, !12, !11} |
0 commit comments