Skip to content

Commit f6058ff

Browse files
committed
[X86] Use SADDSAT/SSUBSAT instead of ADDS/SUBS
Migrate the X86 backend from X86ISD opcodes ADDS and SUBS to generic ISD opcodes SADDSAT and SSUBSAT. This also improves scodegen for @llvm.sadd.sat() and @llvm.ssub.sat() intrinsics. This is a followup to D55787 and part of PR40056. Differential Revision: https://reviews.llvm.org/D55833 llvm-svn: 349520
1 parent 20a6db5 commit f6058ff

File tree

8 files changed

+538
-38098
lines changed

8 files changed

+538
-38098
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -830,14 +830,20 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
830830
}
831831

832832
setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
833-
setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
833+
setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
834834
setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
835+
setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
836+
setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
837+
setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
835838
setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
839+
setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
836840
// Use widening instead of promotion.
837841
for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8,
838842
MVT::v4i16, MVT::v2i16 }) {
839-
setOperationAction(ISD::UADDSAT, VT, Custom);
840-
setOperationAction(ISD::USUBSAT, VT, Custom);
843+
setOperationAction(ISD::UADDSAT, VT, Custom);
844+
setOperationAction(ISD::SADDSAT, VT, Custom);
845+
setOperationAction(ISD::USUBSAT, VT, Custom);
846+
setOperationAction(ISD::SSUBSAT, VT, Custom);
841847
}
842848

843849
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
@@ -1212,9 +1218,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
12121218
setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
12131219

12141220
setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1215-
setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1221+
setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
12161222
setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1223+
setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1224+
setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1225+
setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
12171226
setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1227+
setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
12181228

12191229
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
12201230
setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
@@ -1334,7 +1344,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
13341344
setOperationAction(ISD::SELECT, VT, Custom);
13351345
setOperationAction(ISD::TRUNCATE, VT, Custom);
13361346
setOperationAction(ISD::UADDSAT, VT, Custom);
1347+
setOperationAction(ISD::SADDSAT, VT, Custom);
13371348
setOperationAction(ISD::USUBSAT, VT, Custom);
1349+
setOperationAction(ISD::SSUBSAT, VT, Custom);
13381350

13391351
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
13401352
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
@@ -1596,7 +1608,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
15961608
setOperationAction(ISD::MUL, VT, Custom);
15971609
setOperationAction(ISD::VSELECT, VT, Expand);
15981610
setOperationAction(ISD::UADDSAT, VT, Custom);
1611+
setOperationAction(ISD::SADDSAT, VT, Custom);
15991612
setOperationAction(ISD::USUBSAT, VT, Custom);
1613+
setOperationAction(ISD::SSUBSAT, VT, Custom);
16001614

16011615
setOperationAction(ISD::TRUNCATE, VT, Custom);
16021616
setOperationAction(ISD::SETCC, VT, Custom);
@@ -1678,7 +1692,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
16781692
setOperationAction(ISD::UMIN, VT, Legal);
16791693
setOperationAction(ISD::SETCC, VT, Custom);
16801694
setOperationAction(ISD::UADDSAT, VT, Legal);
1695+
setOperationAction(ISD::SADDSAT, VT, Legal);
16811696
setOperationAction(ISD::USUBSAT, VT, Legal);
1697+
setOperationAction(ISD::SSUBSAT, VT, Legal);
16821698

16831699
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
16841700
// setcc all the way to isel and prefer SETGT in some isel patterns.
@@ -23388,15 +23404,17 @@ static SDValue LowerADD_SUB(SDValue Op, SelectionDAG &DAG) {
2338823404
return split256IntArith(Op, DAG);
2338923405
}
2339023406

23391-
static SDValue LowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG) {
23407+
static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG) {
2339223408
MVT VT = Op.getSimpleValueType();
2339323409
if (VT.getScalarType() == MVT::i1) {
2339423410
SDLoc dl(Op);
2339523411
switch (Op.getOpcode()) {
2339623412
default: llvm_unreachable("Expected saturated arithmetic opcode");
2339723413
case ISD::UADDSAT:
23414+
case ISD::SADDSAT:
2339823415
return DAG.getNode(ISD::OR, dl, VT, Op.getOperand(0), Op.getOperand(1));
2339923416
case ISD::USUBSAT:
23417+
case ISD::SSUBSAT:
2340023418
return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0),
2340123419
DAG.getNOT(dl, Op.getOperand(1), VT));
2340223420
}
@@ -26194,7 +26212,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
2619426212
case ISD::ADD:
2619526213
case ISD::SUB: return LowerADD_SUB(Op, DAG);
2619626214
case ISD::UADDSAT:
26197-
case ISD::USUBSAT: return LowerUADDSAT_USUBSAT(Op, DAG);
26215+
case ISD::SADDSAT:
26216+
case ISD::USUBSAT:
26217+
case ISD::SSUBSAT: return LowerADDSAT_SUBSAT(Op, DAG);
2619826218
case ISD::SMAX:
2619926219
case ISD::SMIN:
2620026220
case ISD::UMAX:
@@ -26277,11 +26297,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
2627726297
return;
2627826298
}
2627926299
case ISD::UADDSAT:
26300+
case ISD::SADDSAT:
2628026301
case ISD::USUBSAT:
26302+
case ISD::SSUBSAT:
2628126303
case X86ISD::VPMADDWD:
2628226304
case X86ISD::AVG: {
26283-
// Legalize types for ISD::UADDSAT/USUBSAT and X86ISD::AVG/VPMADDWD
26284-
// by widening.
26305+
// Legalize types for ISD::UADDSAT/SADDSAT/USUBSAT/SSUBSAT and
26306+
// X86ISD::AVG/VPMADDWD by widening.
2628526307
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
2628626308

2628726309
EVT VT = N->getValueType(0);
@@ -27228,8 +27250,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
2722827250
case X86ISD::FGETEXPS_RND: return "X86ISD::FGETEXPS_RND";
2722927251
case X86ISD::SCALEF: return "X86ISD::SCALEF";
2723027252
case X86ISD::SCALEFS: return "X86ISD::SCALEFS";
27231-
case X86ISD::ADDS: return "X86ISD::ADDS";
27232-
case X86ISD::SUBS: return "X86ISD::SUBS";
2723327253
case X86ISD::AVG: return "X86ISD::AVG";
2723427254
case X86ISD::MULHRS: return "X86ISD::MULHRS";
2723527255
case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND";

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -226,10 +226,6 @@ namespace llvm {
226226
SCALEF,
227227
SCALEFS,
228228

229-
// Integer add/sub with signed saturation.
230-
ADDS,
231-
SUBS,
232-
233229
// Unsigned Integer average.
234230
AVG,
235231

llvm/lib/Target/X86/X86InstrAVX512.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4830,9 +4830,9 @@ defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
48304830
SchedWriteVecALU, 1>;
48314831
defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
48324832
SchedWriteVecALU, 0>;
4833-
defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
4833+
defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
48344834
SchedWriteVecALU, HasBWI, 1>;
4835-
defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
4835+
defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
48364836
SchedWriteVecALU, HasBWI, 0>;
48374837
defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
48384838
SchedWriteVecALU, HasBWI, 1>;

llvm/lib/Target/X86/X86InstrFragmentsSIMD.td

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -227,8 +227,6 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
227227
SDTCisVec<1>,
228228
SDTCisSameAs<2, 1>]>;
229229

230-
def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp, [SDNPCommutative]>;
231-
def X86subs : SDNode<"X86ISD::SUBS", SDTIntBinOp>;
232230
def X86mulhrs : SDNode<"X86ISD::MULHRS", SDTIntBinOp, [SDNPCommutative]>;
233231
def X86avg : SDNode<"X86ISD::AVG" , SDTIntBinOp, [SDNPCommutative]>;
234232
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;

llvm/lib/Target/X86/X86InstrSSE.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3623,9 +3623,9 @@ defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32,
36233623
SchedWriteVecALU, 1, NoVLX>;
36243624
defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64,
36253625
SchedWriteVecALU, 1, NoVLX>;
3626-
defm PADDSB : PDI_binop_all<0xEC, "paddsb", X86adds, v16i8, v32i8,
3626+
defm PADDSB : PDI_binop_all<0xEC, "paddsb", saddsat, v16i8, v32i8,
36273627
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
3628-
defm PADDSW : PDI_binop_all<0xED, "paddsw", X86adds, v8i16, v16i16,
3628+
defm PADDSW : PDI_binop_all<0xED, "paddsw", saddsat, v8i16, v16i16,
36293629
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
36303630
defm PADDUSB : PDI_binop_all<0xDC, "paddusb", uaddsat, v16i8, v32i8,
36313631
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
@@ -3645,9 +3645,9 @@ defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32,
36453645
SchedWriteVecALU, 0, NoVLX>;
36463646
defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64,
36473647
SchedWriteVecALU, 0, NoVLX>;
3648-
defm PSUBSB : PDI_binop_all<0xE8, "psubsb", X86subs, v16i8, v32i8,
3648+
defm PSUBSB : PDI_binop_all<0xE8, "psubsb", ssubsat, v16i8, v32i8,
36493649
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
3650-
defm PSUBSW : PDI_binop_all<0xE9, "psubsw", X86subs, v8i16, v16i16,
3650+
defm PSUBSW : PDI_binop_all<0xE9, "psubsw", ssubsat, v8i16, v16i16,
36513651
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
36523652
defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", usubsat, v16i8, v32i8,
36533653
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;

llvm/lib/Target/X86/X86IntrinsicsInfo.h

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -319,8 +319,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
319319
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
320320
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
321321
X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
322-
X86_INTRINSIC_DATA(avx2_padds_b, INTR_TYPE_2OP, X86ISD::ADDS, 0),
323-
X86_INTRINSIC_DATA(avx2_padds_w, INTR_TYPE_2OP, X86ISD::ADDS, 0),
322+
X86_INTRINSIC_DATA(avx2_padds_b, INTR_TYPE_2OP, ISD::SADDSAT, 0),
323+
X86_INTRINSIC_DATA(avx2_padds_w, INTR_TYPE_2OP, ISD::SADDSAT, 0),
324324
X86_INTRINSIC_DATA(avx2_permd, VPERM_2OP, X86ISD::VPERMV, 0),
325325
X86_INTRINSIC_DATA(avx2_permps, VPERM_2OP, X86ISD::VPERMV, 0),
326326
X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
@@ -361,8 +361,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
361361
X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0),
362362
X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0),
363363
X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0),
364-
X86_INTRINSIC_DATA(avx2_psubs_b, INTR_TYPE_2OP, X86ISD::SUBS, 0),
365-
X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0),
364+
X86_INTRINSIC_DATA(avx2_psubs_b, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
365+
X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
366366
X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
367367
X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
368368
X86_INTRINSIC_DATA(avx512_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0),
@@ -920,8 +920,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
920920
X86_INTRINSIC_DATA(avx512_packsswb_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
921921
X86_INTRINSIC_DATA(avx512_packusdw_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
922922
X86_INTRINSIC_DATA(avx512_packuswb_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
923-
X86_INTRINSIC_DATA(avx512_padds_b_512, INTR_TYPE_2OP, X86ISD::ADDS, 0),
924-
X86_INTRINSIC_DATA(avx512_padds_w_512, INTR_TYPE_2OP, X86ISD::ADDS, 0),
923+
X86_INTRINSIC_DATA(avx512_padds_b_512, INTR_TYPE_2OP, ISD::SADDSAT, 0),
924+
X86_INTRINSIC_DATA(avx512_padds_w_512, INTR_TYPE_2OP, ISD::SADDSAT, 0),
925925
X86_INTRINSIC_DATA(avx512_permvar_df_256, VPERM_2OP, X86ISD::VPERMV, 0),
926926
X86_INTRINSIC_DATA(avx512_permvar_df_512, VPERM_2OP, X86ISD::VPERMV, 0),
927927
X86_INTRINSIC_DATA(avx512_permvar_di_256, VPERM_2OP, X86ISD::VPERMV, 0),
@@ -1004,8 +1004,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
10041004
X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0),
10051005
X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0),
10061006
X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0),
1007-
X86_INTRINSIC_DATA(avx512_psubs_b_512, INTR_TYPE_2OP, X86ISD::SUBS, 0),
1008-
X86_INTRINSIC_DATA(avx512_psubs_w_512, INTR_TYPE_2OP, X86ISD::SUBS, 0),
1007+
X86_INTRINSIC_DATA(avx512_psubs_b_512, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
1008+
X86_INTRINSIC_DATA(avx512_psubs_w_512, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
10091009
X86_INTRINSIC_DATA(avx512_pternlog_d_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
10101010
X86_INTRINSIC_DATA(avx512_pternlog_d_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
10111011
X86_INTRINSIC_DATA(avx512_pternlog_d_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
@@ -1168,8 +1168,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
11681168
X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
11691169
X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
11701170
X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
1171-
X86_INTRINSIC_DATA(sse2_padds_b, INTR_TYPE_2OP, X86ISD::ADDS, 0),
1172-
X86_INTRINSIC_DATA(sse2_padds_w, INTR_TYPE_2OP, X86ISD::ADDS, 0),
1171+
X86_INTRINSIC_DATA(sse2_padds_b, INTR_TYPE_2OP, ISD::SADDSAT, 0),
1172+
X86_INTRINSIC_DATA(sse2_padds_w, INTR_TYPE_2OP, ISD::SADDSAT, 0),
11731173
X86_INTRINSIC_DATA(sse2_pmadd_wd, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0),
11741174
X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
11751175
X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
@@ -1191,8 +1191,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
11911191
X86_INTRINSIC_DATA(sse2_psrli_d, VSHIFT, X86ISD::VSRLI, 0),
11921192
X86_INTRINSIC_DATA(sse2_psrli_q, VSHIFT, X86ISD::VSRLI, 0),
11931193
X86_INTRINSIC_DATA(sse2_psrli_w, VSHIFT, X86ISD::VSRLI, 0),
1194-
X86_INTRINSIC_DATA(sse2_psubs_b, INTR_TYPE_2OP, X86ISD::SUBS, 0),
1195-
X86_INTRINSIC_DATA(sse2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0),
1194+
X86_INTRINSIC_DATA(sse2_psubs_b, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
1195+
X86_INTRINSIC_DATA(sse2_psubs_w, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
11961196
X86_INTRINSIC_DATA(sse2_ucomieq_sd, COMI, X86ISD::UCOMI, ISD::SETEQ),
11971197
X86_INTRINSIC_DATA(sse2_ucomige_sd, COMI, X86ISD::UCOMI, ISD::SETGE),
11981198
X86_INTRINSIC_DATA(sse2_ucomigt_sd, COMI, X86ISD::UCOMI, ISD::SETGT),

0 commit comments

Comments
 (0)