Skip to content

Commit 665ab08

Browse files
committed
[X86] Use UADDSAT/USUBSAT instead of ADDUS/SUBUS
Replace the X86ISD opcodes ADDUS and SUBUS with generic ISD opcodes UADDSAT and USUBSAT. As a side-effect, this also makes codegen for the @llvm.uadd.sat and @llvm.usub.sat intrinsics reasonable. This only replaces use in the X86 backend, and does not move any of the ADDUS/SUBUS X86 specific combines into generic codegen. Differential Revision: https://reviews.llvm.org/D55787 llvm-svn: 349481
1 parent a7d2a23 commit 665ab08

File tree

7 files changed

+496
-25742
lines changed

7 files changed

+496
-25742
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 69 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -829,6 +829,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
829829
setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
830830
}
831831

832+
setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
833+
setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
834+
setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
835+
setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
836+
// Use widening instead of promotion.
837+
for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8,
838+
MVT::v4i16, MVT::v2i16 }) {
839+
setOperationAction(ISD::UADDSAT, VT, Custom);
840+
setOperationAction(ISD::USUBSAT, VT, Custom);
841+
}
842+
832843
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
833844
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
834845
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
@@ -1200,6 +1211,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
12001211
setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
12011212
setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
12021213

1214+
setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1215+
setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1216+
setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1217+
setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1218+
12031219
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
12041220
setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
12051221
setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
@@ -1317,6 +1333,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
13171333
setOperationAction(ISD::SETCC, VT, Custom);
13181334
setOperationAction(ISD::SELECT, VT, Custom);
13191335
setOperationAction(ISD::TRUNCATE, VT, Custom);
1336+
setOperationAction(ISD::UADDSAT, VT, Custom);
1337+
setOperationAction(ISD::USUBSAT, VT, Custom);
13201338

13211339
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
13221340
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
@@ -1577,6 +1595,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
15771595
setOperationAction(ISD::SUB, VT, Custom);
15781596
setOperationAction(ISD::MUL, VT, Custom);
15791597
setOperationAction(ISD::VSELECT, VT, Expand);
1598+
setOperationAction(ISD::UADDSAT, VT, Custom);
1599+
setOperationAction(ISD::USUBSAT, VT, Custom);
15801600

15811601
setOperationAction(ISD::TRUNCATE, VT, Custom);
15821602
setOperationAction(ISD::SETCC, VT, Custom);
@@ -1657,6 +1677,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
16571677
setOperationAction(ISD::SMIN, VT, Legal);
16581678
setOperationAction(ISD::UMIN, VT, Legal);
16591679
setOperationAction(ISD::SETCC, VT, Custom);
1680+
setOperationAction(ISD::UADDSAT, VT, Legal);
1681+
setOperationAction(ISD::USUBSAT, VT, Legal);
16601682

16611683
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
16621684
// setcc all the way to isel and prefer SETGT in some isel patterns.
@@ -19147,7 +19169,7 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
1914719169
break;
1914819170
}
1914919171

19150-
SDValue Result = DAG.getNode(X86ISD::SUBUS, dl, VT, Op0, Op1);
19172+
SDValue Result = DAG.getNode(ISD::USUBSAT, dl, VT, Op0, Op1);
1915119173
return DAG.getNode(X86ISD::PCMPEQ, dl, VT, Result,
1915219174
DAG.getConstant(0, dl, VT));
1915319175
}
@@ -23366,6 +23388,26 @@ static SDValue LowerADD_SUB(SDValue Op, SelectionDAG &DAG) {
2336623388
return split256IntArith(Op, DAG);
2336723389
}
2336823390

23391+
static SDValue LowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG) {
23392+
MVT VT = Op.getSimpleValueType();
23393+
if (VT.getScalarType() == MVT::i1) {
23394+
SDLoc dl(Op);
23395+
switch (Op.getOpcode()) {
23396+
default: llvm_unreachable("Expected saturated arithmetic opcode");
23397+
case ISD::UADDSAT:
23398+
return DAG.getNode(ISD::OR, dl, VT, Op.getOperand(0), Op.getOperand(1));
23399+
case ISD::USUBSAT:
23400+
return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0),
23401+
DAG.getNOT(dl, Op.getOperand(1), VT));
23402+
}
23403+
}
23404+
23405+
assert(Op.getSimpleValueType().is256BitVector() &&
23406+
Op.getSimpleValueType().isInteger() &&
23407+
"Only handle AVX 256-bit vector integer operation");
23408+
return split256IntArith(Op, DAG);
23409+
}
23410+
2336923411
static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) {
2337023412
MVT VT = Op.getSimpleValueType();
2337123413
if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) {
@@ -26147,6 +26189,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
2614726189
case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
2614826190
case ISD::ADD:
2614926191
case ISD::SUB: return LowerADD_SUB(Op, DAG);
26192+
case ISD::UADDSAT:
26193+
case ISD::USUBSAT: return LowerUADDSAT_USUBSAT(Op, DAG);
2615026194
case ISD::SMAX:
2615126195
case ISD::SMIN:
2615226196
case ISD::UMAX:
@@ -26228,11 +26272,12 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
2622826272
}
2622926273
return;
2623026274
}
26275+
case ISD::UADDSAT:
26276+
case ISD::USUBSAT:
2623126277
case X86ISD::VPMADDWD:
26232-
case X86ISD::ADDUS:
26233-
case X86ISD::SUBUS:
2623426278
case X86ISD::AVG: {
26235-
// Legalize types for X86ISD::AVG/ADDUS/SUBUS/VPMADDWD by widening.
26279+
// Legalize types for ISD::UADDSAT/USUBSAT and X86ISD::AVG/VPMADDWD
26280+
// by widening.
2623626281
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
2623726282

2623826283
EVT VT = N->getValueType(0);
@@ -26966,8 +27011,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
2696627011
case X86ISD::ANDNP: return "X86ISD::ANDNP";
2696727012
case X86ISD::BLENDI: return "X86ISD::BLENDI";
2696827013
case X86ISD::SHRUNKBLEND: return "X86ISD::SHRUNKBLEND";
26969-
case X86ISD::ADDUS: return "X86ISD::ADDUS";
26970-
case X86ISD::SUBUS: return "X86ISD::SUBUS";
2697127014
case X86ISD::HADD: return "X86ISD::HADD";
2697227015
case X86ISD::HSUB: return "X86ISD::HSUB";
2697327016
case X86ISD::FHADD: return "X86ISD::FHADD";
@@ -34043,9 +34086,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
3404334086
SDValue OpLHS = Other->getOperand(0), OpRHS = Other->getOperand(1);
3404434087
SDValue CondRHS = Cond->getOperand(1);
3404534088

34046-
auto SUBUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
34047-
ArrayRef<SDValue> Ops) {
34048-
return DAG.getNode(X86ISD::SUBUS, DL, Ops[0].getValueType(), Ops);
34089+
auto USUBSATBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
34090+
ArrayRef<SDValue> Ops) {
34091+
return DAG.getNode(ISD::USUBSAT, DL, Ops[0].getValueType(), Ops);
3404934092
};
3405034093

3405134094
// Look for a general sub with unsigned saturation first.
@@ -34054,22 +34097,22 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
3405434097
if ((CC == ISD::SETUGE || CC == ISD::SETUGT) &&
3405534098
Other->getOpcode() == ISD::SUB && OpRHS == CondRHS)
3405634099
return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
34057-
SUBUSBuilder);
34100+
USUBSATBuilder);
3405834101

3405934102
if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) {
3406034103
if (isa<BuildVectorSDNode>(CondRHS)) {
3406134104
// If the RHS is a constant we have to reverse the const
3406234105
// canonicalization.
3406334106
// x > C-1 ? x+-C : 0 --> subus x, C
34064-
auto MatchSUBUS = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
34107+
auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
3406534108
return Cond->getAPIntValue() == (-Op->getAPIntValue() - 1);
3406634109
};
3406734110
if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
34068-
ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchSUBUS)) {
34111+
ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT)) {
3406934112
OpRHS = DAG.getNode(ISD::SUB, DL, VT,
3407034113
DAG.getConstant(0, DL, VT), OpRHS);
3407134114
return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
34072-
SUBUSBuilder);
34115+
USUBSATBuilder);
3407334116
}
3407434117

3407534118
// Another special case: If C was a sign bit, the sub has been
@@ -34085,7 +34128,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
3408534128
// Note that we have to rebuild the RHS constant here to ensure we
3408634129
// don't rely on particular values of undef lanes.
3408734130
return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
34088-
SUBUSBuilder);
34131+
USUBSATBuilder);
3408934132
}
3409034133
}
3409134134
}
@@ -34118,9 +34161,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
3411834161
if (Other.getNode() && Other.getOpcode() == ISD::ADD) {
3411934162
SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
3412034163

34121-
auto ADDUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
34122-
ArrayRef<SDValue> Ops) {
34123-
return DAG.getNode(X86ISD::ADDUS, DL, Ops[0].getValueType(), Ops);
34164+
auto UADDSATBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
34165+
ArrayRef<SDValue> Ops) {
34166+
return DAG.getNode(ISD::UADDSAT, DL, Ops[0].getValueType(), Ops);
3412434167
};
3412534168

3412634169
// Canonicalize condition operands.
@@ -34135,20 +34178,20 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
3413534178
if (CC == ISD::SETULE && Other == CondRHS &&
3413634179
(OpLHS == CondLHS || OpRHS == CondLHS))
3413734180
return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
34138-
ADDUSBuilder);
34181+
UADDSATBuilder);
3413934182

3414034183
if (isa<BuildVectorSDNode>(OpRHS) && isa<BuildVectorSDNode>(CondRHS) &&
3414134184
CondLHS == OpLHS) {
3414234185
// If the RHS is a constant we have to reverse the const
3414334186
// canonicalization.
3414434187
// x > ~C ? x+C : ~0 --> addus x, C
34145-
auto MatchADDUS = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
34188+
auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
3414634189
return Cond->getAPIntValue() == ~Op->getAPIntValue();
3414734190
};
3414834191
if (CC == ISD::SETULE &&
34149-
ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchADDUS))
34192+
ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
3415034193
return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
34151-
ADDUSBuilder);
34194+
UADDSATBuilder);
3415234195
}
3415334196
}
3415434197
}
@@ -40764,16 +40807,16 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
4076440807
} else
4076540808
return SDValue();
4076640809

40767-
auto SUBUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
40768-
ArrayRef<SDValue> Ops) {
40769-
return DAG.getNode(X86ISD::SUBUS, DL, Ops[0].getValueType(), Ops);
40810+
auto USUBSATBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
40811+
ArrayRef<SDValue> Ops) {
40812+
return DAG.getNode(ISD::USUBSAT, DL, Ops[0].getValueType(), Ops);
4077040813
};
4077140814

4077240815
// PSUBUS doesn't support v8i32/v8i64/v16i32, but it can be enabled with
4077340816
// special preprocessing in some cases.
4077440817
if (VT != MVT::v8i32 && VT != MVT::v16i32 && VT != MVT::v8i64)
4077540818
return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT,
40776-
{ SubusLHS, SubusRHS }, SUBUSBuilder);
40819+
{ SubusLHS, SubusRHS }, USUBSATBuilder);
4077740820

4077840821
// Special preprocessing case can be only applied
4077940822
// if the value was zero extended from 16 bit,
@@ -40805,7 +40848,7 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
4080540848
SDValue NewSubusRHS = DAG.getZExtOrTrunc(UMin, SDLoc(SubusRHS), ShrinkedType);
4080640849
SDValue Psubus =
4080740850
SplitOpsAndApply(DAG, Subtarget, SDLoc(N), ShrinkedType,
40808-
{ NewSubusLHS, NewSubusRHS }, SUBUSBuilder);
40851+
{ NewSubusLHS, NewSubusRHS }, USUBSATBuilder);
4080940852
// Zero extend the result, it may be used somewhere as 32 bit,
4081040853
// if not zext and following trunc will shrink.
4081140854
return DAG.getZExtOrTrunc(Psubus, SDLoc(N), ExtType);

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -226,10 +226,6 @@ namespace llvm {
226226
SCALEF,
227227
SCALEFS,
228228

229-
// Integer add/sub with unsigned saturation.
230-
ADDUS,
231-
SUBUS,
232-
233229
// Integer add/sub with signed saturation.
234230
ADDS,
235231
SUBS,

llvm/lib/Target/X86/X86InstrAVX512.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4834,9 +4834,9 @@ defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
48344834
SchedWriteVecALU, HasBWI, 1>;
48354835
defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
48364836
SchedWriteVecALU, HasBWI, 0>;
4837-
defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
4837+
defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
48384838
SchedWriteVecALU, HasBWI, 1>;
4839-
defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
4839+
defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
48404840
SchedWriteVecALU, HasBWI, 0>;
48414841
defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
48424842
SchedWritePMULLD, HasAVX512, 1>, T8PD;

llvm/lib/Target/X86/X86InstrFragmentsSIMD.td

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -227,8 +227,6 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
227227
SDTCisVec<1>,
228228
SDTCisSameAs<2, 1>]>;
229229

230-
def X86addus : SDNode<"X86ISD::ADDUS", SDTIntBinOp, [SDNPCommutative]>;
231-
def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>;
232230
def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp, [SDNPCommutative]>;
233231
def X86subs : SDNode<"X86ISD::SUBS", SDTIntBinOp>;
234232
def X86mulhrs : SDNode<"X86ISD::MULHRS", SDTIntBinOp, [SDNPCommutative]>;

llvm/lib/Target/X86/X86InstrSSE.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3627,9 +3627,9 @@ defm PADDSB : PDI_binop_all<0xEC, "paddsb", X86adds, v16i8, v32i8,
36273627
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
36283628
defm PADDSW : PDI_binop_all<0xED, "paddsw", X86adds, v8i16, v16i16,
36293629
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
3630-
defm PADDUSB : PDI_binop_all<0xDC, "paddusb", X86addus, v16i8, v32i8,
3630+
defm PADDUSB : PDI_binop_all<0xDC, "paddusb", uaddsat, v16i8, v32i8,
36313631
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
3632-
defm PADDUSW : PDI_binop_all<0xDD, "paddusw", X86addus, v8i16, v16i16,
3632+
defm PADDUSW : PDI_binop_all<0xDD, "paddusw", uaddsat, v8i16, v16i16,
36333633
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
36343634
defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16,
36353635
SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>;
@@ -3649,9 +3649,9 @@ defm PSUBSB : PDI_binop_all<0xE8, "psubsb", X86subs, v16i8, v32i8,
36493649
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
36503650
defm PSUBSW : PDI_binop_all<0xE9, "psubsw", X86subs, v8i16, v16i16,
36513651
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
3652-
defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8,
3652+
defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", usubsat, v16i8, v32i8,
36533653
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
3654-
defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16,
3654+
defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", usubsat, v8i16, v16i16,
36553655
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
36563656
defm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8,
36573657
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;

0 commit comments

Comments
 (0)