@@ -829,6 +829,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
829
829
setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
830
830
}
831
831
832
+ setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
833
+ setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
834
+ setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
835
+ setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
836
+ // Use widening instead of promotion.
837
+ for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8,
838
+ MVT::v4i16, MVT::v2i16 }) {
839
+ setOperationAction(ISD::UADDSAT, VT, Custom);
840
+ setOperationAction(ISD::USUBSAT, VT, Custom);
841
+ }
842
+
832
843
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
833
844
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
834
845
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
@@ -1200,6 +1211,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
1200
1211
setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1201
1212
setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1202
1213
1214
+ setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1215
+ setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1216
+ setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1217
+ setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1218
+
1203
1219
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1204
1220
setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1205
1221
setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
@@ -1317,6 +1333,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
1317
1333
setOperationAction(ISD::SETCC, VT, Custom);
1318
1334
setOperationAction(ISD::SELECT, VT, Custom);
1319
1335
setOperationAction(ISD::TRUNCATE, VT, Custom);
1336
+ setOperationAction(ISD::UADDSAT, VT, Custom);
1337
+ setOperationAction(ISD::USUBSAT, VT, Custom);
1320
1338
1321
1339
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1322
1340
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
@@ -1577,6 +1595,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
1577
1595
setOperationAction(ISD::SUB, VT, Custom);
1578
1596
setOperationAction(ISD::MUL, VT, Custom);
1579
1597
setOperationAction(ISD::VSELECT, VT, Expand);
1598
+ setOperationAction(ISD::UADDSAT, VT, Custom);
1599
+ setOperationAction(ISD::USUBSAT, VT, Custom);
1580
1600
1581
1601
setOperationAction(ISD::TRUNCATE, VT, Custom);
1582
1602
setOperationAction(ISD::SETCC, VT, Custom);
@@ -1657,6 +1677,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
1657
1677
setOperationAction(ISD::SMIN, VT, Legal);
1658
1678
setOperationAction(ISD::UMIN, VT, Legal);
1659
1679
setOperationAction(ISD::SETCC, VT, Custom);
1680
+ setOperationAction(ISD::UADDSAT, VT, Legal);
1681
+ setOperationAction(ISD::USUBSAT, VT, Legal);
1660
1682
1661
1683
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
1662
1684
// setcc all the way to isel and prefer SETGT in some isel patterns.
@@ -19147,7 +19169,7 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
19147
19169
break;
19148
19170
}
19149
19171
19150
- SDValue Result = DAG.getNode(X86ISD::SUBUS , dl, VT, Op0, Op1);
19172
+ SDValue Result = DAG.getNode(ISD::USUBSAT , dl, VT, Op0, Op1);
19151
19173
return DAG.getNode(X86ISD::PCMPEQ, dl, VT, Result,
19152
19174
DAG.getConstant(0, dl, VT));
19153
19175
}
@@ -23366,6 +23388,26 @@ static SDValue LowerADD_SUB(SDValue Op, SelectionDAG &DAG) {
23366
23388
return split256IntArith(Op, DAG);
23367
23389
}
23368
23390
23391
+ static SDValue LowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG) {
23392
+ MVT VT = Op.getSimpleValueType();
23393
+ if (VT.getScalarType() == MVT::i1) {
23394
+ SDLoc dl(Op);
23395
+ switch (Op.getOpcode()) {
23396
+ default: llvm_unreachable("Expected saturated arithmetic opcode");
23397
+ case ISD::UADDSAT:
23398
+ return DAG.getNode(ISD::OR, dl, VT, Op.getOperand(0), Op.getOperand(1));
23399
+ case ISD::USUBSAT:
23400
+ return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0),
23401
+ DAG.getNOT(dl, Op.getOperand(1), VT));
23402
+ }
23403
+ }
23404
+
23405
+ assert(Op.getSimpleValueType().is256BitVector() &&
23406
+ Op.getSimpleValueType().isInteger() &&
23407
+ "Only handle AVX 256-bit vector integer operation");
23408
+ return split256IntArith(Op, DAG);
23409
+ }
23410
+
23369
23411
static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) {
23370
23412
MVT VT = Op.getSimpleValueType();
23371
23413
if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) {
@@ -26147,6 +26189,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
26147
26189
case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
26148
26190
case ISD::ADD:
26149
26191
case ISD::SUB: return LowerADD_SUB(Op, DAG);
26192
+ case ISD::UADDSAT:
26193
+ case ISD::USUBSAT: return LowerUADDSAT_USUBSAT(Op, DAG);
26150
26194
case ISD::SMAX:
26151
26195
case ISD::SMIN:
26152
26196
case ISD::UMAX:
@@ -26228,11 +26272,12 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
26228
26272
}
26229
26273
return;
26230
26274
}
26275
+ case ISD::UADDSAT:
26276
+ case ISD::USUBSAT:
26231
26277
case X86ISD::VPMADDWD:
26232
- case X86ISD::ADDUS:
26233
- case X86ISD::SUBUS:
26234
26278
case X86ISD::AVG: {
26235
- // Legalize types for X86ISD::AVG/ADDUS/SUBUS/VPMADDWD by widening.
26279
+ // Legalize types for ISD::UADDSAT/USUBSAT and X86ISD::AVG/VPMADDWD
26280
+ // by widening.
26236
26281
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
26237
26282
26238
26283
EVT VT = N->getValueType(0);
@@ -26966,8 +27011,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
26966
27011
case X86ISD::ANDNP: return "X86ISD::ANDNP";
26967
27012
case X86ISD::BLENDI: return "X86ISD::BLENDI";
26968
27013
case X86ISD::SHRUNKBLEND: return "X86ISD::SHRUNKBLEND";
26969
- case X86ISD::ADDUS: return "X86ISD::ADDUS";
26970
- case X86ISD::SUBUS: return "X86ISD::SUBUS";
26971
27014
case X86ISD::HADD: return "X86ISD::HADD";
26972
27015
case X86ISD::HSUB: return "X86ISD::HSUB";
26973
27016
case X86ISD::FHADD: return "X86ISD::FHADD";
@@ -34043,9 +34086,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
34043
34086
SDValue OpLHS = Other->getOperand(0), OpRHS = Other->getOperand(1);
34044
34087
SDValue CondRHS = Cond->getOperand(1);
34045
34088
34046
- auto SUBUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
34047
- ArrayRef<SDValue> Ops) {
34048
- return DAG.getNode(X86ISD::SUBUS , DL, Ops[0].getValueType(), Ops);
34089
+ auto USUBSATBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
34090
+ ArrayRef<SDValue> Ops) {
34091
+ return DAG.getNode(ISD::USUBSAT , DL, Ops[0].getValueType(), Ops);
34049
34092
};
34050
34093
34051
34094
// Look for a general sub with unsigned saturation first.
@@ -34054,22 +34097,22 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
34054
34097
if ((CC == ISD::SETUGE || CC == ISD::SETUGT) &&
34055
34098
Other->getOpcode() == ISD::SUB && OpRHS == CondRHS)
34056
34099
return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
34057
- SUBUSBuilder );
34100
+ USUBSATBuilder );
34058
34101
34059
34102
if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) {
34060
34103
if (isa<BuildVectorSDNode>(CondRHS)) {
34061
34104
// If the RHS is a constant we have to reverse the const
34062
34105
// canonicalization.
34063
34106
// x > C-1 ? x+-C : 0 --> subus x, C
34064
- auto MatchSUBUS = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
34107
+ auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
34065
34108
return Cond->getAPIntValue() == (-Op->getAPIntValue() - 1);
34066
34109
};
34067
34110
if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
34068
- ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchSUBUS )) {
34111
+ ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT )) {
34069
34112
OpRHS = DAG.getNode(ISD::SUB, DL, VT,
34070
34113
DAG.getConstant(0, DL, VT), OpRHS);
34071
34114
return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
34072
- SUBUSBuilder );
34115
+ USUBSATBuilder );
34073
34116
}
34074
34117
34075
34118
// Another special case: If C was a sign bit, the sub has been
@@ -34085,7 +34128,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
34085
34128
// Note that we have to rebuild the RHS constant here to ensure we
34086
34129
// don't rely on particular values of undef lanes.
34087
34130
return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
34088
- SUBUSBuilder );
34131
+ USUBSATBuilder );
34089
34132
}
34090
34133
}
34091
34134
}
@@ -34118,9 +34161,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
34118
34161
if (Other.getNode() && Other.getOpcode() == ISD::ADD) {
34119
34162
SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
34120
34163
34121
- auto ADDUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
34122
- ArrayRef<SDValue> Ops) {
34123
- return DAG.getNode(X86ISD::ADDUS , DL, Ops[0].getValueType(), Ops);
34164
+ auto UADDSATBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
34165
+ ArrayRef<SDValue> Ops) {
34166
+ return DAG.getNode(ISD::UADDSAT , DL, Ops[0].getValueType(), Ops);
34124
34167
};
34125
34168
34126
34169
// Canonicalize condition operands.
@@ -34135,20 +34178,20 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
34135
34178
if (CC == ISD::SETULE && Other == CondRHS &&
34136
34179
(OpLHS == CondLHS || OpRHS == CondLHS))
34137
34180
return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
34138
- ADDUSBuilder );
34181
+ UADDSATBuilder );
34139
34182
34140
34183
if (isa<BuildVectorSDNode>(OpRHS) && isa<BuildVectorSDNode>(CondRHS) &&
34141
34184
CondLHS == OpLHS) {
34142
34185
// If the RHS is a constant we have to reverse the const
34143
34186
// canonicalization.
34144
34187
// x > ~C ? x+C : ~0 --> addus x, C
34145
- auto MatchADDUS = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
34188
+ auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
34146
34189
return Cond->getAPIntValue() == ~Op->getAPIntValue();
34147
34190
};
34148
34191
if (CC == ISD::SETULE &&
34149
- ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchADDUS ))
34192
+ ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT ))
34150
34193
return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
34151
- ADDUSBuilder );
34194
+ UADDSATBuilder );
34152
34195
}
34153
34196
}
34154
34197
}
@@ -40764,16 +40807,16 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
40764
40807
} else
40765
40808
return SDValue();
40766
40809
40767
- auto SUBUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
40768
- ArrayRef<SDValue> Ops) {
40769
- return DAG.getNode(X86ISD::SUBUS , DL, Ops[0].getValueType(), Ops);
40810
+ auto USUBSATBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
40811
+ ArrayRef<SDValue> Ops) {
40812
+ return DAG.getNode(ISD::USUBSAT , DL, Ops[0].getValueType(), Ops);
40770
40813
};
40771
40814
40772
40815
// PSUBUS doesn't support v8i32/v8i64/v16i32, but it can be enabled with
40773
40816
// special preprocessing in some cases.
40774
40817
if (VT != MVT::v8i32 && VT != MVT::v16i32 && VT != MVT::v8i64)
40775
40818
return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT,
40776
- { SubusLHS, SubusRHS }, SUBUSBuilder );
40819
+ { SubusLHS, SubusRHS }, USUBSATBuilder );
40777
40820
40778
40821
// Special preprocessing case can be only applied
40779
40822
// if the value was zero extended from 16 bit,
@@ -40805,7 +40848,7 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
40805
40848
SDValue NewSubusRHS = DAG.getZExtOrTrunc(UMin, SDLoc(SubusRHS), ShrinkedType);
40806
40849
SDValue Psubus =
40807
40850
SplitOpsAndApply(DAG, Subtarget, SDLoc(N), ShrinkedType,
40808
- { NewSubusLHS, NewSubusRHS }, SUBUSBuilder );
40851
+ { NewSubusLHS, NewSubusRHS }, USUBSATBuilder );
40809
40852
// Zero extend the result, it may be used somewhere as 32 bit,
40810
40853
// if not zext and following trunc will shrink.
40811
40854
return DAG.getZExtOrTrunc(Psubus, SDLoc(N), ExtType);
0 commit comments