Skip to content

[InstSimplify] Optimize maximumnum and minimumnum #139581

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions llvm/include/llvm/IR/PatternMatch.h
Original file line number Diff line number Diff line change
@@ -707,10 +707,25 @@ m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold) {
struct is_nan {
bool isValue(const APFloat &C) const { return C.isNaN(); }
};

struct is_snan {
bool isValue(const APFloat &C) const { return C.isSignaling(); }
};

struct is_qnan {
bool isValue(const APFloat &C) const { return C.isNaN() && !C.isSignaling(); }
};

/// Match an arbitrary NaN constant. This includes quiet and signalling nans.
/// For vectors, this includes constants with undefined elements.
inline cstfp_pred_ty<is_nan> m_NaN() { return cstfp_pred_ty<is_nan>(); }

/// Match quiet NaN constants, including vectors with undefined elements.
inline cstfp_pred_ty<is_qnan> m_qNaN() { return cstfp_pred_ty<is_qnan>(); }

/// Match signalling NaN constants, including vectors with undefined elements.
inline cstfp_pred_ty<is_snan> m_sNaN() { return cstfp_pred_ty<is_snan>(); }

struct is_nonnan {
bool isValue(const APFloat &C) const { return !C.isNaN(); }
};
56 changes: 42 additions & 14 deletions llvm/lib/Analysis/InstructionSimplify.cpp
Original file line number Diff line number Diff line change
@@ -6415,7 +6415,8 @@ static Value *foldMinMaxSharedOp(Intrinsic::ID IID, Value *Op0, Value *Op1) {
static Value *foldMinimumMaximumSharedOp(Intrinsic::ID IID, Value *Op0,
Value *Op1) {
assert((IID == Intrinsic::maxnum || IID == Intrinsic::minnum ||
IID == Intrinsic::maximum || IID == Intrinsic::minimum) &&
IID == Intrinsic::maximum || IID == Intrinsic::minimum ||
IID == Intrinsic::maximumnum || IID == Intrinsic::minimumnum) &&
"Unsupported intrinsic");

auto *M0 = dyn_cast<IntrinsicInst>(Op0);
@@ -6711,7 +6712,16 @@ Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType,
case Intrinsic::maxnum:
case Intrinsic::minnum:
case Intrinsic::maximum:
case Intrinsic::minimum: {
case Intrinsic::minimum:
case Intrinsic::maximumnum:
case Intrinsic::minimumnum: {
// In several cases here, we deviate from exact IEEE 754 semantics
// to enable optimizations (as allowed by the LLVM IR spec).
//
// For instance, we may return one of the arguments unmodified instead of
// inserting an llvm.canonicalize to transform input sNaNs into qNaNs,
// or may assume all NaN inputs are qNaNs.

// If the arguments are the same, this is a no-op.
if (Op0 == Op1)
return Op0;
@@ -6725,32 +6735,50 @@ Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType,
return Op0;

bool PropagateNaN = IID == Intrinsic::minimum || IID == Intrinsic::maximum;
bool IsMin = IID == Intrinsic::minimum || IID == Intrinsic::minnum;

// minnum(X, nan) -> X
// maxnum(X, nan) -> X
// minimum(X, nan) -> nan
// maximum(X, nan) -> nan
if (match(Op1, m_NaN()))
return PropagateNaN ? propagateNaN(cast<Constant>(Op1)) : Op0;
bool PropagateSNaN = IID == Intrinsic::minnum || IID == Intrinsic::maxnum;
bool IsMin = IID == Intrinsic::minimum || IID == Intrinsic::minnum ||
IID == Intrinsic::minimumnum;

// minnum(x, qnan) -> x
// maxnum(x, qnan) -> x
// minnum(x, snan) -> qnan
// maxnum(x, snan) -> qnan
// minimum(X, nan) -> qnan
// maximum(X, nan) -> qnan
if (PropagateSNaN && match(Op1, m_sNaN()))
return propagateNaN(cast<Constant>(Op1));
if (match(Op1, m_NaN())) {
if (PropagateNaN)
return propagateNaN(cast<Constant>(Op1));
// In cases like mixed <sNaN, qNaN> vectors, avoid the optimization to
// allow correct sNaN propagation where necessary.
else if (PropagateSNaN && !match(Op1, m_qNaN()))
break;
else
return Op0;
Comment on lines +6753 to +6758
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With this chain of nan-matchers, it might actually be easier to just do the elementwise handling - propgqtenan already has the elementwise code in it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think element-wise comparisons will end up taking this slightly out of the current scope of the patch, as it will introduce cases like: maxnum(<x, y>, <sNaN, qNaN>) -> <qNaN, y> , which will require insert/extract chains to be created, and might end up with cases where there are not clear perf wins from re-creating partially applied vectors where some elements are NaN vs just using maxnum on the original vector despite some elements being NaN. To avoid these partially applied cases, we'd need to check whether the whole vec is sNaN or qNaN (ignoring poison/undef elements), which is what the current patch does with the matchers already.

It might be worth considering these kind of element-wise folds in future (and for partial Inf, partial poison, partial FMAX vectors too in addition to NaNs maybe), but I think that would be better-suited to a separate patch.

}

// In the following folds, inf can be replaced with the largest finite
// float, if the ninf flag is set.
const APFloat *C;
if (match(Op1, m_APFloat(C)) &&
(C->isInfinity() || (Call && Call->hasNoInfs() && C->isLargest()))) {
// minnum(X, -inf) -> -inf
// maxnum(X, +inf) -> +inf
// minnum(X, -inf) -> -inf (ignoring sNaN -> qNaN propagation)
// maxnum(X, +inf) -> +inf (ignoring sNaN -> qNaN propagation)
// minimum(X, -inf) -> -inf if nnan
// maximum(X, +inf) -> +inf if nnan
// minimumnum(X, -inf) -> -inf
// maximumnum(X, +inf) -> +inf
if (C->isNegative() == IsMin &&
(!PropagateNaN || (Call && Call->hasNoNaNs())))
return ConstantFP::get(ReturnType, *C);

// minnum(X, +inf) -> X if nnan
// maxnum(X, -inf) -> X if nnan
// minimum(X, +inf) -> X
// maximum(X, -inf) -> X
// minimum(X, +inf) -> X (ignoring quieting of sNaNs)
// maximum(X, -inf) -> X (ignoring quieting of sNaNs)
// maximumnum(X, -inf) -> X if nnan
// minimumnum(X, +inf) -> X if nnan
if (C->isNegative() != IsMin &&
(PropagateNaN || (Call && Call->hasNoNaNs())))
return Op0;
4 changes: 4 additions & 0 deletions llvm/lib/Analysis/ValueTracking.cpp
Original file line number Diff line number Diff line change
@@ -9310,6 +9310,10 @@ Intrinsic::ID llvm::getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID) {
case Intrinsic::minimum: return Intrinsic::maximum;
case Intrinsic::maxnum: return Intrinsic::minnum;
case Intrinsic::minnum: return Intrinsic::maxnum;
case Intrinsic::maximumnum:
return Intrinsic::minimumnum;
case Intrinsic::minimumnum:
return Intrinsic::maximumnum;
default: llvm_unreachable("Unexpected intrinsic");
}
}
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
Original file line number Diff line number Diff line change
@@ -497,12 +497,10 @@ define amdgpu_kernel void @test_fold_canonicalize_minnum_value_f32(ptr addrspace
ret void
}

; FIXME: Should there be more checks here? minnum with NaN operand is simplified away.
; FIXME: Should there be more checks here? minnum with sNaN operand is simplified to qNaN.

; GCN-LABEL: test_fold_canonicalize_sNaN_value_f32:
; GCN: {{flat|global}}_load_dword [[LOAD:v[0-9]+]]
; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[LOAD]]
; GFX9: v_max_f32_e32 v{{[0-9]+}}, [[LOAD]], [[LOAD]]
; GCN: v_mov_b32_e32 v{{.+}}, 0x7fc00000
define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(ptr addrspace(1) %arg) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
12 changes: 4 additions & 8 deletions llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
Original file line number Diff line number Diff line change
@@ -2032,8 +2032,7 @@ define float @v_fneg_self_minimumnum_f32_ieee(float %a) #0 {
; GCN-LABEL: v_fneg_self_minimumnum_f32_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
; GCN-NEXT: v_max_f32_e32 v0, v0, v0
; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%min = call float @llvm.minimumnum.f32(float %a, float %a)
%min.fneg = fneg float %min
@@ -2044,8 +2043,7 @@ define float @v_fneg_self_minimumnum_f32_no_ieee(float %a) #4 {
; GCN-LABEL: v_fneg_self_minimumnum_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
; GCN-NEXT: v_max_f32_e32 v0, v0, v0
; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%min = call float @llvm.minimumnum.f32(float %a, float %a)
%min.fneg = fneg float %min
@@ -2377,8 +2375,7 @@ define float @v_fneg_self_maximumnum_f32_ieee(float %a) #0 {
; GCN-LABEL: v_fneg_self_maximumnum_f32_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
; GCN-NEXT: v_min_f32_e32 v0, v0, v0
; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%max = call float @llvm.maximumnum.f32(float %a, float %a)
%max.fneg = fneg float %max
@@ -2389,8 +2386,7 @@ define float @v_fneg_self_maximumnum_f32_no_ieee(float %a) #4 {
; GCN-LABEL: v_fneg_self_maximumnum_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
; GCN-NEXT: v_min_f32_e32 v0, v0, v0
; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%max = call float @llvm.maximumnum.f32(float %a, float %a)
%max.fneg = fneg float %max
Loading
Loading