-
Notifications
You must be signed in to change notification settings - Fork 13.5k
AMDGPU: Handle minimumnum/maximumnum in fneg combines #139133
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Handle minimumnum/maximumnum in fneg combines #139133
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesFull diff: https://github.com/llvm/llvm-project/pull/139133.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index d4091ed6e770c..107a789523435 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -661,6 +661,8 @@ static bool fnegFoldsIntoOpcode(unsigned Opc) {
case ISD::FMAXNUM_IEEE:
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
+ case ISD::FMINIMUMNUM:
+ case ISD::FMAXIMUMNUM:
case ISD::SELECT:
case ISD::FSIN:
case ISD::FTRUNC:
@@ -4807,10 +4809,14 @@ static unsigned inverseMinMax(unsigned Opc) {
return ISD::FMINIMUM;
case ISD::FMINIMUM:
return ISD::FMAXIMUM;
+ case ISD::FMAXIMUMNUM:
+ return ISD::FMINIMUMNUM;
+ case ISD::FMINIMUMNUM:
+ return ISD::FMAXIMUMNUM;
case AMDGPUISD::FMAX_LEGACY:
return AMDGPUISD::FMIN_LEGACY;
case AMDGPUISD::FMIN_LEGACY:
- return AMDGPUISD::FMAX_LEGACY;
+ return AMDGPUISD::FMAX_LEGACY;
default:
llvm_unreachable("invalid min/max opcode");
}
@@ -4932,6 +4938,8 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
case ISD::FMINNUM_IEEE:
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
+ case ISD::FMINIMUMNUM:
+ case ISD::FMAXIMUMNUM:
case AMDGPUISD::FMAX_LEGACY:
case AMDGPUISD::FMIN_LEGACY: {
// fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y)
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
index 30e685f974599..46da9d33639b6 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
@@ -2167,8 +2167,8 @@ define half @v_fneg_inv2pi_minimumnum_f16(half %a) #0 {
; SI-LABEL: v_fneg_inv2pi_minimumnum_f16:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT: v_cvt_f32_f16_e64 v0, -v0
+; SI-NEXT: v_cvt_f16_f32_e64 v0, -v0
+; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-NEXT: v_max_f32_e32 v0, 0xbe230000, v0
; SI-NEXT: s_setpc_b64 s[30:31]
;
@@ -2188,8 +2188,8 @@ define half @v_fneg_neg_inv2pi_minimumnum_f16(half %a) #0 {
; SI-LABEL: v_fneg_neg_inv2pi_minimumnum_f16:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT: v_cvt_f32_f16_e64 v0, -v0
+; SI-NEXT: v_cvt_f16_f32_e64 v0, -v0
+; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-NEXT: v_max_f32_e32 v0, 0x3e230000, v0
; SI-NEXT: s_setpc_b64 s[30:31]
;
@@ -2208,10 +2208,10 @@ define double @v_fneg_inv2pi_minimumnum_f64(double %a) #0 {
; SI-LABEL: v_fneg_inv2pi_minimumnum_f64:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; SI-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
; SI-NEXT: s_mov_b32 s4, 0x6dc9c882
; SI-NEXT: s_mov_b32 s5, 0xbfc45f30
-; SI-NEXT: v_max_f64 v[0:1], -v[0:1], s[4:5]
+; SI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: v_fneg_inv2pi_minimumnum_f64:
@@ -2230,17 +2230,17 @@ define double @v_fneg_neg_inv2pi_minimumnum_f64(double %a) #0 {
; SI-LABEL: v_fneg_neg_inv2pi_minimumnum_f64:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; SI-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
; SI-NEXT: s_mov_b32 s4, 0x6dc9c882
; SI-NEXT: s_mov_b32 s5, 0x3fc45f30
-; SI-NEXT: v_max_f64 v[0:1], -v[0:1], s[4:5]
+; SI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: v_fneg_neg_inv2pi_minimumnum_f64:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; VI-NEXT: v_max_f64 v[0:1], -v[0:1], 0.15915494309189532
+; VI-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; VI-NEXT: v_max_f64 v[0:1], v[0:1], 0.15915494309189532
; VI-NEXT: s_setpc_b64 s[30:31]
%min = call double @llvm.minimumnum.f64(double 0xbfc45f306dc9c882, double %a)
%fneg = fneg double %min
@@ -2313,9 +2313,9 @@ define { float, float } @v_fneg_minimumnum_multi_use_minimumnum_f32_ieee(float %
; GCN-LABEL: v_fneg_minimumnum_multi_use_minimumnum_f32_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GCN-NEXT: v_max_f32_e64 v0, -v0, -v1
+; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
+; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
+; GCN-NEXT: v_max_f32_e32 v0, v0, v1
; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%min = call float @llvm.minimumnum.f32(float %a, float %b)
@@ -2330,9 +2330,9 @@ define <2 x float> @v_fneg_minimumnum_multi_use_minimumnum_f32_no_ieee(float %a,
; GCN-LABEL: v_fneg_minimumnum_multi_use_minimumnum_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GCN-NEXT: v_max_f32_e64 v0, -v0, -v1
+; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
+; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
+; GCN-NEXT: v_max_f32_e32 v0, v0, v1
; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%min = call float @llvm.minimumnum.f32(float %a, float %b)
@@ -2513,9 +2513,9 @@ define { float, float } @v_fneg_maximumnum_multi_use_maximumnum_f32_ieee(float %
; GCN-LABEL: v_fneg_maximumnum_multi_use_maximumnum_f32_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GCN-NEXT: v_min_f32_e64 v0, -v0, -v1
+; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
+; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
+; GCN-NEXT: v_min_f32_e32 v0, v0, v1
; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%max = call float @llvm.maximumnum.f32(float %a, float %b)
@@ -2530,9 +2530,9 @@ define <2 x float> @v_fneg_maximumnum_multi_use_maximumnum_f32_no_ieee(float %a,
; GCN-LABEL: v_fneg_maximumnum_multi_use_maximumnum_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GCN-NEXT: v_min_f32_e64 v0, -v0, -v1
+; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
+; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
+; GCN-NEXT: v_min_f32_e32 v0, v0, v1
; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%max = call float @llvm.maximumnum.f32(float %a, float %b)
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, although I do not see practical improvements in the tests.
; GCN-NEXT: v_min_f32_e64 v0, -v0, -v1 | ||
; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1 | ||
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0 | ||
; GCN-NEXT: v_min_f32_e32 v0, v0, v1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is an encoding size improvement
5f57920
to
a2a1d3b
Compare
0008a50
to
9a987f2
Compare
No description provided.