Skip to content

AMDGPU: Handle minimumnum/maximumnum in fneg combines #139133

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

Conversation

arsenm
Copy link
Contributor

@arsenm arsenm commented May 8, 2025

No description provided.

@arsenm arsenm marked this pull request as ready for review May 8, 2025 19:18
@llvmbot
Copy link
Member

llvmbot commented May 8, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/139133.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (+9-1)
  • (modified) llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll (+22-22)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index d4091ed6e770c..107a789523435 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -661,6 +661,8 @@ static bool fnegFoldsIntoOpcode(unsigned Opc) {
   case ISD::FMAXNUM_IEEE:
   case ISD::FMINIMUM:
   case ISD::FMAXIMUM:
+  case ISD::FMINIMUMNUM:
+  case ISD::FMAXIMUMNUM:
   case ISD::SELECT:
   case ISD::FSIN:
   case ISD::FTRUNC:
@@ -4807,10 +4809,14 @@ static unsigned inverseMinMax(unsigned Opc) {
     return ISD::FMINIMUM;
   case ISD::FMINIMUM:
     return ISD::FMAXIMUM;
+  case ISD::FMAXIMUMNUM:
+    return ISD::FMINIMUMNUM;
+  case ISD::FMINIMUMNUM:
+    return ISD::FMAXIMUMNUM;
   case AMDGPUISD::FMAX_LEGACY:
     return AMDGPUISD::FMIN_LEGACY;
   case AMDGPUISD::FMIN_LEGACY:
-    return  AMDGPUISD::FMAX_LEGACY;
+    return AMDGPUISD::FMAX_LEGACY;
   default:
     llvm_unreachable("invalid min/max opcode");
   }
@@ -4932,6 +4938,8 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
   case ISD::FMINNUM_IEEE:
   case ISD::FMINIMUM:
   case ISD::FMAXIMUM:
+  case ISD::FMINIMUMNUM:
+  case ISD::FMAXIMUMNUM:
   case AMDGPUISD::FMAX_LEGACY:
   case AMDGPUISD::FMIN_LEGACY: {
     // fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y)
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
index 30e685f974599..46da9d33639b6 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
@@ -2167,8 +2167,8 @@ define half @v_fneg_inv2pi_minimumnum_f16(half %a) #0 {
 ; SI-LABEL: v_fneg_inv2pi_minimumnum_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT:    v_cvt_f32_f16_e64 v0, -v0
+; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
+; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-NEXT:    v_max_f32_e32 v0, 0xbe230000, v0
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2188,8 +2188,8 @@ define half @v_fneg_neg_inv2pi_minimumnum_f16(half %a) #0 {
 ; SI-LABEL: v_fneg_neg_inv2pi_minimumnum_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT:    v_cvt_f32_f16_e64 v0, -v0
+; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
+; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-NEXT:    v_max_f32_e32 v0, 0x3e230000, v0
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2208,10 +2208,10 @@ define double @v_fneg_inv2pi_minimumnum_f64(double %a) #0 {
 ; SI-LABEL: v_fneg_inv2pi_minimumnum_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
+; SI-NEXT:    v_max_f64 v[0:1], -v[0:1], -v[0:1]
 ; SI-NEXT:    s_mov_b32 s4, 0x6dc9c882
 ; SI-NEXT:    s_mov_b32 s5, 0xbfc45f30
-; SI-NEXT:    v_max_f64 v[0:1], -v[0:1], s[4:5]
+; SI-NEXT:    v_max_f64 v[0:1], v[0:1], s[4:5]
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-LABEL: v_fneg_inv2pi_minimumnum_f64:
@@ -2230,17 +2230,17 @@ define double @v_fneg_neg_inv2pi_minimumnum_f64(double %a) #0 {
 ; SI-LABEL: v_fneg_neg_inv2pi_minimumnum_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
+; SI-NEXT:    v_max_f64 v[0:1], -v[0:1], -v[0:1]
 ; SI-NEXT:    s_mov_b32 s4, 0x6dc9c882
 ; SI-NEXT:    s_mov_b32 s5, 0x3fc45f30
-; SI-NEXT:    v_max_f64 v[0:1], -v[0:1], s[4:5]
+; SI-NEXT:    v_max_f64 v[0:1], v[0:1], s[4:5]
 ; SI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-LABEL: v_fneg_neg_inv2pi_minimumnum_f64:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
-; VI-NEXT:    v_max_f64 v[0:1], -v[0:1], 0.15915494309189532
+; VI-NEXT:    v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; VI-NEXT:    v_max_f64 v[0:1], v[0:1], 0.15915494309189532
 ; VI-NEXT:    s_setpc_b64 s[30:31]
   %min = call double @llvm.minimumnum.f64(double 0xbfc45f306dc9c882, double %a)
   %fneg = fneg double %min
@@ -2313,9 +2313,9 @@ define { float, float } @v_fneg_minimumnum_multi_use_minimumnum_f32_ieee(float %
 ; GCN-LABEL: v_fneg_minimumnum_multi_use_minimumnum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; GCN-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GCN-NEXT:    v_max_f32_e64 v0, -v0, -v1
+; GCN-NEXT:    v_mul_f32_e32 v1, -1.0, v1
+; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
+; GCN-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GCN-NEXT:    v_mul_f32_e32 v1, -4.0, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimumnum.f32(float %a, float %b)
@@ -2330,9 +2330,9 @@ define <2 x float> @v_fneg_minimumnum_multi_use_minimumnum_f32_no_ieee(float %a,
 ; GCN-LABEL: v_fneg_minimumnum_multi_use_minimumnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; GCN-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GCN-NEXT:    v_max_f32_e64 v0, -v0, -v1
+; GCN-NEXT:    v_mul_f32_e32 v1, -1.0, v1
+; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
+; GCN-NEXT:    v_max_f32_e32 v0, v0, v1
 ; GCN-NEXT:    v_mul_f32_e32 v1, -4.0, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimumnum.f32(float %a, float %b)
@@ -2513,9 +2513,9 @@ define { float, float } @v_fneg_maximumnum_multi_use_maximumnum_f32_ieee(float %
 ; GCN-LABEL: v_fneg_maximumnum_multi_use_maximumnum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; GCN-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GCN-NEXT:    v_min_f32_e64 v0, -v0, -v1
+; GCN-NEXT:    v_mul_f32_e32 v1, -1.0, v1
+; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
+; GCN-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GCN-NEXT:    v_mul_f32_e32 v1, -4.0, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %max = call float @llvm.maximumnum.f32(float %a, float %b)
@@ -2530,9 +2530,9 @@ define <2 x float> @v_fneg_maximumnum_multi_use_maximumnum_f32_no_ieee(float %a,
 ; GCN-LABEL: v_fneg_maximumnum_multi_use_maximumnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_f32_e32 v1, 1.0, v1
-; GCN-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GCN-NEXT:    v_min_f32_e64 v0, -v0, -v1
+; GCN-NEXT:    v_mul_f32_e32 v1, -1.0, v1
+; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
+; GCN-NEXT:    v_min_f32_e32 v0, v0, v1
 ; GCN-NEXT:    v_mul_f32_e32 v1, -4.0, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %max = call float @llvm.maximumnum.f32(float %a, float %b)

Copy link
Collaborator

@rampitec rampitec left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, although I do not see practical improvements in the tests.

; GCN-NEXT: v_min_f32_e64 v0, -v0, -v1
; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
; GCN-NEXT: v_min_f32_e32 v0, v0, v1
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an encoding size improvement

Copy link
Contributor Author

arsenm commented May 9, 2025

Merge activity

  • May 9, 2:00 AM EDT: A user started a stack merge that includes this pull request via Graphite.
  • May 9, 2:04 AM EDT: Graphite rebased this pull request as part of a merge.
  • May 9, 2:07 AM EDT: @arsenm merged this pull request with Graphite.

@arsenm arsenm force-pushed the users/arsenm/amdgpu/add-baseline-fneg-minimumnum-maximumnum-tests branch from 5f57920 to a2a1d3b Compare May 9, 2025 06:02
Base automatically changed from users/arsenm/amdgpu/add-baseline-fneg-minimumnum-maximumnum-tests to main May 9, 2025 06:04
@arsenm arsenm force-pushed the users/arsenm/amdgpu/handle-minimumnum-maximumnum-fneg-combines branch from 0008a50 to 9a987f2 Compare May 9, 2025 06:04
@arsenm arsenm merged commit 912df60 into main May 9, 2025
5 of 10 checks passed
@arsenm arsenm deleted the users/arsenm/amdgpu/handle-minimumnum-maximumnum-fneg-combines branch May 9, 2025 06:07
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants