AMDGPU: Add baseline tests for fneg with min/max intrinsics #139132

arsenm · 2025-05-08T19:17:27Z

Copy the minnum and maxnum tests into versions with minimum/maximum
and minimumnum/maximumnum.

arsenm · 2025-05-08T19:17:42Z

This stack of pull requests is managed by Graphite. Learn more about stacking.

llvmbot · 2025-05-08T19:18:03Z

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Copy the minnum and maxnum tests into versions with minimum/maximum
and minimumnum/maximumnum.

Patch is 44.72 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139132.diff

1 Files Affected:

(modified) llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll (+1166-4)

diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
index 14f7cbcd0f438..30e685f974599 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
@@ -1381,6 +1381,1168 @@ define <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %a, float %
   ret <2 x float> %ins1
 }
 
+; --------------------------------------------------------------------------------
+; fminimum tests
+; --------------------------------------------------------------------------------
+
+define float @v_fneg_minimum_f32_ieee(float %a, float %b) #0 {
+; GCN-LABEL: v_fneg_minimum_f32_ieee:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_max_f32_e64 v2, -v0, -v1
+; GCN-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v1
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %min = call float @llvm.minimum.f32(float %a, float %b)
+  %fneg = fneg float %min
+  ret float %fneg
+}
+
+define float @v_fneg_minimum_f32_no_ieee(float %a, float %b) #4 {
+; GCN-LABEL: v_fneg_minimum_f32_no_ieee:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_max_f32_e64 v2, -v0, -v1
+; GCN-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v1
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %min = call float @llvm.minimum.f32(float %a, float %b)
+  %fneg = fneg float %min
+  ret float %fneg
+}
+
+define float @v_fneg_self_minimum_f32_ieee(float %a) #0 {
+; GCN-LABEL: v_fneg_self_minimum_f32_ieee:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %min = call float @llvm.minimum.f32(float %a, float %a)
+  %min.fneg = fneg float %min
+  ret float %min.fneg
+}
+
+define float @v_fneg_self_minimum_f32_no_ieee(float %a) #4 {
+; GCN-LABEL: v_fneg_self_minimum_f32_no_ieee:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %min = call float @llvm.minimum.f32(float %a, float %a)
+  %min.fneg = fneg float %min
+  ret float %min.fneg
+}
+
+define float @v_fneg_posk_minimum_f32_ieee(float %a) #0 {
+; GCN-LABEL: v_fneg_posk_minimum_f32_ieee:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_max_f32_e64 v1, -v0, -4.0
+; GCN-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %min = call float @llvm.minimum.f32(float 4.0, float %a)
+  %fneg = fneg float %min
+  ret float %fneg
+}
+
+define float @v_fneg_posk_minimum_f32_no_ieee(float %a) #4 {
+; GCN-LABEL: v_fneg_posk_minimum_f32_no_ieee:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_max_f32_e64 v1, -v0, -4.0
+; GCN-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %min = call float @llvm.minimum.f32(float 4.0, float %a)
+  %fneg = fneg float %min
+  ret float %fneg
+}
+
+define float @v_fneg_negk_minimum_f32_ieee(float %a) #0 {
+; GCN-LABEL: v_fneg_negk_minimum_f32_ieee:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_max_f32_e64 v1, -v0, 4.0
+; GCN-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %min = call float @llvm.minimum.f32(float -4.0, float %a)
+  %fneg = fneg float %min
+  ret float %fneg
+}
+
+define float @v_fneg_negk_minimum_f32_no_ieee(float %a) #4 {
+; GCN-LABEL: v_fneg_negk_minimum_f32_no_ieee:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_max_f32_e64 v1, -v0, 4.0
+; GCN-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %min = call float @llvm.minimum.f32(float -4.0, float %a)
+  %fneg = fneg float %min
+  ret float %fneg
+}
+
+define float @v_fneg_0_minimum_f32(float %a) #0 {
+; GCN-LABEL: v_fneg_0_minimum_f32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_min_f32_e32 v0, 0, v0
+; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %min = call nnan float @llvm.minimum.f32(float 0.0, float %a)
+  %fneg = fneg float %min
+  ret float %fneg
+}
+
+define float @v_fneg_neg0_minimum_f32_ieee(float %a) #0 {
+; GCN-LABEL: v_fneg_neg0_minimum_f32_ieee:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_max_f32_e64 v1, -v0, 0
+; GCN-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %min = call float @llvm.minimum.f32(float -0.0, float %a)
+  %fneg = fneg float %min
+  ret float %fneg
+}
+
+define float @v_fneg_inv2pi_minimum_f32(float %a) #0 {
+; SI-LABEL: v_fneg_inv2pi_minimum_f32:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    s_mov_b32 s4, 0xbe22f983
+; SI-NEXT:    v_max_f32_e64 v1, -v0, s4
+; SI-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; SI-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
+; SI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; SI-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: v_fneg_inv2pi_minimum_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_min_f32_e32 v1, 0.15915494, v0
+; VI-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; VI-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
+; VI-NEXT:    s_setpc_b64 s[30:31]
+  %min = call float @llvm.minimum.f32(float 0x3FC45F3060000000, float %a)
+  %fneg = fneg float %min
+  ret float %fneg
+}
+
+define float @v_fneg_neg_inv2pi_minimum_f32(float %a) #0 {
+; SI-LABEL: v_fneg_neg_inv2pi_minimum_f32:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    s_mov_b32 s4, 0x3e22f983
+; SI-NEXT:    v_max_f32_e64 v1, -v0, s4
+; SI-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; SI-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
+; SI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; SI-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: v_fneg_neg_inv2pi_minimum_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_max_f32_e64 v1, -v0, 0.15915494
+; VI-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; VI-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
+; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-NEXT:    s_setpc_b64 s[30:31]
+  %min = call float @llvm.minimum.f32(float 0xBFC45F3060000000, float %a)
+  %fneg = fneg float %min
+  ret float %fneg
+}
+
+define half @v_fneg_inv2pi_minimum_f16(half %a) #0 {
+; SI-LABEL: v_fneg_inv2pi_minimum_f16:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
+; SI-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
+; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SI-NEXT:    v_max_f32_e32 v2, 0xbe230000, v0
+; SI-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; SI-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
+; SI-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: v_fneg_inv2pi_minimum_f16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_min_f16_e32 v1, 0.15915494, v0
+; VI-NEXT:    v_mov_b32_e32 v2, 0x7e00
+; VI-NEXT:    v_cmp_o_f16_e32 vcc, v0, v0
+; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
+; VI-NEXT:    s_setpc_b64 s[30:31]
+  %min = call half @llvm.minimum.f16(half 0xH3118, half %a)
+  %fneg = fneg half %min
+  ret half %fneg
+}
+
+define half @v_fneg_neg_inv2pi_minimum_f16(half %a) #0 {
+; SI-LABEL: v_fneg_neg_inv2pi_minimum_f16:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
+; SI-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
+; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SI-NEXT:    v_max_f32_e32 v2, 0x3e230000, v0
+; SI-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; SI-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
+; SI-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: v_fneg_neg_inv2pi_minimum_f16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_max_f16_e64 v1, -v0, 0.15915494
+; VI-NEXT:    v_mov_b32_e32 v2, 0x7e00
+; VI-NEXT:    v_cmp_o_f16_e64 vcc, -v0, -v0
+; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-NEXT:    s_setpc_b64 s[30:31]
+  %min = call half @llvm.minimum.f16(half 0xHB118, half %a)
+  %fneg = fneg half %min
+  ret half %fneg
+}
+
+define double @v_fneg_inv2pi_minimum_f64(double %a) #0 {
+; SI-LABEL: v_fneg_inv2pi_minimum_f64:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    s_mov_b32 s4, 0x6dc9c882
+; SI-NEXT:    s_mov_b32 s5, 0xbfc45f30
+; SI-NEXT:    v_max_f64 v[2:3], -v[0:1], s[4:5]
+; SI-NEXT:    v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1]
+; SI-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
+; SI-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: v_fneg_inv2pi_minimum_f64:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_min_f64 v[2:3], v[0:1], 0.15915494309189532
+; VI-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v1, 0xfff80000
+; VI-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; VI-NEXT:    v_cndmask_b32_e64 v1, -v3, v1, vcc
+; VI-NEXT:    s_setpc_b64 s[30:31]
+  %min = call double @llvm.minimum.f64(double 0x3fc45f306dc9c882, double %a)
+  %fneg = fneg double %min
+  ret double %fneg
+}
+
+define double @v_fneg_neg_inv2pi_minimum_f64(double %a) #0 {
+; SI-LABEL: v_fneg_neg_inv2pi_minimum_f64:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    s_mov_b32 s4, 0x6dc9c882
+; SI-NEXT:    s_mov_b32 s5, 0x3fc45f30
+; SI-NEXT:    v_max_f64 v[2:3], -v[0:1], s[4:5]
+; SI-NEXT:    v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1]
+; SI-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
+; SI-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; SI-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: v_fneg_neg_inv2pi_minimum_f64:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_max_f64 v[2:3], -v[0:1], 0.15915494309189532
+; VI-NEXT:    v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v1, 0x7ff80000
+; VI-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-NEXT:    s_setpc_b64 s[30:31]
+  %min = call double @llvm.minimum.f64(double 0xbfc45f306dc9c882, double %a)
+  %fneg = fneg double %min
+  ret double %fneg
+}
+
+define float @v_fneg_neg0_minimum_f32_no_ieee(float %a) #4 {
+; GCN-LABEL: v_fneg_neg0_minimum_f32_no_ieee:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_max_f32_e64 v1, -v0, 0
+; GCN-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %min = call float @llvm.minimum.f32(float -0.0, float %a)
+  %fneg = fneg float %min
+  ret float %fneg
+}
+
+define float @v_fneg_0_minimum_foldable_use_f32_ieee(float %a, float %b) #0 {
+; GCN-LABEL: v_fneg_0_minimum_foldable_use_f32_ieee:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_min_f32_e32 v2, 0, v0
+; GCN-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GCN-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GCN-NEXT:    v_mul_f32_e64 v0, -v0, v1
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %min = call float @llvm.minimum.f32(float 0.0, float %a)
+  %fneg = fneg float %min
+  %mul = fmul float %fneg, %b
+  ret float %mul
+}
+
+define float @v_fneg_inv2pi_minimum_foldable_use_f32(float %a, float %b) #0 {
+; SI-LABEL: v_fneg_inv2pi_minimum_foldable_use_f32:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    s_mov_b32 s4, 0xbe22f983
+; SI-NEXT:    v_max_f32_e64 v2, -v0, s4
+; SI-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; SI-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
+; SI-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; SI-NEXT:    v_mul_f32_e32 v0, v0, v1
+; SI-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: v_fneg_inv2pi_minimum_foldable_use_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_min_f32_e32 v2, 0.15915494, v0
+; VI-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; VI-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; VI-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; VI-NEXT:    v_mul_f32_e64 v0, -v0, v1
+; VI-NEXT:    s_setpc_b64 s[30:31]
+  %min = call float @llvm.minimum.f32(float 0x3FC45F3060000000, float %a)
+  %fneg = fneg float %min
+  %mul = fmul float %fneg, %b
+  ret float %mul
+}
+
+define float @v_fneg_0_minimum_foldable_use_f32_no_ieee(float %a, float %b) #4 {
+; GCN-LABEL: v_fneg_0_minimum_foldable_use_f32_no_ieee:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_min_f32_e32 v2, 0, v0
+; GCN-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GCN-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GCN-NEXT:    v_mul_f32_e64 v0, -v0, v1
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %min = call float @llvm.minimum.f32(float 0.0, float %a)
+  %fneg = fneg float %min
+  %mul = fmul float %fneg, %b
+  ret float %mul
+}
+
+define { float, float } @v_fneg_minimum_multi_use_minimum_f32_ieee(float %a, float %b) #0 {
+; GCN-LABEL: v_fneg_minimum_multi_use_minimum_f32_ieee:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_max_f32_e64 v2, -v0, -v1
+; GCN-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v1
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GCN-NEXT:    v_mul_f32_e32 v1, -4.0, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %min = call float @llvm.minimum.f32(float %a, float %b)
+  %fneg = fneg float %min
+  %use1 = fmul float %min, 4.0
+  %insert.0 = insertvalue { float, float } poison, float %fneg, 0
+  %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1
+  ret { float, float } %insert.1
+}
+
+define <2 x float> @v_fneg_minimum_multi_use_minimum_f32_no_ieee(float %a, float %b) #4 {
+; GCN-LABEL: v_fneg_minimum_multi_use_minimum_f32_no_ieee:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_max_f32_e64 v2, -v0, -v1
+; GCN-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v1
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GCN-NEXT:    v_mul_f32_e32 v1, -4.0, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %min = call float @llvm.minimum.f32(float %a, float %b)
+  %fneg = fneg float %min
+  %use1 = fmul float %min, 4.0
+  %ins0 = insertelement <2 x float> poison, float %fneg, i32 0
+  %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
+  ret <2 x float> %ins1
+}
+
+; --------------------------------------------------------------------------------
+; fmaximum tests
+; --------------------------------------------------------------------------------
+
+define float @v_fneg_maximum_f32_ieee(float %a, float %b) #0 {
+; GCN-LABEL: v_fneg_maximum_f32_ieee:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_min_f32_e64 v2, -v0, -v1
+; GCN-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v1
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %max = call float @llvm.maximum.f32(float %a, float %b)
+  %fneg = fneg float %max
+  ret float %fneg
+}
+
+define float @v_fneg_maximum_f32_no_ieee(float %a, float %b) #4 {
+; GCN-LABEL: v_fneg_maximum_f32_no_ieee:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_min_f32_e64 v2, -v0, -v1
+; GCN-NEXT:    v_mov_b32_e32 v3, 0x7fc00000
+; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v1
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %max = call float @llvm.maximum.f32(float %a, float %b)
+  %fneg = fneg float %max
+  ret float %fneg
+}
+
+define float @v_fneg_self_maximum_f32_ieee(float %a) #0 {
+; GCN-LABEL: v_fneg_self_maximum_f32_ieee:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %max = call float @llvm.maximum.f32(float %a, float %a)
+  %max.fneg = fneg float %max
+  ret float %max.fneg
+}
+
+define float @v_fneg_self_maximum_f32_no_ieee(float %a) #4 {
+; GCN-LABEL: v_fneg_self_maximum_f32_no_ieee:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %max = call float @llvm.maximum.f32(float %a, float %a)
+  %max.fneg = fneg float %max
+  ret float %max.fneg
+}
+
+define float @v_fneg_posk_maximum_f32_ieee(float %a) #0 {
+; GCN-LABEL: v_fneg_posk_maximum_f32_ieee:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_min_f32_e64 v1, -v0, -4.0
+; GCN-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %max = call float @llvm.maximum.f32(float 4.0, float %a)
+  %fneg = fneg float %max
+  ret float %fneg
+}
+
+define float @v_fneg_posk_maximum_f32_no_ieee(float %a) #4 {
+; GCN-LABEL: v_fneg_posk_maximum_f32_no_ieee:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_min_f32_e64 v1, -v0, -4.0
+; GCN-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %max = call float @llvm.maximum.f32(float 4.0, float %a)
+  %fneg = fneg float %max
+  ret float %fneg
+}
+
+define float @v_fneg_negk_maximum_f32_ieee(float %a) #0 {
+; GCN-LABEL: v_fneg_negk_maximum_f32_ieee:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_min_f32_e64 v1, -v0, 4.0
+; GCN-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %max = call float @llvm.maximum.f32(float -4.0, float %a)
+  %fneg = fneg float %max
+  ret float %fneg
+}
+
+define float @v_fneg_negk_maximum_f32_no_ieee(float %a) #4 {
+; GCN-LABEL: v_fneg_negk_maximum_f32_no_ieee:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_min_f32_e64 v1, -v0, 4.0
+; GCN-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %max = call float @llvm.maximum.f32(float -4.0, float %a)
+  %fneg = fneg float %max
+  ret float %fneg
+}
+
+define float @v_fneg_0_maximum_f32(float %a) #0 {
+; GCN-LABEL: v_fneg_0_maximum_f32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_max_f32_e32 v0, 0, v0
+; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %max = call nnan float @llvm.maximum.f32(float 0.0, float %a)
+  %fneg = fneg float %max
+  ret float %fneg
+}
+
+define float @v_fneg_neg0_maximum_f32_ieee(float %a) #0 {
+; GCN-LABEL: v_fneg_neg0_maximum_f32_ieee:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_min_f32_e64 v1, -v0, 0
+; GCN-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; GCN-NEXT:    v_cmp_o_f32_e64 vcc, -v0, -v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v2,...
[truncated]

arsenm · 2025-05-09T06:00:53Z

Merge activity

May 9, 2:00 AM EDT: A user started a stack merge that includes this pull request via Graphite.
May 9, 2:02 AM EDT: Graphite rebased this pull request as part of a merge.
May 9, 2:04 AM EDT: @arsenm merged this pull request with Graphite.

Copy the minnum and maxnum tests into versions with minimum/maximum and minimumnum/maximumnum.

arsenm added the backend:AMDGPU label May 8, 2025 — with Graphite App

arsenm mentioned this pull request May 8, 2025

AMDGPU: Handle minimumnum/maximumnum in fneg combines #139133

Merged

arsenm requested review from jayfoad, Pierre-vh, rampitec, shiltian and Sisyph May 8, 2025 19:17

arsenm marked this pull request as ready for review May 8, 2025 19:18

rampitec approved these changes May 8, 2025

View reviewed changes

AMDGPU: Add baseline tests for fneg with min/max intrinsics

a2a1d3b

Copy the minnum and maxnum tests into versions with minimum/maximum and minimumnum/maximumnum.

arsenm force-pushed the users/arsenm/amdgpu/add-baseline-fneg-minimumnum-maximumnum-tests branch from 5f57920 to a2a1d3b Compare May 9, 2025 06:02

arsenm merged commit 07f36f2 into main May 9, 2025
6 of 10 checks passed

arsenm deleted the users/arsenm/amdgpu/add-baseline-fneg-minimumnum-maximumnum-tests branch May 9, 2025 06:04

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

AMDGPU: Add baseline tests for fneg with min/max intrinsics #139132

AMDGPU: Add baseline tests for fneg with min/max intrinsics #139132

arsenm commented May 8, 2025

arsenm commented May 8, 2025 •

edited

Loading

llvmbot commented May 8, 2025

arsenm commented May 9, 2025 •

edited

Loading

AMDGPU: Add baseline tests for fneg with min/max intrinsics #139132

AMDGPU: Add baseline tests for fneg with min/max intrinsics #139132

Conversation

arsenm commented May 8, 2025

arsenm commented May 8, 2025 • edited Loading

llvmbot commented May 8, 2025

arsenm commented May 9, 2025 • edited Loading

Merge activity

arsenm commented May 8, 2025 •

edited

Loading

arsenm commented May 9, 2025 •

edited

Loading