-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[Codegen] Remove redundant pseudo mov instruction #139716
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-backend-aarch64 Author: Rohit Aggarwal (rohitaggarwal007) ChangesRemove redundant pseudo mov instructions after PostRegalloc. Patch is 52.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139716.diff 33 Files Affected:
diff --git a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
index c8c8ed99d93ea..c3b6115338f05 100644
--- a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
+++ b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
@@ -186,11 +186,13 @@ static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg()) {
- if (MO.isDef()) {
+ if (MO.isDef() && DefedReg == MCRegister::NoRegister) {
if (i == 0 && !MO.isImplicit() && !MO.isDead())
DefedReg = MO.getReg();
else
return false;
+ } else if (MI->isPseudo() && MI->isMoveImmediate()) {
+ return DefedReg.isValid();
} else if (MO.getReg() && MO.getReg() != FrameReg)
return false;
} else if (!(MO.isImm() || MO.isCImm() || MO.isFPImm() || MO.isCPI() ||
diff --git a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
index 0f208f8ed9052..08346f1a857eb 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
+++ b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
@@ -80,7 +80,6 @@ define void @widget(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %a
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: mov x1, xzr
-; CHECK-NEXT: mov w8, #1 ; =0x1
; CHECK-NEXT: stp xzr, xzr, [sp]
; CHECK-NEXT: stp x8, xzr, [sp, #16]
; CHECK-NEXT: bl _fprintf
diff --git a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
index 9abb50651146a..6889110752ae0 100644
--- a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
@@ -41,7 +41,6 @@ define amdgpu_kernel void @call_memory_no_dep(ptr addrspace(1) %ptr, i32) #0 {
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: global_store_dword v0, v0, s[6:7]
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_mov_b32 s32, 0
; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
; GCN-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
index 963b3a55259fa..852ffd5fa7183 100644
--- a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
+++ b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
@@ -113,8 +113,7 @@ define amdgpu_kernel void @stored_fi_to_fi() #0 {
; GCN-LABEL: {{^}}stored_fi_to_global:
; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
-; GCN: buffer_store_dword [[FI]]
+; GCN: buffer_store_dword v{{[0-9]+}}
define amdgpu_kernel void @stored_fi_to_global(ptr addrspace(1) %ptr) #0 {
%tmp = alloca float, addrspace(5)
store float 0.0, ptr addrspace(5) %tmp
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
index 81f768f303ca1..98771dcb441cc 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
@@ -31,7 +31,6 @@ define amdgpu_kernel void @test_sink_small_offset_global_atomic_csub_i32(ptr add
; GCN-NEXT: v_cmpx_ne_u32_e32 0, v1
; GCN-NEXT: s_cbranch_execz .LBB0_2
; GCN-NEXT: ; %bb.1: ; %if
-; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 2
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: global_atomic_csub v0, v0, v1, s[2:3] offset:28 glc
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
index 49370e2fbf1b6..d212d7d52d841 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
@@ -33,7 +33,6 @@ define amdgpu_kernel void @test_sink_small_offset_global_atomic_fadd_f32(ptr add
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN-NEXT: s_cbranch_execz .LBB0_2
; GCN-NEXT: ; %bb.1: ; %if
-; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 2.0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: global_atomic_add_f32 v0, v1, s[2:3] offset:28
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
index 0f9407b77aa83..8f92ee42c066a 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
@@ -72,8 +72,7 @@ done:
; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset:
; GCN: s_and_saveexec_b64
; SICIVI: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}}
-; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; GFX9: global_load_sbyte {{v[0-9]+}}, [[ZERO]], {{s\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
+; GFX9: global_load_sbyte {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
; GCN: {{^}}.LBB2_2:
; GCN: s_or_b64 exec
define amdgpu_kernel void @test_sink_global_small_max_mubuf_offset(ptr addrspace(1) %out, ptr addrspace(1) %in) {
diff --git a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
index 77b78f1f8a333..a4ae669617263 100644
--- a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
@@ -323,8 +323,6 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v36, vcc, 64, v30
; SDAG-NEXT: v_lshr_b64 v[37:38], v[6:7], v30
; SDAG-NEXT: v_add_i32_e32 v34, vcc, -1, v29
-; SDAG-NEXT: v_mov_b32_e32 v12, 0
-; SDAG-NEXT: v_mov_b32_e32 v13, 0
; SDAG-NEXT: v_mov_b32_e32 v14, 0
; SDAG-NEXT: v_mov_b32_e32 v15, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
@@ -1107,8 +1105,6 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v28, vcc, 64, v22
; SDAG-NEXT: v_lshr_b64 v[29:30], v[6:7], v22
; SDAG-NEXT: v_add_i32_e32 v26, vcc, -1, v12
-; SDAG-NEXT: v_mov_b32_e32 v20, 0
-; SDAG-NEXT: v_mov_b32_e32 v21, 0
; SDAG-NEXT: v_mov_b32_e32 v10, 0
; SDAG-NEXT: v_mov_b32_e32 v11, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
@@ -1679,8 +1675,6 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v37, vcc, 64, v32
; SDAG-NEXT: v_lshr_b64 v[24:25], v[0:1], v32
; SDAG-NEXT: v_add_i32_e32 v36, vcc, -1, v31
-; SDAG-NEXT: v_mov_b32_e32 v18, 0
-; SDAG-NEXT: v_mov_b32_e32 v19, 0
; SDAG-NEXT: v_mov_b32_e32 v22, 0
; SDAG-NEXT: v_mov_b32_e32 v23, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
@@ -1874,8 +1868,6 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v51, vcc, 64, v38
; SDAG-NEXT: v_lshr_b64 v[22:23], v[4:5], v38
; SDAG-NEXT: v_add_i32_e32 v50, vcc, -1, v37
-; SDAG-NEXT: v_mov_b32_e32 v18, 0
-; SDAG-NEXT: v_mov_b32_e32 v19, 0
; SDAG-NEXT: v_mov_b32_e32 v20, 0
; SDAG-NEXT: v_mov_b32_e32 v21, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
@@ -2562,8 +2554,6 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v35, vcc, 64, v30
; SDAG-NEXT: v_lshr_b64 v[26:27], v[2:3], v30
; SDAG-NEXT: v_add_i32_e32 v34, vcc, -1, v8
-; SDAG-NEXT: v_mov_b32_e32 v20, 0
-; SDAG-NEXT: v_mov_b32_e32 v21, 0
; SDAG-NEXT: v_mov_b32_e32 v24, 0
; SDAG-NEXT: v_mov_b32_e32 v25, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
@@ -2737,8 +2727,6 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v39, vcc, 64, v34
; SDAG-NEXT: v_lshr_b64 v[26:27], v[6:7], v34
; SDAG-NEXT: v_add_i32_e32 v38, vcc, -1, v12
-; SDAG-NEXT: v_mov_b32_e32 v22, 0
-; SDAG-NEXT: v_mov_b32_e32 v23, 0
; SDAG-NEXT: v_mov_b32_e32 v24, 0
; SDAG-NEXT: v_mov_b32_e32 v25, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
diff --git a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
index 13884eb788d8a..a3c7eb8c56fb0 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
@@ -53,7 +53,6 @@ define void @callee_with_stack_and_call() #0 {
; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:16
; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0)
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5]
-; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1
; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16
; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v0, s31, 0
@@ -77,7 +76,6 @@ define void @callee_with_stack_and_call() #0 {
; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:16
; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0)
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5]
-; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1
; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16
; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
index 6384fdba7a45a..e8aa64f0b8f15 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
@@ -3234,20 +3234,20 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, 0
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX11-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v4, 0
-; GFX11-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v6, 0
-; GFX11-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v8, 0
-; GFX11-NEXT: v_dual_mov_b32 v11, 0 :: v_dual_mov_b32 v10, 0
-; GFX11-NEXT: v_dual_mov_b32 v13, 0 :: v_dual_mov_b32 v12, 0
-; GFX11-NEXT: v_dual_mov_b32 v15, 0 :: v_dual_mov_b32 v14, 0
-; GFX11-NEXT: v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v16, 0
-; GFX11-NEXT: v_dual_mov_b32 v19, 0 :: v_dual_mov_b32 v18, 0
-; GFX11-NEXT: v_dual_mov_b32 v21, 0 :: v_dual_mov_b32 v20, 0
-; GFX11-NEXT: v_dual_mov_b32 v23, 0 :: v_dual_mov_b32 v22, 0
-; GFX11-NEXT: v_dual_mov_b32 v25, 0 :: v_dual_mov_b32 v24, 0
-; GFX11-NEXT: v_dual_mov_b32 v27, 0 :: v_dual_mov_b32 v26, 0
-; GFX11-NEXT: v_dual_mov_b32 v29, 0 :: v_dual_mov_b32 v28, 0
-; GFX11-NEXT: v_dual_mov_b32 v31, 0 :: v_dual_mov_b32 v30, 0
+; GFX11-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v6, 0
+; GFX11-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v8, 0
+; GFX11-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v10, 0
+; GFX11-NEXT: v_dual_mov_b32 v11, 0 :: v_dual_mov_b32 v12, 0
+; GFX11-NEXT: v_dual_mov_b32 v13, 0 :: v_dual_mov_b32 v14, 0
+; GFX11-NEXT: v_dual_mov_b32 v15, 0 :: v_dual_mov_b32 v16, 0
+; GFX11-NEXT: v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v18, 0
+; GFX11-NEXT: v_dual_mov_b32 v19, 0 :: v_dual_mov_b32 v20, 0
+; GFX11-NEXT: v_dual_mov_b32 v21, 0 :: v_dual_mov_b32 v22, 0
+; GFX11-NEXT: v_dual_mov_b32 v23, 0 :: v_dual_mov_b32 v24, 0
+; GFX11-NEXT: v_dual_mov_b32 v25, 0 :: v_dual_mov_b32 v26, 0
+; GFX11-NEXT: v_dual_mov_b32 v27, 0 :: v_dual_mov_b32 v28, 0
+; GFX11-NEXT: v_dual_mov_b32 v29, 0 :: v_dual_mov_b32 v30, 0
+; GFX11-NEXT: v_mov_b32_e32 v31, 0
; GFX11-NEXT: s_mov_b32 s1, return_72xi32@abs32@hi
; GFX11-NEXT: s_mov_b32 s0, return_72xi32@abs32@lo
; GFX11-NEXT: v_writelane_b32 v60, s31, 1
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
index a3ebaec4811a9..97b7e26d1230b 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
@@ -225,12 +225,10 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(ptr addrspace(1) %out
; MUBUF-NEXT: ; %bb.2: ; %split
; MUBUF-NEXT: v_mov_b32_e32 v1, 0x4000
; MUBUF-NEXT: v_or_b32_e32 v0, 0x12d4, v1
-; MUBUF-NEXT: v_mov_b32_e32 v1, 0x4000
; MUBUF-NEXT: s_movk_i32 s4, 0x4000
; MUBUF-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen glc
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: v_or_b32_e32 v0, 0x12d0, v1
-; MUBUF-NEXT: v_mov_b32_e32 v1, 0x4000
; MUBUF-NEXT: s_or_b32 s4, s4, 0x12c0
; MUBUF-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen glc
; MUBUF-NEXT: s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
index 2d95ec6f699dc..3d6e7c532348f 100644
--- a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
+++ b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
@@ -395,7 +395,6 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:168
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: s_mov_b64 exec, s[16:17]
-; GFX908-NEXT: s_mov_b64 s[16:17], exec
; GFX908-NEXT: s_mov_b64 exec, 1
; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:168
; GFX908-NEXT: v_writelane_b32 v2, s31, 0
@@ -743,7 +742,6 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:168
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: s_mov_b64 exec, s[4:5]
-; GFX908-NEXT: s_mov_b64 s[4:5], exec
; GFX908-NEXT: s_mov_b64 exec, 1
; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:168
; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
index 4aa1ddee2efe3..e145380dca59e 100644
--- a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
@@ -267,7 +267,6 @@ define amdgpu_ps void @test_export_across_store_load(i32 %idx, float %v) #0 {
; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GCN-NEXT: v_cndmask_b32_e32 v0, 16, v2, vcc_lo
-; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: scratch_store_b32 v0, v1, off
; GCN-NEXT: scratch_load_b32 v0, off, off
; GCN-NEXT: v_mov_b32_e32 v1, 1.0
diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
index 35234236b848f..ddb7d6b9c3936 100644
--- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
@@ -388,11 +388,7 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
; GCN-NEXT: s_add_u32 s4, s4, i32_fastcc_i32_i32_a32i32@gotpcrel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, i32_fastcc_i32_i32_a32i32@gotpcrel32@hi+12
; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_writelane_b32 v40, s30, 0
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: v_mov_b32_e32 v4, 0
@@ -423,6 +419,9 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
; GCN-NEXT: v_mov_b32_e32 v29, 0
; GCN-NEXT: v_mov_b32_e32 v30, 0
; GCN-NEXT: v_writelane_b32 v40, s31, 1
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GCN-NEXT: v_readlane_b32 s31, v40, 1
@@ -528,10 +527,6 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg
; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:48
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, 0
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
-; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: v_mov_b32_e32 v4, 0
; GCN-NEXT: v_mov_b32_e32 v5, 0
@@ -560,6 +555,9 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg
; GCN-NEXT: v_mov_b32_e32 v28, 0
; GCN-NEXT: v_mov_b32_e32 v29, 0
; GCN-NEXT: v_mov_b32_e32 v30, 0
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64 s[4:5]
entry:
@@ -928,7 +926,6 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64
; GCN-NEXT: s_add_u32 s16, s16, void_fastcc_byval_and_stack_passed@rel32@lo+4
; GCN-NEXT: s_addc_u32 s17, s17, void_fastcc_byval_and_stack_passed@rel32@hi+12
; GCN-NEXT: v_mov_b32_e32 v0, 0
-; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: v_mov_b32_e32 v4, 0
diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
index 50056b62b3397..e9aebeef6ea6d 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
@@ -9971,7 +9971,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
-; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_mov_b32 s34, 0x80c00
@@ -9989,7 +9988,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
-; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10007,7 +10005,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
-; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_mov_b32 s34, 0x81400
@@ -10025,7 +10022,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
-; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10043,7 +10039,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
-; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_mov_b32 s34, 0x81c00
@@ -10061,7 +10056,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
-; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 15
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10105,7 +10099,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr ...
[truncated]
|
@llvm/pr-subscribers-backend-amdgpu Author: Rohit Aggarwal (rohitaggarwal007) ChangesRemove redundant pseudo mov instructions after PostRegalloc. Patch is 52.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139716.diff 33 Files Affected:
diff --git a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
index c8c8ed99d93ea..c3b6115338f05 100644
--- a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
+++ b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
@@ -186,11 +186,13 @@ static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg()) {
- if (MO.isDef()) {
+ if (MO.isDef() && DefedReg == MCRegister::NoRegister) {
if (i == 0 && !MO.isImplicit() && !MO.isDead())
DefedReg = MO.getReg();
else
return false;
+ } else if (MI->isPseudo() && MI->isMoveImmediate()) {
+ return DefedReg.isValid();
} else if (MO.getReg() && MO.getReg() != FrameReg)
return false;
} else if (!(MO.isImm() || MO.isCImm() || MO.isFPImm() || MO.isCPI() ||
diff --git a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
index 0f208f8ed9052..08346f1a857eb 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
+++ b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
@@ -80,7 +80,6 @@ define void @widget(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %a
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: mov x1, xzr
-; CHECK-NEXT: mov w8, #1 ; =0x1
; CHECK-NEXT: stp xzr, xzr, [sp]
; CHECK-NEXT: stp x8, xzr, [sp, #16]
; CHECK-NEXT: bl _fprintf
diff --git a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
index 9abb50651146a..6889110752ae0 100644
--- a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
@@ -41,7 +41,6 @@ define amdgpu_kernel void @call_memory_no_dep(ptr addrspace(1) %ptr, i32) #0 {
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: global_store_dword v0, v0, s[6:7]
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_mov_b32 s32, 0
; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
; GCN-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
index 963b3a55259fa..852ffd5fa7183 100644
--- a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
+++ b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
@@ -113,8 +113,7 @@ define amdgpu_kernel void @stored_fi_to_fi() #0 {
; GCN-LABEL: {{^}}stored_fi_to_global:
; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
-; GCN: buffer_store_dword [[FI]]
+; GCN: buffer_store_dword v{{[0-9]+}}
define amdgpu_kernel void @stored_fi_to_global(ptr addrspace(1) %ptr) #0 {
%tmp = alloca float, addrspace(5)
store float 0.0, ptr addrspace(5) %tmp
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
index 81f768f303ca1..98771dcb441cc 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
@@ -31,7 +31,6 @@ define amdgpu_kernel void @test_sink_small_offset_global_atomic_csub_i32(ptr add
; GCN-NEXT: v_cmpx_ne_u32_e32 0, v1
; GCN-NEXT: s_cbranch_execz .LBB0_2
; GCN-NEXT: ; %bb.1: ; %if
-; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 2
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: global_atomic_csub v0, v0, v1, s[2:3] offset:28 glc
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
index 49370e2fbf1b6..d212d7d52d841 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
@@ -33,7 +33,6 @@ define amdgpu_kernel void @test_sink_small_offset_global_atomic_fadd_f32(ptr add
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN-NEXT: s_cbranch_execz .LBB0_2
; GCN-NEXT: ; %bb.1: ; %if
-; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 2.0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: global_atomic_add_f32 v0, v1, s[2:3] offset:28
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
index 0f9407b77aa83..8f92ee42c066a 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
@@ -72,8 +72,7 @@ done:
; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset:
; GCN: s_and_saveexec_b64
; SICIVI: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}}
-; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; GFX9: global_load_sbyte {{v[0-9]+}}, [[ZERO]], {{s\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
+; GFX9: global_load_sbyte {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
; GCN: {{^}}.LBB2_2:
; GCN: s_or_b64 exec
define amdgpu_kernel void @test_sink_global_small_max_mubuf_offset(ptr addrspace(1) %out, ptr addrspace(1) %in) {
diff --git a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
index 77b78f1f8a333..a4ae669617263 100644
--- a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
@@ -323,8 +323,6 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v36, vcc, 64, v30
; SDAG-NEXT: v_lshr_b64 v[37:38], v[6:7], v30
; SDAG-NEXT: v_add_i32_e32 v34, vcc, -1, v29
-; SDAG-NEXT: v_mov_b32_e32 v12, 0
-; SDAG-NEXT: v_mov_b32_e32 v13, 0
; SDAG-NEXT: v_mov_b32_e32 v14, 0
; SDAG-NEXT: v_mov_b32_e32 v15, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
@@ -1107,8 +1105,6 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v28, vcc, 64, v22
; SDAG-NEXT: v_lshr_b64 v[29:30], v[6:7], v22
; SDAG-NEXT: v_add_i32_e32 v26, vcc, -1, v12
-; SDAG-NEXT: v_mov_b32_e32 v20, 0
-; SDAG-NEXT: v_mov_b32_e32 v21, 0
; SDAG-NEXT: v_mov_b32_e32 v10, 0
; SDAG-NEXT: v_mov_b32_e32 v11, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
@@ -1679,8 +1675,6 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v37, vcc, 64, v32
; SDAG-NEXT: v_lshr_b64 v[24:25], v[0:1], v32
; SDAG-NEXT: v_add_i32_e32 v36, vcc, -1, v31
-; SDAG-NEXT: v_mov_b32_e32 v18, 0
-; SDAG-NEXT: v_mov_b32_e32 v19, 0
; SDAG-NEXT: v_mov_b32_e32 v22, 0
; SDAG-NEXT: v_mov_b32_e32 v23, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
@@ -1874,8 +1868,6 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v51, vcc, 64, v38
; SDAG-NEXT: v_lshr_b64 v[22:23], v[4:5], v38
; SDAG-NEXT: v_add_i32_e32 v50, vcc, -1, v37
-; SDAG-NEXT: v_mov_b32_e32 v18, 0
-; SDAG-NEXT: v_mov_b32_e32 v19, 0
; SDAG-NEXT: v_mov_b32_e32 v20, 0
; SDAG-NEXT: v_mov_b32_e32 v21, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
@@ -2562,8 +2554,6 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v35, vcc, 64, v30
; SDAG-NEXT: v_lshr_b64 v[26:27], v[2:3], v30
; SDAG-NEXT: v_add_i32_e32 v34, vcc, -1, v8
-; SDAG-NEXT: v_mov_b32_e32 v20, 0
-; SDAG-NEXT: v_mov_b32_e32 v21, 0
; SDAG-NEXT: v_mov_b32_e32 v24, 0
; SDAG-NEXT: v_mov_b32_e32 v25, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
@@ -2737,8 +2727,6 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v39, vcc, 64, v34
; SDAG-NEXT: v_lshr_b64 v[26:27], v[6:7], v34
; SDAG-NEXT: v_add_i32_e32 v38, vcc, -1, v12
-; SDAG-NEXT: v_mov_b32_e32 v22, 0
-; SDAG-NEXT: v_mov_b32_e32 v23, 0
; SDAG-NEXT: v_mov_b32_e32 v24, 0
; SDAG-NEXT: v_mov_b32_e32 v25, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
diff --git a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
index 13884eb788d8a..a3c7eb8c56fb0 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
@@ -53,7 +53,6 @@ define void @callee_with_stack_and_call() #0 {
; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:16
; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0)
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5]
-; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1
; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16
; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v0, s31, 0
@@ -77,7 +76,6 @@ define void @callee_with_stack_and_call() #0 {
; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:16
; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0)
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5]
-; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1
; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16
; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
index 6384fdba7a45a..e8aa64f0b8f15 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
@@ -3234,20 +3234,20 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, 0
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX11-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v4, 0
-; GFX11-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v6, 0
-; GFX11-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v8, 0
-; GFX11-NEXT: v_dual_mov_b32 v11, 0 :: v_dual_mov_b32 v10, 0
-; GFX11-NEXT: v_dual_mov_b32 v13, 0 :: v_dual_mov_b32 v12, 0
-; GFX11-NEXT: v_dual_mov_b32 v15, 0 :: v_dual_mov_b32 v14, 0
-; GFX11-NEXT: v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v16, 0
-; GFX11-NEXT: v_dual_mov_b32 v19, 0 :: v_dual_mov_b32 v18, 0
-; GFX11-NEXT: v_dual_mov_b32 v21, 0 :: v_dual_mov_b32 v20, 0
-; GFX11-NEXT: v_dual_mov_b32 v23, 0 :: v_dual_mov_b32 v22, 0
-; GFX11-NEXT: v_dual_mov_b32 v25, 0 :: v_dual_mov_b32 v24, 0
-; GFX11-NEXT: v_dual_mov_b32 v27, 0 :: v_dual_mov_b32 v26, 0
-; GFX11-NEXT: v_dual_mov_b32 v29, 0 :: v_dual_mov_b32 v28, 0
-; GFX11-NEXT: v_dual_mov_b32 v31, 0 :: v_dual_mov_b32 v30, 0
+; GFX11-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v6, 0
+; GFX11-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v8, 0
+; GFX11-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v10, 0
+; GFX11-NEXT: v_dual_mov_b32 v11, 0 :: v_dual_mov_b32 v12, 0
+; GFX11-NEXT: v_dual_mov_b32 v13, 0 :: v_dual_mov_b32 v14, 0
+; GFX11-NEXT: v_dual_mov_b32 v15, 0 :: v_dual_mov_b32 v16, 0
+; GFX11-NEXT: v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v18, 0
+; GFX11-NEXT: v_dual_mov_b32 v19, 0 :: v_dual_mov_b32 v20, 0
+; GFX11-NEXT: v_dual_mov_b32 v21, 0 :: v_dual_mov_b32 v22, 0
+; GFX11-NEXT: v_dual_mov_b32 v23, 0 :: v_dual_mov_b32 v24, 0
+; GFX11-NEXT: v_dual_mov_b32 v25, 0 :: v_dual_mov_b32 v26, 0
+; GFX11-NEXT: v_dual_mov_b32 v27, 0 :: v_dual_mov_b32 v28, 0
+; GFX11-NEXT: v_dual_mov_b32 v29, 0 :: v_dual_mov_b32 v30, 0
+; GFX11-NEXT: v_mov_b32_e32 v31, 0
; GFX11-NEXT: s_mov_b32 s1, return_72xi32@abs32@hi
; GFX11-NEXT: s_mov_b32 s0, return_72xi32@abs32@lo
; GFX11-NEXT: v_writelane_b32 v60, s31, 1
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
index a3ebaec4811a9..97b7e26d1230b 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
@@ -225,12 +225,10 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(ptr addrspace(1) %out
; MUBUF-NEXT: ; %bb.2: ; %split
; MUBUF-NEXT: v_mov_b32_e32 v1, 0x4000
; MUBUF-NEXT: v_or_b32_e32 v0, 0x12d4, v1
-; MUBUF-NEXT: v_mov_b32_e32 v1, 0x4000
; MUBUF-NEXT: s_movk_i32 s4, 0x4000
; MUBUF-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen glc
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: v_or_b32_e32 v0, 0x12d0, v1
-; MUBUF-NEXT: v_mov_b32_e32 v1, 0x4000
; MUBUF-NEXT: s_or_b32 s4, s4, 0x12c0
; MUBUF-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen glc
; MUBUF-NEXT: s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
index 2d95ec6f699dc..3d6e7c532348f 100644
--- a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
+++ b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
@@ -395,7 +395,6 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:168
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: s_mov_b64 exec, s[16:17]
-; GFX908-NEXT: s_mov_b64 s[16:17], exec
; GFX908-NEXT: s_mov_b64 exec, 1
; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:168
; GFX908-NEXT: v_writelane_b32 v2, s31, 0
@@ -743,7 +742,6 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:168
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: s_mov_b64 exec, s[4:5]
-; GFX908-NEXT: s_mov_b64 s[4:5], exec
; GFX908-NEXT: s_mov_b64 exec, 1
; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:168
; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
index 4aa1ddee2efe3..e145380dca59e 100644
--- a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
@@ -267,7 +267,6 @@ define amdgpu_ps void @test_export_across_store_load(i32 %idx, float %v) #0 {
; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GCN-NEXT: v_cndmask_b32_e32 v0, 16, v2, vcc_lo
-; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: scratch_store_b32 v0, v1, off
; GCN-NEXT: scratch_load_b32 v0, off, off
; GCN-NEXT: v_mov_b32_e32 v1, 1.0
diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
index 35234236b848f..ddb7d6b9c3936 100644
--- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
@@ -388,11 +388,7 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
; GCN-NEXT: s_add_u32 s4, s4, i32_fastcc_i32_i32_a32i32@gotpcrel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, i32_fastcc_i32_i32_a32i32@gotpcrel32@hi+12
; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_writelane_b32 v40, s30, 0
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: v_mov_b32_e32 v4, 0
@@ -423,6 +419,9 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
; GCN-NEXT: v_mov_b32_e32 v29, 0
; GCN-NEXT: v_mov_b32_e32 v30, 0
; GCN-NEXT: v_writelane_b32 v40, s31, 1
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GCN-NEXT: v_readlane_b32 s31, v40, 1
@@ -528,10 +527,6 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg
; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:48
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, 0
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
-; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: v_mov_b32_e32 v4, 0
; GCN-NEXT: v_mov_b32_e32 v5, 0
@@ -560,6 +555,9 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg
; GCN-NEXT: v_mov_b32_e32 v28, 0
; GCN-NEXT: v_mov_b32_e32 v29, 0
; GCN-NEXT: v_mov_b32_e32 v30, 0
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64 s[4:5]
entry:
@@ -928,7 +926,6 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64
; GCN-NEXT: s_add_u32 s16, s16, void_fastcc_byval_and_stack_passed@rel32@lo+4
; GCN-NEXT: s_addc_u32 s17, s17, void_fastcc_byval_and_stack_passed@rel32@hi+12
; GCN-NEXT: v_mov_b32_e32 v0, 0
-; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: v_mov_b32_e32 v4, 0
diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
index 50056b62b3397..e9aebeef6ea6d 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
@@ -9971,7 +9971,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
-; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_mov_b32 s34, 0x80c00
@@ -9989,7 +9988,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
-; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10007,7 +10005,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
-; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_mov_b32 s34, 0x81400
@@ -10025,7 +10022,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
-; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10043,7 +10039,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
-; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_mov_b32 s34, 0x81c00
@@ -10061,7 +10056,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
-; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 15
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10105,7 +10099,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr ...
[truncated]
|
if (i == 0 && !MO.isImplicit() && !MO.isDead()) | ||
DefedReg = MO.getReg(); | ||
else | ||
return false; | ||
} else if (MI->isPseudo() && MI->isMoveImmediate()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
isPseudo does not give you any semantic information, you cannot use it here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure, will remove it
@@ -186,11 +186,13 @@ static bool isCandidate(const MachineInstr *MI, Register &DefedReg, | |||
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { | |||
const MachineOperand &MO = MI->getOperand(i); | |||
if (MO.isReg()) { | |||
if (MO.isDef()) { | |||
if (MO.isDef() && DefedReg == MCRegister::NoRegister) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't understand why this is necessary, in general this would be a malformed instruction
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In some case instruction's MachineOperands MO.isDef() is getting true for more than one operand. Instruction having more than one define. So it just add a check that if the DefedReg is not assigned then try to find the value of register and assign it to DefedReg. Once it is assigned, do not reassign it.
As per my understanding, in any instruction's MOs there is only def and all other are use().
Please correct me if my understanding is incorrect.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Instructions can have multiple defs, but this code specifically only wants to handle instructions with a single def. This should probably early exit if it encounters a second def
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}} | ||
; GCN: buffer_store_dword [[FI]] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This lost the point of the test?
@@ -72,8 +72,7 @@ done: | |||
; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset: | |||
; GCN: s_and_saveexec_b64 | |||
; SICIVI: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}} | |||
; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} | |||
; GFX9: global_load_sbyte {{v[0-9]+}}, [[ZERO]], {{s\[[0-9]+:[0-9]+\]}} offset:4095{{$}} | |||
; GFX9: global_load_sbyte {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} offset:4095{{$}} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This still needs the zero check, the mov is just above here now?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@rohitaggarwal007 please can merge against trunk - I've tried to clean up some tests to reduce the superfluous x86 diffs
9a1de88
to
7038d50
Compare
…oject into RemovePseudoMov
Remove redundant pseudo mov instructions after PostRegalloc.