-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[AMDGPU][True16][CodeGen] replace subreg_to_reg to req_sequence #138746
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Brox Chen (broxigarchen) ChangesSince subreg_to_reg is considered broken in llvm, replace subreg_to_reg to reg_sequence Patch is 141.72 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/138746.diff 8 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index e6d54860df221..9ea77e06f753d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -7787,11 +7787,15 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
if (16 == RI.getRegSizeInBits(*SrcRegRC) &&
32 == RI.getRegSizeInBits(*NewDstRC)) {
Register NewDstReg = MRI.createVirtualRegister(NewDstRC);
+ Register Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
- get(TargetOpcode::SUBREG_TO_REG), NewDstReg)
- .add(MachineOperand::CreateImm(0))
- .add(Inst.getOperand(1))
- .add(MachineOperand::CreateImm(AMDGPU::lo16));
+ get(AMDGPU::IMPLICIT_DEF), Undef);
+ BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
+ get(AMDGPU::REG_SEQUENCE), NewDstReg)
+ .addReg(Inst.getOperand(1).getReg())
+ .addImm(AMDGPU::lo16)
+ .addReg(Undef)
+ .addImm(AMDGPU::hi16);
Inst.eraseFromParent();
MRI.replaceRegWith(DstReg, NewDstReg);
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
index b19a5a44e706a..8788dc2c059d6 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
@@ -112618,575 +112618,570 @@ define <64 x i16> @bitcast_v64bf16_to_v64i16(<64 x bfloat> %a, i32 %b) {
; GFX11-TRUE16-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB51_2
; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.true
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v32, 0xffff0000, v16
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v33, 16, v17
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v17, 0xffff0000, v17
-; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v34, 16, v18
-; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v98, 16, v14
-; GFX11-TRUE16-NEXT: v_add_f32_e32 v32, 0x40c00000, v32
-; GFX11-TRUE16-NEXT: v_dual_add_f32 v33, 0x40c00000, v33 :: v_dual_lshlrev_b32 v16, 16, v16
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_dual_add_f32 v35, 0x40c00000, v17 :: v_dual_add_f32 v34, 0x40c00000, v34
-; GFX11-TRUE16-NEXT: v_bfe_u32 v17, v32, 16, 1
-; GFX11-TRUE16-NEXT: v_or_b32_e32 v38, 0x400000, v32
+; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v32, 16, v16
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v16, 0xffff0000, v16
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v48, 0xffff0000, v20
+; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v51, 16, v23
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_bfe_u32 v39, v33, 16, 1
-; GFX11-TRUE16-NEXT: v_bfe_u32 v48, v35, 16, 1
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v33, 0x40c00000, v33 :: v_dual_add_f32 v32, 0x40c00000, v32
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v34, 0x40c00000, v16 :: v_dual_lshlrev_b32 v35, 16, v18
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v52, 0xffff0000, v24
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v33, 16, 1
+; GFX11-TRUE16-NEXT: v_bfe_u32 v16, v32, 16, 1
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v38, 0x400000, v32
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32
-; GFX11-TRUE16-NEXT: v_add3_u32 v17, v17, v32, 0x7fff
-; GFX11-TRUE16-NEXT: v_or_b32_e32 v49, 0x400000, v35
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v14, 0xffff0000, v14
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v32, v17, v38, vcc_lo
-; GFX11-TRUE16-NEXT: v_add3_u32 v38, v39, v33, 0x7fff
-; GFX11-TRUE16-NEXT: v_add3_u32 v39, v48, v35, 0x7fff
-; GFX11-TRUE16-NEXT: v_add_f32_e32 v16, 0x40c00000, v16
-; GFX11-TRUE16-NEXT: v_add_f32_e32 v14, 0x40c00000, v14
-; GFX11-TRUE16-NEXT: v_bfe_u32 v36, v16, 16, 1
-; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v16
-; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v16, v16
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_bfe_u32 v102, v14, 16, 1
-; GFX11-TRUE16-NEXT: v_add3_u32 v36, v36, v16, 0x7fff
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.l, v32.h
-; GFX11-TRUE16-NEXT: v_or_b32_e32 v32, 0x400000, v33
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v17, v36, v37, vcc_lo
-; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35
-; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v34, 16, 1
-; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v35, v39, v49, vcc_lo
-; GFX11-TRUE16-NEXT: v_add3_u32 v37, v37, v34, 0x7fff
; GFX11-TRUE16-NEXT: v_or_b32_e32 v39, 0x400000, v34
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v18, 0xffff0000, v18
+; GFX11-TRUE16-NEXT: v_add3_u32 v37, v37, v33, 0x7fff
+; GFX11-TRUE16-NEXT: v_add3_u32 v16, v16, v32, 0x7fff
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v17, 0xffff0000, v17
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v32, 0x400000, v33
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v66, 0xffff0000, v30
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v35, 0x40c00000, v35 :: v_dual_cndmask_b32 v16, v16, v38
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v17
+; GFX11-TRUE16-NEXT: v_bfe_u32 v17, v34, 16, 1
+; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v18
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v68, 0xffff0000, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v70, 0xffff0000, v2
+; GFX11-TRUE16-NEXT: v_add3_u32 v17, v17, v34, 0x7fff
+; GFX11-TRUE16-NEXT: v_bfe_u32 v34, v36, 16, 1
+; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v80, 16, v5
+; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v82, 16, v7
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xffff0000, v7
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v17, v17, v39, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-TRUE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v18
-; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v18, v38, v32, vcc_lo
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v35.h
-; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v35, 16, v19
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v19, 0xffff0000, v19
-; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v36, 16, 1
-; GFX11-TRUE16-NEXT: v_or_b32_e32 v38, 0x400000, v36
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v33, 0x400000, v36
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v82, 0x40c00000, v82 :: v_dual_lshlrev_b32 v83, 16, v8
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xffff0000, v8
+; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v18, v37, v32 :: v_dual_add_f32 v37, 0x40c00000, v38
+; GFX11-TRUE16-NEXT: v_add3_u32 v32, v34, v36, 0x7fff
+; GFX11-TRUE16-NEXT: v_bfe_u32 v34, v35, 16, 1
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36
-; GFX11-TRUE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_add3_u32 v33, v33, v36, 0x7fff
-; GFX11-TRUE16-NEXT: v_add_f32_e32 v48, 0x40c00000, v19
-; GFX11-TRUE16-NEXT: v_bfe_u32 v36, v35, 16, 1
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v33, v38, vcc_lo
-; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34
-; GFX11-TRUE16-NEXT: v_bfe_u32 v34, v48, 16, 1
-; GFX11-TRUE16-NEXT: v_or_b32_e32 v38, 0x400000, v48
-; GFX11-TRUE16-NEXT: v_add3_u32 v36, v36, v35, 0x7fff
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v33.h
-; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v19, v37, v39, vcc_lo
-; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v37, 16, v20
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v20, 0xffff0000, v20
-; GFX11-TRUE16-NEXT: v_add3_u32 v34, v34, v48, 0x7fff
-; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v48, v48
-; GFX11-TRUE16-NEXT: v_or_b32_e32 v39, 0x400000, v35
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_dual_add_f32 v49, 0x40c00000, v20 :: v_dual_cndmask_b32 v34, v34, v38
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v39, 0xffff0000, v19
+; GFX11-TRUE16-NEXT: v_bfe_u32 v36, v37, 16, 1
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v83, 0x40c00000, v83 :: v_dual_add_f32 v8, 0x40c00000, v8
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v32, v32, v33, vcc_lo
+; GFX11-TRUE16-NEXT: v_add3_u32 v33, v34, v35, 0x7fff
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v34, 0x400000, v35
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35
-; GFX11-TRUE16-NEXT: v_add_f32_e32 v37, 0x40c00000, v37
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v34.h
-; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v20, v36, v39, vcc_lo
-; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v36, 16, v21
-; GFX11-TRUE16-NEXT: v_bfe_u32 v35, v49, 16, 1
-; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v37, 16, 1
-; GFX11-TRUE16-NEXT: v_or_b32_e32 v39, 0x400000, v49
-; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v49, v49
-; GFX11-TRUE16-NEXT: v_add_f32_e32 v48, 0x40c00000, v36
-; GFX11-TRUE16-NEXT: v_add3_u32 v35, v35, v49, 0x7fff
-; GFX11-TRUE16-NEXT: v_add3_u32 v38, v38, v37, 0x7fff
-; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v37
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v35, v35, v39, vcc_lo
+; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v38, 16, v19
+; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v84, 16, v9
+; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v86, 16, v11
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v11, 0xffff0000, v11
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v19, v33, v34, vcc_lo
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v38, 0x40c00000, v38
+; GFX11-TRUE16-NEXT: v_add3_u32 v33, v36, v37, 0x7fff
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v34, 0x400000, v37
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v36, 0x40c00000, v39 :: v_dual_lshlrev_b32 v39, 16, v20
+; GFX11-TRUE16-NEXT: v_bfe_u32 v35, v38, 16, 1
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v86, 0x40c00000, v86 :: v_dual_lshlrev_b32 v87, 16, v12
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v33, v34, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v36, 16, 1
+; GFX11-TRUE16-NEXT: v_add3_u32 v34, v35, v38, 0x7fff
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v35, 0x400000, v38
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v39, 0x40c00000, v39
+; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v38, 0x40c00000, v48
+; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v48, 16, v21
; GFX11-TRUE16-NEXT: v_and_b32_e32 v21, 0xffff0000, v21
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v35.h
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v36, v38, v36 :: v_dual_add_f32 v21, 0x40c00000, v21
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v87, 0x40c00000, v87 :: v_dual_lshlrev_b32 v96, 16, v13
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v20, v34, v35, vcc_lo
+; GFX11-TRUE16-NEXT: v_add3_u32 v34, v37, v36, 0x7fff
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v35, 0x400000, v36
+; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v39, 16, 1
+; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v39
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v48, 0x40c00000, v48 :: v_dual_add_f32 v49, 0x40c00000, v21
+; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v34, v34, v35 :: v_dual_lshlrev_b32 v21, 16, v22
+; GFX11-TRUE16-NEXT: v_add3_u32 v35, v37, v39, 0x7fff
+; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v38, 16, 1
+; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v39, v39
; GFX11-TRUE16-NEXT: v_bfe_u32 v39, v48, 16, 1
-; GFX11-TRUE16-NEXT: v_or_b32_e32 v50, 0x400000, v48
-; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v36
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v21, 16, 1
-; GFX11-TRUE16-NEXT: v_or_b32_e32 v49, 0x400000, v21
-; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v21, v21
-; GFX11-TRUE16-NEXT: v_add3_u32 v39, v39, v48, 0x7fff
-; GFX11-TRUE16-NEXT: v_add3_u32 v37, v37, v21, 0x7fff
-; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v38, 16, v22
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v21, v37, v49 :: v_dual_and_b32 v22, 0xffff0000, v22
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v50, 0x40c00000, v21
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v22, 0xffff0000, v22
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v97, 0x400000, v87
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v35, v35, v36, vcc_lo
+; GFX11-TRUE16-NEXT: v_add3_u32 v36, v37, v38, 0x7fff
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v38
+; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v38, 0x400000, v48
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v35
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v16, 16, v16
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v53, 0xffff0000, v25
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v36, v36, v37, vcc_lo
+; GFX11-TRUE16-NEXT: v_add3_u32 v37, v39, v48, 0x7fff
+; GFX11-TRUE16-NEXT: v_bfe_u32 v39, v49, 16, 1
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v48, v48
-; GFX11-TRUE16-NEXT: v_dual_add_f32 v51, 0x40c00000, v22 :: v_dual_lshlrev_b32 v48, 16, v23
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_add_f32_e32 v38, 0x40c00000, v38
-; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v22, v39, v50 :: v_dual_and_b32 v23, 0xffff0000, v23
-; GFX11-TRUE16-NEXT: v_bfe_u32 v39, v51, 16, 1
-; GFX11-TRUE16-NEXT: v_or_b32_e32 v49, 0x400000, v51
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v38, 16, 1
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v48, 0x40c00000, v22
+; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v64, 16, v28
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v65, 0xffff0000, v29
+; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v67, 16, v0
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v21, v37, v38, vcc_lo
+; GFX11-TRUE16-NEXT: v_add3_u32 v37, v39, v49, 0x7fff
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v38, 0x400000, v49
+; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v49, v49
+; GFX11-TRUE16-NEXT: v_bfe_u32 v39, v50, 16, 1
+; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v21, 16, v21
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v69, 0xffff0000, v1
+; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v22, v37, v38 :: v_dual_lshlrev_b32 v71, 16, v4
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v49, 0x40c00000, v51
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v51, 0xffff0000, v23
+; GFX11-TRUE16-NEXT: v_add3_u32 v37, v39, v50, 0x7fff
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v38, 0x400000, v50
+; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v50, v50
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v4
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v50, 0x40c00000, v51 :: v_dual_lshlrev_b32 v51, 16, v24
+; GFX11-TRUE16-NEXT: v_bfe_u32 v39, v48, 16, 1
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v23, v37, v38, vcc_lo
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v38, 0x400000, v48
+; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v48, v48
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v51, 0x40c00000, v51
+; GFX11-TRUE16-NEXT: v_add3_u32 v37, v39, v48, 0x7fff
+; GFX11-TRUE16-NEXT: v_bfe_u32 v39, v49, 16, 1
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v22.h
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v71, 0x40c00000, v71 :: v_dual_add_f32 v4, 0x40c00000, v4
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v37, v37, v38, vcc_lo
+; GFX11-TRUE16-NEXT: v_add3_u32 v38, v39, v49, 0x7fff
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v39, 0x400000, v49
+; GFX11-TRUE16-NEXT: v_bfe_u32 v48, v50, 16, 1
+; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v49, v49
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v49, 0x40c00000, v52 :: v_dual_lshlrev_b32 v52, 16, v25
+; GFX11-TRUE16-NEXT: v_lshl_or_b32 v21, v34, 16, v21
+; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v24, v38, v39 :: v_dual_and_b32 v5, 0xffff0000, v5
+; GFX11-TRUE16-NEXT: v_add3_u32 v38, v48, v50, 0x7fff
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v39, 0x400000, v50
+; GFX11-TRUE16-NEXT: v_bfe_u32 v48, v51, 16, 1
+; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v50, v50
+; GFX11-TRUE16-NEXT: v_bfe_u32 v50, v49, 16, 1
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v52, 0x40c00000, v52
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v80, 0x40c00000, v80 :: v_dual_add_f32 v5, 0x40c00000, v5
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v38, v38, v39, vcc_lo
+; GFX11-TRUE16-NEXT: v_add3_u32 v39, v48, v51, 0x7fff
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v48, 0x400000, v51
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v51, v51
-; GFX11-TRUE16-NEXT: v_or_b32_e32 v50, 0x400000, v38
-; GFX11-TRUE16-NEXT: v_add3_u32 v39, v39, v51, 0x7fff
-; GFX11-TRUE16-NEXT: v_add_f32_e32 v52, 0x40c00000, v23
-; GFX11-TRUE16-NEXT: v_add3_u32 v37, v37, v38, 0x7fff
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v21.h
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v39, v39, v49, vcc_lo
-; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38
-; GFX11-TRUE16-NEXT: v_dual_add_f32 v48, 0x40c00000, v48 :: v_dual_cndmask_b32 v23, v37, v50
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v39.h
-; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v39, 16, v24
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v24, 0xffff0000, v24
-; GFX11-TRUE16-NEXT: v_bfe_u32 v49, v48, 16, 1
-; GFX11-TRUE16-NEXT: v_or_b32_e32 v51, 0x400000, v48
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_4)
-; GFX11-TRUE16-NEXT: v_add_f32_e32 v53, 0x40c00000, v24
-; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v52, 16, 1
-; GFX11-TRUE16-NEXT: v_or_b32_e32 v50, 0x400000, v52
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v51, 0x40c00000, v53
+; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v81, 16, v6
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v6
+; GFX11-TRUE16-NEXT: v_add_f32_e32 v7, 0x40c00000, v7
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v25, v39, v48, vcc_lo
+; GFX11-TRUE16-NEXT: v_add3_u32 v39, v50, v49, 0x7fff
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v48, 0x400000, v49
+; GFX11-TRUE16-NEXT: v_bfe_u32 v50, v52, 16, 1
+; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v49, v49
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v49, 0x400000, v52
+; GFX11-TRUE16-NEXT: v_dual_add_f32 v81, 0x40c00000, v81 :: v_dual_add_f32 v6, 0x40c00000, v6
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v9
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v39, v39, v48, vcc_lo
+; GFX11-TRUE16-NEXT: v_add3_u32 v48, v50, v52, 0x7fff
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v52, v52
-; GFX11-TRUE16-NEXT: v_add3_u32 v49, v49, v48, 0x7fff
-; GFX11-TRUE16-NEXT: v_add3_u32 v38, v38, v52, 0x7fff
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_dual_add_f32 v39, 0x40c00000, v39 :: v_dual_cndmask_b32 v38, v38, v50
-; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v48, v48
-; GFX11-TRUE16-NEXT: v_bfe_u32 v48, v53, 16, 1
-; GFX11-TRUE16-NEXT: v_bfe_u32 v50, v39, 16, 1
-; GFX11-TRUE16-NEXT: v_or_b32_e32 v52, 0x400000, v39
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v38.h
-; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v24, v49, v51 :: v_dual_lshlrev_b32 v49, 16, v25
-; GFX11-TRUE16-NEXT: v_add3_u32 v48, v48, v53, 0x7fff
+; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v5...
[truncated]
|
10c5275
to
661b551
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm but this shouldn't really be the set of tests changing
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM as well. I honestly would have expected it to be NFC. But the changes look to be minor.
Hi Matt. Any test you expect to change here? I haven't check these test into details since they seems to be mintor. I suspect the changes are from rewrite-virtual-reg pass and twoaddress. |
661b551
to
78f38f6
Compare
added the missing mir test change |
Since subreg_to_reg is considered broken in llvm, replace subreg_to_reg to reg_sequence