-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[RISCV][Peephole] Checking regclass compatibility in VMV #138844
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Piyou Chen (BeMg) ChangesWithout checking the regclass compatibility, this pass may generate bad machine code.
Full diff: https://github.com/llvm/llvm-project/pull/138844.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index 7c05ff1f1a70e..768bd2c8da0a8 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -565,6 +565,11 @@ bool RISCVVectorPeephole::foldUndefPassthruVMV_V_V(MachineInstr &MI) {
if (MI.getOperand(1).getReg() != RISCV::NoRegister)
return false;
+ const TargetRegisterClass *RC1 = MRI->getRegClass(MI.getOperand(0).getReg());
+ const TargetRegisterClass *RC2 = MRI->getRegClass(MI.getOperand(2).getReg());
+ if (!RC1->hasSubClassEq(RC2))
+ return false;
+
// If the input was a pseudo with a policy operand, we can give it a tail
// agnostic policy if MI's undef tail subsumes the input's.
MachineInstr *Src = MRI->getVRegDef(MI.getOperand(2).getReg());
@@ -607,6 +612,11 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
if (!MRI->hasOneUse(MI.getOperand(2).getReg()))
return false;
+ const TargetRegisterClass *RC1 = MRI->getRegClass(MI.getOperand(0).getReg());
+ const TargetRegisterClass *RC2 = MRI->getRegClass(MI.getOperand(2).getReg());
+ if (!RC1->hasSubClassEq(RC2))
+ return false;
+
MachineInstr *Src = MRI->getVRegDef(MI.getOperand(2).getReg());
if (!Src || Src->hasUnmodeledSideEffects() ||
Src->getParent() != MI.getParent() || Src->getNumDefs() != 1 ||
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmv-with-different-class.mir b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmv-with-different-class.mir
new file mode 100644
index 0000000000000..6f83842112efe
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmv-with-different-class.mir
@@ -0,0 +1,145 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc %s -o - -mtriple=riscv64 -mattr=+v -run-pass=riscv-vector-peephole \
+# RUN: -verify-machineinstrs | FileCheck %s
+
+--- |
+ source_filename = "reduced.ll"
+ target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+ target triple = "riscv64-unknown-linux-gnu"
+
+ define i32 @main() #0 {
+ entry:
+ %0 = tail call <vscale x 1 x i32> @llvm.riscv.vmv.v.v.nxv1i32.i64(<vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer, i64 0)
+ %1 = tail call <vscale x 1 x i32> @llvm.riscv.vxor.mask.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> %0, <vscale x 1 x i32> zeroinitializer, <vscale x 1 x i32> zeroinitializer, <vscale x 1 x i1> zeroinitializer, i64 0, i64 0)
+ %2 = tail call <vscale x 1 x i64> @llvm.riscv.vwmacc.mask.nxv1i64.nxv1i32.nxv1i32.i64(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x i32> %1, <vscale x 1 x i32> zeroinitializer, <vscale x 1 x i1> zeroinitializer, i64 0, i64 0)
+ %3 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_3t.nxv1i64(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) zeroinitializer, <vscale x 1 x i64> %2, i32 0)
+ call void @llvm.riscv.vsseg3.triscv.vector.tuple_nxv8i8_3t.i64(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %3, ptr null, i64 0, i64 6)
+ ret i32 0
+ }
+
+ declare <vscale x 1 x i32> @llvm.riscv.vmv.v.v.nxv1i32.i64(<vscale x 1 x i32>, <vscale x 1 x i32>, i64) #1
+
+ declare <vscale x 1 x i32> @llvm.riscv.vxor.mask.nxv1i32.nxv1i32.i64(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i64, i64 immarg) #1
+
+ declare <vscale x 1 x i64> @llvm.riscv.vwmacc.mask.nxv1i64.nxv1i32.nxv1i32.i64(<vscale x 1 x i64>, <vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i64, i64 immarg) #1
+
+ declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_3t.nxv1i64(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), <vscale x 1 x i64>, i32 immarg) #2
+
+ declare void @llvm.riscv.vsseg3.triscv.vector.tuple_nxv8i8_3t.i64(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr captures(none), i64, i64 immarg) #3
+
+ attributes #0 = { "target-features"="+v" }
+ attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) "target-features"="+v" }
+ attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-features"="+v" }
+ attributes #3 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) "target-features"="+v" }
+
+...
+---
+name: main
+alignment: 4
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+noPhis: false
+isSSA: true
+noVRegs: false
+hasFakeUses: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+ - { id: 0, class: vr, preferred-register: '', flags: [ ] }
+ - { id: 1, class: vrnov0, preferred-register: '', flags: [ ] }
+ - { id: 2, class: vr, preferred-register: '', flags: [ ] }
+ - { id: 3, class: vrnov0, preferred-register: '', flags: [ ] }
+ - { id: 4, class: vmv0, preferred-register: '', flags: [ ] }
+ - { id: 5, class: vrnov0, preferred-register: '', flags: [ ] }
+ - { id: 6, class: vrnov0, preferred-register: '', flags: [ ] }
+ - { id: 7, class: vmv0, preferred-register: '', flags: [ ] }
+ - { id: 8, class: vr, preferred-register: '', flags: [ ] }
+ - { id: 9, class: vrn3m1, preferred-register: '', flags: [ ] }
+ - { id: 10, class: vrn3m1, preferred-register: '', flags: [ ] }
+ - { id: 11, class: vrn3m1, preferred-register: '', flags: [ ] }
+ - { id: 12, class: vrn3m1, preferred-register: '', flags: [ ] }
+ - { id: 13, class: vrn3m1, preferred-register: '', flags: [ ] }
+ - { id: 14, class: gpr, preferred-register: '', flags: [ ] }
+liveins: []
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ functionContext: ''
+ maxCallFrameSize: 4294967295
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ isCalleeSavedInfoValid: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack: []
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo:
+ varArgsFrameIndex: 0
+ varArgsSaveSize: 0
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: main
+ ; CHECK: [[PseudoVMV_V_I_MF2_:%[0-9]+]]:vr = PseudoVMV_V_I_MF2 $noreg, 0, -1, 5 /* e32 */, 0 /* tu, mu */
+ ; CHECK-NEXT: [[PseudoVMV_V_V_MF2_:%[0-9]+]]:vrnov0 = PseudoVMV_V_V_MF2 $noreg, [[PseudoVMV_V_I_MF2_]], 0, 5 /* e32 */, 0 /* tu, mu */
+ ; CHECK-NEXT: [[PseudoVMCLR_M_B64_:%[0-9]+]]:vr = PseudoVMCLR_M_B64 -1, 0 /* e8 */
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vmv0 = COPY [[PseudoVMCLR_M_B64_]]
+ ; CHECK-NEXT: [[PseudoVXOR_VV_MF2_MASK:%[0-9]+]]:vrnov0 = PseudoVXOR_VV_MF2_MASK [[PseudoVMV_V_V_MF2_]], [[PseudoVMV_V_I_MF2_]], [[PseudoVMV_V_I_MF2_]], [[COPY]], 0, 5 /* e32 */, 0 /* tu, mu */
+ ; CHECK-NEXT: [[PseudoVMV_V_I_M1_:%[0-9]+]]:vrnov0 = PseudoVMV_V_I_M1 $noreg, 0, -1, 6 /* e64 */, 0 /* tu, mu */
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vmv0 = COPY [[PseudoVMCLR_M_B64_]]
+ ; CHECK-NEXT: early-clobber %6:vrnov0 = PseudoVWMACC_VV_MF2_MASK [[PseudoVMV_V_I_M1_]], killed [[PseudoVXOR_VV_MF2_MASK]], [[PseudoVMV_V_I_MF2_]], [[COPY1]], 0, 5 /* e32 */, 0 /* tu, mu */
+ ; CHECK-NEXT: [[PseudoVMV_V_I_M1_1:%[0-9]+]]:vr = PseudoVMV_V_I_M1 $noreg, 0, -1, 3 /* e8 */, 0 /* tu, mu */
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrn3m1 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[DEF]], [[PseudoVMV_V_I_M1_1]], %subreg.sub_vrm1_0
+ ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoVMV_V_I_M1_1]], %subreg.sub_vrm1_1
+ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoVMV_V_I_M1_1]], %subreg.sub_vrm1_2
+ ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG2]], killed %6, %subreg.sub_vrm1_0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
+ ; CHECK-NEXT: PseudoVSSEG3E64_V_M1 killed [[INSERT_SUBREG3]], [[COPY2]], 0, 6 /* e64 */ :: (store unknown-size into `ptr null`, align 8)
+ ; CHECK-NEXT: $x10 = COPY [[COPY2]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:vr = PseudoVMV_V_I_MF2 $noreg, 0, -1, 5 /* e32 */, 0 /* tu, mu */
+ %1:vrnov0 = PseudoVMV_V_V_MF2 $noreg, %0, 0, 5 /* e32 */, 0 /* tu, mu */
+ %2:vr = PseudoVMCLR_M_B64 -1, 0 /* e8 */
+ %4:vmv0 = COPY %2
+ %3:vrnov0 = PseudoVXOR_VV_MF2_MASK %1, %0, %0, %4, 0, 5 /* e32 */, 0 /* tu, mu */
+ %5:vrnov0 = PseudoVMV_V_I_M1 $noreg, 0, -1, 6 /* e64 */, 0 /* tu, mu */
+ %7:vmv0 = COPY %2
+ early-clobber %6:vrnov0 = PseudoVWMACC_VV_MF2_MASK %5, killed %3, %0, %7, 0, 5 /* e32 */, 0 /* tu, mu */
+ %8:vr = PseudoVMV_V_I_M1 $noreg, 0, -1, 3 /* e8 */, 0 /* tu, mu */
+ %10:vrn3m1 = IMPLICIT_DEF
+ %9:vrn3m1 = INSERT_SUBREG %10, %8, %subreg.sub_vrm1_0
+ %11:vrn3m1 = INSERT_SUBREG %9, %8, %subreg.sub_vrm1_1
+ %12:vrn3m1 = INSERT_SUBREG %11, %8, %subreg.sub_vrm1_2
+ %13:vrn3m1 = INSERT_SUBREG %12, killed %6, %subreg.sub_vrm1_0
+ %14:gpr = COPY $x0
+ PseudoVSSEG3E64_V_M1 killed %13, %14, 0, 6 /* e64 */ :: (store unknown-size into `ptr null`, align 8)
+ $x10 = COPY %14
+ PseudoRET implicit $x10
+...
|
llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmv-with-different-class.mir
Outdated
Show resolved
Hide resolved
llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmv-with-different-class.mir
Outdated
Show resolved
Hide resolved
Update the testcase |
%5:gpr = COPY $x0 | ||
%7:vmv0 = COPY $v8 | ||
%6:vrnov0 = PseudoVLSE32_V_MF2_MASK %3, %5, %5, killed %7, 0, 5 /* e32 */, 0 /* tu, mu */ :: (load unknown-size, align 4) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit, but can we use a PseudoVADD_VV_M1_MASK to simplify the test case a bit?
%5:gpr = COPY $x0 | |
%7:vmv0 = COPY $v8 | |
%6:vrnov0 = PseudoVLSE32_V_MF2_MASK %3, %5, %5, killed %7, 0, 5 /* e32 */, 0 /* tu, mu */ :: (load unknown-size, align 4) | |
%4:vrnov0 = PseudoVADD_VV_M1_MASK %3, $noreg, $noreg, $noreg, 0, 5 /* e32 */, 0 /* tu, mu */ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this line will cause a compiler crash due to not filtering $noreg
on the mask operand.
if (!isAllOnesMask(MRI->getVRegDef( |
%4:vmv0 = COPY $v8 | ||
%3:vrnov0 = PseudoVXOR_VV_MF2_MASK %1, %0, %0, %4, 0, 5 /* e32 */, 0 /* tu, mu */ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit, but can we use a PseudoVADD_VV_M1_MASK to simplify the test case a bit?
%4:vmv0 = COPY $v8 | |
%3:vrnov0 = PseudoVXOR_VV_MF2_MASK %1, %0, %0, %4, 0, 5 /* e32 */, 0 /* tu, mu */ | |
%4:vrnov0 = PseudoVADD_VV_M1_MASK %3, $noreg, $noreg, $noreg, 0, 5 /* e32 */, 0 /* tu, mu */ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it will cause a compiler crash due to not filtering $noreg
on the mask operand.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah I see, then you can just use a COPY of a live in for the mask
name: diff_regclass | ||
body: | | ||
bb.0.entry: | ||
liveins: $v8 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you replace %4:vmv0 with $noreg then I think you can drop the livein
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ditto
bd9cd19
to
04b2d43
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Co-authored-by: Luke Lau <[email protected]>
Without checking the regclass compatibility, this pass may generate bad machine code.