-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[PowerPC] Update DMF VSX ACC data transfer instructions #138897
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
lei137
wants to merge
1
commit into
llvm:main
Choose a base branch
from
lei137:mvTFdmf
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+50
−42
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
For cpu=future, acc registers no longer overlap VSRs and are prefixed with `dm`. The original, xxmfacc/xxmtacc instructions are now extended menemonics to it's dm* equivalents.
@llvm/pr-subscribers-backend-powerpc Author: Lei Huang (lei137) ChangesFor cpu=future, acc registers no longer overlap VSRs and are prefixed with Full diff: https://github.com/llvm/llvm-project/pull/138897.diff 5 Files Affected:
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 4d4a3efd1098e..9d4d2d864fc32 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1108,7 +1108,7 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(
case PPC::CRSET:
case PPC::CRUNSET:
case PPC::XXSETACCZ:
- case PPC::XXSETACCZW:
+ case PPC::DMXXSETACCZ:
return true;
}
return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrMMA.td b/llvm/lib/Target/PowerPC/PPCInstrMMA.td
index 23b951871d5f4..6df76956a6e39 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrMMA.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrMMA.td
@@ -535,25 +535,25 @@ let Predicates = [MMA, IsNotISAFuture] in {
}
let Predicates = [MMA, IsISAFuture], isCodeGenOnly = 1 in {
- // For Future and up XXMFACCW and XXMTACCW will not have patterns.
// On Future CPU the wacc registers no longer overlap with the vsr registers
- // and so register allocation would have to know to match 4 vsr registers
- // with one wacc register.
- // On top of that Future CPU has a more convenient way to move between vsrs
- // and wacc registers using xxextfdmr512 and xxinstdmr512.
- def XXMFACCW :
- XForm_AT3<31, 0, 177, (outs wacc:$ATo), (ins wacc:$AT), "xxmfacc $AT",
- IIC_VecGeneral, []>,
+ // so register allocation need to match 4 vsr registers with one wacc
+ // register. XXMTACC/XXFACC will be aliased to these new instructions.
+ def DMXXMFACC:
+ XForm_AT3<31, 0, 177, (outs wacc:$ATo), (ins wacc:$AT), "dmxxmfacc $AT",
+ IIC_VecGeneral,
+ [(set v512i1:$ATo, (int_ppc_mma_xxmfacc v512i1:$AT))]>,
RegConstraint<"$ATo = $AT">, NoEncode<"$ATo">;
- def XXMTACCW :
- XForm_AT3<31, 1, 177, (outs wacc:$AT), (ins wacc:$ATi), "xxmtacc $AT",
- IIC_VecGeneral, []>,
+ def DMXXMTACC:
+ XForm_AT3<31, 1, 177, (outs wacc:$AT), (ins wacc:$ATi), "dmxxmtacc $AT",
+ IIC_VecGeneral,
+ [(set v512i1:$AT, (int_ppc_mma_xxmtacc v512i1:$ATi))]>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
- def XXSETACCZW :
- XForm_AT3<31, 3, 177, (outs wacc:$AT), (ins), "xxsetaccz $AT",
- IIC_VecGeneral, [(set v512i1:$AT, (int_ppc_mma_xxsetaccz))]>;
+ def DMXXSETACCZ:
+ XForm_AT3<31, 3, 177, (outs wacc:$AT), (ins), "dmxxsetaccz $AT",
+ IIC_VecGeneral,
+ [(set v512i1:$AT, (int_ppc_mma_xxsetaccz))]>;
}
def XVI8GER4WSPP :
@@ -572,6 +572,12 @@ let Predicates = [MMA, IsISAFuture], isCodeGenOnly = 1 in {
}
}
+let Predicates = [MMA, IsISAFuture] in {
+ def : InstAlias<"dmxxmmfacc $AT ", (XXMFACC acc:$AT)>;
+ def : InstAlias<"dmxxmmtacc $AT ", (XXMTACC acc:$AT)>;
+ def : InstAlias<"dmxxsetaccz $AT ", (XXSETACCZ acc:$AT)>;
+}
+
let Predicates = [MMA, PrefixInstrs, IsNotISAFuture] in {
def PMXVI8GER4SPP :
MMIRR_XX3Form_XYP4_XAB6<59, 99, (outs acc:$AT),
@@ -1093,5 +1099,5 @@ let Predicates = [MMA, IsISAFuture] in {
def : Pat<(v512i1 (int_ppc_mma_assemble_acc v16i8:$vs1, v16i8:$vs0,
v16i8:$vs3, v16i8:$vs2)),
(DMXXINSTDMR512 ConcatsMMA.VecsToVecPair0, ConcatsMMA.VecsToVecPair1)>;
- def : Pat<(v512i1 immAllZerosV), (XXSETACCZW)>;
+ def : Pat<(v512i1 immAllZerosV), (DMXXSETACCZ)>;
}
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index 8b690b7b833b3..81929964ef1bc 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -20,6 +20,8 @@ def sub_64 : SubRegIndex<64>;
def sub_64_hi_phony : SubRegIndex<64,64>;
def sub_vsx0 : SubRegIndex<128>;
def sub_vsx1 : SubRegIndex<128, 128>;
+def sub_vsx2 : ComposedSubRegIndex<sub_vsx1, sub_vsx0>;
+def sub_vsx3 : ComposedSubRegIndex<sub_vsx2, sub_vsx0>;
def sub_gp8_x0 : SubRegIndex<64>;
def sub_gp8_x1 : SubRegIndex<64, 64>;
def sub_fp0 : SubRegIndex<64>;
diff --git a/llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll
index 41e702c94339d..9a528f4fd911f 100644
--- a/llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll
@@ -769,7 +769,7 @@ declare <512 x i1> @llvm.ppc.mma.xxsetaccz()
define void @int_xxsetaccz(ptr %ptr) {
; CHECK-LABEL: int_xxsetaccz:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxsetaccz wacc0
+; CHECK-NEXT: dmxxsetaccz wacc0
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT: stxv v4, 48(r3)
; CHECK-NEXT: stxv v5, 32(r3)
@@ -779,7 +779,7 @@ define void @int_xxsetaccz(ptr %ptr) {
;
; CHECK-BE-LABEL: int_xxsetaccz:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: xxsetaccz wacc0
+; CHECK-BE-NEXT: dmxxsetaccz wacc0
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT: stxv v5, 48(r3)
; CHECK-BE-NEXT: stxv v4, 32(r3)
@@ -789,7 +789,7 @@ define void @int_xxsetaccz(ptr %ptr) {
;
; CHECK-O0-LABEL: int_xxsetaccz:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: xxsetaccz wacc0
+; CHECK-O0-NEXT: dmxxsetaccz wacc0
; CHECK-O0-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-O0-NEXT: xxlor vs0, v4, v4
; CHECK-O0-NEXT: stxv vs0, 48(r3)
@@ -803,7 +803,7 @@ define void @int_xxsetaccz(ptr %ptr) {
;
; CHECK-O0-BE-LABEL: int_xxsetaccz:
; CHECK-O0-BE: # %bb.0: # %entry
-; CHECK-O0-BE-NEXT: xxsetaccz wacc0
+; CHECK-O0-BE-NEXT: dmxxsetaccz wacc0
; CHECK-O0-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-O0-BE-NEXT: xxlor vs0, v5, v5
; CHECK-O0-BE-NEXT: stxv vs0, 48(r3)
@@ -817,7 +817,7 @@ define void @int_xxsetaccz(ptr %ptr) {
;
; CHECK-AIX64-LABEL: int_xxsetaccz:
; CHECK-AIX64: # %bb.0: # %entry
-; CHECK-AIX64-NEXT: xxsetaccz 0
+; CHECK-AIX64-NEXT: dmxxsetaccz 0
; CHECK-AIX64-NEXT: dmxxextfdmr512 34, 36, 0, 0
; CHECK-AIX64-NEXT: stxv 5, 48(3)
; CHECK-AIX64-NEXT: stxv 4, 32(3)
@@ -827,7 +827,7 @@ define void @int_xxsetaccz(ptr %ptr) {
;
; CHECK-AIX32-LABEL: int_xxsetaccz:
; CHECK-AIX32: # %bb.0: # %entry
-; CHECK-AIX32-NEXT: xxsetaccz 0
+; CHECK-AIX32-NEXT: dmxxsetaccz 0
; CHECK-AIX32-NEXT: dmxxextfdmr512 34, 36, 0, 0
; CHECK-AIX32-NEXT: stxv 5, 48(3)
; CHECK-AIX32-NEXT: stxv 4, 32(3)
@@ -845,7 +845,7 @@ declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble
define void @disass_acc(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4) {
; CHECK-LABEL: disass_acc:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxsetaccz wacc0
+; CHECK-NEXT: dmxxsetaccz wacc0
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT: stxv v5, 0(r3)
; CHECK-NEXT: stxv v4, 0(r4)
@@ -855,7 +855,7 @@ define void @disass_acc(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4) {
;
; CHECK-BE-LABEL: disass_acc:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: xxsetaccz wacc0
+; CHECK-BE-NEXT: dmxxsetaccz wacc0
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT: stxv v2, 0(r3)
; CHECK-BE-NEXT: stxv v3, 0(r4)
@@ -865,7 +865,7 @@ define void @disass_acc(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4) {
;
; CHECK-O0-LABEL: disass_acc:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: xxsetaccz wacc0
+; CHECK-O0-NEXT: dmxxsetaccz wacc0
; CHECK-O0-NEXT: dmxxextfdmr512 vsp32, vsp36, wacc0, 0
; CHECK-O0-NEXT: vmr v2, v0
; CHECK-O0-NEXT: xxlor vs0, v1, v1
@@ -879,7 +879,7 @@ define void @disass_acc(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4) {
;
; CHECK-O0-BE-LABEL: disass_acc:
; CHECK-O0-BE: # %bb.0: # %entry
-; CHECK-O0-BE-NEXT: xxsetaccz wacc0
+; CHECK-O0-BE-NEXT: dmxxsetaccz wacc0
; CHECK-O0-BE-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc0, 0
; CHECK-O0-BE-NEXT: vmr v2, v1
; CHECK-O0-BE-NEXT: xxlor vs0, v0, v0
@@ -893,7 +893,7 @@ define void @disass_acc(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4) {
;
; CHECK-AIX64-LABEL: disass_acc:
; CHECK-AIX64: # %bb.0: # %entry
-; CHECK-AIX64-NEXT: xxsetaccz 0
+; CHECK-AIX64-NEXT: dmxxsetaccz 0
; CHECK-AIX64-NEXT: dmxxextfdmr512 34, 36, 0, 0
; CHECK-AIX64-NEXT: stxv 2, 0(3)
; CHECK-AIX64-NEXT: stxv 3, 0(4)
@@ -903,7 +903,7 @@ define void @disass_acc(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4) {
;
; CHECK-AIX32-LABEL: disass_acc:
; CHECK-AIX32: # %bb.0: # %entry
-; CHECK-AIX32-NEXT: xxsetaccz 0
+; CHECK-AIX32-NEXT: dmxxsetaccz 0
; CHECK-AIX32-NEXT: dmxxextfdmr512 34, 36, 0, 0
; CHECK-AIX32-NEXT: stxv 2, 0(3)
; CHECK-AIX32-NEXT: stxv 3, 0(4)
@@ -931,7 +931,7 @@ declare <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1>, <16 x i8>, <16 x i8>)
define void @testcse(ptr %res, <16 x i8> %vc) {
; CHECK-LABEL: testcse:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxsetaccz wacc0
+; CHECK-NEXT: dmxxsetaccz wacc0
; CHECK-NEXT: xvf32gerpp wacc0, v2, v2
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT: stxv v4, 48(r3)
@@ -946,7 +946,7 @@ define void @testcse(ptr %res, <16 x i8> %vc) {
;
; CHECK-BE-LABEL: testcse:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: xxsetaccz wacc0
+; CHECK-BE-NEXT: dmxxsetaccz wacc0
; CHECK-BE-NEXT: xvf32gerpp wacc0, v2, v2
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT: stxv v5, 48(r3)
@@ -961,7 +961,7 @@ define void @testcse(ptr %res, <16 x i8> %vc) {
;
; CHECK-O0-LABEL: testcse:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: xxsetaccz wacc0
+; CHECK-O0-NEXT: dmxxsetaccz wacc0
; CHECK-O0-NEXT: xvf32gerpp wacc0, v2, v2
; CHECK-O0-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-O0-NEXT: xxlor vs3, v4, v4
@@ -980,7 +980,7 @@ define void @testcse(ptr %res, <16 x i8> %vc) {
;
; CHECK-O0-BE-LABEL: testcse:
; CHECK-O0-BE: # %bb.0: # %entry
-; CHECK-O0-BE-NEXT: xxsetaccz wacc0
+; CHECK-O0-BE-NEXT: dmxxsetaccz wacc0
; CHECK-O0-BE-NEXT: xvf32gerpp wacc0, v2, v2
; CHECK-O0-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-O0-BE-NEXT: xxlor vs3, v5, v5
@@ -999,7 +999,7 @@ define void @testcse(ptr %res, <16 x i8> %vc) {
;
; CHECK-AIX64-LABEL: testcse:
; CHECK-AIX64: # %bb.0: # %entry
-; CHECK-AIX64-NEXT: xxsetaccz 0
+; CHECK-AIX64-NEXT: dmxxsetaccz 0
; CHECK-AIX64-NEXT: xvf32gerpp 0, 2, 2
; CHECK-AIX64-NEXT: dmxxextfdmr512 34, 36, 0, 0
; CHECK-AIX64-NEXT: stxv 5, 48(3)
@@ -1014,7 +1014,7 @@ define void @testcse(ptr %res, <16 x i8> %vc) {
;
; CHECK-AIX32-LABEL: testcse:
; CHECK-AIX32: # %bb.0: # %entry
-; CHECK-AIX32-NEXT: xxsetaccz 0
+; CHECK-AIX32-NEXT: dmxxsetaccz 0
; CHECK-AIX32-NEXT: xvf32gerpp 0, 2, 2
; CHECK-AIX32-NEXT: dmxxextfdmr512 34, 36, 0, 0
; CHECK-AIX32-NEXT: stxv 5, 48(3)
diff --git a/llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir b/llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir
index cf69d3ad09878..0287f067d0713 100644
--- a/llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir
+++ b/llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir
@@ -6,17 +6,17 @@
# Keep track of all of the lanemasks for various subregsiters.
#
# CHECK: %3 [80r,80d:0) 0@80r L000000000000000C [80r,80d:0) 0@80r weight:0.000000e+00
-# CHECK: %4 [96r,96d:0) 0@96r L0000000000003000 [96r,96d:0) 0@96r weight:0.000000e+00
+# CHECK: %4 [96r,96d:0) 0@96r L0000000000007000 [96r,96d:0) 0@96r weight:0.000000e+00
# CHECK: %5 [112r,112d:0) 0@112r L000000000000000C [112r,112d:0) 0@112r weight:0.000000e+00
-# CHECK: %6 [128r,128d:0) 0@128r L0000000000003000 [128r,128d:0) 0@128r weight:0.000000e+00
+# CHECK: %6 [128r,128d:0) 0@128r L0000000000007000 [128r,128d:0) 0@128r weight:0.000000e+00
# CHECK: %7 [144r,144d:0) 0@144r L0000000000000004 [144r,144d:0) 0@144r weight:0.000000e+00
-# CHECK: %8 [160r,160d:0) 0@160r L0000000000001000 [160r,160d:0) 0@160r weight:0.000000e+00
+# CHECK: %8 [160r,160d:0) 0@160r L0000000000002000 [160r,160d:0) 0@160r weight:0.000000e+00
# CHECK: %9 [176r,176d:0) 0@176r L0000000000000004 [176r,176d:0) 0@176r weight:0.000000e+00
-# CHECK: %10 [192r,192d:0) 0@192r L0000000000001000 [192r,192d:0) 0@192r weight:0.000000e+00
-# CHECK: %11 [208r,208d:0) 0@208r L0000000000004000 [208r,208d:0) 0@208r weight:0.000000e+00
-# CHECK: %12 [224r,224d:0) 0@224r L0000000000010000 [224r,224d:0) 0@224r weight:0.000000e+00
-# CHECK: %13 [240r,240d:0) 0@240r L000000000000300C [240r,240d:0) 0@240r weight:0.000000e+00
-# CHECK: %14 [256r,256d:0) 0@256r L000000000003C000 [256r,256d:0) 0@256r weight:0.000000e+00
+# CHECK: %10 [192r,192d:0) 0@192r L0000000000002000 [192r,192d:0) 0@192r weight:0.000000e+00
+# CHECK: %11 [208r,208d:0) 0@208r L0000000000008000 [208r,208d:0) 0@208r weight:0.000000e+00
+# CHECK: %12 [224r,224d:0) 0@224r L0000000000020000 [224r,224d:0) 0@224r weight:0.000000e+00
+# CHECK: %13 [240r,240d:0) 0@240r L000000000000700C [240r,240d:0) 0@240r weight:0.000000e+00
+# CHECK: %14 [256r,256d:0) 0@256r L0000000000078000 [256r,256d:0) 0@256r weight:0.000000e+00
# CHECK: 0B bb.0
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
For cpu=future, acc registers no longer overlap VSRs and are prefixed with
dm
. The original, xxmfacc/xxmtacc instructions are now extended menemonics to it's dm* equivalents.