-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[RISCV] Don't increase vslide or splat vl if +vl-dependent-latency is present #147089
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Luke Lau (lukel97) ChangesStacked on #146746 If the subtarget's latency is dependent on vl, then we shouldn't try to fold away vsetvli toggles if it means increasing vl. Patch is 371.42 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/147089.diff 10 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 6e103dd7d8c44..d6cb1aa95ad5b 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1695,6 +1695,10 @@ foreach nf = {2-8} in
"true", "vlseg"#nf#"eN.v and vsseg"#nf#"eN.v are "
"implemented as a wide memory op and shuffle">;
+def TuneMinimizeVL
+ : SubtargetFeature<"minimize-vl", "MinimizeVL", "true",
+ "Prefer reducing vl even it requires more vsetvli instructions">;
+
def Experimental
: SubtargetFeature<"experimental", "HasExperimental",
"true", "Experimental intrinsics">;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 326dd7149ef96..1ba8aba13f8d3 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -12329,9 +12329,9 @@ SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
- SDValue SlideDown =
- getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
- DownOffset, TrueMask, UpOffset);
+ SDValue SlideDown = getVSlidedown(
+ DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1, DownOffset, TrueMask,
+ Subtarget.minimizeVL() ? UpOffset : DAG.getRegister(RISCV::X0, XLenVT));
return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
RISCVVType::TAIL_AGNOSTIC);
@@ -13355,7 +13355,7 @@ RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
if (ImmValue != 0)
Op1 = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), Op1, DownOffset, Mask,
- UpOffset);
+ Subtarget.minimizeVL() ? UpOffset : EVL2);
SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, Op1, Op2,
UpOffset, Mask, EVL2, RISCVVType::TAIL_AGNOSTIC);
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 78d64ea67324f..88461e8461038 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -421,10 +421,11 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
// * We can't modify SEW here since the slide amount is in units of SEW.
// * VL=1 is special only because we have existing support for zero vs
// non-zero VL. We could generalize this if we had a VL > C predicate.
- // * The LMUL1 restriction is for machines whose latency may depend on VL.
+ // * The LMUL1 restriction is for machines whose latency may depend on LMUL.
// * As above, this is only legal for tail "undefined" not "agnostic".
+ // * We avoid increasing vl if the subtarget has +minimize-vl
if (RISCVInstrInfo::isVSlideInstr(MI) && VLOp.isImm() &&
- VLOp.getImm() == 1 && hasUndefinedPassthru(MI)) {
+ VLOp.getImm() == 1 && hasUndefinedPassthru(MI) && !ST->minimizeVL()) {
Res.VLAny = false;
Res.VLZeroness = true;
Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
@@ -438,7 +439,7 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
// careful to not increase the number of active vector registers (unlike for
// vmv.s.x.)
if (RISCVInstrInfo::isScalarSplatInstr(MI) && VLOp.isImm() &&
- VLOp.getImm() == 1 && hasUndefinedPassthru(MI)) {
+ VLOp.getImm() == 1 && hasUndefinedPassthru(MI) && !ST->minimizeVL()) {
Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
Res.SEWLMULRatio = false;
Res.VLAny = false;
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 57b415dc713ac..f4f31e25bbde7 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -274,7 +274,8 @@ def SIFIVE_U74 : RISCVProcessorModel<"sifive-u74",
defvar SiFiveIntelligenceTuneFeatures = !listconcat(SiFive7TuneFeatures,
[TuneDLenFactor2,
TuneOptimizedZeroStrideLoad,
- TuneOptimizedNF2SegmentLoadStore]);
+ TuneOptimizedNF2SegmentLoadStore,
+ TuneMinimizeVL]);
def SIFIVE_X280 : RISCVProcessorModel<"sifive-x280", SiFive7Model,
[Feature64Bit,
FeatureStdExtI,
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll
index 8160e62a43106..ba9f950390a52 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll
@@ -1,32 +1,52 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zvfh,+zvfbfmin -verify-machineinstrs -riscv-v-vector-bits-min=128 \
-; RUN: < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zvfhmin,+zvfbfmin -verify-machineinstrs -riscv-v-vector-bits-min=128 \
-; RUN: < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zfh,+zfbfmin,+zvfh,+zvfbfmin -verify-machineinstrs \
+; RUN: < %s | FileCheck %s --check-prefixes=CHECK,NOMINVL,ZVFH
+; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zfh,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs \
+; RUN: < %s | FileCheck %s --check-prefixes=CHECK,NOMINVL,ZVFHMIN
+; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zfh,+zfbfmin,+zvfh,+zvfbfmin,+minimize-vl -verify-machineinstrs \
+; RUN: < %s | FileCheck %s --check-prefixes=CHECK,MINVL,ZVFH
+; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zfh,+zfbfmin,+zvfhmin,+zvfbfmin,+minimize-vl -verify-machineinstrs \
+; RUN: < %s | FileCheck %s --check-prefixes=CHECK,MINVL,ZVFHMIN
define <2 x i64> @test_vp_splice_v2i64(<2 x i64> %va, <2 x i64> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
-; CHECK-LABEL: test_vp_splice_v2i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, a0, -5
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 5
-; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v9, a0
-; CHECK-NEXT: ret
+; NOMINVL-LABEL: test_vp_splice_v2i64:
+; NOMINVL: # %bb.0:
+; NOMINVL-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; NOMINVL-NEXT: vslidedown.vi v8, v8, 5
+; NOMINVL-NEXT: addi a0, a0, -5
+; NOMINVL-NEXT: vslideup.vx v8, v9, a0
+; NOMINVL-NEXT: ret
+;
+; MINVL-LABEL: test_vp_splice_v2i64:
+; MINVL: # %bb.0:
+; MINVL-NEXT: addi a0, a0, -5
+; MINVL-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; MINVL-NEXT: vslidedown.vi v8, v8, 5
+; MINVL-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; MINVL-NEXT: vslideup.vx v8, v9, a0
+; MINVL-NEXT: ret
%v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 5, <2 x i1> splat (i1 1), i32 %evla, i32 %evlb)
ret <2 x i64> %v
}
define <2 x i64> @test_vp_splice_v2i64_negative_offset(<2 x i64> %va, <2 x i64> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
-; CHECK-LABEL: test_vp_splice_v2i64_negative_offset:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, a0, -5
-; CHECK-NEXT: vsetivli zero, 5, e64, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v9, 5
-; CHECK-NEXT: ret
+; NOMINVL-LABEL: test_vp_splice_v2i64_negative_offset:
+; NOMINVL: # %bb.0:
+; NOMINVL-NEXT: addi a0, a0, -5
+; NOMINVL-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; NOMINVL-NEXT: vslidedown.vx v8, v8, a0
+; NOMINVL-NEXT: vslideup.vi v8, v9, 5
+; NOMINVL-NEXT: ret
+;
+; MINVL-LABEL: test_vp_splice_v2i64_negative_offset:
+; MINVL: # %bb.0:
+; MINVL-NEXT: addi a0, a0, -5
+; MINVL-NEXT: vsetivli zero, 5, e64, m1, ta, ma
+; MINVL-NEXT: vslidedown.vx v8, v8, a0
+; MINVL-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; MINVL-NEXT: vslideup.vi v8, v9, 5
+; MINVL-NEXT: ret
%v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 -5, <2 x i1> splat (i1 1), i32 %evla, i32 %evlb)
ret <2 x i64> %v
@@ -44,260 +64,419 @@ define <2 x i64> @test_vp_splice_v2i64_zero_offset(<2 x i64> %va, <2 x i64> %vb,
}
define <2 x i64> @test_vp_splice_v2i64_masked(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
-; CHECK-LABEL: test_vp_splice_v2i64_masked:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, a0, -5
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
-; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t
-; CHECK-NEXT: ret
+; NOMINVL-LABEL: test_vp_splice_v2i64_masked:
+; NOMINVL: # %bb.0:
+; NOMINVL-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; NOMINVL-NEXT: vslidedown.vi v8, v8, 5, v0.t
+; NOMINVL-NEXT: addi a0, a0, -5
+; NOMINVL-NEXT: vsetvli zero, zero, e64, m1, ta, mu
+; NOMINVL-NEXT: vslideup.vx v8, v9, a0, v0.t
+; NOMINVL-NEXT: ret
+;
+; MINVL-LABEL: test_vp_splice_v2i64_masked:
+; MINVL: # %bb.0:
+; MINVL-NEXT: addi a0, a0, -5
+; MINVL-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; MINVL-NEXT: vslidedown.vi v8, v8, 5, v0.t
+; MINVL-NEXT: vsetvli zero, a1, e64, m1, ta, mu
+; MINVL-NEXT: vslideup.vx v8, v9, a0, v0.t
+; MINVL-NEXT: ret
%v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 5, <2 x i1> %mask, i32 %evla, i32 %evlb)
ret <2 x i64> %v
}
define <4 x i32> @test_vp_splice_v4i32(<4 x i32> %va, <4 x i32> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
-; CHECK-LABEL: test_vp_splice_v4i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, a0, -5
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 5
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v9, a0
-; CHECK-NEXT: ret
+; NOMINVL-LABEL: test_vp_splice_v4i32:
+; NOMINVL: # %bb.0:
+; NOMINVL-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; NOMINVL-NEXT: vslidedown.vi v8, v8, 5
+; NOMINVL-NEXT: addi a0, a0, -5
+; NOMINVL-NEXT: vslideup.vx v8, v9, a0
+; NOMINVL-NEXT: ret
+;
+; MINVL-LABEL: test_vp_splice_v4i32:
+; MINVL: # %bb.0:
+; MINVL-NEXT: addi a0, a0, -5
+; MINVL-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; MINVL-NEXT: vslidedown.vi v8, v8, 5
+; MINVL-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; MINVL-NEXT: vslideup.vx v8, v9, a0
+; MINVL-NEXT: ret
%v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 5, <4 x i1> splat (i1 1), i32 %evla, i32 %evlb)
ret <4 x i32> %v
}
define <4 x i32> @test_vp_splice_v4i32_negative_offset(<4 x i32> %va, <4 x i32> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
-; CHECK-LABEL: test_vp_splice_v4i32_negative_offset:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, a0, -5
-; CHECK-NEXT: vsetivli zero, 5, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v9, 5
-; CHECK-NEXT: ret
+; NOMINVL-LABEL: test_vp_splice_v4i32_negative_offset:
+; NOMINVL: # %bb.0:
+; NOMINVL-NEXT: addi a0, a0, -5
+; NOMINVL-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; NOMINVL-NEXT: vslidedown.vx v8, v8, a0
+; NOMINVL-NEXT: vslideup.vi v8, v9, 5
+; NOMINVL-NEXT: ret
+;
+; MINVL-LABEL: test_vp_splice_v4i32_negative_offset:
+; MINVL: # %bb.0:
+; MINVL-NEXT: addi a0, a0, -5
+; MINVL-NEXT: vsetivli zero, 5, e32, m1, ta, ma
+; MINVL-NEXT: vslidedown.vx v8, v8, a0
+; MINVL-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; MINVL-NEXT: vslideup.vi v8, v9, 5
+; MINVL-NEXT: ret
%v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 -5, <4 x i1> splat (i1 1), i32 %evla, i32 %evlb)
ret <4 x i32> %v
}
define <4 x i32> @test_vp_splice_v4i32_masked(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
-; CHECK-LABEL: test_vp_splice_v4i32_masked:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, a0, -5
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t
-; CHECK-NEXT: ret
+; NOMINVL-LABEL: test_vp_splice_v4i32_masked:
+; NOMINVL: # %bb.0:
+; NOMINVL-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; NOMINVL-NEXT: vslidedown.vi v8, v8, 5, v0.t
+; NOMINVL-NEXT: addi a0, a0, -5
+; NOMINVL-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; NOMINVL-NEXT: vslideup.vx v8, v9, a0, v0.t
+; NOMINVL-NEXT: ret
+;
+; MINVL-LABEL: test_vp_splice_v4i32_masked:
+; MINVL: # %bb.0:
+; MINVL-NEXT: addi a0, a0, -5
+; MINVL-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; MINVL-NEXT: vslidedown.vi v8, v8, 5, v0.t
+; MINVL-NEXT: vsetvli zero, a1, e32, m1, ta, mu
+; MINVL-NEXT: vslideup.vx v8, v9, a0, v0.t
+; MINVL-NEXT: ret
%v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 5, <4 x i1> %mask, i32 %evla, i32 %evlb)
ret <4 x i32> %v
}
define <8 x i16> @test_vp_splice_v8i16(<8 x i16> %va, <8 x i16> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
-; CHECK-LABEL: test_vp_splice_v8i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, a0, -5
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 5
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v9, a0
-; CHECK-NEXT: ret
+; NOMINVL-LABEL: test_vp_splice_v8i16:
+; NOMINVL: # %bb.0:
+; NOMINVL-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; NOMINVL-NEXT: vslidedown.vi v8, v8, 5
+; NOMINVL-NEXT: addi a0, a0, -5
+; NOMINVL-NEXT: vslideup.vx v8, v9, a0
+; NOMINVL-NEXT: ret
+;
+; MINVL-LABEL: test_vp_splice_v8i16:
+; MINVL: # %bb.0:
+; MINVL-NEXT: addi a0, a0, -5
+; MINVL-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; MINVL-NEXT: vslidedown.vi v8, v8, 5
+; MINVL-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; MINVL-NEXT: vslideup.vx v8, v9, a0
+; MINVL-NEXT: ret
%v = call <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16> %va, <8 x i16> %vb, i32 5, <8 x i1> splat (i1 1), i32 %evla, i32 %evlb)
ret <8 x i16> %v
}
define <8 x i16> @test_vp_splice_v8i16_negative_offset(<8 x i16> %va, <8 x i16> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
-; CHECK-LABEL: test_vp_splice_v8i16_negative_offset:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, a0, -5
-; CHECK-NEXT: vsetivli zero, 5, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v9, 5
-; CHECK-NEXT: ret
+; NOMINVL-LABEL: test_vp_splice_v8i16_negative_offset:
+; NOMINVL: # %bb.0:
+; NOMINVL-NEXT: addi a0, a0, -5
+; NOMINVL-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; NOMINVL-NEXT: vslidedown.vx v8, v8, a0
+; NOMINVL-NEXT: vslideup.vi v8, v9, 5
+; NOMINVL-NEXT: ret
+;
+; MINVL-LABEL: test_vp_splice_v8i16_negative_offset:
+; MINVL: # %bb.0:
+; MINVL-NEXT: addi a0, a0, -5
+; MINVL-NEXT: vsetivli zero, 5, e16, m1, ta, ma
+; MINVL-NEXT: vslidedown.vx v8, v8, a0
+; MINVL-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; MINVL-NEXT: vslideup.vi v8, v9, 5
+; MINVL-NEXT: ret
%v = call <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16> %va, <8 x i16> %vb, i32 -5, <8 x i1> splat (i1 1), i32 %evla, i32 %evlb)
ret <8 x i16> %v
}
define <8 x i16> @test_vp_splice_v8i16_masked(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
-; CHECK-LABEL: test_vp_splice_v8i16_masked:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, a0, -5
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t
-; CHECK-NEXT: ret
+; NOMINVL-LABEL: test_vp_splice_v8i16_masked:
+; NOMINVL: # %bb.0:
+; NOMINVL-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; NOMINVL-NEXT: vslidedown.vi v8, v8, 5, v0.t
+; NOMINVL-NEXT: addi a0, a0, -5
+; NOMINVL-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; NOMINVL-NEXT: vslideup.vx v8, v9, a0, v0.t
+; NOMINVL-NEXT: ret
+;
+; MINVL-LABEL: test_vp_splice_v8i16_masked:
+; MINVL: # %bb.0:
+; MINVL-NEXT: addi a0, a0, -5
+; MINVL-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; MINVL-NEXT: vslidedown.vi v8, v8, 5, v0.t
+; MINVL-NEXT: vsetvli zero, a1, e16, m1, ta, mu
+; MINVL-NEXT: vslideup.vx v8, v9, a0, v0.t
+; MINVL-NEXT: ret
%v = call <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16> %va, <8 x i16> %vb, i32 5, <8 x i1> %mask, i32 %evla, i32 %evlb)
ret <8 x i16> %v
}
define <16 x i8> @test_vp_splice_v16i8(<16 x i8> %va, <16 x i8> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
-; CHECK-LABEL: test_vp_splice_v16i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, a0, -5
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 5
-; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v9, a0
-; CHECK-NEXT: ret
+; NOMINVL-LABEL: test_vp_splice_v16i8:
+; NOMINVL: # %bb.0:
+; NOMINVL-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; NOMINVL-NEXT: vslidedown.vi v8, v8, 5
+; NOMINVL-NEXT: addi a0, a0, -5
+; NOMINVL-NEXT: vslideup.vx v8, v9, a0
+; NOMINVL-NEXT: ret
+;
+; MINVL-LABEL: test_vp_splice_v16i8:
+; MINVL: # %bb.0:
+; MINVL-NEXT: addi a0, a0, -5
+; MINVL-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; MINVL-NEXT: vslidedown.vi v8, v8, 5
+; MINVL-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; MINVL-NEXT: vslideup.vx v8, v9, a0
+; MINVL-NEXT: ret
%v = call <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8> %va, <16 x i8> %vb, i32 5, <16 x i1> splat (i1 1), i32 %evla, i32 %evlb)
ret <16 x i8> %v
}
define <16 x i8> @test_vp_splice_v16i8_negative_offset(<16 x i8> %va, <16 x i8> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
-; CHECK-LABEL: test_vp_splice_v16i8_negative_offset:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, a0, -5
-; CHECK-NEXT: vsetivli zero, 5, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v9, 5
-; CHECK-NEXT: ret
+; NOMINVL-LABEL: test_vp_splice_v16i8_negative_offset:
+; NOMINVL: # %bb.0:
+; NOMINVL-NEXT: addi a0, a0, -5
+; NOMINVL-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; NOMINVL-NEXT: vslidedown.vx v8, v8, a0
+; NOMINVL-NEXT: vslideup.vi v8, v9, 5
+; NOMINVL-NEXT: ret
+;
+; MINVL-LABEL: test_vp_splice_v16i8_negative_offset:
+; MINVL: # %bb.0:
+; MINVL-NEXT: addi a0, a0, -5
+; MINVL-NEXT: vsetivli zero, 5, e8, m1, ta, ma
+; MINVL-NEXT: vslidedown.vx v8, v8, a0
+; MINVL-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; MINVL-NEXT: vslideup.vi v8, v9, 5
+; MINVL-NEXT: ret
%v = call <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8> %va, <16 x i8> %vb, i32 -5, <16 x i1> splat (i1 1), i32 %evla, i32 %evlb)
ret <16 x i8> %v
}
define <16 x i8> @test_vp_splice_v16i8_masked(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
-; CHECK-LABEL: test_vp_splice_v16i8_masked:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, a0, -5
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
-; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu
-; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t
-; CHECK-NEXT: ret
+; NOMINVL-LABEL: test_vp_splice_v16i8_masked:
+; NOMINVL: # %bb.0:
+; NOMINVL-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; NOMINVL-NEXT: vslidedown.vi v8, v8, 5, v0.t
+; NOMINVL-NEXT: addi a0, a0, -5
+; NOMINVL-NEXT: vsetvli zero...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, but does it really make big difference for this case?
If the subtarget's latency is dependent on vl, then we shouldn't try to fold away vsetvli toggles if it means increasing vl.
f9b45f0
to
20e6166
Compare
Not really, but at least it keeps the remaining vp.splice tests consistent with respect to +vl-dependent-latency! |
If the subtarget's latency is dependent on vl, then we shouldn't try to fold away vsetvli toggles if it means increasing vl.