-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[RISCV] Add optimization for memset inline #146673
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
✅ With the latest revision this PR passed the C/C++ code formatter. |
20b5a7a
to
00a26f3
Compare
Optimize RISCV memset inline implementation based on the issue discussed in llvm#144562.
00a26f3
to
e6d4a85
Compare
const AttributeList & /*FuncAttributes*/) const { | ||
virtual EVT getOptimalMemOpType(const MemOp &Op, | ||
const AttributeList & /*FuncAttributes*/, | ||
LLVMContext *Context = nullptr) const { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You have a default argument of nullptr here, but don't check if it's null in RISCVISelLowering.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it should be like:
virtual EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
const AttributeList & /*FuncAttributes*/) const
Just always pass the context like allowsMemoryAccess
above
@llvm/pr-subscribers-backend-mips @llvm/pr-subscribers-backend-hexagon Author: Boyao Wang (BoyaoWang430) ChangesOptimize RISCV memset inline implementation based on the issue discussed in #144562. Patch is 48.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146673.diff 26 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 84c53e1e45452..2f7f859e8d58d 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2019,9 +2019,9 @@ class LLVM_ABI TargetLoweringBase {
/// a result of memset, memcpy, and memmove lowering.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
- virtual EVT
- getOptimalMemOpType(const MemOp &Op,
- const AttributeList & /*FuncAttributes*/) const {
+ virtual EVT getOptimalMemOpType(const MemOp &Op,
+ const AttributeList & /*FuncAttributes*/,
+ LLVMContext *Context = nullptr) const {
return MVT::Other;
}
@@ -4108,10 +4108,11 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
/// Note that this is always the case when Limit is ~0.
/// It returns the types of the sequence of memory ops to perform
/// memset / memcpy by reference.
- virtual bool
- findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
- const MemOp &Op, unsigned DstAS, unsigned SrcAS,
- const AttributeList &FuncAttributes) const;
+ virtual bool findOptimalMemOpLowering(std::vector<EVT> &MemOps,
+ unsigned Limit, const MemOp &Op,
+ unsigned DstAS, unsigned SrcAS,
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const;
/// Check to see if the specified operand of the specified instruction is a
/// constant integer. If so, check to see if there are any bits set in the
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 6df21b624137f..7ca2fa46c5f72 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -8334,7 +8334,7 @@ static SDValue getMemcpyLoadsAndStores(
*SrcAlign, isVol, CopyFromConstant);
if (!TLI.findOptimalMemOpLowering(
MemOps, Limit, Op, DstPtrInfo.getAddrSpace(),
- SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes()))
+ SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), &C))
return SDValue();
if (DstAlignCanChange) {
@@ -8529,7 +8529,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
MemOp::Copy(Size, DstAlignCanChange, Alignment, *SrcAlign,
/*IsVolatile*/ true),
DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
- MF.getFunction().getAttributes()))
+ MF.getFunction().getAttributes(), &C))
return SDValue();
if (DstAlignCanChange) {
@@ -8634,6 +8634,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
+ LLVMContext &C = *DAG.getContext();
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
bool OptSize = shouldLowerMemFuncForSize(MF, DAG);
@@ -8646,7 +8647,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
if (!TLI.findOptimalMemOpLowering(
MemOps, Limit,
MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol),
- DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes()))
+ DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes(), &C))
return SDValue();
if (DstAlignCanChange) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 000f8cc6786a5..f25c42e8d8ea1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -211,12 +211,13 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
bool TargetLowering::findOptimalMemOpLowering(
std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
- unsigned SrcAS, const AttributeList &FuncAttributes) const {
+ unsigned SrcAS, const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
Op.getSrcAlign() < Op.getDstAlign())
return false;
- EVT VT = getOptimalMemOpType(Op, FuncAttributes);
+ EVT VT = getOptimalMemOpType(Op, FuncAttributes, Context);
if (VT == MVT::Other) {
// Use the largest integer type whose alignment constraints are satisfied.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index fb8bd81c033af..044af4bea3a2c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17587,7 +17587,8 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
}
EVT AArch64TargetLowering::getOptimalMemOpType(
- const MemOp &Op, const AttributeList &FuncAttributes) const {
+ const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 89f90ee2b7707..47eb60684388b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -233,8 +233,8 @@ class AArch64TargetLowering : public TargetLowering {
bool shouldConsiderGEPOffsetSplit() const override;
- EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override;
+ EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override;
LLT getOptimalMemOpLLT(const MemOp &Op,
const AttributeList &FuncAttributes) const override;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b083a9014737b..15d0d5feae830 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1982,8 +1982,9 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(
Alignment, Flags, IsFast);
}
-EVT SITargetLowering::getOptimalMemOpType(
- const MemOp &Op, const AttributeList &FuncAttributes) const {
+EVT SITargetLowering::getOptimalMemOpType(const MemOp &Op,
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
// FIXME: Should account for address space here.
// The default fallback uses the private pointer size as a guess for a type to
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index c66f300ec4cb1..2ebcf716afcc9 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -357,8 +357,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
unsigned *IsFast = nullptr) const override;
- EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override;
+ EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override;
bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 62418ff4eec85..a85bf405556b1 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -19219,9 +19219,9 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
return false;
}
-
-EVT ARMTargetLowering::getOptimalMemOpType(
- const MemOp &Op, const AttributeList &FuncAttributes) const {
+EVT ARMTargetLowering::getOptimalMemOpType(const MemOp &Op,
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
// See if we can use NEON instructions for this...
if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 604910e04d4cc..e7bee969a9e29 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -473,7 +473,8 @@ class VectorType;
unsigned *Fast) const override;
EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override;
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override;
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
index 23cbce7094e6b..f400a271786ff 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -114,8 +114,8 @@ class BPFTargetLowering : public TargetLowering {
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
- EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override {
+ EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override {
return Op.size() >= 8 ? MVT::i64 : MVT::i32;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index e7d0ec6ee0fe5..4543474ef114c 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -3742,7 +3742,8 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
/// does not need to be loaded. It returns EVT::Other if the type should be
/// determined using generic target-independent logic.
EVT HexagonTargetLowering::getOptimalMemOpType(
- const MemOp &Op, const AttributeList &FuncAttributes) const {
+ const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
if (Op.size() >= 8 && Op.isAligned(Align(8)))
return MVT::i64;
if (Op.size() >= 4 && Op.isAligned(Align(4)))
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index a2c9b57d04caa..e5b477d5fd57a 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -325,8 +325,8 @@ class HexagonTargetLowering : public TargetLowering {
/// the immediate into a register.
bool isLegalICmpImmediate(int64_t Imm) const override;
- EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override;
+ EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override;
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
unsigned AddrSpace, Align Alignment,
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 9b4b336727235..8fa5a6ef4c589 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -4519,8 +4519,9 @@ MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
return false;
}
-EVT MipsTargetLowering::getOptimalMemOpType(
- const MemOp &Op, const AttributeList &FuncAttributes) const {
+EVT MipsTargetLowering::getOptimalMemOpType(const MemOp &Op,
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
if (Subtarget.hasMips64())
return MVT::i64;
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h
index 241e9343ae384..2580894b15bb5 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -699,7 +699,8 @@ class TargetRegisterClass;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override;
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override;
/// isFPImmLegal - Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 5a4a63469ad6e..fbfc825b2823c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -18190,8 +18190,9 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
-EVT PPCTargetLowering::getOptimalMemOpType(
- const MemOp &Op, const AttributeList &FuncAttributes) const {
+EVT PPCTargetLowering::getOptimalMemOpType(const MemOp &Op,
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
// We should use Altivec/VSX loads and stores when available. For unaligned
// addresses, unaligned VSX loads are only fast starting with the P8.
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 4c88bd372b106..13876fcc57783 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1089,7 +1089,8 @@ namespace llvm {
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override;
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override;
/// Is unaligned memory access allowed for the given type, and is it fast
/// relative to software emulation.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index dac6ed6d40199..b01af803d0596 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1663,7 +1663,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
- MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
+ MaxStoresPerMemset = Subtarget.hasVInstructions()
+ ? Subtarget.getRealMinVLen() / 8
+ : Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
MaxGluedStoresPerMemcpy = Subtarget.getMaxGluedStoresPerMemcpy();
MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
@@ -23776,9 +23778,12 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
return Subtarget.enableUnalignedVectorMem();
}
+EVT RISCVTargetLowering::getOptimalMemOpType(
+ const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
+ if (!Context)
+ llvm_unreachable("LLVMContext must not be null here");
-EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const {
if (!Subtarget.hasVInstructions())
return MVT::Other;
@@ -23807,6 +23812,17 @@ EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op,
if (MinVLenInBytes <= RISCV::RVVBytesPerBlock)
return MVT::Other;
+ // If Op size is greater than LMUL8 memory operation, we don't support inline
+ // of memset. Return EVT based on Op size to avoid redundant splitting and
+ // merging operations if Op size is no greater than LMUL8 memory operation.
+ if (Op.isMemset()) {
+ if (Op.size() > 8 * MinVLenInBytes)
+ return MVT::Other;
+ if (Op.size() % 8 == 0)
+ return EVT::getVectorVT(*Context, MVT::i64, Op.size() / 8);
+ return EVT::getVectorVT(*Context, MVT::i8, Op.size());
+ }
+
// Prefer i8 for non-zero memset as it allows us to avoid materializing
// a large scalar constant and instead use vmv.v.x/i to do the
// broadcast. For everything else, prefer ELenVT to minimize VL and thus
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index f67d7f155c9d0..ae2360d6e85eb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -331,8 +331,8 @@ class RISCVTargetLowering : public TargetLowering {
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
unsigned *Fast = nullptr) const override;
- EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override;
+ EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override;
bool splitValueIntoRegisterParts(
SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index c6044514fa951..6dd29d3a2ef51 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1424,7 +1424,8 @@ bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
bool SystemZTargetLowering::findOptimalMemOpLowering(
std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
- unsigned SrcAS, const AttributeList &FuncAttributes) const {
+ unsigned SrcAS, const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
const int MVCFastLen = 16;
if (Limit != ~unsigned(0)) {
@@ -1437,12 +1438,13 @@ bool SystemZTargetLowering::findOptimalMemOpLowering(
return false; // Memset zero: Use XC
}
- return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
- SrcAS, FuncAttributes);
+ return TargetLowering::findOptimalMemOpLowering(
+ MemOps, Limit, Op, DstAS, SrcAS, FuncAttributes, Context);
}
-EVT SystemZTargetLowering::getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const {
+EVT SystemZTargetLowering::getOptimalMemOpType(
+ const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
}
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index f2f0bf6d8b410..98e7c891745d8 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -509,12 +509,12 @@ class SystemZTargetLowering : public TargetLowering {
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
MachineMemOperand::Flags Flags,
...
[truncated]
|
@llvm/pr-subscribers-backend-systemz Author: Boyao Wang (BoyaoWang430) ChangesOptimize RISCV memset inline implementation based on the issue discussed in #144562. Patch is 48.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146673.diff 26 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 84c53e1e45452..2f7f859e8d58d 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2019,9 +2019,9 @@ class LLVM_ABI TargetLoweringBase {
/// a result of memset, memcpy, and memmove lowering.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
- virtual EVT
- getOptimalMemOpType(const MemOp &Op,
- const AttributeList & /*FuncAttributes*/) const {
+ virtual EVT getOptimalMemOpType(const MemOp &Op,
+ const AttributeList & /*FuncAttributes*/,
+ LLVMContext *Context = nullptr) const {
return MVT::Other;
}
@@ -4108,10 +4108,11 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
/// Note that this is always the case when Limit is ~0.
/// It returns the types of the sequence of memory ops to perform
/// memset / memcpy by reference.
- virtual bool
- findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
- const MemOp &Op, unsigned DstAS, unsigned SrcAS,
- const AttributeList &FuncAttributes) const;
+ virtual bool findOptimalMemOpLowering(std::vector<EVT> &MemOps,
+ unsigned Limit, const MemOp &Op,
+ unsigned DstAS, unsigned SrcAS,
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const;
/// Check to see if the specified operand of the specified instruction is a
/// constant integer. If so, check to see if there are any bits set in the
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 6df21b624137f..7ca2fa46c5f72 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -8334,7 +8334,7 @@ static SDValue getMemcpyLoadsAndStores(
*SrcAlign, isVol, CopyFromConstant);
if (!TLI.findOptimalMemOpLowering(
MemOps, Limit, Op, DstPtrInfo.getAddrSpace(),
- SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes()))
+ SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), &C))
return SDValue();
if (DstAlignCanChange) {
@@ -8529,7 +8529,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
MemOp::Copy(Size, DstAlignCanChange, Alignment, *SrcAlign,
/*IsVolatile*/ true),
DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
- MF.getFunction().getAttributes()))
+ MF.getFunction().getAttributes(), &C))
return SDValue();
if (DstAlignCanChange) {
@@ -8634,6 +8634,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
+ LLVMContext &C = *DAG.getContext();
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
bool OptSize = shouldLowerMemFuncForSize(MF, DAG);
@@ -8646,7 +8647,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
if (!TLI.findOptimalMemOpLowering(
MemOps, Limit,
MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol),
- DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes()))
+ DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes(), &C))
return SDValue();
if (DstAlignCanChange) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 000f8cc6786a5..f25c42e8d8ea1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -211,12 +211,13 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
bool TargetLowering::findOptimalMemOpLowering(
std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
- unsigned SrcAS, const AttributeList &FuncAttributes) const {
+ unsigned SrcAS, const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
Op.getSrcAlign() < Op.getDstAlign())
return false;
- EVT VT = getOptimalMemOpType(Op, FuncAttributes);
+ EVT VT = getOptimalMemOpType(Op, FuncAttributes, Context);
if (VT == MVT::Other) {
// Use the largest integer type whose alignment constraints are satisfied.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index fb8bd81c033af..044af4bea3a2c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17587,7 +17587,8 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
}
EVT AArch64TargetLowering::getOptimalMemOpType(
- const MemOp &Op, const AttributeList &FuncAttributes) const {
+ const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 89f90ee2b7707..47eb60684388b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -233,8 +233,8 @@ class AArch64TargetLowering : public TargetLowering {
bool shouldConsiderGEPOffsetSplit() const override;
- EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override;
+ EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override;
LLT getOptimalMemOpLLT(const MemOp &Op,
const AttributeList &FuncAttributes) const override;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b083a9014737b..15d0d5feae830 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1982,8 +1982,9 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(
Alignment, Flags, IsFast);
}
-EVT SITargetLowering::getOptimalMemOpType(
- const MemOp &Op, const AttributeList &FuncAttributes) const {
+EVT SITargetLowering::getOptimalMemOpType(const MemOp &Op,
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
// FIXME: Should account for address space here.
// The default fallback uses the private pointer size as a guess for a type to
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index c66f300ec4cb1..2ebcf716afcc9 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -357,8 +357,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
unsigned *IsFast = nullptr) const override;
- EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override;
+ EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override;
bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 62418ff4eec85..a85bf405556b1 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -19219,9 +19219,9 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
return false;
}
-
-EVT ARMTargetLowering::getOptimalMemOpType(
- const MemOp &Op, const AttributeList &FuncAttributes) const {
+EVT ARMTargetLowering::getOptimalMemOpType(const MemOp &Op,
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
// See if we can use NEON instructions for this...
if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 604910e04d4cc..e7bee969a9e29 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -473,7 +473,8 @@ class VectorType;
unsigned *Fast) const override;
EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override;
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override;
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
index 23cbce7094e6b..f400a271786ff 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -114,8 +114,8 @@ class BPFTargetLowering : public TargetLowering {
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
- EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override {
+ EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override {
return Op.size() >= 8 ? MVT::i64 : MVT::i32;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index e7d0ec6ee0fe5..4543474ef114c 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -3742,7 +3742,8 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
/// does not need to be loaded. It returns EVT::Other if the type should be
/// determined using generic target-independent logic.
EVT HexagonTargetLowering::getOptimalMemOpType(
- const MemOp &Op, const AttributeList &FuncAttributes) const {
+ const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
if (Op.size() >= 8 && Op.isAligned(Align(8)))
return MVT::i64;
if (Op.size() >= 4 && Op.isAligned(Align(4)))
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index a2c9b57d04caa..e5b477d5fd57a 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -325,8 +325,8 @@ class HexagonTargetLowering : public TargetLowering {
/// the immediate into a register.
bool isLegalICmpImmediate(int64_t Imm) const override;
- EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override;
+ EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override;
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
unsigned AddrSpace, Align Alignment,
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 9b4b336727235..8fa5a6ef4c589 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -4519,8 +4519,9 @@ MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
return false;
}
-EVT MipsTargetLowering::getOptimalMemOpType(
- const MemOp &Op, const AttributeList &FuncAttributes) const {
+EVT MipsTargetLowering::getOptimalMemOpType(const MemOp &Op,
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
if (Subtarget.hasMips64())
return MVT::i64;
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h
index 241e9343ae384..2580894b15bb5 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -699,7 +699,8 @@ class TargetRegisterClass;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override;
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override;
/// isFPImmLegal - Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 5a4a63469ad6e..fbfc825b2823c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -18190,8 +18190,9 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
-EVT PPCTargetLowering::getOptimalMemOpType(
- const MemOp &Op, const AttributeList &FuncAttributes) const {
+EVT PPCTargetLowering::getOptimalMemOpType(const MemOp &Op,
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
// We should use Altivec/VSX loads and stores when available. For unaligned
// addresses, unaligned VSX loads are only fast starting with the P8.
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 4c88bd372b106..13876fcc57783 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1089,7 +1089,8 @@ namespace llvm {
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override;
+ const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override;
/// Is unaligned memory access allowed for the given type, and is it fast
/// relative to software emulation.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index dac6ed6d40199..b01af803d0596 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1663,7 +1663,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
- MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
+ MaxStoresPerMemset = Subtarget.hasVInstructions()
+ ? Subtarget.getRealMinVLen() / 8
+ : Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
MaxGluedStoresPerMemcpy = Subtarget.getMaxGluedStoresPerMemcpy();
MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
@@ -23776,9 +23778,12 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
return Subtarget.enableUnalignedVectorMem();
}
+EVT RISCVTargetLowering::getOptimalMemOpType(
+ const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
+ if (!Context)
+ llvm_unreachable("LLVMContext must not be null here");
-EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const {
if (!Subtarget.hasVInstructions())
return MVT::Other;
@@ -23807,6 +23812,17 @@ EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op,
if (MinVLenInBytes <= RISCV::RVVBytesPerBlock)
return MVT::Other;
+ // If Op size is greater than LMUL8 memory operation, we don't support inline
+ // of memset. Return EVT based on Op size to avoid redundant splitting and
+ // merging operations if Op size is no greater than LMUL8 memory operation.
+ if (Op.isMemset()) {
+ if (Op.size() > 8 * MinVLenInBytes)
+ return MVT::Other;
+ if (Op.size() % 8 == 0)
+ return EVT::getVectorVT(*Context, MVT::i64, Op.size() / 8);
+ return EVT::getVectorVT(*Context, MVT::i8, Op.size());
+ }
+
// Prefer i8 for non-zero memset as it allows us to avoid materializing
// a large scalar constant and instead use vmv.v.x/i to do the
// broadcast. For everything else, prefer ELenVT to minimize VL and thus
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index f67d7f155c9d0..ae2360d6e85eb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -331,8 +331,8 @@ class RISCVTargetLowering : public TargetLowering {
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
unsigned *Fast = nullptr) const override;
- EVT getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const override;
+ EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context = nullptr) const override;
bool splitValueIntoRegisterParts(
SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index c6044514fa951..6dd29d3a2ef51 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1424,7 +1424,8 @@ bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
bool SystemZTargetLowering::findOptimalMemOpLowering(
std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
- unsigned SrcAS, const AttributeList &FuncAttributes) const {
+ unsigned SrcAS, const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
const int MVCFastLen = 16;
if (Limit != ~unsigned(0)) {
@@ -1437,12 +1438,13 @@ bool SystemZTargetLowering::findOptimalMemOpLowering(
return false; // Memset zero: Use XC
}
- return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
- SrcAS, FuncAttributes);
+ return TargetLowering::findOptimalMemOpLowering(
+ MemOps, Limit, Op, DstAS, SrcAS, FuncAttributes, Context);
}
-EVT SystemZTargetLowering::getOptimalMemOpType(const MemOp &Op,
- const AttributeList &FuncAttributes) const {
+EVT SystemZTargetLowering::getOptimalMemOpType(
+ const MemOp &Op, const AttributeList &FuncAttributes,
+ LLVMContext *Context) const {
return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
}
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index f2f0bf6d8b410..98e7c891745d8 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -509,12 +509,12 @@ class SystemZTargetLowering : public TargetLowering {
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
MachineMemOperand::Flags Flags,
...
[truncated]
|
virtual EVT | ||
getOptimalMemOpType(const MemOp &Op, | ||
getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, | ||
const AttributeList & /*FuncAttributes*/) const { | ||
return MVT::Other; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you precommit these signature changes
if (Op.size() > 8 * MinVLenInBytes) | ||
return MVT::Other; | ||
if (Op.size() % 8 == 0) | ||
return EVT::getVectorVT(Context, MVT::i64, Op.size() / 8); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to check for Zve64x or V before using an i64 element type?
// of memset. Return EVT based on Op size to avoid redundant splitting and | ||
// merging operations if Op size is no greater than LMUL8 memory operation. | ||
if (Op.isMemset()) { | ||
if (Op.size() > 8 * MinVLenInBytes) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to use Subtarget.getMaxLMULForFixedLengthVectors()
instead of 8?
On a target that doesn't scale LMUL dynamically with VL, is using a single LMUL=8 vse8 that isn't full better than using multiple smaller LMUL vse8 instructions? For example, if VLEN=128 with 20 bytes to copy. Is an LMUL=8 VL=20 vse8 better or worse than LMUL=4 VL=16 vse8 and a LMUL=1 VL=4 vse8. |
Optimize RISCV memset inline implementation based on the issue discussed in #144562.