-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[libc][NFC] refactor Cortex memcpy
code
#148204
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
gchatelet
wants to merge
2
commits into
llvm:main
Choose a base branch
from
gchatelet:arm_improve_memset
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
+102
−80
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-libc Author: Guillaume Chatelet (gchatelet) ChangesFull diff: https://github.com/llvm/llvm-project/pull/148204.diff 4 Files Affected:
diff --git a/libc/src/string/memory_utils/CMakeLists.txt b/libc/src/string/memory_utils/CMakeLists.txt
index a967247db53f4..633d9f12949d2 100644
--- a/libc/src/string/memory_utils/CMakeLists.txt
+++ b/libc/src/string/memory_utils/CMakeLists.txt
@@ -7,6 +7,7 @@ add_header_library(
aarch64/inline_memcpy.h
aarch64/inline_memmove.h
aarch64/inline_memset.h
+ arm/common.h
arm/inline_memcpy.h
generic/aligned_access.h
generic/byte_per_byte.h
diff --git a/libc/src/string/memory_utils/arm/common.h b/libc/src/string/memory_utils/arm/common.h
new file mode 100644
index 0000000000000..dafd4aaf02343
--- /dev/null
+++ b/libc/src/string/memory_utils/arm/common.h
@@ -0,0 +1,52 @@
+//===-- Common constants and defines for arm --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ARM_COMMON_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ARM_COMMON_H
+
+#include "src/__support/macros/attributes.h" // LIBC_INLINE_VAR
+#include "src/string/memory_utils/utils.h" // CPtr, Ptr, distance_to_align
+
+#include <stddef.h> // size_t
+
+// https://libc.llvm.org/compiler_support.html
+// Support for [[likely]] / [[unlikely]]
+// [X] GCC 12.2
+// [X] Clang 12
+// [ ] Clang 11
+#define LIBC_ATTR_LIKELY [[likely]]
+#define LIBC_ATTR_UNLIKELY [[unlikely]]
+
+#if defined(LIBC_COMPILER_IS_CLANG)
+#if LIBC_COMPILER_CLANG_VER < 1200
+#undef LIBC_ATTR_LIKELY
+#undef LIBC_ATTR_UNLIKELY
+#define LIBC_ATTR_LIKELY
+#define LIBC_ATTR_UNLIKELY
+#endif
+#endif
+
+namespace LIBC_NAMESPACE_DECL {
+
+LIBC_INLINE_VAR constexpr size_t kWordSize = sizeof(uint32_t);
+
+enum class BumpSize : bool { kNo = false, kYes = true };
+enum class BlockOp : bool { kFull = false, kByWord = true };
+
+LIBC_INLINE auto misaligned(CPtr ptr) {
+ return distance_to_align_down<kWordSize>(ptr);
+}
+
+LIBC_INLINE CPtr bitwise_or(CPtr a, CPtr b) {
+ return cpp::bit_cast<CPtr>(cpp::bit_cast<uintptr_t>(a) |
+ cpp::bit_cast<uintptr_t>(b));
+}
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ARM_COMMON_H
diff --git a/libc/src/string/memory_utils/arm/inline_memcpy.h b/libc/src/string/memory_utils/arm/inline_memcpy.h
index 61efebe29b485..ecf938d9ba3a6 100644
--- a/libc/src/string/memory_utils/arm/inline_memcpy.h
+++ b/libc/src/string/memory_utils/arm/inline_memcpy.h
@@ -10,57 +10,35 @@
#include "src/__support/macros/attributes.h" // LIBC_INLINE
#include "src/__support/macros/optimization.h" // LIBC_LOOP_NOUNROLL
+#include "src/string/memory_utils/arm/common.h" // LIBC_ATTR_LIKELY, LIBC_ATTR_UNLIKELY
#include "src/string/memory_utils/utils.h" // memcpy_inline, distance_to_align
#include <stddef.h> // size_t
-// https://libc.llvm.org/compiler_support.html
-// Support for [[likely]] / [[unlikely]]
-// [X] GCC 12.2
-// [X] Clang 12
-// [ ] Clang 11
-#define LIBC_ATTR_LIKELY [[likely]]
-#define LIBC_ATTR_UNLIKELY [[unlikely]]
-
-#if defined(LIBC_COMPILER_IS_CLANG)
-#if LIBC_COMPILER_CLANG_VER < 1200
-#undef LIBC_ATTR_LIKELY
-#undef LIBC_ATTR_UNLIKELY
-#define LIBC_ATTR_LIKELY
-#define LIBC_ATTR_UNLIKELY
-#endif
-#endif
-
namespace LIBC_NAMESPACE_DECL {
namespace {
-LIBC_INLINE_VAR constexpr size_t kWordSize = sizeof(uint32_t);
-
-enum Strategy {
- ForceWordLdStChain,
- AssumeWordAligned,
- AssumeUnaligned,
-};
+template <size_t bytes>
+LIBC_INLINE void copy_assume_aligned(void *dst, const void *src) {
+ constexpr size_t alignment = bytes > kWordSize ? kWordSize : bytes;
+ memcpy_inline<bytes>(assume_aligned<alignment>(dst),
+ assume_aligned<alignment>(src));
+}
-template <size_t bytes, Strategy strategy = AssumeUnaligned>
-LIBC_INLINE void copy_and_bump_pointers(Ptr &dst, CPtr &src) {
- if constexpr (strategy == AssumeUnaligned) {
- memcpy_inline<bytes>(assume_aligned<1>(dst), assume_aligned<1>(src));
- } else if constexpr (strategy == AssumeWordAligned) {
- static_assert(bytes >= kWordSize);
- memcpy_inline<bytes>(assume_aligned<kWordSize>(dst),
- assume_aligned<kWordSize>(src));
- } else if constexpr (strategy == ForceWordLdStChain) {
+template <size_t bytes, BlockOp block_op = BlockOp::kFull>
+LIBC_INLINE void copy_block_and_bump_pointers(Ptr &dst, CPtr &src) {
+ if constexpr (block_op == BlockOp::kFull) {
+ copy_assume_aligned<bytes>(dst, src);
+ } else {
// We restrict loads/stores to 4 byte to prevent the use of load/store
- // multiple (LDM, STM) and load/store double (LDRD, STRD). First, they may
- // fault (see notes below) and second, they use more registers which in turn
- // adds push/pop instructions in the hot path.
- static_assert((bytes % kWordSize == 0) && (bytes >= kWordSize));
+ // multiple (LDM, STM) and load/store double (LDRD, STRD). First, they
+ // may fault (see notes below) and second, they use more registers which
+ // in turn adds push/pop instructions in the hot path.
+ static_assert(bytes >= kWordSize);
LIBC_LOOP_UNROLL
- for (size_t i = 0; i < bytes / kWordSize; ++i) {
- const size_t offset = i * kWordSize;
- memcpy_inline<kWordSize>(dst + offset, src + offset);
+ for (size_t offset = 0; offset < bytes; offset += kWordSize) {
+ copy_assume_aligned<kWordSize>(dst + offset, src + offset);
}
}
// In the 1, 2, 4 byte copy case, the compiler can fold pointer offsetting
@@ -72,30 +50,19 @@ LIBC_INLINE void copy_and_bump_pointers(Ptr &dst, CPtr &src) {
src += bytes;
}
-LIBC_INLINE void copy_bytes_and_bump_pointers(Ptr &dst, CPtr &src,
- const size_t size) {
+template <size_t bytes, BlockOp block_op, BumpSize bump_size = BumpSize::kYes>
+LIBC_INLINE void consume_by_aligned_block(Ptr &dst, CPtr &src, size_t &size) {
LIBC_LOOP_NOUNROLL
- for (size_t i = 0; i < size; ++i)
- *dst++ = *src++;
-}
-
-template <size_t block_size, Strategy strategy>
-LIBC_INLINE void copy_blocks_and_update_args(Ptr &dst, CPtr &src,
- size_t &size) {
- LIBC_LOOP_NOUNROLL
- for (size_t i = 0; i < size / block_size; ++i)
- copy_and_bump_pointers<block_size, strategy>(dst, src);
- // Update `size` once at the end instead of once per iteration.
- size %= block_size;
-}
-
-LIBC_INLINE CPtr bitwise_or(CPtr a, CPtr b) {
- return cpp::bit_cast<CPtr>(cpp::bit_cast<uintptr_t>(a) |
- cpp::bit_cast<uintptr_t>(b));
+ for (size_t i = 0; i < size / bytes; ++i)
+ copy_block_and_bump_pointers<bytes, block_op>(dst, src);
+ if constexpr (bump_size == BumpSize::kYes) {
+ size %= bytes;
+ }
}
-LIBC_INLINE auto misaligned(CPtr a) {
- return distance_to_align_down<kWordSize>(a);
+LIBC_INLINE void copy_bytes_and_bump_pointers(Ptr &dst, CPtr &src,
+ size_t size) {
+ consume_by_aligned_block<1, BlockOp::kFull, BumpSize::kNo>(dst, src, size);
}
} // namespace
@@ -125,20 +92,21 @@ LIBC_INLINE auto misaligned(CPtr a) {
if (src_alignment == 0)
LIBC_ATTR_LIKELY {
// Both `src` and `dst` are now word-aligned.
- copy_blocks_and_update_args<64, AssumeWordAligned>(dst, src, size);
- copy_blocks_and_update_args<16, AssumeWordAligned>(dst, src, size);
- copy_blocks_and_update_args<4, AssumeWordAligned>(dst, src, size);
+ consume_by_aligned_block<64, BlockOp::kFull>(dst, src, size);
+ consume_by_aligned_block<16, BlockOp::kFull>(dst, src, size);
+ consume_by_aligned_block<4, BlockOp::kFull>(dst, src, size);
}
else {
// `dst` is aligned but `src` is not.
LIBC_LOOP_NOUNROLL
while (size >= kWordSize) {
- // Recompose word from multiple loads depending on the alignment.
+ // Recompose word from multiple loads depending on the
+ // alignment.
const uint32_t value =
src_alignment == 2
? load_aligned<uint32_t, uint16_t, uint16_t>(src)
: load_aligned<uint32_t, uint8_t, uint16_t, uint8_t>(src);
- memcpy_inline<kWordSize>(assume_aligned<kWordSize>(dst), &value);
+ copy_assume_aligned<kWordSize>(dst, &value);
dst += kWordSize;
src += kWordSize;
size -= kWordSize;
@@ -169,31 +137,33 @@ LIBC_INLINE auto misaligned(CPtr a) {
if (size < 8)
LIBC_ATTR_UNLIKELY {
if (size & 1)
- copy_and_bump_pointers<1>(dst, src);
+ copy_block_and_bump_pointers<1>(dst, src);
if (size & 2)
- copy_and_bump_pointers<2>(dst, src);
+ copy_block_and_bump_pointers<2>(dst, src);
if (size & 4)
- copy_and_bump_pointers<4>(dst, src);
+ copy_block_and_bump_pointers<4>(dst, src);
return;
}
if (misaligned(src))
LIBC_ATTR_UNLIKELY {
const size_t offset = distance_to_align_up<kWordSize>(dst);
if (offset & 1)
- copy_and_bump_pointers<1>(dst, src);
+ copy_block_and_bump_pointers<1>(dst, src);
if (offset & 2)
- copy_and_bump_pointers<2>(dst, src);
+ copy_block_and_bump_pointers<2>(dst, src);
size -= offset;
}
}
- copy_blocks_and_update_args<64, ForceWordLdStChain>(dst, src, size);
- copy_blocks_and_update_args<16, ForceWordLdStChain>(dst, src, size);
- copy_blocks_and_update_args<4, AssumeUnaligned>(dst, src, size);
+ // `dst` and `src` are not necessarily both aligned at that point but this
+ // implementation assumes hardware support for unaligned loads and stores.
+ consume_by_aligned_block<64, BlockOp::kByWord>(dst, src, size);
+ consume_by_aligned_block<16, BlockOp::kByWord>(dst, src, size);
+ consume_by_aligned_block<4, BlockOp::kFull>(dst, src, size);
if (size & 1)
- copy_and_bump_pointers<1>(dst, src);
+ copy_block_and_bump_pointers<1>(dst, src);
if (size & 2)
LIBC_ATTR_UNLIKELY
- copy_and_bump_pointers<2>(dst, src);
+ copy_block_and_bump_pointers<2>(dst, src);
}
[[maybe_unused]] LIBC_INLINE void inline_memcpy_arm(void *__restrict dst_,
@@ -210,8 +180,4 @@ LIBC_INLINE auto misaligned(CPtr a) {
} // namespace LIBC_NAMESPACE_DECL
-// Cleanup local macros
-#undef LIBC_ATTR_LIKELY
-#undef LIBC_ATTR_UNLIKELY
-
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ARM_INLINE_MEMCPY_H
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index b13a909770e58..5fa6dc1ee04fa 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -4268,6 +4268,7 @@ libc_support_library(
"src/string/memory_utils/aarch64/inline_memcpy.h",
"src/string/memory_utils/aarch64/inline_memmove.h",
"src/string/memory_utils/aarch64/inline_memset.h",
+ "src/string/memory_utils/arm/common.h",
"src/string/memory_utils/arm/inline_memcpy.h",
"src/string/memory_utils/generic/aligned_access.h",
"src/string/memory_utils/generic/byte_per_byte.h",
|
memcpy
code
memcpy
codememcpy
code
lntue
approved these changes
Jul 11, 2025
petrhosek
approved these changes
Jul 11, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
This patch is in preparation for the Cortex
memset
implementation.It does not change the generated code.