From e2809e3a1015697832ee4d37b75ba1cd0caac0f0 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Fri, 28 Feb 2025 16:27:30 +0700 Subject: [PATCH] Inline CRC computation for small fixed-length input on x86 pg_crc32c.h now has a simplified copy of the loop in pg_crc32c_sse42.c suitable for inlining where possible. This may slightly reduce contention for the WAL insertion lock, but that hasn't been tested. The motivation for this change is avoid regressing for a future commit that will use a function pointer for non-constant input in all x86 builds. While it's technically possible to make a similar change for Arm and LoongArch, there are some questions about how inlining should work since those platforms prefer stricter alignment. There are also no immediate plans to add additional implementations for them. Reviewed-by: Nathan Bossart Reviewed-by: Raghuveer Devulapalli Discussion: https://postgr.es/m/CANWCAZZEiTzhZcuwTiJ2=opiNpAUn1vuDRu1N02z61AthwRZLA@mail.gmail.com Discussion: https://postgr.es/m/CANWCAZYRhLHArpyfV4uRK-Rw9N5oV5HMkkKtBehcuTjNOMwCZg@mail.gmail.com --- src/include/port/pg_crc32c.h | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/include/port/pg_crc32c.h b/src/include/port/pg_crc32c.h index 65ebeacf4b1..0ab7513f523 100644 --- a/src/include/port/pg_crc32c.h +++ b/src/include/port/pg_crc32c.h @@ -43,12 +43,42 @@ typedef uint32 pg_crc32c; #if defined(USE_SSE42_CRC32C) /* Use Intel SSE4.2 instructions. */ + +#include + #define COMP_CRC32C(crc, data, len) \ - ((crc) = pg_comp_crc32c_sse42((crc), (data), (len))) + ((crc) = pg_comp_crc32c_dispatch((crc), (data), (len))) #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); +pg_attribute_no_sanitize_alignment() +static inline +pg_crc32c +pg_comp_crc32c_dispatch(pg_crc32c crc, const void *data, size_t len) +{ + if (__builtin_constant_p(len) && len < 32) + { + const unsigned char *p = data; + + /* + * For small constant inputs, inline the computation to avoid a + * function call and allow the compiler to unroll loops. + */ +#if SIZEOF_VOID_P >= 8 + for (; len >= 8; p += 8, len -= 8) + crc = _mm_crc32_u64(crc, *(const uint64 *) p); +#endif + for (; len >= 4; p += 4, len -= 4) + crc = _mm_crc32_u32(crc, *(const uint32 *) p); + for (; len > 0; --len) + crc = _mm_crc32_u8(crc, *p++); + return crc; + } + else + return pg_comp_crc32c_sse42(crc, data, len); +} + #elif defined(USE_ARMV8_CRC32C) /* Use ARMv8 CRC Extension instructions. */ -- 2.30.2