Skip to content

Commit 246f56e

Browse files
committed
btrfs-progs: crypto: add PCL based implementation for crc32c
Copy faster implementation of crc32c from linux kernel as of 6.5-rc7 (x86_64, arch/x86/crypto/crc32c-pcl-intel-asm_64.S). This needs assembler build support, so detect target architecture so cross-compilation still works. Add a special CPU flag so the old and new implementations can be benchmarked and verified separately. Sample benchmark: CPU flags: 0x1ff CPU features: SSE2 SSSE3 SSE41 SSE42 SHA AVX AVX2 CRC32C_PCL Block size: 4096 Iterations: 1000000 Implementation: builtin Units: CPU cycles NULL-NOP: cycles: 77177218, cycles/i 77 NULL-MEMCPY: cycles: 226313072, cycles/i 226, 62133.395 MiB/s CRC32C-ref: cycles: 24418596066, cycles/i 24418, 575.859 MiB/s CRC32C-NI: cycles: 1188335920, cycles/i 1188, 11833.073 MiB/s CRC32C-PCL: cycles: 463193456, cycles/i 463, 30358.037 MiB/s XXHASH: cycles: 851606646, cycles/i 851, 16511.916 MiB/s SHA256-ref: cycles: 74476234956, cycles/i 74476, 188.808 MiB/s SHA256-NI: cycles: 34198637428, cycles/i 34198, 411.177 MiB/s BLAKE2-ref: cycles: 14761411664, cycles/i 14761, 952.597 MiB/s BLAKE2-SSE2: cycles: 18101896796, cycles/i 18101, 776.807 MiB/s BLAKE2-SSE41: cycles: 12599091062, cycles/i 12599, 1116.087 MiB/s BLAKE2-AVX2: cycles: 9668247506, cycles/i 9668, 1454.418 MiB/s The new implementation is about 2.5x faster. Note: there new version does not work on musl because of linkage problems (relocations in .rodata), so it's still using the old implementation. Signed-off-by: David Sterba <[email protected]>
1 parent 5c9b480 commit 246f56e

File tree

9 files changed

+541
-4
lines changed

9 files changed

+541
-4
lines changed

Makefile

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ DISABLE_WARNING_FLAGS := $(call cc-disable-warning, format-truncation) \
8888
ENABLE_WARNING_FLAGS := $(call cc-option, -Wimplicit-fallthrough) \
8989
$(call cc-option, -Wmissing-prototypes)
9090

91+
ASFLAGS =
92+
9193
# Common build flags
9294
CFLAGS = $(SUBST_CFLAGS) \
9395
-std=gnu11 \
@@ -383,6 +385,14 @@ CRYPTO_OBJECTS = crypto/sha224-256.o crypto/blake2b-ref.o crypto/blake2b-sse2.o
383385
CRYPTO_CFLAGS = -DCRYPTOPROVIDER_BUILTIN=1
384386
endif
385387

388+
ifeq ($(TARGET_CPU),x86_64)
389+
# FIXME: linkage is broken on musl for some reason
390+
ifeq ($(HAVE_GLIBC),1)
391+
CRYPTO_OBJECTS += crypto/crc32c-pcl-intel-asm_64.o
392+
ASFLAGS += -fPIC
393+
endif
394+
endif
395+
386396
CHECKER_FLAGS += $(btrfs_convert_cflags)
387397

388398
# collect values of the variables above
@@ -450,6 +460,13 @@ endif
450460
-MT $($(dir $@).deps/$(notdir $@):.o.d=.static.o) \
451461
-MT $(dir $@).deps/$(notdir $@) $(CFLAGS) $<
452462

463+
.S.o:
464+
@echo " [AS] $@"
465+
$(Q)$(CC) $(CFLAGS) $(ASFLAGS) -c $< -o $@
466+
467+
%.static.o: %.S
468+
@echo " [AS] $@"
469+
$(Q)$(CC) $(CFLAGS) $(ASFLAGS) -c $< -o $@
453470
#
454471
# Pick from per-file variables, btrfs_*_cflags
455472
#

Makefile.inc.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ HAVE_CFLAG_msse2 = @HAVE_CFLAG_msse2@
2828
HAVE_CFLAG_msse41 = @HAVE_CFLAG_msse41@
2929
HAVE_CFLAG_mavx2 = @HAVE_CFLAG_mavx2@
3030
HAVE_CFLAG_msha = @HAVE_CFLAG_msha@
31+
TARGET_CPU = @target_cpu@
32+
HAVE_GLIBC = @HAVE_GLIBC@
3133

3234
SUBST_CFLAGS = @CFLAGS@
3335
SUBST_LDFLAGS = @LDFLAGS@

common/cpu-utils.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ void cpu_print_flags(void) {
5858
FLAG(SHA);
5959
FLAG(AVX);
6060
FLAG(AVX2);
61+
FLAG(CRC32C_PCL);
6162
putchar(10);
6263
}
6364
#undef FLAG
@@ -88,6 +89,7 @@ void cpu_detect_flags(void)
8889
if (b & (1UL << 29))
8990
__cpu_flags |= CPU_FLAG_SHA;
9091

92+
__cpu_flags |= CPU_FLAG_CRC32C_PCL;
9193
__cpu_flags_orig = __cpu_flags;
9294
}
9395

common/cpu-utils.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ enum cpu_feature {
3737
ENUM_CPU_BIT(CPU_FLAG_SHA),
3838
ENUM_CPU_BIT(CPU_FLAG_AVX),
3939
ENUM_CPU_BIT(CPU_FLAG_AVX2),
40+
41+
/* Special features */
42+
ENUM_CPU_BIT(CPU_FLAG_CRC32C_PCL),
4043
};
4144

4245
#undef ENUM_CPU_BIT

configure.ac

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ AC_PREFIX_DEFAULT([/usr/local])
4040

4141
AC_PROG_CC
4242
AC_CANONICAL_HOST
43+
AC_CANONICAL_TARGET
4344
AC_C_CONST
4445
AC_C_VOLATILE
4546
AC_C_BIGENDIAN
@@ -79,6 +80,9 @@ AC_CHECK_FUNCS([reallocarray])
7980

8081
AC_CHECK_FUNCS([clock_gettime])
8182

83+
AX_CHECK_DEFINE([features.h],[__GLIBC__],[HAVE_GLIBC=1],[HAVE_GLIBC=0])
84+
AC_SUBST([HAVE_GLIBC])
85+
8286
AX_GCC_BUILTIN([__builtin_add_overflow])
8387
AX_GCC_BUILTIN([__builtin_sub_overflow])
8488
AX_GCC_BUILTIN([__builtin_mul_overflow])

0 commit comments

Comments
 (0)