From 8c5d5eca51d9e4cd9aa046dba8f939b3f4012256 Mon Sep 17 00:00:00 2001
From: Hans Kristian Rosbach <hk-git@circlestorm.org>
Date: Fri, 21 Jul 2023 13:43:15 +0200
Subject: [PATCH 1/3] Clean up SSE4.2 support, and no longer use asm fallback
or gcc builtin.
Defines changing meaning:
X86_SSE42 used to mean the compiler supports crc asm fallback.
X86_SSE42_CRC_INTRIN used to mean compiler supports SSE4.2 intrinsics.
X86_SSE42 now means compiler supports SSE4.2 intrinsics.
This therefore also fixes the adler32_sse42 checks, since those were depending
on SSE4.2 intrinsics but was mistakenly checking the X86_SSE42 define.
Now the X86_SSE42 define actually means what it appears to.
---
CMakeLists.txt | 5 +----
arch/x86/insert_string_sse42.c | 36 +++++----------------------------
cmake/detect-intrinsics.cmake | 23 +++------------------
configure | 37 ++++++++--------------------------
win32/Makefile.msc | 1 -
5 files changed, 17 insertions(+), 85 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 420a5c78..1e42239a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -827,15 +827,12 @@ if(WITH_OPTIM)
endif()
if(WITH_SSE42)
check_sse42_intrinsics()
- if(HAVE_SSE42CRC_INLINE_ASM OR HAVE_SSE42CRC_INTRIN)
+ if(HAVE_SSE42_INTRIN)
add_definitions(-DX86_SSE42)
set(SSE42_SRCS ${ARCHDIR}/adler32_sse42.c ${ARCHDIR}/insert_string_sse42.c)
add_feature_info(SSE42_CRC 1 "Support SSE4.2 optimized CRC hash generation, using \"${SSE42FLAG}\"")
list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS})
set_property(SOURCE ${SSE42_SRCS} PROPERTY COMPILE_FLAGS "${SSE42FLAG} ${NOLTOFLAG}")
- if(HAVE_SSE42CRC_INTRIN)
- add_definitions(-DX86_SSE42_CRC_INTRIN)
- endif()
else()
set(WITH_SSE42 OFF)
endif()
diff --git a/arch/x86/insert_string_sse42.c b/arch/x86/insert_string_sse42.c
index 565d92f9..ae092a7e 100644
--- a/arch/x86/insert_string_sse42.c
+++ b/arch/x86/insert_string_sse42.c
@@ -5,38 +5,13 @@
*
*/
+#ifdef X86_SSE42
#include "../../zbuild.h"
-#include <immintrin.h>
-#ifdef _MSC_VER
-# include <nmmintrin.h>
-#endif
+#include <nmmintrin.h>
#include "../../deflate.h"
-#ifdef X86_SSE42_CRC_INTRIN
-# ifdef _MSC_VER
-# define HASH_CALC(s, h, val)\
- h = _mm_crc32_u32(h, val)
-# else
-# define HASH_CALC(s, h, val)\
- h = __builtin_ia32_crc32si(h, val)
-# endif
-#else
-# ifdef _MSC_VER
-# define HASH_CALC(s, h, val) {\
- __asm mov edx, h\
- __asm mov eax, val\
- __asm crc32 eax, edx\
- __asm mov h, eax\
- }
-# else
-# define HASH_CALC(s, h, val) \
- __asm__ __volatile__ (\
- "crc32 %1,%0\n\t"\
- : "+r" (h)\
- : "r" (val)\
- );
-# endif
-#endif
+#define HASH_CALC(s, h, val)\
+ h = _mm_crc32_u32(h, val)
#define HASH_CALC_VAR h
#define HASH_CALC_VAR_INIT uint32_t h = 0
@@ -45,6 +20,5 @@
#define INSERT_STRING insert_string_sse42
#define QUICK_INSERT_STRING quick_insert_string_sse42
-#ifdef X86_SSE42
-# include "../../insert_string_tpl.h"
+#include "../../insert_string_tpl.h"
#endif
diff --git a/cmake/detect-intrinsics.cmake b/cmake/detect-intrinsics.cmake
index 9cbc5908..52c54dc8 100644
--- a/cmake/detect-intrinsics.cmake
+++ b/cmake/detect-intrinsics.cmake
@@ -481,35 +481,18 @@ macro(check_sse42_intrinsics)
set(SSE42FLAG "-msse4.2")
endif()
endif()
- # Check whether compiler supports SSE4.2 CRC inline asm
+ # Check whether compiler supports SSE4.2 intrinsics
set(CMAKE_REQUIRED_FLAGS "${SSE42FLAG} ${NATIVEFLAG}")
check_c_source_compile_or_run(
- "int main(void) {
- unsigned val = 0, h = 0;
- #if defined(_MSC_VER)
- { __asm mov edx, h __asm mov eax, val __asm crc32 eax, edx __asm mov h, eax }
- #else
- __asm__ __volatile__ ( \"crc32 %1,%0\" : \"+r\" (h) : \"r\" (val) );
- #endif
- return (int)h;
- }"
- HAVE_SSE42CRC_INLINE_ASM
- )
- # Check whether compiler supports SSE4.2 CRC intrinsics
- check_c_source_compile_or_run(
- "#include <immintrin.h>
+ "#include <nmmintrin.h>
int main(void) {
unsigned crc = 0;
char c = 'c';
- #if defined(_MSC_VER)
crc = _mm_crc32_u32(crc, c);
- #else
- crc = __builtin_ia32_crc32qi(crc, c);
- #endif
(void)crc;
return 0;
}"
- HAVE_SSE42CRC_INTRIN
+ HAVE_SSE42_INTRIN
)
set(CMAKE_REQUIRED_FLAGS)
endmacro()
diff --git a/configure b/configure
index 8714590e..6b4e7fff 100755
--- a/configure
+++ b/configure
@@ -1431,38 +1431,23 @@ EOF
}
check_sse42_intrinsics() {
- # Check whether compiler supports SSE4.2 CRC inline asm
- cat > $test.c << EOF
-int main(void) {
- unsigned val = 0, h = 0;
- __asm__ __volatile__ ( "crc32 %1,%0" : "+r" (h) : "r" (val) );
- return (int) h;
-}
-EOF
- if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then
- echo "Checking for SSE4.2 CRC inline assembly ... Yes." | tee -a configure.log
- HAVE_SSE42CRC_INLINE_ASM=1
- else
- echo "Checking for SSE4.2 CRC inline assembly ... No." | tee -a configure.log
- HAVE_SSE42CRC_INLINE_ASM=0
- fi
-
- # Check whether compiler supports SSE4.2 CRC intrinsics
+ # Check whether compiler supports SSE4.2 intrinsics
cat > $test.c << EOF
+#include <nmmintrin.h>
int main(void) {
unsigned crc = 0;
char c = 'c';
- crc = __builtin_ia32_crc32qi(crc, c);
+ crc = _mm_crc32_u32(crc, c);
(void)crc;
return 0;
}
EOF
if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then
- echo "Checking for SSE4.2 CRC intrinsics ... Yes." | tee -a configure.log
- HAVE_SSE42CRC_INTRIN=1
+ echo "Checking for SSE4.2 intrinsics ... Yes." | tee -a configure.log
+ HAVE_SSE42_INTRIN=1
else
- echo "Checking for SSE4.2 CRC intrinsics ... No." | tee -a configure.log
- HAVE_SSE42CRC_INTRIN=0
+ echo "Checking for SSE4.2 intrinsics ... No." | tee -a configure.log
+ HAVE_SSE42_INTRIN=0
fi
}
@@ -1606,15 +1591,9 @@ case "${ARCH}" in
check_sse42_intrinsics
- if test ${HAVE_SSE42CRC_INTRIN} -eq 1 || test ${HAVE_SSE42CRC_INLINE_ASM} -eq 1; then
+ if test ${HAVE_SSE42_INTRIN} -eq 1; then
CFLAGS="${CFLAGS} -DX86_SSE42"
SFLAGS="${SFLAGS} -DX86_SSE42"
-
- if test ${HAVE_SSE42CRC_INTRIN} -eq 1; then
- CFLAGS="${CFLAGS} -DX86_SSE42_CRC_INTRIN"
- SFLAGS="${SFLAGS} -DX86_SSE42_CRC_INTRIN"
- fi
-
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_sse42.o insert_string_sse42.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_sse42.lo insert_string_sse42.lo"
fi
diff --git a/win32/Makefile.msc b/win32/Makefile.msc
index 9ed26f28..3035072b 100644
--- a/win32/Makefile.msc
+++ b/win32/Makefile.msc
@@ -31,7 +31,6 @@ WFLAGS = \
-DX86_PCLMULQDQ_CRC \
-DX86_SSE2 \
-DX86_SSE42 \
- -DX86_SSE42_CRC_INTRIN \
-DX86_SSSE3 \
-DX86_AVX2
--
2.39.2