summaryrefslogblamecommitdiff
path: root/var/spack/repos/builtin/packages/zlib-ng/pr-1542.patch
blob: 675c2c1a3d0ac53392b9ac18ed8a31ad169a6f4b (plain) (tree)































































































































































































































                                                                                                                     
From 8c5d5eca51d9e4cd9aa046dba8f939b3f4012256 Mon Sep 17 00:00:00 2001
From: Hans Kristian Rosbach <hk-git@circlestorm.org>
Date: Fri, 21 Jul 2023 13:43:15 +0200
Subject: [PATCH 1/3] Clean up SSE4.2 support, and no longer use asm fallback
 or gcc builtin.

Defines changing meaning:
X86_SSE42 used to mean the compiler supports crc asm fallback.
X86_SSE42_CRC_INTRIN used to mean compiler supports SSE4.2 intrinsics.

X86_SSE42 now means compiler supports SSE4.2 intrinsics.

This therefore also fixes the adler32_sse42 checks, since those were depending
on SSE4.2 intrinsics but was mistakenly checking the X86_SSE42 define.
Now the X86_SSE42 define actually means what it appears to.
---
 CMakeLists.txt                 |  5 +----
 arch/x86/insert_string_sse42.c | 36 +++++----------------------------
 cmake/detect-intrinsics.cmake  | 23 +++------------------
 configure                      | 37 ++++++++--------------------------
 win32/Makefile.msc             |  1 -
 5 files changed, 17 insertions(+), 85 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 420a5c78..1e42239a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -827,15 +827,12 @@ if(WITH_OPTIM)
         endif()
         if(WITH_SSE42)
             check_sse42_intrinsics()
-            if(HAVE_SSE42CRC_INLINE_ASM OR HAVE_SSE42CRC_INTRIN)
+            if(HAVE_SSE42_INTRIN)
                 add_definitions(-DX86_SSE42)
                 set(SSE42_SRCS ${ARCHDIR}/adler32_sse42.c ${ARCHDIR}/insert_string_sse42.c)
                 add_feature_info(SSE42_CRC 1 "Support SSE4.2 optimized CRC hash generation, using \"${SSE42FLAG}\"")
                 list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS})
                 set_property(SOURCE ${SSE42_SRCS} PROPERTY COMPILE_FLAGS "${SSE42FLAG} ${NOLTOFLAG}")
-                if(HAVE_SSE42CRC_INTRIN)
-                    add_definitions(-DX86_SSE42_CRC_INTRIN)
-                endif()
             else()
                 set(WITH_SSE42 OFF)
             endif()
diff --git a/arch/x86/insert_string_sse42.c b/arch/x86/insert_string_sse42.c
index 565d92f9..ae092a7e 100644
--- a/arch/x86/insert_string_sse42.c
+++ b/arch/x86/insert_string_sse42.c
@@ -5,38 +5,13 @@
  *
  */
 
+#ifdef X86_SSE42
 #include "../../zbuild.h"
-#include <immintrin.h>
-#ifdef _MSC_VER
-#  include <nmmintrin.h>
-#endif
+#include <nmmintrin.h>
 #include "../../deflate.h"
 
-#ifdef X86_SSE42_CRC_INTRIN
-#  ifdef _MSC_VER
-#    define HASH_CALC(s, h, val)\
-        h = _mm_crc32_u32(h, val)
-#  else
-#    define HASH_CALC(s, h, val)\
-        h = __builtin_ia32_crc32si(h, val)
-#  endif
-#else
-#  ifdef _MSC_VER
-#    define HASH_CALC(s, h, val) {\
-        __asm mov edx, h\
-        __asm mov eax, val\
-        __asm crc32 eax, edx\
-        __asm mov h, eax\
-    }
-#  else
-#    define HASH_CALC(s, h, val) \
-        __asm__ __volatile__ (\
-            "crc32 %1,%0\n\t"\
-            : "+r" (h)\
-            : "r" (val)\
-        );
-#  endif
-#endif
+#define HASH_CALC(s, h, val)\
+    h = _mm_crc32_u32(h, val)
 
 #define HASH_CALC_VAR       h
 #define HASH_CALC_VAR_INIT  uint32_t h = 0
@@ -45,6 +20,5 @@
 #define INSERT_STRING       insert_string_sse42
 #define QUICK_INSERT_STRING quick_insert_string_sse42
 
-#ifdef X86_SSE42
-#  include "../../insert_string_tpl.h"
+#include "../../insert_string_tpl.h"
 #endif
diff --git a/cmake/detect-intrinsics.cmake b/cmake/detect-intrinsics.cmake
index 9cbc5908..52c54dc8 100644
--- a/cmake/detect-intrinsics.cmake
+++ b/cmake/detect-intrinsics.cmake
@@ -481,35 +481,18 @@ macro(check_sse42_intrinsics)
             set(SSE42FLAG "-msse4.2")
         endif()
     endif()
-    # Check whether compiler supports SSE4.2 CRC inline asm
+    # Check whether compiler supports SSE4.2 intrinsics
     set(CMAKE_REQUIRED_FLAGS "${SSE42FLAG} ${NATIVEFLAG}")
     check_c_source_compile_or_run(
-        "int main(void) {
-            unsigned val = 0, h = 0;
-        #if defined(_MSC_VER)
-            { __asm mov edx, h __asm mov eax, val __asm crc32 eax, edx __asm mov h, eax }
-        #else
-            __asm__ __volatile__ ( \"crc32 %1,%0\" : \"+r\" (h) : \"r\" (val) );
-        #endif
-            return (int)h;
-        }"
-        HAVE_SSE42CRC_INLINE_ASM
-    )
-    # Check whether compiler supports SSE4.2 CRC intrinsics
-    check_c_source_compile_or_run(
-        "#include <immintrin.h>
+        "#include <nmmintrin.h>
         int main(void) {
             unsigned crc = 0;
             char c = 'c';
-        #if defined(_MSC_VER)
             crc = _mm_crc32_u32(crc, c);
-        #else
-            crc = __builtin_ia32_crc32qi(crc, c);
-        #endif
             (void)crc;
             return 0;
         }"
-        HAVE_SSE42CRC_INTRIN
+        HAVE_SSE42_INTRIN
     )
     set(CMAKE_REQUIRED_FLAGS)
 endmacro()
diff --git a/configure b/configure
index 8714590e..6b4e7fff 100755
--- a/configure
+++ b/configure
@@ -1431,38 +1431,23 @@ EOF
 }
 
 check_sse42_intrinsics() {
-    # Check whether compiler supports SSE4.2 CRC inline asm
-    cat > $test.c << EOF
-int main(void) {
-    unsigned val = 0, h = 0;
-    __asm__ __volatile__ ( "crc32 %1,%0" : "+r" (h) : "r" (val) );
-    return (int) h;
-}
-EOF
-    if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then
-        echo "Checking for SSE4.2 CRC inline assembly ... Yes." | tee -a configure.log
-        HAVE_SSE42CRC_INLINE_ASM=1
-    else
-        echo "Checking for SSE4.2 CRC inline assembly ... No." | tee -a configure.log
-        HAVE_SSE42CRC_INLINE_ASM=0
-    fi
-
-    # Check whether compiler supports SSE4.2 CRC intrinsics
+    # Check whether compiler supports SSE4.2 intrinsics
     cat > $test.c << EOF
+#include <nmmintrin.h>
 int main(void) {
     unsigned crc = 0;
     char c = 'c';
-    crc = __builtin_ia32_crc32qi(crc, c);
+    crc = _mm_crc32_u32(crc, c);
     (void)crc;
     return 0;
 }
 EOF
     if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then
-        echo "Checking for SSE4.2 CRC intrinsics ... Yes." | tee -a configure.log
-        HAVE_SSE42CRC_INTRIN=1
+        echo "Checking for SSE4.2 intrinsics ... Yes." | tee -a configure.log
+        HAVE_SSE42_INTRIN=1
     else
-        echo "Checking for SSE4.2 CRC intrinsics ... No." | tee -a configure.log
-        HAVE_SSE42CRC_INTRIN=0
+        echo "Checking for SSE4.2 intrinsics ... No." | tee -a configure.log
+        HAVE_SSE42_INTRIN=0
     fi
 }
 
@@ -1606,15 +1591,9 @@ case "${ARCH}" in
 
             check_sse42_intrinsics
 
-            if test ${HAVE_SSE42CRC_INTRIN} -eq 1 || test ${HAVE_SSE42CRC_INLINE_ASM} -eq 1; then
+            if test ${HAVE_SSE42_INTRIN} -eq 1; then
                 CFLAGS="${CFLAGS} -DX86_SSE42"
                 SFLAGS="${SFLAGS} -DX86_SSE42"
-
-                if test ${HAVE_SSE42CRC_INTRIN} -eq 1; then
-                  CFLAGS="${CFLAGS} -DX86_SSE42_CRC_INTRIN"
-                  SFLAGS="${SFLAGS} -DX86_SSE42_CRC_INTRIN"
-                fi
-
                 ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_sse42.o insert_string_sse42.o"
                 ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_sse42.lo insert_string_sse42.lo"
             fi
diff --git a/win32/Makefile.msc b/win32/Makefile.msc
index 9ed26f28..3035072b 100644
--- a/win32/Makefile.msc
+++ b/win32/Makefile.msc
@@ -31,7 +31,6 @@ WFLAGS  = \
 	-DX86_PCLMULQDQ_CRC \
 	-DX86_SSE2 \
 	-DX86_SSE42 \
-	-DX86_SSE42_CRC_INTRIN \
 	-DX86_SSSE3 \
 	-DX86_AVX2
 
-- 
2.39.2