From 5672c6435658f98953ff2662c2f31d3ccb0d436a Mon Sep 17 00:00:00 2001 From: Jonas Thies <16190001+jthies@users.noreply.github.com> Date: Sat, 15 Jul 2023 06:10:57 +0200 Subject: add a phist patch to avoid trying to compile SSE code if that is not … (#38806) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add a phist patch to avoid trying to compile SSE code if that is not available. * phist: make the avoid-sse patch more robust because compiler on ARM system still tried to compile SSE code --- .../repos/builtin/packages/phist/avoid-sse.patch | 346 +++++++++++++++++++++ var/spack/repos/builtin/packages/phist/package.py | 4 + 2 files changed, 350 insertions(+) create mode 100644 var/spack/repos/builtin/packages/phist/avoid-sse.patch (limited to 'var') diff --git a/var/spack/repos/builtin/packages/phist/avoid-sse.patch b/var/spack/repos/builtin/packages/phist/avoid-sse.patch new file mode 100644 index 0000000000..44ae90ec8b --- /dev/null +++ b/var/spack/repos/builtin/packages/phist/avoid-sse.patch @@ -0,0 +1,346 @@ +commit eaef462cc07509fe8f380fbf520a2617b910b139 +Author: Jonas Thies <16190001+jthies@users.noreply.github.com> +Date: Sun Jul 9 21:33:30 2023 +0200 + + exit early from builtin kernels requiring SSE so that they are not compiled if it is not available + (this broke phist compilation on ARM systems, even though we never called these kernels if SSE was disabled) + +diff --git a/src/kernels/builtin/axpy_kernels_nt.c b/src/kernels/builtin/axpy_kernels_nt.c +index 64d5fbd0..17c5024a 100644 +--- a/src/kernels/builtin/axpy_kernels_nt.c ++++ b/src/kernels/builtin/axpy_kernels_nt.c +@@ -19,7 +19,9 @@ + #endif + #include + #include ++#ifdef PHIST_HAVE_SSE + #include ++#endif + #include + + static inline _Bool is_aligned(const void *restrict pointer, size_t byte_count) +@@ -30,6 +32,10 @@ static inline _Bool is_aligned(const void *restrict pointer, size_t byte_count) + + void daxpy_nt_2_c(int nrows, const double *restrict alpha, const double *restrict x, double *restrict y) + { ++#ifndef PHIST_HAVE_SSE ++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__); ++ exit(1); ++#else + if( !is_aligned(y,16) ) + { + printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)y); +@@ -54,11 +60,16 @@ void daxpy_nt_2_c(int nrows, const double *restrict alpha, const double *restric + // non-temporal store + _mm_stream_pd(y+2*i, y_); + } ++#endif + } + + + void daxpy_nt_4_c(int nrows, const double *restrict alpha, const double *restrict x, double *restrict y) + { ++#ifndef PHIST_HAVE_SSE ++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__); ++ exit(1); ++#else + if( !is_aligned(y,16) ) + { + printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)y); +@@ -86,11 +97,16 @@ void daxpy_nt_4_c(int nrows, const double *restrict alpha, const double *restric + _mm_stream_pd(y+4*i+2*k, y_); + } + } ++#endif + } + + + void daxpy_nt_8_c(int nrows, const double *restrict alpha, const double *restrict x, double *restrict y) + { ++#ifndef PHIST_HAVE_SSE ++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__); ++ exit(1); ++#else + if( !is_aligned(y,16) ) + { + printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)y); +@@ -118,11 +134,16 @@ void daxpy_nt_8_c(int nrows, const double *restrict alpha, const double *restric + _mm_stream_pd(y+8*i+2*k, y_); + } + } ++#endif + } + + + void daxpy_nt_strided_2_c(int nrows, const double *restrict alpha, const double *restrict x, int ldx, double *restrict y, int ldy) + { ++#ifndef PHIST_HAVE_SSE ++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__); ++ exit(1); ++#else + if( !is_aligned(y,16) ) + { + printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)y); +@@ -140,11 +161,16 @@ void daxpy_nt_strided_2_c(int nrows, const double *restrict alpha, const double + // non-temporal store + _mm_stream_pd(y+ldy*i, y_); + } ++#endif + } + + + void daxpy_nt_strided_4_c(int nrows, const double *restrict alpha, const double *restrict x, int ldx, double *restrict y, int ldy) + { ++#ifndef PHIST_HAVE_SSE ++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__); ++ exit(1); ++#else + if( !is_aligned(y,16) || ldy % 2 != 0 ) + { + printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)y); +@@ -165,11 +191,16 @@ void daxpy_nt_strided_4_c(int nrows, const double *restrict alpha, const double + _mm_stream_pd(y+ldy*i+2*k, y_); + } + } ++#endif + } + + + void daxpy_nt_strided_8_c(int nrows, const double *restrict alpha, const double *restrict x, int ldx, double *restrict y, int ldy) + { ++#ifndef PHIST_HAVE_SSE ++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__); ++ exit(1); ++#else + if( !is_aligned(y,16) || ldy % 2 != 0 ) + { + printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)y); +@@ -190,11 +221,16 @@ void daxpy_nt_strided_8_c(int nrows, const double *restrict alpha, const double + _mm_stream_pd(y+ldy*i+2*k, y_); + } + } ++#endif + } + + + void dcopy_general_nt_c(int nrows, int nvec, const double *restrict x, int ldx, double *restrict y, int ldy) + { ++#ifndef PHIST_HAVE_SSE ++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__); ++ exit(1); ++#else + if( nvec % 2 != 0 ) + { + printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)x); +@@ -217,5 +253,6 @@ void dcopy_general_nt_c(int nrows, int nvec, const double *restrict x, int ldx, + _mm_stream_pd(y+i*ldy+2*j, tmp); + } + } ++#endif + } + +diff --git a/src/kernels/builtin/spmvm_kernels_nt.c b/src/kernels/builtin/spmvm_kernels_nt.c +index d4d30bff..5d858878 100644 +--- a/src/kernels/builtin/spmvm_kernels_nt.c ++++ b/src/kernels/builtin/spmvm_kernels_nt.c +@@ -19,7 +19,9 @@ + #endif + #include + #include ++#ifdef PHIST_HAVE_SSE + #include ++#endif + #include + + #ifdef PHIST_HIGH_PRECISION_KERNELS +@@ -35,6 +37,10 @@ static inline _Bool is_aligned(const void *restrict pointer, size_t byte_count) + void dspmvm_nt_1_c(int nrows, double alpha, const long *restrict row_ptr, const long *restrict halo_ptr, const int *restrict col_idx, const double *restrict val, + const double *restrict shifts, const double *restrict rhsv, const double *restrict halo, double *restrict lhsv) + { ++#ifndef PHIST_HAVE_SSE ++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__); ++ exit(1); ++#else + if( !is_aligned(lhsv,16) ) + { + printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)lhsv); +@@ -123,7 +129,7 @@ void dspmvm_nt_1_c(int nrows, double alpha, const long *restrict row_ptr, const + #endif + + // last row +-#ifdef PHIST_HIGH_PRECISION_KERNELS ++# ifdef PHIST_HIGH_PRECISION_KERNELS + if( nrows % 2 != 0 ) + { + double lhs, lhsC; +@@ -136,7 +142,7 @@ void dspmvm_nt_1_c(int nrows, double alpha, const long *restrict row_ptr, const + + lhsv[nrows-1] = alpha*(lhs+lhsC); + } +-#else ++# else + if( nrows % 2 != 0 ) + { + lhsv[nrows-1] = shifts[0]*rhsv[nrows-1]; +@@ -146,6 +152,7 @@ void dspmvm_nt_1_c(int nrows, double alpha, const long *restrict row_ptr, const + lhsv[nrows-1] += val[j]*halo[ (col_idx[j]-1) ]; + lhsv[nrows-1] *= alpha; + } ++# endif + #endif + } + +@@ -153,6 +160,10 @@ void dspmvm_nt_1_c(int nrows, double alpha, const long *restrict row_ptr, const + void dspmvm_nt_2_c(int nrows, double alpha, const long *restrict row_ptr, const long *restrict halo_ptr, const int *restrict col_idx, const double *restrict val, + const double *restrict shifts, const double *restrict rhsv, const double *restrict halo, double *restrict lhsv, int ldl) + { ++#ifndef PHIST_HAVE_SSE ++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__); ++ exit(1); ++#else + if( !is_aligned(lhsv,32) || ldl % 2 != 0 ) + { + printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)lhsv); +@@ -176,7 +187,7 @@ void dspmvm_nt_2_c(int nrows, double alpha, const long *restrict row_ptr, const + __m128d shifts_ = _mm_loadu_pd(shifts); + __m128d alpha_ = _mm_set1_pd(alpha); + +-#ifdef PHIST_HIGH_PRECISION_KERNELS ++# ifdef PHIST_HIGH_PRECISION_KERNELS + #pragma omp parallel for schedule(static) + for(int i = 0; i < nrows; i++) + { +@@ -204,7 +215,7 @@ void dspmvm_nt_2_c(int nrows, double alpha, const long *restrict row_ptr, const + // non-temporal store + _mm_stream_pd(lhsv+i*ldl, lhs); + } +-#else ++# else + #pragma omp parallel for schedule(static) + for(int i = 0; i < nrows; i++) + { +@@ -232,16 +243,21 @@ void dspmvm_nt_2_c(int nrows, double alpha, const long *restrict row_ptr, const + // multiply with alpha + __m128d alpha_ = _mm_set1_pd(alpha); + lhs_ = _mm_mul_pd(alpha_,lhs_); +- ++ + // non-temporal store + _mm_stream_pd(lhsv+i*ldl, lhs_); + } ++# endif + #endif + } + + void dspmvm_nt_4_c(int nrows, double alpha, const long *restrict row_ptr, const long *restrict halo_ptr, const int *restrict col_idx, const double *restrict val, + const double *restrict shifts, const double *restrict rhsv, const double *restrict halo, double *restrict lhsv, int ldl) + { ++#ifndef PHIST_HAVE_SSE ++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__); ++ exit(1); ++#else + if( !is_aligned(lhsv,32) || ldl % 4 != 0 ) + { + printf("%s: lhsv not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)lhsv); +@@ -261,7 +277,7 @@ void dspmvm_nt_4_c(int nrows, double alpha, const long *restrict row_ptr, const + } + + +-#ifdef PHIST_HIGH_PRECISION_KERNELS ++# ifdef PHIST_HIGH_PRECISION_KERNELS + + __m256d shifts_ = _mm256_loadu_pd(shifts); + __m256d alpha_ = _mm256_set1_pd(alpha); +@@ -294,7 +310,7 @@ void dspmvm_nt_4_c(int nrows, double alpha, const long *restrict row_ptr, const + _mm256_stream_pd(lhsv+i*ldl, lhs); + } + +-#else ++# else + + __m128d shifts_[2]; + shifts_[0] = _mm_loadu_pd(shifts); +@@ -341,6 +357,7 @@ void dspmvm_nt_4_c(int nrows, double alpha, const long *restrict row_ptr, const + } + } + ++# endif + #endif + } + +@@ -348,6 +365,10 @@ void dspmvm_nt_4_c(int nrows, double alpha, const long *restrict row_ptr, const + void dspmvm_nt_8_c(int nrows, double alpha, const long *restrict row_ptr, const long *restrict halo_ptr, const int *restrict col_idx, const double *restrict val, + const double *restrict shifts, const double *restrict rhsv, const double *restrict halo, double *restrict lhsv, int ldl) + { ++#ifndef PHIST_HAVE_SSE ++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__); ++ exit(1); ++#else + if( !is_aligned(lhsv,16) || ldl % 2 != 0 ) + { + printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)lhsv); +@@ -412,12 +433,17 @@ void dspmvm_nt_8_c(int nrows, double alpha, const long *restrict row_ptr, const + _mm_stream_pd(lhsv+i*ldl+2*k, lhs_[k]); + } + } ++#endif + } + + + void dspmvm_nt_strided_2_c(int nrows, double alpha, const long *restrict row_ptr, const long *restrict halo_ptr, const int *restrict col_idx, const double *restrict val, + const double *restrict shifts, const double *restrict rhsv, int ldr, const double *restrict halo, double *restrict lhsv, int ldl) + { ++#ifndef PHIST_HAVE_SSE ++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__); ++ exit(1); ++#else + if( !is_aligned(lhsv,16) || ldl % 2 != 0 ) + { + printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)lhsv); +@@ -460,15 +486,20 @@ void dspmvm_nt_strided_2_c(int nrows, double alpha, const long *restrict row_ptr + // multiply with alpha + __m128d alpha_ = _mm_set1_pd(alpha); + lhs_ = _mm_mul_pd(alpha_,lhs_); +- ++ + // non-temporal store + _mm_stream_pd(lhsv+i*ldl, lhs_); + } ++#endif + } + + void dspmvm_nt_strided_4_c(int nrows, double alpha, const long *restrict row_ptr, const long *restrict halo_ptr, const int *restrict col_idx, const double *restrict val, + const double *restrict shifts, const double *restrict rhsv, int ldr, const double *restrict halo, double *restrict lhsv, int ldl) + { ++#ifndef PHIST_HAVE_SSE ++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__); ++ exit(1); ++#else + if( !is_aligned(lhsv,16) || ldl % 2 != 0 ) + { + printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)lhsv); +@@ -526,11 +557,16 @@ void dspmvm_nt_strided_4_c(int nrows, double alpha, const long *restrict row_ptr + _mm_stream_pd(lhsv+i*ldl+2*k, lhs_[k]); + } + } ++#endif + } + + void dspmvm_nt_strided_8_c(int nrows, double alpha, const long *restrict row_ptr, const long *restrict halo_ptr, const int *restrict col_idx, const double *restrict val, + const double *restrict shifts, const double *restrict rhsv, int ldr, const double *restrict halo, double *restrict lhsv, int ldl) + { ++#ifndef PHIST_HAVE_SSE ++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__); ++ exit(1); ++#else + if( !is_aligned(lhsv,16) || ldl % 2 != 0 ) + { + printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)lhsv); +@@ -589,6 +625,7 @@ void dspmvm_nt_strided_8_c(int nrows, double alpha, const long *restrict row_ptr + _mm_stream_pd(lhsv+i*ldl+2*k, lhs_[k]); + } + } ++#endif + } + + diff --git a/var/spack/repos/builtin/packages/phist/package.py b/var/spack/repos/builtin/packages/phist/package.py index fa055e47f2..62b8b131d4 100644 --- a/var/spack/repos/builtin/packages/phist/package.py +++ b/var/spack/repos/builtin/packages/phist/package.py @@ -150,6 +150,10 @@ class Phist(CMakePackage): # ###################### Patches ########################## + # Avoid trying to compile some SSE code if SSE is not available + # This patch will be part of phist 1.11.3 and greater and only affects + # the 'builtin' kernel_lib. + patch("avoid-sse.patch", when="@:1.11.2 kernel_lib=builtin") # Only applies to 1.9.4: While SSE instructions are handled correctly, # build fails on ppc64le unless -DNO_WARN_X86_INTRINSICS is defined. patch("ppc64_sse.patch", when="@1.9.4") -- cgit v1.2.3-70-g09d2