summaryrefslogtreecommitdiff
path: root/var
diff options
context:
space:
mode:
authorJonas Thies <16190001+jthies@users.noreply.github.com>2023-07-15 06:10:57 +0200
committerGitHub <noreply@github.com>2023-07-14 21:10:57 -0700
commit5672c6435658f98953ff2662c2f31d3ccb0d436a (patch)
treee7f21f92e4a7054f7c4bd4678dbf97155ddc4392 /var
parent1f58ac5ed317f99d50333f390611e38da2ef23eb (diff)
downloadspack-5672c6435658f98953ff2662c2f31d3ccb0d436a.tar.gz
spack-5672c6435658f98953ff2662c2f31d3ccb0d436a.tar.bz2
spack-5672c6435658f98953ff2662c2f31d3ccb0d436a.tar.xz
spack-5672c6435658f98953ff2662c2f31d3ccb0d436a.zip
add a phist patch to avoid trying to compile SSE code if that is not … (#38806)
* add a phist patch to avoid trying to compile SSE code if that is not available. * phist: make the avoid-sse patch more robust because compiler on ARM system still tried to compile SSE code
Diffstat (limited to 'var')
-rw-r--r--var/spack/repos/builtin/packages/phist/avoid-sse.patch346
-rw-r--r--var/spack/repos/builtin/packages/phist/package.py4
2 files changed, 350 insertions, 0 deletions
diff --git a/var/spack/repos/builtin/packages/phist/avoid-sse.patch b/var/spack/repos/builtin/packages/phist/avoid-sse.patch
new file mode 100644
index 0000000000..44ae90ec8b
--- /dev/null
+++ b/var/spack/repos/builtin/packages/phist/avoid-sse.patch
@@ -0,0 +1,346 @@
+commit eaef462cc07509fe8f380fbf520a2617b910b139
+Author: Jonas Thies <16190001+jthies@users.noreply.github.com>
+Date: Sun Jul 9 21:33:30 2023 +0200
+
+ exit early from builtin kernels requiring SSE so that they are not compiled if it is not available
+ (this broke phist compilation on ARM systems, even though we never called these kernels if SSE was disabled)
+
+diff --git a/src/kernels/builtin/axpy_kernels_nt.c b/src/kernels/builtin/axpy_kernels_nt.c
+index 64d5fbd0..17c5024a 100644
+--- a/src/kernels/builtin/axpy_kernels_nt.c
++++ b/src/kernels/builtin/axpy_kernels_nt.c
+@@ -19,7 +19,9 @@
+ #endif
+ #include <stdint.h>
+ #include <stdio.h>
++#ifdef PHIST_HAVE_SSE
+ #include <emmintrin.h>
++#endif
+ #include <stdlib.h>
+
+ static inline _Bool is_aligned(const void *restrict pointer, size_t byte_count)
+@@ -30,6 +32,10 @@ static inline _Bool is_aligned(const void *restrict pointer, size_t byte_count)
+
+ void daxpy_nt_2_c(int nrows, const double *restrict alpha, const double *restrict x, double *restrict y)
+ {
++#ifndef PHIST_HAVE_SSE
++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
++ exit(1);
++#else
+ if( !is_aligned(y,16) )
+ {
+ printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)y);
+@@ -54,11 +60,16 @@ void daxpy_nt_2_c(int nrows, const double *restrict alpha, const double *restric
+ // non-temporal store
+ _mm_stream_pd(y+2*i, y_);
+ }
++#endif
+ }
+
+
+ void daxpy_nt_4_c(int nrows, const double *restrict alpha, const double *restrict x, double *restrict y)
+ {
++#ifndef PHIST_HAVE_SSE
++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
++ exit(1);
++#else
+ if( !is_aligned(y,16) )
+ {
+ printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)y);
+@@ -86,11 +97,16 @@ void daxpy_nt_4_c(int nrows, const double *restrict alpha, const double *restric
+ _mm_stream_pd(y+4*i+2*k, y_);
+ }
+ }
++#endif
+ }
+
+
+ void daxpy_nt_8_c(int nrows, const double *restrict alpha, const double *restrict x, double *restrict y)
+ {
++#ifndef PHIST_HAVE_SSE
++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
++ exit(1);
++#else
+ if( !is_aligned(y,16) )
+ {
+ printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)y);
+@@ -118,11 +134,16 @@ void daxpy_nt_8_c(int nrows, const double *restrict alpha, const double *restric
+ _mm_stream_pd(y+8*i+2*k, y_);
+ }
+ }
++#endif
+ }
+
+
+ void daxpy_nt_strided_2_c(int nrows, const double *restrict alpha, const double *restrict x, int ldx, double *restrict y, int ldy)
+ {
++#ifndef PHIST_HAVE_SSE
++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
++ exit(1);
++#else
+ if( !is_aligned(y,16) )
+ {
+ printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)y);
+@@ -140,11 +161,16 @@ void daxpy_nt_strided_2_c(int nrows, const double *restrict alpha, const double
+ // non-temporal store
+ _mm_stream_pd(y+ldy*i, y_);
+ }
++#endif
+ }
+
+
+ void daxpy_nt_strided_4_c(int nrows, const double *restrict alpha, const double *restrict x, int ldx, double *restrict y, int ldy)
+ {
++#ifndef PHIST_HAVE_SSE
++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
++ exit(1);
++#else
+ if( !is_aligned(y,16) || ldy % 2 != 0 )
+ {
+ printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)y);
+@@ -165,11 +191,16 @@ void daxpy_nt_strided_4_c(int nrows, const double *restrict alpha, const double
+ _mm_stream_pd(y+ldy*i+2*k, y_);
+ }
+ }
++#endif
+ }
+
+
+ void daxpy_nt_strided_8_c(int nrows, const double *restrict alpha, const double *restrict x, int ldx, double *restrict y, int ldy)
+ {
++#ifndef PHIST_HAVE_SSE
++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
++ exit(1);
++#else
+ if( !is_aligned(y,16) || ldy % 2 != 0 )
+ {
+ printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)y);
+@@ -190,11 +221,16 @@ void daxpy_nt_strided_8_c(int nrows, const double *restrict alpha, const double
+ _mm_stream_pd(y+ldy*i+2*k, y_);
+ }
+ }
++#endif
+ }
+
+
+ void dcopy_general_nt_c(int nrows, int nvec, const double *restrict x, int ldx, double *restrict y, int ldy)
+ {
++#ifndef PHIST_HAVE_SSE
++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
++ exit(1);
++#else
+ if( nvec % 2 != 0 )
+ {
+ printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)x);
+@@ -217,5 +253,6 @@ void dcopy_general_nt_c(int nrows, int nvec, const double *restrict x, int ldx,
+ _mm_stream_pd(y+i*ldy+2*j, tmp);
+ }
+ }
++#endif
+ }
+
+diff --git a/src/kernels/builtin/spmvm_kernels_nt.c b/src/kernels/builtin/spmvm_kernels_nt.c
+index d4d30bff..5d858878 100644
+--- a/src/kernels/builtin/spmvm_kernels_nt.c
++++ b/src/kernels/builtin/spmvm_kernels_nt.c
+@@ -19,7 +19,9 @@
+ #endif
+ #include <stdint.h>
+ #include <stdio.h>
++#ifdef PHIST_HAVE_SSE
+ #include <emmintrin.h>
++#endif
+ #include <stdlib.h>
+
+ #ifdef PHIST_HIGH_PRECISION_KERNELS
+@@ -35,6 +37,10 @@ static inline _Bool is_aligned(const void *restrict pointer, size_t byte_count)
+ void dspmvm_nt_1_c(int nrows, double alpha, const long *restrict row_ptr, const long *restrict halo_ptr, const int *restrict col_idx, const double *restrict val,
+ const double *restrict shifts, const double *restrict rhsv, const double *restrict halo, double *restrict lhsv)
+ {
++#ifndef PHIST_HAVE_SSE
++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
++ exit(1);
++#else
+ if( !is_aligned(lhsv,16) )
+ {
+ printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)lhsv);
+@@ -123,7 +129,7 @@ void dspmvm_nt_1_c(int nrows, double alpha, const long *restrict row_ptr, const
+ #endif
+
+ // last row
+-#ifdef PHIST_HIGH_PRECISION_KERNELS
++# ifdef PHIST_HIGH_PRECISION_KERNELS
+ if( nrows % 2 != 0 )
+ {
+ double lhs, lhsC;
+@@ -136,7 +142,7 @@ void dspmvm_nt_1_c(int nrows, double alpha, const long *restrict row_ptr, const
+
+ lhsv[nrows-1] = alpha*(lhs+lhsC);
+ }
+-#else
++# else
+ if( nrows % 2 != 0 )
+ {
+ lhsv[nrows-1] = shifts[0]*rhsv[nrows-1];
+@@ -146,6 +152,7 @@ void dspmvm_nt_1_c(int nrows, double alpha, const long *restrict row_ptr, const
+ lhsv[nrows-1] += val[j]*halo[ (col_idx[j]-1) ];
+ lhsv[nrows-1] *= alpha;
+ }
++# endif
+ #endif
+ }
+
+@@ -153,6 +160,10 @@ void dspmvm_nt_1_c(int nrows, double alpha, const long *restrict row_ptr, const
+ void dspmvm_nt_2_c(int nrows, double alpha, const long *restrict row_ptr, const long *restrict halo_ptr, const int *restrict col_idx, const double *restrict val,
+ const double *restrict shifts, const double *restrict rhsv, const double *restrict halo, double *restrict lhsv, int ldl)
+ {
++#ifndef PHIST_HAVE_SSE
++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
++ exit(1);
++#else
+ if( !is_aligned(lhsv,32) || ldl % 2 != 0 )
+ {
+ printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)lhsv);
+@@ -176,7 +187,7 @@ void dspmvm_nt_2_c(int nrows, double alpha, const long *restrict row_ptr, const
+ __m128d shifts_ = _mm_loadu_pd(shifts);
+ __m128d alpha_ = _mm_set1_pd(alpha);
+
+-#ifdef PHIST_HIGH_PRECISION_KERNELS
++# ifdef PHIST_HIGH_PRECISION_KERNELS
+ #pragma omp parallel for schedule(static)
+ for(int i = 0; i < nrows; i++)
+ {
+@@ -204,7 +215,7 @@ void dspmvm_nt_2_c(int nrows, double alpha, const long *restrict row_ptr, const
+ // non-temporal store
+ _mm_stream_pd(lhsv+i*ldl, lhs);
+ }
+-#else
++# else
+ #pragma omp parallel for schedule(static)
+ for(int i = 0; i < nrows; i++)
+ {
+@@ -232,16 +243,21 @@ void dspmvm_nt_2_c(int nrows, double alpha, const long *restrict row_ptr, const
+ // multiply with alpha
+ __m128d alpha_ = _mm_set1_pd(alpha);
+ lhs_ = _mm_mul_pd(alpha_,lhs_);
+-
++
+ // non-temporal store
+ _mm_stream_pd(lhsv+i*ldl, lhs_);
+ }
++# endif
+ #endif
+ }
+
+ void dspmvm_nt_4_c(int nrows, double alpha, const long *restrict row_ptr, const long *restrict halo_ptr, const int *restrict col_idx, const double *restrict val,
+ const double *restrict shifts, const double *restrict rhsv, const double *restrict halo, double *restrict lhsv, int ldl)
+ {
++#ifndef PHIST_HAVE_SSE
++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
++ exit(1);
++#else
+ if( !is_aligned(lhsv,32) || ldl % 4 != 0 )
+ {
+ printf("%s: lhsv not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)lhsv);
+@@ -261,7 +277,7 @@ void dspmvm_nt_4_c(int nrows, double alpha, const long *restrict row_ptr, const
+ }
+
+
+-#ifdef PHIST_HIGH_PRECISION_KERNELS
++# ifdef PHIST_HIGH_PRECISION_KERNELS
+
+ __m256d shifts_ = _mm256_loadu_pd(shifts);
+ __m256d alpha_ = _mm256_set1_pd(alpha);
+@@ -294,7 +310,7 @@ void dspmvm_nt_4_c(int nrows, double alpha, const long *restrict row_ptr, const
+ _mm256_stream_pd(lhsv+i*ldl, lhs);
+ }
+
+-#else
++# else
+
+ __m128d shifts_[2];
+ shifts_[0] = _mm_loadu_pd(shifts);
+@@ -341,6 +357,7 @@ void dspmvm_nt_4_c(int nrows, double alpha, const long *restrict row_ptr, const
+ }
+ }
+
++# endif
+ #endif
+ }
+
+@@ -348,6 +365,10 @@ void dspmvm_nt_4_c(int nrows, double alpha, const long *restrict row_ptr, const
+ void dspmvm_nt_8_c(int nrows, double alpha, const long *restrict row_ptr, const long *restrict halo_ptr, const int *restrict col_idx, const double *restrict val,
+ const double *restrict shifts, const double *restrict rhsv, const double *restrict halo, double *restrict lhsv, int ldl)
+ {
++#ifndef PHIST_HAVE_SSE
++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
++ exit(1);
++#else
+ if( !is_aligned(lhsv,16) || ldl % 2 != 0 )
+ {
+ printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)lhsv);
+@@ -412,12 +433,17 @@ void dspmvm_nt_8_c(int nrows, double alpha, const long *restrict row_ptr, const
+ _mm_stream_pd(lhsv+i*ldl+2*k, lhs_[k]);
+ }
+ }
++#endif
+ }
+
+
+ void dspmvm_nt_strided_2_c(int nrows, double alpha, const long *restrict row_ptr, const long *restrict halo_ptr, const int *restrict col_idx, const double *restrict val,
+ const double *restrict shifts, const double *restrict rhsv, int ldr, const double *restrict halo, double *restrict lhsv, int ldl)
+ {
++#ifndef PHIST_HAVE_SSE
++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
++ exit(1);
++#else
+ if( !is_aligned(lhsv,16) || ldl % 2 != 0 )
+ {
+ printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)lhsv);
+@@ -460,15 +486,20 @@ void dspmvm_nt_strided_2_c(int nrows, double alpha, const long *restrict row_ptr
+ // multiply with alpha
+ __m128d alpha_ = _mm_set1_pd(alpha);
+ lhs_ = _mm_mul_pd(alpha_,lhs_);
+-
++
+ // non-temporal store
+ _mm_stream_pd(lhsv+i*ldl, lhs_);
+ }
++#endif
+ }
+
+ void dspmvm_nt_strided_4_c(int nrows, double alpha, const long *restrict row_ptr, const long *restrict halo_ptr, const int *restrict col_idx, const double *restrict val,
+ const double *restrict shifts, const double *restrict rhsv, int ldr, const double *restrict halo, double *restrict lhsv, int ldl)
+ {
++#ifndef PHIST_HAVE_SSE
++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
++ exit(1);
++#else
+ if( !is_aligned(lhsv,16) || ldl % 2 != 0 )
+ {
+ printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)lhsv);
+@@ -526,11 +557,16 @@ void dspmvm_nt_strided_4_c(int nrows, double alpha, const long *restrict row_ptr
+ _mm_stream_pd(lhsv+i*ldl+2*k, lhs_[k]);
+ }
+ }
++#endif
+ }
+
+ void dspmvm_nt_strided_8_c(int nrows, double alpha, const long *restrict row_ptr, const long *restrict halo_ptr, const int *restrict col_idx, const double *restrict val,
+ const double *restrict shifts, const double *restrict rhsv, int ldr, const double *restrict halo, double *restrict lhsv, int ldl)
+ {
++#ifndef PHIST_HAVE_SSE
++ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
++ exit(1);
++#else
+ if( !is_aligned(lhsv,16) || ldl % 2 != 0 )
+ {
+ printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)lhsv);
+@@ -589,6 +625,7 @@ void dspmvm_nt_strided_8_c(int nrows, double alpha, const long *restrict row_ptr
+ _mm_stream_pd(lhsv+i*ldl+2*k, lhs_[k]);
+ }
+ }
++#endif
+ }
+
+
diff --git a/var/spack/repos/builtin/packages/phist/package.py b/var/spack/repos/builtin/packages/phist/package.py
index fa055e47f2..62b8b131d4 100644
--- a/var/spack/repos/builtin/packages/phist/package.py
+++ b/var/spack/repos/builtin/packages/phist/package.py
@@ -150,6 +150,10 @@ class Phist(CMakePackage):
# ###################### Patches ##########################
+ # Avoid trying to compile some SSE code if SSE is not available
+ # This patch will be part of phist 1.11.3 and greater and only affects
+ # the 'builtin' kernel_lib.
+ patch("avoid-sse.patch", when="@:1.11.2 kernel_lib=builtin")
# Only applies to 1.9.4: While SSE instructions are handled correctly,
# build fails on ppc64le unless -DNO_WARN_X86_INTRINSICS is defined.
patch("ppc64_sse.patch", when="@1.9.4")