summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCédric Chevalier <cedric.chevalier@cea.fr>2024-10-29 13:28:12 +0100
committerGitHub <noreply@github.com>2024-10-29 13:28:12 +0100
commit360dbe41f7b7a380651db3735ea6e0a7cf1b1920 (patch)
treed030706eec9e513f6eed52a26e0d8f711ab478b6
parentea1aa0714b83f3ea75045dccf160f46f8ef14356 (diff)
downloadspack-360dbe41f7b7a380651db3735ea6e0a7cf1b1920.tar.gz
spack-360dbe41f7b7a380651db3735ea6e0a7cf1b1920.tar.bz2
spack-360dbe41f7b7a380651db3735ea6e0a7cf1b1920.tar.xz
spack-360dbe41f7b7a380651db3735ea6e0a7cf1b1920.zip
kokkos: async malloc (#46464)
-rw-r--r--var/spack/repos/builtin/packages/kokkos/package.py13
1 files changed, 7 insertions, 6 deletions
diff --git a/var/spack/repos/builtin/packages/kokkos/package.py b/var/spack/repos/builtin/packages/kokkos/package.py
index ca9791fab1..01ee5235e9 100644
--- a/var/spack/repos/builtin/packages/kokkos/package.py
+++ b/var/spack/repos/builtin/packages/kokkos/package.py
@@ -227,6 +227,10 @@ class Kokkos(CMakePackage, CudaPackage, ROCmPackage):
conflicts("+cuda", when="cxxstd=17 ^cuda@:10")
conflicts("+cuda", when="cxxstd=20 ^cuda@:11")
+ # Expose a way to disable CudaMallocAsync that can cause problems
+ # with some MPI such as cray-mpich
+ variant("alloc_async", default=False, description="Use CudaMallocAsync", when="@4.2: +cuda")
+
# SYCL and OpenMPTarget require C++17 or higher
for cxxstdver in cxxstds[: cxxstds.index("17")]:
conflicts(
@@ -371,12 +375,9 @@ class Kokkos(CMakePackage, CudaPackage, ROCmPackage):
if self.spec.satisfies("%oneapi") or self.spec.satisfies("%intel"):
options.append(self.define("CMAKE_CXX_FLAGS", "-fp-model=precise"))
- # Kokkos 4.2.00+ changed the default to Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=on
- # which breaks GPU-aware with Cray-MPICH
- # See https://github.com/kokkos/kokkos/pull/6402
- # TODO: disable this once Cray-MPICH is fixed
- if self.spec.satisfies("@4.2.00:") and self.spec.satisfies("^[virtuals=mpi] cray-mpich"):
- options.append(self.define("Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC", False))
+ options.append(
+ self.define_from_variant("Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC", "alloc_async")
+ )
# Remove duplicate options
return lang.dedupe(options)