diff options
author | Cédric Chevalier <cedric.chevalier@cea.fr> | 2024-10-29 13:28:12 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-10-29 13:28:12 +0100 |
commit | 360dbe41f7b7a380651db3735ea6e0a7cf1b1920 (patch) | |
tree | d030706eec9e513f6eed52a26e0d8f711ab478b6 | |
parent | ea1aa0714b83f3ea75045dccf160f46f8ef14356 (diff) | |
download | spack-360dbe41f7b7a380651db3735ea6e0a7cf1b1920.tar.gz spack-360dbe41f7b7a380651db3735ea6e0a7cf1b1920.tar.bz2 spack-360dbe41f7b7a380651db3735ea6e0a7cf1b1920.tar.xz spack-360dbe41f7b7a380651db3735ea6e0a7cf1b1920.zip |
kokkos: async malloc (#46464)
-rw-r--r-- | var/spack/repos/builtin/packages/kokkos/package.py | 13 |
1 files changed, 7 insertions, 6 deletions
diff --git a/var/spack/repos/builtin/packages/kokkos/package.py b/var/spack/repos/builtin/packages/kokkos/package.py index ca9791fab1..01ee5235e9 100644 --- a/var/spack/repos/builtin/packages/kokkos/package.py +++ b/var/spack/repos/builtin/packages/kokkos/package.py @@ -227,6 +227,10 @@ class Kokkos(CMakePackage, CudaPackage, ROCmPackage): conflicts("+cuda", when="cxxstd=17 ^cuda@:10") conflicts("+cuda", when="cxxstd=20 ^cuda@:11") + # Expose a way to disable CudaMallocAsync that can cause problems + # with some MPI such as cray-mpich + variant("alloc_async", default=False, description="Use CudaMallocAsync", when="@4.2: +cuda") + # SYCL and OpenMPTarget require C++17 or higher for cxxstdver in cxxstds[: cxxstds.index("17")]: conflicts( @@ -371,12 +375,9 @@ class Kokkos(CMakePackage, CudaPackage, ROCmPackage): if self.spec.satisfies("%oneapi") or self.spec.satisfies("%intel"): options.append(self.define("CMAKE_CXX_FLAGS", "-fp-model=precise")) - # Kokkos 4.2.00+ changed the default to Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=on - # which breaks GPU-aware with Cray-MPICH - # See https://github.com/kokkos/kokkos/pull/6402 - # TODO: disable this once Cray-MPICH is fixed - if self.spec.satisfies("@4.2.00:") and self.spec.satisfies("^[virtuals=mpi] cray-mpich"): - options.append(self.define("Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC", False)) + options.append( + self.define_from_variant("Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC", "alloc_async") + ) # Remove duplicate options return lang.dedupe(options) |