CUDA: support Grace Hopper 9.0a compute capability (#45540)

* CUDA: support Grace Hopper 9.0a compute capability * Fix other packages * Add type annotations * Support ancient Python versions * isort * spec -> self.spec Co-authored-by: Andrew W Elble <aweits@rit.edu> * [@spackbot] updating style on behalf of adamjstewart --------- Co-authored-by: Andrew W Elble <aweits@rit.edu> Co-authored-by: adamjstewart <adamjstewart@users.noreply.github.com>
author: Adam J. Stewart <ajstewart426@gmail.com> 2024-09-11 17:43:20 +0200
committer: GitHub <noreply@github.com> 2024-09-11 17:43:20 +0200
commit: 5fa8890bd3f239464fc8375f934c692cec599a70 (patch)
tree: 9af4dc3f66655d4d9e97d625108f8a2f71cf921f
parent: 122c3c2dbb4269869b12b3b6175d731cd8301b6e (diff)
download: spack-5fa8890bd3f239464fc8375f934c692cec599a70.tar.gz
spack-5fa8890bd3f239464fc8375f934c692cec599a70.tar.bz2
spack-5fa8890bd3f239464fc8375f934c692cec599a70.tar.xz
spack-5fa8890bd3f239464fc8375f934c692cec599a70.zip
7 files changed, 49 insertions, 27 deletions
diff --git a/lib/spack/spack/build_systems/cuda.py b/lib/spack/spack/build_systems/cuda.py
index 20f7ede139..9320a137a5 100644
--- a/lib/spack/spack/build_systems/cuda.py
+++ b/lib/spack/spack/build_systems/cuda.py
@@ -3,6 +3,9 @@
 #
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)
 
+import re
+from typing import Iterable, List
+
 import spack.variant
 from spack.directives import conflicts, depends_on, variant
 from spack.multimethod import when
@@ -44,6 +47,7 @@ class CudaPackage(PackageBase):
         "87",
         "89",
         "90",
+        "90a",
     )
 
     # FIXME: keep cuda and cuda_arch separate to make usage easier until
@@ -70,6 +74,27 @@ class CudaPackage(PackageBase):
             for s in arch_list
         ]
 
+    @staticmethod
+    def compute_capabilities(arch_list: Iterable[str]) -> List[str]:
+        """Adds a decimal place to each CUDA arch.
+
+        >>> compute_capabilities(['90', '90a'])
+        ['9.0', '9.0a']
+
+        Args:
+            arch_list: A list of integer strings, optionally followed by a suffix.
+
+        Returns:
+            A list of float strings, optionally followed by a suffix
+        """
+        pattern = re.compile(r"(\d+)")
+        capabilities = []
+        for arch in arch_list:
+            _, number, letter = re.split(pattern, arch)
+            number = "{0:.1f}".format(float(number) / 10.0)
+            capabilities.append(number + letter)
+        return capabilities
+
     depends_on("cuda", when="+cuda")
 
     # CUDA version vs Architecture
diff --git a/var/spack/repos/builtin/packages/amr-wind/package.py b/var/spack/repos/builtin/packages/amr-wind/package.py
index b3875753bc..ba5c15da37 100644
--- a/var/spack/repos/builtin/packages/amr-wind/package.py
+++ b/var/spack/repos/builtin/packages/amr-wind/package.py
@@ -156,9 +156,7 @@ class AmrWind(CMakePackage, CudaPackage, ROCmPackage):
             args.append(define("HDF5_IS_PARALLEL", spec.satisfies("+mpi")))
 
         if spec.satisfies("+cuda"):
-            amrex_arch = [
-                "{0:.1f}".format(float(i) / 10.0) for i in spec.variants["cuda_arch"].value
-            ]
+            amrex_arch = CudaPackage.compute_capabilities(spec.variants["cuda_arch"].value)
             if amrex_arch:
                 args.append(define("AMReX_CUDA_ARCH", amrex_arch))
 
diff --git a/var/spack/repos/builtin/packages/babelstream/package.py b/var/spack/repos/builtin/packages/babelstream/package.py
index b1f518de1f..ec85b2d356 100644
--- a/var/spack/repos/builtin/packages/babelstream/package.py
+++ b/var/spack/repos/builtin/packages/babelstream/package.py
@@ -343,7 +343,6 @@ class Babelstream(CMakePackage, CudaPackage, ROCmPackage):
                     args.append("-DCMAKE_CUDA_COMPILER=" + cuda_comp)
                     args.append("-DTARGET=NVIDIA")
                     cuda_arch_list = self.spec.variants["cuda_arch"].value
-                    int_cuda_arch = int(cuda_arch_list[0])
                     cuda_arch = "sm_" + cuda_arch_list[0]
                     args.append("-DCUDA_ARCH=" + cuda_arch)
 
@@ -393,20 +392,20 @@ class Babelstream(CMakePackage, CudaPackage, ROCmPackage):
                 if "cuda" in self.spec.variants["backend"].value:
                     args.append("-DKokkos_ENABLE_CUDA=ON")
                     cuda_arch_list = self.spec.variants["cuda_arch"].value
-                    int_cuda_arch = int(cuda_arch_list[0])
+                    cuda_arch = cuda_arch_list[0]
                     # arhitecture kepler optimisations
-                    if int_cuda_arch in (30, 32, 35, 37):
-                        args.append("-D" + "Kokkos_ARCH_KEPLER" + str(int_cuda_arch) + "=ON")
+                    if cuda_arch in ("30", "32", "35", "37"):
+                        args.append("-D" + "Kokkos_ARCH_KEPLER" + cuda_arch + "=ON")
                     # arhitecture maxwell optimisations
-                    if int_cuda_arch in (50, 52, 53):
-                        args.append("-D" + "Kokkos_ARCH_MAXWELL" + str(int_cuda_arch) + "=ON")
+                    if cuda_arch in ("50", "52", "53"):
+                        args.append("-D" + "Kokkos_ARCH_MAXWELL" + cuda_arch + "=ON")
                     # arhitecture pascal optimisations
-                    if int_cuda_arch in (60, 61):
-                        args.append("-D" + "Kokkos_ARCH_PASCAL" + str(int_cuda_arch) + "=ON")
+                    if cuda_arch in ("60", "61"):
+                        args.append("-D" + "Kokkos_ARCH_PASCAL" + cuda_arch + "=ON")
                     # architecture volta optimisations
-                    if int_cuda_arch in (70, 72):
-                        args.append("-D" + "Kokkos_ARCH_VOLTA" + str(int_cuda_arch) + "=ON")
-                    if int_cuda_arch == 75:
+                    if cuda_arch in ("70", "72"):
+                        args.append("-D" + "Kokkos_ARCH_VOLTA" + cuda_arch + "=ON")
+                    if cuda_arch == "75":
                         args.append("-DKokkos_ARCH_TURING75=ON")
                 if "omp" in self.spec.variants["backend"].value:
                     args.append("-DKokkos_ENABLE_OPENMP=ON")
diff --git a/var/spack/repos/builtin/packages/paraview/package.py b/var/spack/repos/builtin/packages/paraview/package.py
index 2c7bc311de..c0980ae42c 100644
--- a/var/spack/repos/builtin/packages/paraview/package.py
+++ b/var/spack/repos/builtin/packages/paraview/package.py
@@ -5,6 +5,7 @@
 
 import itertools
 import os
+import re
 import sys
 from subprocess import Popen
 
@@ -182,8 +183,11 @@ class Paraview(CMakePackage, CudaPackage, ROCmPackage):
     # Starting from cmake@3.18, CUDA architecture managament can be delegated to CMake.
     # Hence, it is possible to rely on it instead of relying on custom logic updates from VTK-m for
     # newer architectures (wrt mapping).
-    for _arch in [arch for arch in CudaPackage.cuda_arch_values if int(arch) > 86]:
-        conflicts("cmake@:3.17", when=f"cuda_arch={_arch}")
+    pattern = re.compile(r"\d+")
+    for _arch in CudaPackage.cuda_arch_values:
+        _number = re.match(pattern, _arch).group()
+        if int(_number) > 86:
+            conflicts("cmake@:3.17", when=f"cuda_arch={_arch}")
 
     # We only support one single Architecture
     for _arch, _other_arch in itertools.permutations(CudaPackage.cuda_arch_values, 2):
diff --git a/var/spack/repos/builtin/packages/py-jaxlib/package.py b/var/spack/repos/builtin/packages/py-jaxlib/package.py
index ff67dd4604..951aa4d9d3 100644
--- a/var/spack/repos/builtin/packages/py-jaxlib/package.py
+++ b/var/spack/repos/builtin/packages/py-jaxlib/package.py
@@ -149,10 +149,8 @@ build --local_cpu_resources={make_jobs}
             args.append("--enable_cuda")
             args.append("--cuda_path={0}".format(self.spec["cuda"].prefix))
             args.append("--cudnn_path={0}".format(self.spec["cudnn"].prefix))
-            capabilities = ",".join(
-                "{0:.1f}".format(float(i) / 10.0) for i in spec.variants["cuda_arch"].value
-            )
-            args.append("--cuda_compute_capabilities={0}".format(capabilities))
+            capabilities = CudaPackage.compute_capabilities(spec.variants["cuda_arch"].value)
+            args.append("--cuda_compute_capabilities={0}".format(",".join(capabilities)))
         args.append(
             "--bazel_startup_options="
             "--output_user_root={0}".format(self.wrapped_package_object.buildtmp)
diff --git a/var/spack/repos/builtin/packages/py-tensorflow/package.py b/var/spack/repos/builtin/packages/py-tensorflow/package.py
index 5e7b2986b4..9935be8b29 100644
--- a/var/spack/repos/builtin/packages/py-tensorflow/package.py
+++ b/var/spack/repos/builtin/packages/py-tensorflow/package.py
@@ -630,10 +630,8 @@ class PyTensorflow(Package, CudaPackage, ROCmPackage, PythonExtension):
             # Please note that each additional compute capability significantly
             # increases your build time and binary size, and that TensorFlow
             # only supports compute capabilities >= 3.5
-            capabilities = ",".join(
-                "{0:.1f}".format(float(i) / 10.0) for i in spec.variants["cuda_arch"].value
-            )
-            env.set("TF_CUDA_COMPUTE_CAPABILITIES", capabilities)
+            capabilities = CudaPackage.compute_capabilities(spec.variants["cuda_arch"].value)
+            env.set("TF_CUDA_COMPUTE_CAPABILITIES", ",".join(capabilities))
         else:
             env.set("TF_NEED_CUDA", "0")
 
diff --git a/var/spack/repos/builtin/packages/py-torch/package.py b/var/spack/repos/builtin/packages/py-torch/package.py
index 2e1631fa02..ec2f8e3bb3 100644
--- a/var/spack/repos/builtin/packages/py-torch/package.py
+++ b/var/spack/repos/builtin/packages/py-torch/package.py
@@ -481,10 +481,10 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage):
 
     def torch_cuda_arch_list(self, env):
         if "+cuda" in self.spec:
-            torch_cuda_arch = ";".join(
-                "{0:.1f}".format(float(i) / 10.0) for i in self.spec.variants["cuda_arch"].value
+            torch_cuda_arch = CudaPackage.compute_capabilities(
+                self.spec.variants["cuda_arch"].value
             )
-            env.set("TORCH_CUDA_ARCH_LIST", torch_cuda_arch)
+            env.set("TORCH_CUDA_ARCH_LIST", ";".join(torch_cuda_arch))
 
     def setup_build_environment(self, env):
         """Set environment variables used to control the build.
author	Adam J. Stewart <ajstewart426@gmail.com>	2024-09-11 17:43:20 +0200
committer	GitHub <noreply@github.com>	2024-09-11 17:43:20 +0200
commit	5fa8890bd3f239464fc8375f934c692cec599a70 (patch)
tree	9af4dc3f66655d4d9e97d625108f8a2f71cf921f
parent	122c3c2dbb4269869b12b3b6175d731cd8301b6e (diff)
download	spack-5fa8890bd3f239464fc8375f934c692cec599a70.tar.gz spack-5fa8890bd3f239464fc8375f934c692cec599a70.tar.bz2 spack-5fa8890bd3f239464fc8375f934c692cec599a70.tar.xz spack-5fa8890bd3f239464fc8375f934c692cec599a70.zip