1 files changed, 133 insertions, 14 deletions
diff --git a/var/spack/repos/builtin/packages/py-torch-nvidia-apex/package.py b/var/spack/repos/builtin/packages/py-torch-nvidia-apex/package.py
index d1591b578c..a0ca87cd8b 100644
--- a/var/spack/repos/builtin/packages/py-torch-nvidia-apex/package.py
+++ b/var/spack/repos/builtin/packages/py-torch-nvidia-apex/package.py
@@ -13,11 +13,12 @@ class PyTorchNvidiaApex(PythonPackage, CudaPackage):
 
     homepage = "https://github.com/nvidia/apex/"
     git = "https://github.com/nvidia/apex/"
+    url = "https://github.com/NVIDIA/apex/archive/refs/tags/24.04.01.tar.gz"
 
     license("BSD-3-Clause")
 
     version("master", branch="master")
-    version("24.04.01", tag="24.04.01")
+    version("24.04.01", sha256="065bc5c0146ee579d5db2b38ca3949da4dc799b871961a2c9eb19e18892166ce")
     version("23.08", tag="23.08")
     version("23.07", tag="23.07")
     version("23.06", tag="23.06")
@@ -25,40 +26,158 @@ class PyTorchNvidiaApex(PythonPackage, CudaPackage):
     version("22.03", tag="22.03")
     version("2020-10-19", commit="8a1ed9e8d35dfad26fb973996319965e4224dcdd")
 
-    depends_on("cxx", type="build")  # generated
-
-    depends_on("python@3:", type=("build", "run"))
-    depends_on("py-setuptools", type="build")
-    depends_on("py-packaging", type="build")
-    depends_on("py-torch@0.4:", type=("build", "run"))
-    depends_on("cuda@9:", when="+cuda")
-    depends_on("py-pybind11", type=("build", "link", "run"))
+    depends_on("cxx", type="build")
 
     variant("cuda", default=True, description="Build with CUDA")
 
+    # Based on the table of the readme on github
+    variant(
+        "permutation_search_cuda", default=False, description="Build permutation search module"
+    )
+    variant("bnp", default=False, description="Build batch norm module")
+    variant("xentropy", default=False, description="Build cross entropy module")
+    variant("focal_loss_cuda", default=False, description="Build focal loss module")
+    variant("fused_index_mul_2d", default=False, description="Build fused_index_mul_2d module")
+    variant("fast_layer_norm", default=False, description="Build fast layer norm module")
+    variant("fmhalib", default=False, description="Build fmha module")
+    variant(
+        "fast_multihead_attn", default=False, description="Build fast multihead attention module"
+    )
+    variant("transducer", default=False, description="Build transducer module")
+    variant("cudnn_gbn_lib", default=False, description="Build cudnn gbn module")
+    variant("peer_memory_cuda", default=False, description="Build peer memory module")
+    variant("nccl_p2p_cuda", default=False, description="Build with nccl p2p")
+    variant("fast_bottleneck", default=False, description="Build fast_bottleneck module")
+    variant("fused_conv_bias_relu", default=False, description="Build fused_conv_bias_relu moduel")
+
+    requires(
+        "+peer_memory_cuda+nccl_p2p_cuda",
+        when="+fast_bottleneck",
+        msg="+fast_bottleneck requires both +peer_memory_cuda and +nccl_p2p_cuda to be enabled.",
+    )
+    requires("^cudnn@8.5:", when="+cudnn_gbn_lib")
+    requires("^cudnn@8.4:", when="+fused_conv_bias_relu")
+    requires("^nccl@2.10:", when="+nccl_p2p_cuda")
+
+    with default_args(type=("build")):
+        depends_on("py-setuptools")
+        depends_on("py-packaging")
+        depends_on("py-pip")
+    with default_args(type=("build", "run")):
+        depends_on("python@3:")
+        depends_on("py-torch@0.4:")
+        for _arch in CudaPackage.cuda_arch_values:
+            depends_on(f"py-torch+cuda cuda_arch={_arch}", when=f"+cuda cuda_arch={_arch}")
+
+    depends_on("py-pybind11", type=("build", "link", "run"))
+    depends_on("cuda@9:", when="+cuda")
+
     # https://github.com/NVIDIA/apex/issues/1498
     # https://github.com/NVIDIA/apex/pull/1499
     patch("1499.patch", when="@2020-10-19")
 
+    conflicts(
+        "cuda_arch=none",
+        when="+cuda",
+        msg="Must specify CUDA compute capabilities of your GPU, see "
+        "https://developer.nvidia.com/cuda-gpus",
+    )
+
+    def torch_cuda_arch_list(self, env):
+        if self.spec.satisfies("+cuda"):
+            torch_cuda_arch = ";".join(
+                "{0:.1f}".format(float(i) / 10.0) for i in self.spec.variants["cuda_arch"].value
+            )
+            env.set("TORCH_CUDA_ARCH_LIST", torch_cuda_arch)
+
     def setup_build_environment(self, env):
-        if "+cuda" in self.spec:
+        if self.spec.satisfies("+cuda"):
             env.set("CUDA_HOME", self.spec["cuda"].prefix)
+            self.torch_cuda_arch_list(env)
         else:
             env.unset("CUDA_HOME")
 
-    @when("^python@:3.10")
+    def setup_run_environment(self, env):
+        self.torch_cuda_arch_list(env)
+
+    @when("^py-pip@:23.0")
     def global_options(self, spec, prefix):
         args = []
         if spec.satisfies("^py-torch@1.0:"):
             args.append("--cpp_ext")
-            if "+cuda" in spec:
+            if spec.satisfies("+cuda"):
                 args.append("--cuda_ext")
+
+        if spec.satisfies("+permutation_search_cuda"):
+            args.append("--permutation_search")
+        if spec.satisfies("+bnp"):
+            args.append("--bnp")
+        if spec.satisfies("+xentropy"):
+            args.append("--xentropy")
+        if spec.satisfies("+focal_loss_cuda"):
+            args.append("--focal_loss")
+        if spec.satisfies("+fused_index_mul_2d"):
+            args.append("--index_mul_2d")
+        if spec.satisfies("+fast_layer_norm"):
+            args.append("--fast_layer_norm")
+        if spec.satisfies("+fmhalib"):
+            args.append("--fmha")
+        if spec.satisfies("+fast_multihead_attn"):
+            args.append("--fast_multihead_attn")
+        if spec.satisfies("+transducer"):
+            args.append("--transducer")
+        if spec.satisfies("+cudnn_gbn_lib"):
+            args.append("--cudnn_gbn")
+        if spec.satisfies("+peer_memory_cuda"):
+            args.append("--peer_memory")
+        if spec.satisfies("+nccl_p2p_cuda"):
+            args.append("--nccl_p2p")
+        if spec.satisfies("+fast_bottleneck"):
+            args.append("--fast_bottleneck")
+        if spec.satisfies("+fused_conv_bias_relu"):
+            args.append("--fused_conv_bias_relu")
+
         return args
 
-    @when("^python@3.11:")
+    @when("^py-pip@23.1:")
     def config_settings(self, spec, prefix):
+        global_options = ""
+        if spec.satisfies("^py-torch@1.0:"):
+            global_options += "--cpp_ext"
+            if spec.satisfies("+cuda"):
+                global_options += " --cuda_ext"
+
+        if spec.satisfies("+permutation_search_cuda"):
+            global_options += " --permutation_search"
+        if spec.satisfies("+bnp"):
+            global_options += " --bnp"
+        if spec.satisfies("+xentropy"):
+            global_options += " --xentropy"
+        if spec.satisfies("+focal_loss_cuda"):
+            global_options += " --focal_loss"
+        if spec.satisfies("+fused_index_mul_2d"):
+            global_options += " --index_mul_2d"
+        if spec.satisfies("+fast_layer_norm"):
+            global_options += " --fast_layer_norm"
+        if spec.satisfies("+fmhalib"):
+            global_options += " --fmha"
+        if spec.satisfies("+fast_multihead_attn"):
+            global_options += " --fast_multihead_attn"
+        if spec.satisfies("+transducer"):
+            global_options += " --transducer"
+        if spec.satisfies("+cudnn_gbn_lib"):
+            global_options += " --cudnn_gbn"
+        if spec.satisfies("+peer_memory_cuda"):
+            global_options += " --peer_memory"
+        if spec.satisfies("+nccl_p2p_cuda"):
+            global_options += " --nccl_p2p"
+        if spec.satisfies("+fast_bottleneck"):
+            global_options += " --fast_bottleneck"
+        if spec.satisfies("+fused_conv_bias_relu"):
+            global_options += " --fused_conv_bias_relu"
+
         return {
             "builddir": "build",
             "compile-args": f"-j{make_jobs}",
-            "--global-option": "--cpp_ext --cuda_ext",
+            "--global-option": global_options,
         }