summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--var/spack/repos/builtin/packages/py-torch-nvidia-apex/package.py147
1 files changed, 133 insertions, 14 deletions
diff --git a/var/spack/repos/builtin/packages/py-torch-nvidia-apex/package.py b/var/spack/repos/builtin/packages/py-torch-nvidia-apex/package.py
index d1591b578c..a0ca87cd8b 100644
--- a/var/spack/repos/builtin/packages/py-torch-nvidia-apex/package.py
+++ b/var/spack/repos/builtin/packages/py-torch-nvidia-apex/package.py
@@ -13,11 +13,12 @@ class PyTorchNvidiaApex(PythonPackage, CudaPackage):
homepage = "https://github.com/nvidia/apex/"
git = "https://github.com/nvidia/apex/"
+ url = "https://github.com/NVIDIA/apex/archive/refs/tags/24.04.01.tar.gz"
license("BSD-3-Clause")
version("master", branch="master")
- version("24.04.01", tag="24.04.01")
+ version("24.04.01", sha256="065bc5c0146ee579d5db2b38ca3949da4dc799b871961a2c9eb19e18892166ce")
version("23.08", tag="23.08")
version("23.07", tag="23.07")
version("23.06", tag="23.06")
@@ -25,40 +26,158 @@ class PyTorchNvidiaApex(PythonPackage, CudaPackage):
version("22.03", tag="22.03")
version("2020-10-19", commit="8a1ed9e8d35dfad26fb973996319965e4224dcdd")
- depends_on("cxx", type="build") # generated
-
- depends_on("python@3:", type=("build", "run"))
- depends_on("py-setuptools", type="build")
- depends_on("py-packaging", type="build")
- depends_on("py-torch@0.4:", type=("build", "run"))
- depends_on("cuda@9:", when="+cuda")
- depends_on("py-pybind11", type=("build", "link", "run"))
+ depends_on("cxx", type="build")
variant("cuda", default=True, description="Build with CUDA")
+ # Based on the table of the readme on github
+ variant(
+ "permutation_search_cuda", default=False, description="Build permutation search module"
+ )
+ variant("bnp", default=False, description="Build batch norm module")
+ variant("xentropy", default=False, description="Build cross entropy module")
+ variant("focal_loss_cuda", default=False, description="Build focal loss module")
+ variant("fused_index_mul_2d", default=False, description="Build fused_index_mul_2d module")
+ variant("fast_layer_norm", default=False, description="Build fast layer norm module")
+ variant("fmhalib", default=False, description="Build fmha module")
+ variant(
+ "fast_multihead_attn", default=False, description="Build fast multihead attention module"
+ )
+ variant("transducer", default=False, description="Build transducer module")
+ variant("cudnn_gbn_lib", default=False, description="Build cudnn gbn module")
+ variant("peer_memory_cuda", default=False, description="Build peer memory module")
+ variant("nccl_p2p_cuda", default=False, description="Build with nccl p2p")
+ variant("fast_bottleneck", default=False, description="Build fast_bottleneck module")
+ variant("fused_conv_bias_relu", default=False, description="Build fused_conv_bias_relu moduel")
+
+ requires(
+ "+peer_memory_cuda+nccl_p2p_cuda",
+ when="+fast_bottleneck",
+ msg="+fast_bottleneck requires both +peer_memory_cuda and +nccl_p2p_cuda to be enabled.",
+ )
+ requires("^cudnn@8.5:", when="+cudnn_gbn_lib")
+ requires("^cudnn@8.4:", when="+fused_conv_bias_relu")
+ requires("^nccl@2.10:", when="+nccl_p2p_cuda")
+
+ with default_args(type=("build")):
+ depends_on("py-setuptools")
+ depends_on("py-packaging")
+ depends_on("py-pip")
+ with default_args(type=("build", "run")):
+ depends_on("python@3:")
+ depends_on("py-torch@0.4:")
+ for _arch in CudaPackage.cuda_arch_values:
+ depends_on(f"py-torch+cuda cuda_arch={_arch}", when=f"+cuda cuda_arch={_arch}")
+
+ depends_on("py-pybind11", type=("build", "link", "run"))
+ depends_on("cuda@9:", when="+cuda")
+
# https://github.com/NVIDIA/apex/issues/1498
# https://github.com/NVIDIA/apex/pull/1499
patch("1499.patch", when="@2020-10-19")
+ conflicts(
+ "cuda_arch=none",
+ when="+cuda",
+ msg="Must specify CUDA compute capabilities of your GPU, see "
+ "https://developer.nvidia.com/cuda-gpus",
+ )
+
+ def torch_cuda_arch_list(self, env):
+ if self.spec.satisfies("+cuda"):
+ torch_cuda_arch = ";".join(
+ "{0:.1f}".format(float(i) / 10.0) for i in self.spec.variants["cuda_arch"].value
+ )
+ env.set("TORCH_CUDA_ARCH_LIST", torch_cuda_arch)
+
def setup_build_environment(self, env):
- if "+cuda" in self.spec:
+ if self.spec.satisfies("+cuda"):
env.set("CUDA_HOME", self.spec["cuda"].prefix)
+ self.torch_cuda_arch_list(env)
else:
env.unset("CUDA_HOME")
- @when("^python@:3.10")
+ def setup_run_environment(self, env):
+ self.torch_cuda_arch_list(env)
+
+ @when("^py-pip@:23.0")
def global_options(self, spec, prefix):
args = []
if spec.satisfies("^py-torch@1.0:"):
args.append("--cpp_ext")
- if "+cuda" in spec:
+ if spec.satisfies("+cuda"):
args.append("--cuda_ext")
+
+ if spec.satisfies("+permutation_search_cuda"):
+ args.append("--permutation_search")
+ if spec.satisfies("+bnp"):
+ args.append("--bnp")
+ if spec.satisfies("+xentropy"):
+ args.append("--xentropy")
+ if spec.satisfies("+focal_loss_cuda"):
+ args.append("--focal_loss")
+ if spec.satisfies("+fused_index_mul_2d"):
+ args.append("--index_mul_2d")
+ if spec.satisfies("+fast_layer_norm"):
+ args.append("--fast_layer_norm")
+ if spec.satisfies("+fmhalib"):
+ args.append("--fmha")
+ if spec.satisfies("+fast_multihead_attn"):
+ args.append("--fast_multihead_attn")
+ if spec.satisfies("+transducer"):
+ args.append("--transducer")
+ if spec.satisfies("+cudnn_gbn_lib"):
+ args.append("--cudnn_gbn")
+ if spec.satisfies("+peer_memory_cuda"):
+ args.append("--peer_memory")
+ if spec.satisfies("+nccl_p2p_cuda"):
+ args.append("--nccl_p2p")
+ if spec.satisfies("+fast_bottleneck"):
+ args.append("--fast_bottleneck")
+ if spec.satisfies("+fused_conv_bias_relu"):
+ args.append("--fused_conv_bias_relu")
+
return args
- @when("^python@3.11:")
+ @when("^py-pip@23.1:")
def config_settings(self, spec, prefix):
+ global_options = ""
+ if spec.satisfies("^py-torch@1.0:"):
+ global_options += "--cpp_ext"
+ if spec.satisfies("+cuda"):
+ global_options += " --cuda_ext"
+
+ if spec.satisfies("+permutation_search_cuda"):
+ global_options += " --permutation_search"
+ if spec.satisfies("+bnp"):
+ global_options += " --bnp"
+ if spec.satisfies("+xentropy"):
+ global_options += " --xentropy"
+ if spec.satisfies("+focal_loss_cuda"):
+ global_options += " --focal_loss"
+ if spec.satisfies("+fused_index_mul_2d"):
+ global_options += " --index_mul_2d"
+ if spec.satisfies("+fast_layer_norm"):
+ global_options += " --fast_layer_norm"
+ if spec.satisfies("+fmhalib"):
+ global_options += " --fmha"
+ if spec.satisfies("+fast_multihead_attn"):
+ global_options += " --fast_multihead_attn"
+ if spec.satisfies("+transducer"):
+ global_options += " --transducer"
+ if spec.satisfies("+cudnn_gbn_lib"):
+ global_options += " --cudnn_gbn"
+ if spec.satisfies("+peer_memory_cuda"):
+ global_options += " --peer_memory"
+ if spec.satisfies("+nccl_p2p_cuda"):
+ global_options += " --nccl_p2p"
+ if spec.satisfies("+fast_bottleneck"):
+ global_options += " --fast_bottleneck"
+ if spec.satisfies("+fused_conv_bias_relu"):
+ global_options += " --fused_conv_bias_relu"
+
return {
"builddir": "build",
"compile-args": f"-j{make_jobs}",
- "--global-option": "--cpp_ext --cuda_ext",
+ "--global-option": global_options,
}