From 58e531508928a931925cc8166fe49fed63bc674f Mon Sep 17 00:00:00 2001 From: "Adam J. Stewart" Date: Tue, 31 Oct 2023 16:25:24 -0500 Subject: PyTorch: build with external gloo (#40759) * PyTorch: build with external gloo * Fix gloo compilation with GCC 11 * undeprecate * py-torch+cuda+gloo requires gloo+cuda --- var/spack/repos/builtin/packages/gloo/package.py | 5 ++++- .../repos/builtin/packages/py-horovod/package.py | 2 +- .../repos/builtin/packages/py-torch/package.py | 26 +++++++++++++--------- 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/var/spack/repos/builtin/packages/gloo/package.py b/var/spack/repos/builtin/packages/gloo/package.py index 4ca7d55f43..ec4503900e 100644 --- a/var/spack/repos/builtin/packages/gloo/package.py +++ b/var/spack/repos/builtin/packages/gloo/package.py @@ -13,7 +13,10 @@ class Gloo(CMakePackage, CudaPackage): git = "https://github.com/facebookincubator/gloo.git" version("master", branch="master") - version("2021-05-21", commit="c22a5cfba94edf8ea4f53a174d38aa0c629d070f") # py-torch@1.10: + version("2023-05-19", commit="597accfd79f5b0f9d57b228dec088ca996686475") # py-torch@2.1: + version("2023-01-17", commit="10909297fedab0a680799211a299203e53515032") # py-torch@2.0 + version("2022-05-18", commit="5b143513263133af2b95547e97c07cebeb72bf72") # py-torch@1.13 + version("2021-05-21", commit="c22a5cfba94edf8ea4f53a174d38aa0c629d070f") # py-torch@1.10:1.12 version("2021-05-04", commit="6f7095f6e9860ce4fd682a7894042e6eba0996f1") # py-torch@1.9 version("2020-09-18", commit="3dc0328fe6a9d47bd47c0c6ca145a0d8a21845c6") # py-torch@1.7:1.8 version("2020-03-17", commit="113bde13035594cafdca247be953610b53026553") # py-torch@1.5:1.6 diff --git a/var/spack/repos/builtin/packages/py-horovod/package.py b/var/spack/repos/builtin/packages/py-horovod/package.py index 0e0bc5fd7f..5e221c0296 100644 --- a/var/spack/repos/builtin/packages/py-horovod/package.py +++ b/var/spack/repos/builtin/packages/py-horovod/package.py @@ -225,7 +225,7 @@ class PyHorovod(PythonPackage, CudaPackage): conflicts( "controllers=gloo", when="@:0.20.0 platform=darwin", msg="Gloo cannot be compiled on MacOS" ) - # FIXME + # https://github.com/horovod/horovod/issues/3996 conflicts("^py-torch@2.1:") # https://github.com/horovod/horovod/pull/1835 diff --git a/var/spack/repos/builtin/packages/py-torch/package.py b/var/spack/repos/builtin/packages/py-torch/package.py index 96cae5404b..21a68b069f 100644 --- a/var/spack/repos/builtin/packages/py-torch/package.py +++ b/var/spack/repos/builtin/packages/py-torch/package.py @@ -246,14 +246,14 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): # depends_on("xnnpack@2021-02-22", when="@1.8:1.9+xnnpack") # depends_on("xnnpack@2020-03-23", when="@1.6:1.7+xnnpack") depends_on("mpi", when="+mpi") - # https://github.com/pytorch/pytorch/issues/60270 - # depends_on("gloo@2023-05-19", when="@2.1:+gloo") - # depends_on("gloo@2023-01-17", when="@2.0+gloo") - # depends_on("gloo@2022-05-18", when="@1.13:1+gloo") - # depends_on("gloo@2021-05-21", when="@1.10:1.12+gloo") - # depends_on("gloo@2021-05-04", when="@1.9+gloo") - # depends_on("gloo@2020-09-18", when="@1.7:1.8+gloo") - # depends_on("gloo@2020-03-17", when="@1.6+gloo") + depends_on("gloo@2023-05-19", when="@2.1:+gloo") + depends_on("gloo@2023-01-17", when="@2.0+gloo") + depends_on("gloo@2022-05-18", when="@1.13:1+gloo") + depends_on("gloo@2021-05-21", when="@1.10:1.12+gloo") + depends_on("gloo@2021-05-04", when="@1.9+gloo") + depends_on("gloo@2020-09-18", when="@1.7:1.8+gloo") + depends_on("gloo@2020-03-17", when="@1.6+gloo") + depends_on("gloo+cuda", when="@1.6:+gloo+cuda") # https://github.com/pytorch/pytorch/issues/60331 # depends_on("onnx@1.14.1", when="@2.1:+onnx_ml") # depends_on("onnx@1.13.1", when="@2.0+onnx_ml") @@ -270,6 +270,13 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): depends_on("py-six", type="test") depends_on("py-psutil", type="test") + # https://github.com/pytorch/pytorch/issues/90448 + patch( + "https://github.com/pytorch/pytorch/pull/97270.patch?full_index=1", + sha256="beb3fb57746cf8443f5caa6e08b2f8f4d4822c1e11e0c912134bd166c6a0ade7", + when="@1.10:2.0", + ) + # Fix BLAS being overridden by MKL # https://github.com/pytorch/pytorch/issues/60328 patch( @@ -628,8 +635,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): # env.set("USE_SYSTEM_LIBS", "ON") # https://github.com/pytorch/pytorch/issues/60329 # env.set("USE_SYSTEM_CPUINFO", "ON") - # https://github.com/pytorch/pytorch/issues/60270 - # env.set("USE_SYSTEM_GLOO", "ON") + env.set("USE_SYSTEM_GLOO", "ON") env.set("USE_SYSTEM_FP16", "ON") env.set("USE_SYSTEM_PTHREADPOOL", "ON") env.set("USE_SYSTEM_PSIMD", "ON") -- cgit v1.2.3-60-g2f50