summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrian Van Essen <vanessen1@llnl.gov>2022-10-27 12:19:56 -0700
committerGitHub <noreply@github.com>2022-10-27 21:19:56 +0200
commit6408b51def038b9479b0e0cba2d079b103af56a6 (patch)
tree72dfdad71d9d0b455bab163836af470cf083a0c5
parent4be67facdc41fdac6d71de2367ba39659cb1ccba (diff)
downloadspack-6408b51def038b9479b0e0cba2d079b103af56a6.tar.gz
spack-6408b51def038b9479b0e0cba2d079b103af56a6.tar.bz2
spack-6408b51def038b9479b0e0cba2d079b103af56a6.tar.xz
spack-6408b51def038b9479b0e0cba2d079b103af56a6.zip
Support ROCm backing in DiHydrogen (#33563)
* Added support for building the DiHydrogen package and LBANN extensions to DiHydrogen with ROCm libraries. Fixed a bug on Cray systems where CMake didn't try hard enough to find an MPI-compatible compiler wrapper. Make it look more. Added support for the roctracer package when using ROCm libraries. * Fixed how ROCm support is defined for pre-v0.3 versions.
-rw-r--r--var/spack/repos/builtin/packages/dihydrogen/package.py26
-rw-r--r--var/spack/repos/builtin/packages/lbann/package.py12
2 files changed, 30 insertions, 8 deletions
diff --git a/var/spack/repos/builtin/packages/dihydrogen/package.py b/var/spack/repos/builtin/packages/dihydrogen/package.py
index 40ffd50d2d..082ea55559 100644
--- a/var/spack/repos/builtin/packages/dihydrogen/package.py
+++ b/var/spack/repos/builtin/packages/dihydrogen/package.py
@@ -83,9 +83,9 @@ class Dihydrogen(CMakePackage, CudaPackage, ROCmPackage):
for val in ROCmPackage.amdgpu_targets:
depends_on("aluminum amdgpu_target=%s" % val, when="amdgpu_target=%s" % val)
- for when in ["+cuda", "+distconv"]:
- depends_on("cuda", when=when)
- depends_on("cudnn", when=when)
+ depends_on("roctracer-dev", when="+rocm +distconv")
+
+ depends_on("cudnn", when="+cuda +distconv")
depends_on("cub", when="^cuda@:10")
# Note that #1712 forces us to enumerate the different blas variants
@@ -108,8 +108,8 @@ class Dihydrogen(CMakePackage, CudaPackage, ROCmPackage):
depends_on("cray-libsci", when="blas=libsci")
depends_on("cray-libsci +openmp", when="blas=libsci +openmp_blas")
- # Distconv builds require cuda
- conflicts("~cuda", when="+distconv")
+ # Distconv builds require cuda or rocm
+ conflicts("+distconv", when="~cuda ~rocm")
conflicts("+distconv", when="+half")
conflicts("+rocm", when="+half")
@@ -120,6 +120,8 @@ class Dihydrogen(CMakePackage, CudaPackage, ROCmPackage):
depends_on("ninja", type="build")
depends_on("cmake@3.17.0:", type="build")
+ depends_on("spdlog", when="@:0.1,0.2:")
+
depends_on("llvm-openmp", when="%apple-clang +openmp")
# TODO: Debug linker errors when NVSHMEM is built with UCX
@@ -155,10 +157,14 @@ class Dihydrogen(CMakePackage, CudaPackage, ROCmPackage):
"-DH2_ENABLE_DISTCONV_LEGACY=%s" % ("+distconv" in spec),
"-DH2_ENABLE_OPENMP=%s" % ("+openmp" in spec),
"-DH2_ENABLE_FP16=%s" % ("+half" in spec),
- "-DH2_ENABLE_HIP_ROCM=%s" % ("+rocm" in spec),
"-DH2_DEVELOPER_BUILD=%s" % ("+developer" in spec),
]
+ if spec.version < Version("0.3"):
+ args.append("-DH2_ENABLE_HIP_ROCM=%s" % ("+rocm" in spec))
+ else:
+ args.append("-DH2_ENABLE_ROCM=%s" % ("+rocm" in spec))
+
if not spec.satisfies("^cmake@3.23.0"):
# There is a bug with using Ninja generator in this version
# of CMake
@@ -181,7 +187,7 @@ class Dihydrogen(CMakePackage, CudaPackage, ROCmPackage):
if spec.satisfies("%cce") and spec.satisfies("^cuda+allow-unsupported-compilers"):
args.append("-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler")
- if "+cuda" in spec or "+distconv" in spec:
+ if "+cuda" in spec:
args.append("-DcuDNN_DIR={0}".format(spec["cudnn"].prefix))
if spec.satisfies("^cuda@:10"):
@@ -209,6 +215,12 @@ class Dihydrogen(CMakePackage, CudaPackage, ROCmPackage):
"-DHIP_CXX_COMPILER={0}".format(self.spec["hip"].hipcc),
]
)
+ if "platform=cray" in spec:
+ args.extend(
+ [
+ "-DMPI_ASSUME_NO_BUILTIN_MPI=ON",
+ ]
+ )
archs = self.spec.variants["amdgpu_target"].value
if archs != "none":
arch_str = ",".join(archs)
diff --git a/var/spack/repos/builtin/packages/lbann/package.py b/var/spack/repos/builtin/packages/lbann/package.py
index 69039783ff..bed40b3f5e 100644
--- a/var/spack/repos/builtin/packages/lbann/package.py
+++ b/var/spack/repos/builtin/packages/lbann/package.py
@@ -167,7 +167,8 @@ class Lbann(CMakePackage, CudaPackage, ROCmPackage):
depends_on("dihydrogen +cuda", when="+dihydrogen +cuda")
depends_on("dihydrogen ~al", when="+dihydrogen ~al")
depends_on("dihydrogen +al", when="+dihydrogen +al")
- depends_on("dihydrogen +distconv +cuda", when="+distconv")
+ depends_on("dihydrogen +distconv +cuda", when="+distconv +cuda")
+ depends_on("dihydrogen +distconv +rocm", when="+distconv +rocm")
depends_on("dihydrogen ~half", when="+dihydrogen ~half")
depends_on("dihydrogen +half", when="+dihydrogen +half")
depends_on("dihydrogen ~nvshmem", when="+dihydrogen ~nvshmem")
@@ -191,6 +192,8 @@ class Lbann(CMakePackage, CudaPackage, ROCmPackage):
depends_on("aluminum amdgpu_target=%s" % val, when="+al amdgpu_target=%s" % val)
depends_on("dihydrogen amdgpu_target=%s" % val, when="+dihydrogen amdgpu_target=%s" % val)
+ depends_on("roctracer-dev", when="+rocm +distconv")
+
depends_on("cudnn", when="@0.90:0.100 +cuda")
depends_on("cudnn@8.0.2:", when="@:0.90,0.101: +cuda")
depends_on("cub", when="@0.94:0.98.2 +cuda ^cuda@:10")
@@ -334,6 +337,7 @@ class Lbann(CMakePackage, CudaPackage, ROCmPackage):
"-DLBANN_WITH_ONNX:BOOL=%s" % ("+onnx" in spec),
"-DLBANN_WITH_EMBEDDED_PYTHON:BOOL=%s" % ("+python" in spec),
"-DLBANN_WITH_PYTHON_FRONTEND:BOOL=%s" % ("+pfe" in spec),
+ "-DLBANN_WITH_ROCTRACER:BOOL=%s" % ("+rocm +distconv" in spec),
"-DLBANN_WITH_TBINF=OFF",
"-DLBANN_WITH_UNIT_TESTING:BOOL=%s" % ("+unit_tests" in spec),
"-DLBANN_WITH_VISION:BOOL=%s" % ("+vision" in spec),
@@ -424,6 +428,12 @@ class Lbann(CMakePackage, CudaPackage, ROCmPackage):
"-DHIP_CXX_COMPILER={0}".format(self.spec["hip"].hipcc),
]
)
+ if "platform=cray" in spec:
+ args.extend(
+ [
+ "-DMPI_ASSUME_NO_BUILTIN_MPI=ON",
+ ]
+ )
archs = self.spec.variants["amdgpu_target"].value
if archs != "none":
arch_str = ",".join(archs)