summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrian Van Essen <vanessen1@llnl.gov>2020-07-31 13:53:51 -0700
committerGitHub <noreply@github.com>2020-07-31 13:53:51 -0700
commit46e7fbe120076439f4f0013b95e6fae8291effc5 (patch)
treeecb0cea1afee4734d9a48f78f75b6a63bff0c84a
parentd89bc1d998a1c9e8107ca71cf1fdd54715235b67 (diff)
downloadspack-46e7fbe120076439f4f0013b95e6fae8291effc5.tar.gz
spack-46e7fbe120076439f4f0013b95e6fae8291effc5.tar.bz2
spack-46e7fbe120076439f4f0013b95e6fae8291effc5.tar.xz
spack-46e7fbe120076439f4f0013b95e6fae8291effc5.zip
LBANN: add versions, update CUDA support and dependencies (#17813)
* Update LBANN, Hydrogen, Aluminum to inherit CudaPackage * Update CMake constraints: LBANN, Hydrogen, and Aluminum now require cmake@3.16.0: (better support for pthreads with nvcc) * Aluminum: add variants for host-enabled MPI and RMA features in a MPI-GPU RDMA-enabled library * NCCL: add versions 2.7.5-1, 2.7.6-1, and 2.7.8-1 * Hydrogen: add version 1.4.0 * LBANN: add versions 0.99 and 0.100 * Aluminum: add versions 0.4.0 and 0.5.0
-rw-r--r--var/spack/repos/builtin/packages/aluminum/package.py27
-rw-r--r--var/spack/repos/builtin/packages/dihydrogen/package.py11
-rw-r--r--var/spack/repos/builtin/packages/hydrogen/package.py7
-rw-r--r--var/spack/repos/builtin/packages/lbann/package.py38
-rw-r--r--var/spack/repos/builtin/packages/nccl/package.py3
5 files changed, 58 insertions, 28 deletions
diff --git a/var/spack/repos/builtin/packages/aluminum/package.py b/var/spack/repos/builtin/packages/aluminum/package.py
index c5d6a45ed8..fbcf0f5be8 100644
--- a/var/spack/repos/builtin/packages/aluminum/package.py
+++ b/var/spack/repos/builtin/packages/aluminum/package.py
@@ -7,7 +7,7 @@ import os
from spack import *
-class Aluminum(CMakePackage):
+class Aluminum(CMakePackage, CudaPackage):
"""Aluminum provides a generic interface to high-performance
communication libraries, with a focus on allreduce
algorithms. Blocking and non-blocking algorithms and GPU-aware
@@ -20,6 +20,8 @@ class Aluminum(CMakePackage):
git = "https://github.com/LLNL/Aluminum.git"
version('master', branch='master')
+ version('0.5.0', sha256='dc365a5849eaba925355a8efb27005c5f22bcd1dca94aaed8d0d29c265c064c1')
+ version('0.4.0', sha256='4d6fab5481cc7c994b32fb23a37e9ee44041a9f91acf78f981a97cb8ef57bb7d')
version('0.3.3', sha256='26e7f263f53c6c6ee0fe216e981a558dfdd7ec997d0dd2a24285a609a6c68f3b')
version('0.3.2', sha256='09b6d1bcc02ac54ba269b1123eee7be20f0104b93596956c014b794ba96b037f')
version('0.2.1-1', sha256='066b750e9d1134871709a3e2414b96b166e0e24773efc7d512df2f1d96ee8eef')
@@ -27,12 +29,13 @@ class Aluminum(CMakePackage):
version('0.2', sha256='fc8f06c6d8faab17a2aedd408d3fe924043bf857da1094d5553f35c4d2af893b')
version('0.1', sha256='3880b736866e439dd94e6a61eeeb5bb2abccebbac82b82d52033bc6c94950bdb')
- variant('gpu', default=False, description='Builds with support for GPUs via CUDA and cuDNN')
variant('nccl', default=False, description='Builds with support for NCCL communication lib')
- variant('mpi_cuda', default=False, description='Builds with support for MPI-CUDA enabled library')
+ variant('ht', default=False, description='Builds with support for host-enabled MPI'
+ ' communication of accelerator data')
+ variant('mpi_gpu_rdma', default=False, description='Builds with support for using RMA'
+ ' features in a MPI-GPU RDMA enabled library')
- depends_on('cmake@3.9.0:', type='build')
- depends_on('cuda', when='+gpu')
+ depends_on('cmake@3.16.0:', type='build')
depends_on('mpi')
depends_on('nccl', when='+nccl')
depends_on('hwloc')
@@ -43,10 +46,20 @@ class Aluminum(CMakePackage):
def cmake_args(self):
spec = self.spec
args = [
- '-DALUMINUM_ENABLE_CUDA:BOOL=%s' % ('+gpu' in spec),
- '-DALUMINUM_ENABLE_MPI_CUDA:BOOL=%s' % ('+mpi_cuda' in spec),
+ '-DALUMINUM_ENABLE_CUDA:BOOL=%s' % ('+cuda' in spec),
'-DALUMINUM_ENABLE_NCCL:BOOL=%s' % ('+nccl' in spec)]
+ if '@0.5:':
+ args.extend([
+ '-DALUMINUM_ENABLE_HOST_TRANSFER:BOOL=%s' % ('+ht' in spec),
+ '-DALUMINUM_ENABLE_MPI_CUDA:BOOL=%s' %
+ ('+mpi_gpu_rdma' in spec),
+ '-DALUMINUM_ENABLE_MPI_CUDA_RMA:BOOL=%s' %
+ ('+mpi_gpu_rdma' in spec)])
+ else:
+ args.extend([
+ '-DALUMINUM_ENABLE_MPI_CUDA:BOOL=%s' % ('+ht' in spec)])
+
# Add support for OS X to find OpenMP (LLVM installed via brew)
if self.spec.satisfies('%clang platform=darwin'):
clang = self.compiler.cc
diff --git a/var/spack/repos/builtin/packages/dihydrogen/package.py b/var/spack/repos/builtin/packages/dihydrogen/package.py
index 6de0cfb516..7f444d8aff 100644
--- a/var/spack/repos/builtin/packages/dihydrogen/package.py
+++ b/var/spack/repos/builtin/packages/dihydrogen/package.py
@@ -14,6 +14,7 @@ class Dihydrogen(CMakePackage, CudaPackage):
needs of the distributed machine learning effort, LBANN."""
homepage = "https://github.com/LLNL/DiHydrogen.git"
+ url = "https://github.com/LLNL/DiHydrogen.git"
git = "https://github.com/LLNL/DiHydrogen.git"
maintainers = ['bvanessen']
@@ -36,22 +37,27 @@ class Dihydrogen(CMakePackage, CudaPackage):
description='Enable ROCm/HIP language features.')
variant('shared', default=True,
description='Enables the build of shared libraries')
+ variant('docs', default=False,
+ description='Builds with support for building documentation')
# Override the default set of CUDA architectures with the relevant
# subset from lib/spack/spack/build_systems/cuda.py
cuda_arch_values = [
'60', '61', '62',
'70', '72', '75',
+ '80'
]
variant('cuda_arch',
description='CUDA architecture',
values=spack.variant.auto_or_any_combination_of(*cuda_arch_values))
+ depends_on('cmake@3.16.0:', type='build')
+
depends_on('mpi')
depends_on('catch2', type='test')
depends_on('aluminum', when='+al ~cuda')
- depends_on('aluminum +gpu +nccl +mpi_cuda', when='+al +cuda')
+ depends_on('aluminum +cuda +nccl +ht +mpi_gpu_rdma', when='+al +cuda')
depends_on('cuda', when=('+cuda' or '+legacy'))
depends_on('cudnn', when=('+cuda' or '+legacy'))
@@ -66,6 +72,9 @@ class Dihydrogen(CMakePackage, CudaPackage):
depends_on('ninja', type='build')
depends_on('cmake@3.14.0:', type='build')
+ depends_on('py-breathe', type='build', when='+docs')
+ depends_on('doxygen', type='build', when='+docs')
+
illegal_cuda_arch_values = [
'10', '11', '12', '13',
'20', '21',
diff --git a/var/spack/repos/builtin/packages/hydrogen/package.py b/var/spack/repos/builtin/packages/hydrogen/package.py
index 9d12235d72..41d5541648 100644
--- a/var/spack/repos/builtin/packages/hydrogen/package.py
+++ b/var/spack/repos/builtin/packages/hydrogen/package.py
@@ -7,7 +7,7 @@ import os
from spack import *
-class Hydrogen(CMakePackage):
+class Hydrogen(CMakePackage, CudaPackage):
"""Hydrogen: Distributed-memory dense and sparse-direct linear algebra
and optimization library. Based on the Elemental library."""
@@ -18,6 +18,7 @@ class Hydrogen(CMakePackage):
maintainers = ['bvanessen']
version('develop', branch='hydrogen')
+ version('1.4.0', sha256='c13374ff4a6c4d1076e47ba8c8d91a7082588b9958d1ed89cffb12f1d2e1452e')
version('1.3.4', sha256='7979f6656f698f0bbad6798b39d4b569835b3013ff548d98089fce7c283c6741')
version('1.3.3', sha256='a51a1cfd40ac74d10923dfce35c2c04a3082477683f6b35e7b558ea9f4bb6d51')
version('1.3.2', sha256='50bc5e87955f8130003d04dfd9dcad63107e92b82704f8107baf95b0ccf98ed6')
@@ -62,6 +63,8 @@ class Hydrogen(CMakePackage):
variant('half', default=True,
description='Builds with support for FP16 precision data types')
+ depends_on('cmake@3.16.0:', type='build')
+
# Note that #1712 forces us to enumerate the different blas variants
depends_on('openblas', when='blas=openblas ~openmp_blas ~int64_blas')
depends_on('openblas +ilp64', when='blas=openblas ~openmp_blas +int64_blas')
@@ -83,7 +86,7 @@ class Hydrogen(CMakePackage):
depends_on('netlib-lapack +external-blas', when='blas=essl')
depends_on('aluminum', when='+al ~cuda')
- depends_on('aluminum +gpu +nccl', when='+al +cuda')
+ depends_on('aluminum +cuda +nccl +ht +mpi_gpu_rdma', when='+al +cuda')
# Note that this forces us to use OpenBLAS until #1712 is fixed
depends_on('lapack', when='blas=openblas ~openmp_blas')
diff --git a/var/spack/repos/builtin/packages/lbann/package.py b/var/spack/repos/builtin/packages/lbann/package.py
index bc0da109ec..8eacb2fb71 100644
--- a/var/spack/repos/builtin/packages/lbann/package.py
+++ b/var/spack/repos/builtin/packages/lbann/package.py
@@ -8,7 +8,7 @@ import sys
from spack import *
-class Lbann(CMakePackage):
+class Lbann(CMakePackage, CudaPackage):
"""LBANN: Livermore Big Artificial Neural Network Toolkit. A distributed
memory, HPC-optimized, model and data parallel training toolkit for deep
neural networks."""
@@ -20,7 +20,8 @@ class Lbann(CMakePackage):
maintainers = ['bvanessen']
version('develop', branch='develop')
- version('0.99', branch='develop')
+ version('0.100', sha256='d1bab4fb6f1b80ae83a7286cc536a32830890f6e5b0c3107a17c2600d0796912')
+ version('0.99', sha256='3358d44f1bc894321ce07d733afdf6cb7de39c33e3852d73c9f31f530175b7cd')
version('0.98.1', sha256='9a2da8f41cd8bf17d1845edf9de6d60f781204ebd37bffba96d8872036c10c66')
version('0.98', sha256='8d64b9ac0f1d60db553efa4e657f5ea87e790afe65336117267e9c7ae6f68239')
version('0.97.1', sha256='2f2756126ac8bb993202cf532d72c4d4044e877f4d52de9fdf70d0babd500ce4')
@@ -32,7 +33,6 @@ class Lbann(CMakePackage):
version('0.92', sha256='9187c5bcbc562c2828fe619d53884ab80afb1bcd627a817edb935b80affe7b84')
version('0.91', sha256='b69f470829f434f266119a33695592f74802cff4b76b37022db00ab32de322f5')
- variant('gpu', default=False, description='Builds with support for GPUs via CUDA and cuDNN')
variant('nccl', default=False, description='Builds with support for NCCL communication lib')
variant('opencv', default=True, description='Builds with support for image processing routines with OpenCV')
variant('seq_init', default=False, description='Force serial initialization of weight matrices.')
@@ -52,6 +52,8 @@ class Lbann(CMakePackage):
conflicts('@:0.90,0.99:', when='~conduit')
+ depends_on('cmake@3.16.0:', type='build')
+
# It seems that there is a need for one statement per version bounds
depends_on('hydrogen +openmp_blas +shared +int64', when='@:0.90,0.95: ~al')
depends_on('hydrogen +openmp_blas +shared +int64 +al', when='@:0.90,0.95: +al')
@@ -77,12 +79,11 @@ class Lbann(CMakePackage):
when='build_type=Debug @0.91:0.94')
depends_on('aluminum', when='@:0.90,0.95: +al ~gpu')
- depends_on('aluminum +gpu +mpi_cuda', when='@:0.90,0.95: +al +gpu ~nccl')
- depends_on('aluminum +gpu +nccl +mpi_cuda', when='@:0.90,0.95: +al +gpu +nccl')
+ depends_on('aluminum +cuda +ht', when='@:0.90,0.95: +al +cuda ~nccl')
+ depends_on('aluminum +cuda +nccl +ht', when='@:0.90,0.95: +al +cuda +nccl')
- depends_on('cuda', when='+gpu')
- depends_on('cudnn', when='+gpu')
- depends_on('cub', when='@0.94:0.98.2 +gpu')
+ depends_on('cudnn', when='+cuda')
+ depends_on('cub', when='@0.94:0.98.2 +cuda')
depends_on('mpi')
depends_on('hwloc')
@@ -98,7 +99,7 @@ class Lbann(CMakePackage):
'~videostab ~videoio ~vtk', when='+opencv')
depends_on('cnpy')
- depends_on('nccl', when='@0.94:0.98.2 +gpu +nccl')
+ depends_on('nccl', when='@0.94:0.98.2 +cuda +nccl')
depends_on('conduit@0.4.0: +hdf5', when='@0.94:0.99 +conduit')
depends_on('conduit@0.4.0: +hdf5', when='@:0.90,0.99:')
@@ -118,6 +119,7 @@ class Lbann(CMakePackage):
depends_on('py-protobuf+cpp@3.6.1:', type=('build', 'run'), when='@:0.90,0.99:')
depends_on('py-breathe', type='build', when='+docs')
+ depends_on('doxygen', type='build', when='+docs')
depends_on('py-m2r', type='build', when='+docs')
depends_on('cereal')
@@ -135,7 +137,6 @@ class Lbann(CMakePackage):
cppflags.append('-DLBANN_SET_EL_RNG -ldl')
return [
- '-DCMAKE_INSTALL_MESSAGE=LAZY',
'-DCMAKE_CXX_FLAGS=%s' % ' '.join(cppflags),
'-DLBANN_VERSION=spack',
'-DCNPY_DIR={0}'.format(spec['cnpy'].prefix),
@@ -148,12 +149,12 @@ class Lbann(CMakePackage):
spec = self.spec
args = self.common_config_args
args.extend([
- '-DLBANN_WITH_TOPO_AWARE:BOOL=%s' % ('+gpu +nccl' in spec),
+ '-DLBANN_WITH_TOPO_AWARE:BOOL=%s' % ('+cuda +nccl' in spec),
'-DLBANN_WITH_ALUMINUM:BOOL=%s' % ('+al' in spec),
'-DLBANN_WITH_CONDUIT:BOOL=%s' % ('+conduit' in spec),
- '-DLBANN_WITH_CUDA:BOOL=%s' % ('+gpu' in spec),
- '-DLBANN_WITH_CUDNN:BOOL=%s' % ('+gpu' in spec),
- '-DLBANN_WITH_SOFTMAX_CUDA:BOOL=%s' % ('+gpu' in spec),
+ '-DLBANN_WITH_CUDA:BOOL=%s' % ('+cuda' in spec),
+ '-DLBANN_WITH_CUDNN:BOOL=%s' % ('+cuda' in spec),
+ '-DLBANN_WITH_SOFTMAX_CUDA:BOOL=%s' % ('+cuda' in spec),
'-DLBANN_SEQUENTIAL_INITIALIZATION:BOOL=%s' %
('+seq_init' in spec),
'-DLBANN_WITH_TBINF=OFF',
@@ -174,7 +175,8 @@ class Lbann(CMakePackage):
spec['elemental'].prefix)])
if spec.satisfies('@0.94:0.98.2'):
- args.extend(['-DLBANN_WITH_NCCL:BOOL=%s' % ('+gpu +nccl' in spec)])
+ args.extend(['-DLBANN_WITH_NCCL:BOOL=%s' %
+ ('+cuda +nccl' in spec)])
if '+vtune' in spec:
args.extend(['-DVTUNE_DIR={0}'.format(spec['vtune'].prefix)])
@@ -203,7 +205,7 @@ class Lbann(CMakePackage):
args.extend(['-DOpenCV_DIR:STRING={0}'.format(
spec['opencv'].prefix)])
- if '+gpu' in spec:
+ if '+cuda' in spec:
args.extend([
'-DCUDA_TOOLKIT_ROOT_DIR={0}'.format(
spec['cuda'].prefix)])
@@ -225,8 +227,8 @@ class Lbann(CMakePackage):
spec = self.spec
args = self.common_config_args
args.extend([
- '-DWITH_CUDA:BOOL=%s' % ('+gpu' in spec),
- '-DWITH_CUDNN:BOOL=%s' % ('+gpu' in spec),
+ '-DWITH_CUDA:BOOL=%s' % ('+cuda' in spec),
+ '-DWITH_CUDNN:BOOL=%s' % ('+cuda' in spec),
'-DELEMENTAL_USE_CUBLAS:BOOL=%s' % (
'+cublas' in spec['elemental']),
'-DWITH_TBINF=OFF',
diff --git a/var/spack/repos/builtin/packages/nccl/package.py b/var/spack/repos/builtin/packages/nccl/package.py
index d1f4de962d..f805f55b1c 100644
--- a/var/spack/repos/builtin/packages/nccl/package.py
+++ b/var/spack/repos/builtin/packages/nccl/package.py
@@ -14,6 +14,9 @@ class Nccl(MakefilePackage, CudaPackage):
maintainers = ['adamjstewart']
+ version('2.7.8-1', sha256='fa2bec307270f30fcf6280a85f24ea8801e0ce3b3027937c7325260a890b07e0')
+ version('2.7.6-1', sha256='60dd9b1743c2db6c05f60959edf98a4477f218115ef910d7ec2662f2fb5cf626')
+ version('2.7.5-1', sha256='26a8dec6fa0a776eb71205d618d58e26d372621719788a23b33db6fdce4426bf')
version('2.7.3-1', sha256='dc7b8794373306e323363314c3327796e416f745e8003490fc1407a22dd7acd6')
version('2.6.4-1', sha256='ed8c9dfd40e013003923ae006787b1a30d3cb363b47d2e4307eaa2624ebba2ba')
version('2.5.7-1', sha256='781a6bb2278566be4abbdf22b2fa19afc7306cff4b312c82bd782979b368014e')