diff options
author | Brian Van Essen <vanessen1@llnl.gov> | 2020-10-30 12:51:10 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-30 14:51:10 -0500 |
commit | 7dea225fcefd33266438e1c9d403d6139a45215c (patch) | |
tree | 8760305cebf96a4e0cb94126dd8c11f6a0021a4f | |
parent | a04e7686f80f9e14388e79570de436a62cead284 (diff) | |
download | spack-7dea225fcefd33266438e1c9d403d6139a45215c.tar.gz spack-7dea225fcefd33266438e1c9d403d6139a45215c.tar.bz2 spack-7dea225fcefd33266438e1c9d403d6139a45215c.tar.xz spack-7dea225fcefd33266438e1c9d403d6139a45215c.zip |
Update lbann version and simplify installation (#19579)
* Added hash values for LBANN v0.101 and Hydrogen v1.5.0. Updated the
LBANN package to be more successful in resolving a legal configuration
of MPI and HWLOC packages. This required the removal of the MPI
virtual package since it is unable to resolve dependencies with
minimum version requirements. As a result to enable a reasonable
install line for LBANN this requires explicit forwarding of MPI
variants to Hydrogen and Aluminum. Due to the lack of variant
forwarding, there are many explicitly replicated dependencies for both
LBANN and Hydrogen. Fixed the error in LBANN where gpu variant was
replaced by the cuda variant, but not all dependencies were fixed.
* Fixed the minumum cuDNN version for newer versions of LBANN.
* Added explicit versioning of the MPI libraries for DiHydrogen to avoid
all of the conflicts with minimum required versions of the OpenMPI library.
* Removed explicit MPI versions and went back to using the MPI virtual
dependency. Updated construction of variant forwarding to use
iterative construction of constraints and variants. This exacerbates
the challenges with backtracking in the current concretizer, but
should be fixed in the new concretizer.
* Added support for including the DiHydrogen library in LBANN as well as
support for the distributed convolution (DistConv) parallel
algorithms. Also include support for building with half precision.
* Moving dependencies around
* Added conflict statement to ensure that the variant dihydrogen is
required for distconv.
* Removed the preferred field
* Fixed Flake8 and cuDNN version bounds
4 files changed, 77 insertions, 40 deletions
diff --git a/var/spack/repos/builtin/packages/aluminum/package.py b/var/spack/repos/builtin/packages/aluminum/package.py index ee060b8368..f23b186e73 100644 --- a/var/spack/repos/builtin/packages/aluminum/package.py +++ b/var/spack/repos/builtin/packages/aluminum/package.py @@ -38,7 +38,7 @@ class Aluminum(CMakePackage, CudaPackage): depends_on('cmake@3.16.0:', type='build') depends_on('mpi') depends_on('nccl', when='+nccl') - depends_on('hwloc') + depends_on('hwloc@2.0:') generator = 'Ninja' depends_on('ninja', type='build') diff --git a/var/spack/repos/builtin/packages/dihydrogen/package.py b/var/spack/repos/builtin/packages/dihydrogen/package.py index 9cf751bea4..03e01cbd51 100644 --- a/var/spack/repos/builtin/packages/dihydrogen/package.py +++ b/var/spack/repos/builtin/packages/dihydrogen/package.py @@ -67,7 +67,11 @@ class Dihydrogen(CMakePackage, CudaPackage): depends_on('mpi') depends_on('catch2', type='test') - depends_on('aluminum', when='+al ~cuda') + # Specify the correct version of Aluminum + depends_on('aluminum@0.4:0.4.99', when='@0.1:0.1.99 +al') + depends_on('aluminum@0.5:', when='@:0.0,0.2: +al') + + # Add Aluminum variants depends_on('aluminum +cuda +nccl +ht +cuda_rma', when='+al +cuda') depends_on('cuda', when=('+cuda' or '+legacy')) diff --git a/var/spack/repos/builtin/packages/hydrogen/package.py b/var/spack/repos/builtin/packages/hydrogen/package.py index 2618574803..6fc5323ed5 100644 --- a/var/spack/repos/builtin/packages/hydrogen/package.py +++ b/var/spack/repos/builtin/packages/hydrogen/package.py @@ -18,6 +18,7 @@ class Hydrogen(CMakePackage, CudaPackage): maintainers = ['bvanessen'] version('develop', branch='hydrogen') + version('1.5.0', sha256='03dd487fb23b9fdbc715554a8ea48c3196a1021502e61b0172ef3fdfbee75180') version('1.4.0', sha256='c13374ff4a6c4d1076e47ba8c8d91a7082588b9958d1ed89cffb12f1d2e1452e') version('1.3.4', sha256='7979f6656f698f0bbad6798b39d4b569835b3013ff548d98089fce7c283c6741') version('1.3.3', sha256='a51a1cfd40ac74d10923dfce35c2c04a3082477683f6b35e7b558ea9f4bb6d51') @@ -62,8 +63,9 @@ class Hydrogen(CMakePackage, CudaPackage): description='Use OpenMP taskloops instead of parallel for loops.') variant('half', default=True, description='Builds with support for FP16 precision data types') - depends_on('cmake@3.16.0:', type='build') + depends_on('mpi') + depends_on('hwloc@2.0:') # Note that #1712 forces us to enumerate the different blas variants depends_on('openblas', when='blas=openblas ~openmp_blas ~int64_blas') @@ -85,14 +87,17 @@ class Hydrogen(CMakePackage, CudaPackage): depends_on('essl threads=openmp +ilp64', when='blas=essl +openmp_blas +int64_blas') depends_on('netlib-lapack +external-blas', when='blas=essl') - depends_on('aluminum', when='+al ~cuda') + # Specify the correct version of Aluminum + depends_on('aluminum@:0.3.99', when='@:1.3.99 +al') + depends_on('aluminum@0.4:0.4.99', when='@1.4:1.4.99 +al') + depends_on('aluminum@0.5:', when='@:1.0,1.5.0: +al') + + # Add Aluminum variants depends_on('aluminum +cuda +nccl +ht +cuda_rma', when='+al +cuda') # Note that this forces us to use OpenBLAS until #1712 is fixed depends_on('lapack', when='blas=openblas ~openmp_blas') - depends_on('mpi') - depends_on('scalapack', when='+scalapack') depends_on('gmp', when='+mpfr') depends_on('mpc', when='+mpfr') diff --git a/var/spack/repos/builtin/packages/lbann/package.py b/var/spack/repos/builtin/packages/lbann/package.py index 8eacb2fb71..2e377efbbd 100644 --- a/var/spack/repos/builtin/packages/lbann/package.py +++ b/var/spack/repos/builtin/packages/lbann/package.py @@ -20,6 +20,7 @@ class Lbann(CMakePackage, CudaPackage): maintainers = ['bvanessen'] version('develop', branch='develop') + version('0.101', sha256='69d3fe000a88a448dc4f7e263bcb342c34a177bd9744153654528cd86335a1f7') version('0.100', sha256='d1bab4fb6f1b80ae83a7286cc536a32830890f6e5b0c3107a17c2600d0796912') version('0.99', sha256='3358d44f1bc894321ce07d733afdf6cb7de39c33e3852d73c9f31f530175b7cd') version('0.98.1', sha256='9a2da8f41cd8bf17d1845edf9de6d60f781204ebd37bffba96d8872036c10c66') @@ -33,7 +34,6 @@ class Lbann(CMakePackage, CudaPackage): version('0.92', sha256='9187c5bcbc562c2828fe619d53884ab80afb1bcd627a817edb935b80affe7b84') version('0.91', sha256='b69f470829f434f266119a33695592f74802cff4b76b37022db00ab32de322f5') - variant('nccl', default=False, description='Builds with support for NCCL communication lib') variant('opencv', default=True, description='Builds with support for image processing routines with OpenCV') variant('seq_init', default=False, description='Force serial initialization of weight matrices.') variant('dtype', default='float', @@ -46,6 +46,14 @@ class Lbann(CMakePackage, CudaPackage): variant('conduit', default=True, description='Builds with support for Conduit Library ' '(note that for v0.99 conduit is required)') + variant('half', default=False, + description='Builds with support for FP16 precision data types') + variant('dihydrogen', default=False, + description='Builds with support for DiHydrogen Tensor Library') + variant('distconv', default=False, + description='Builds with support for spatial, filter, or channel ' + 'distributed convolutions') + variant('vtune', default=False, description='Builds with support for Intel VTune') variant('docs', default=False, description='Builds with support for building documentation') variant('extras', default=False, description='Add python modules for LBANN related tools') @@ -54,57 +62,68 @@ class Lbann(CMakePackage, CudaPackage): depends_on('cmake@3.16.0:', type='build') - # It seems that there is a need for one statement per version bounds - depends_on('hydrogen +openmp_blas +shared +int64', when='@:0.90,0.95: ~al') - depends_on('hydrogen +openmp_blas +shared +int64 +al', when='@:0.90,0.95: +al') - - depends_on('hydrogen +openmp_blas +shared +int64 build_type=Debug', - when='build_type=Debug @:0.90,0.95: ~al') - depends_on('hydrogen +openmp_blas +shared +int64 build_type=Debug +al', - when='build_type=Debug @:0.90,0.95: +al') - - depends_on('hydrogen +openmp_blas +shared +int64 +cuda', - when='+gpu @:0.90,0.95: ~al') - depends_on('hydrogen +openmp_blas +shared +int64 +cuda +al', - when='+gpu @:0.90,0.95: +al') + # Specify the correct versions of Hydrogen + depends_on('hydrogen@:1.3.4', when='@0.95:0.100') + depends_on('hydrogen@1.4.0:1.4.99', when='@0.101:0.101.99') + depends_on('hydrogen@1.5.0:', when='@:0.90,0.102:') - depends_on('hydrogen +openmp_blas +shared +int64 +cuda build_type=Debug', - when='build_type=Debug @:0.90,0.95: +gpu') - depends_on('hydrogen +openmp_blas +shared +int64 +cuda build_type=Debug +al', - when='build_type=Debug @:0.90,0.95: +gpu +al') + # Add Hydrogen variants + depends_on('hydrogen +openmp_blas +shared +int64') + depends_on('hydrogen +al', when='+al') + depends_on('hydrogen +cuda', when='+cuda') + depends_on('hydrogen +half', when='+half') + depends_on('hydrogen build_type=Debug', when='build_type=Debug') # Older versions depended on Elemental not Hydrogen depends_on('elemental +openmp_blas +shared +int64', when='@0.91:0.94') depends_on('elemental +openmp_blas +shared +int64 build_type=Debug', when='build_type=Debug @0.91:0.94') - depends_on('aluminum', when='@:0.90,0.95: +al ~gpu') - depends_on('aluminum +cuda +ht', when='@:0.90,0.95: +al +cuda ~nccl') - depends_on('aluminum +cuda +nccl +ht', when='@:0.90,0.95: +al +cuda +nccl') - - depends_on('cudnn', when='+cuda') + # Specify the correct version of Aluminum + depends_on('aluminum@:0.3.99', when='@0.95:0.100 +al') + depends_on('aluminum@0.4:0.4.99', when='@0.101:0.101.99 +al') + depends_on('aluminum@0.5:', when='@:0.90,0.102: +al') + + # Add Aluminum variants + depends_on('aluminum +cuda +nccl +ht +cuda_rma', when='+al +cuda') + + depends_on('dihydrogen +openmp', when='+dihydrogen') + depends_on('dihydrogen +cuda', when='+dihydrogen +cuda') + depends_on('dihydrogen +al', when='+dihydrogen +al') + depends_on('dihydrogen +legacy +cuda', when='+distconv') + depends_on('dihydrogen +half', when='+dihydrogen +half') + depends_on('dihydrogen@0.1', when='@0.101:0.101.99 +dihydrogen') + depends_on('dihydrogen@:0.0,0.2:', when='@:0.90,0.102: +dihydrogen') + conflicts('~dihydrogen', when='+distconv') + + depends_on('cudnn', when='@0.90:0.100.99 +cuda') + depends_on('cudnn@8.0.2:', when='@:0.90,0.101: +cuda') depends_on('cub', when='@0.94:0.98.2 +cuda') depends_on('mpi') - depends_on('hwloc') + depends_on('hwloc@2.0:') + + depends_on('half', when='+half') # LBANN wraps OpenCV calls in OpenMP parallel loops, build without OpenMP # Additionally disable video related options, they incorrectly link in a # bad OpenMP library when building with clang or Intel compilers - # Note that for Power systems we want the environment to add +powerpc +vsx - depends_on('opencv@3.2.0: +core +highgui +imgproc +jpeg +png +tiff +zlib ' - '+fast-math ~calib3d ~cuda ~dnn ~eigen' + depends_on('opencv@4.1.0: build_type=RelWithDebInfo +core +highgui +imgproc +jpeg ' + '+png +tiff +zlib +fast-math ~calib3d ~cuda ~dnn ~eigen' '~features2d ~flann ~gtk ~ipp ~ipp_iw ~jasper ~java ~lapack ~ml' '~openmp ~opencl ~opencl_svm ~openclamdblas ~openclamdfft' - '~pthreads_pf ~python ~qt ~stitching ~superres ~ts ~video' - '~videostab ~videoio ~vtk', when='+opencv') + '~pthreads_pf ~python ~qt +shared ~stitching ~superres ~ts' + '~video ~videostab ~videoio ~vtk', when='+opencv') + + # Note that for Power systems we want the environment to add +powerpc +vsx + depends_on('opencv@4.1.0: +powerpc +vsx', when='+opencv arch=ppc64le:') depends_on('cnpy') - depends_on('nccl', when='@0.94:0.98.2 +cuda +nccl') + depends_on('nccl', when='@0.94:0.98.2 +cuda') - depends_on('conduit@0.4.0: +hdf5', when='@0.94:0.99 +conduit') - depends_on('conduit@0.4.0: +hdf5', when='@:0.90,0.99:') + depends_on('conduit@0.4.0: +hdf5~hdf5_compat', when='@0.94:0.99 +conduit') + depends_on('conduit@0.4.0: +hdf5~hdf5_compat', when='@:0.90,0.99:') - depends_on('python@3: +shared', type=('build', 'run'), when='@:0.90,0.99:') + depends_on('python@3:3.7.9 +shared', type=('build', 'run'), when='@:0.90,0.99:') extends("python") depends_on('py-setuptools', type='build') depends_on('py-argparse', type='run', when='@:0.90,0.99: ^python@:2.6') @@ -116,7 +135,8 @@ class Lbann(CMakePackage, CudaPackage): depends_on('py-pandas@0.24.1:', type='run', when='@:0.90,0.99: +extras') depends_on('py-texttable@1.4.0:', type='run', when='@:0.90,0.99: +extras') depends_on('py-pytest', type='test', when='@:0.90,0.99:') - depends_on('py-protobuf+cpp@3.6.1:', type=('build', 'run'), when='@:0.90,0.99:') + depends_on('py-protobuf+cpp@3.10.0', type=('build', 'run'), when='@:0.90,0.99:') + depends_on('protobuf+shared@3.10.0', when='@:0.90,0.99:') depends_on('py-breathe', type='build', when='+docs') depends_on('doxygen', type='build', when='+docs') @@ -220,6 +240,14 @@ class Lbann(CMakePackage, CudaPackage): '-DNCCL_DIR={0}'.format( spec['nccl'].prefix)]) + if spec.satisfies('@:0.90') or spec.satisfies('@0.100:'): + args.extend([ + '-DLBANN_WITH_DIHYDROGEN:BOOL=%s' % ('+dihydrogen' in spec)]) + + if spec.satisfies('@:0.90') or spec.satisfies('@0.101:'): + args.extend([ + '-DLBANN_WITH_DISTCONV:BOOL=%s' % ('+distconv' in spec)]) + return args @when('@0.91:0.93') |