From 659d84518597983f884428af100df4a400a202a1 Mon Sep 17 00:00:00 2001 From: Brian Van Essen Date: Thu, 18 Oct 2018 11:22:17 -0700 Subject: lbann: add aluminum dependency (#9523) * Added support for the Aluminum library to LBANN and Hydrogen. Also fixed several bugs in the grouping of dependencies of both packages. * Updated the conduit package to have the proper dependency on the python variant. * Added new versions for NCCL * Fixed a bug in how Hydrogen set the path for OpenBLAS. * Added support for conduit in LBANN. --- .../repos/builtin/packages/aluminum/package.py | 43 ++++++++++++++++++ .../repos/builtin/packages/conduit/package.py | 2 +- .../repos/builtin/packages/hydrogen/package.py | 22 ++++++++- var/spack/repos/builtin/packages/lbann/package.py | 53 ++++++++++++++++------ var/spack/repos/builtin/packages/nccl/package.py | 3 ++ 5 files changed, 107 insertions(+), 16 deletions(-) create mode 100644 var/spack/repos/builtin/packages/aluminum/package.py diff --git a/var/spack/repos/builtin/packages/aluminum/package.py b/var/spack/repos/builtin/packages/aluminum/package.py new file mode 100644 index 0000000000..9700988217 --- /dev/null +++ b/var/spack/repos/builtin/packages/aluminum/package.py @@ -0,0 +1,43 @@ +# Copyright 2013-2018 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +from spack import * + + +class Aluminum(CMakePackage): + """Aluminum provides a generic interface to high-performance + communication libraries, with a focus on allreduce + algorithms. Blocking and non-blocking algorithms and GPU-aware + algorithms are supported. Aluminum also contains custom + implementations of select algorithms to optimize for certain + situations.""" + + homepage = "https://github.com/LLNL/Aluminum" + url = "https://github.com/LLNL/Aluminum/archive/v0.1.tar.gz" + git = "https://github.com/LLNL/Aluminum.git" + + version('master', branch='master') + version('0.1', sha256='3880b736866e439dd94e6a61eeeb5bb2abccebbac82b82d52033bc6c94950bdb') + + variant('gpu', default=False, description='Builds with support for GPUs via CUDA and cuDNN') + variant('nccl', default=False, description='Builds with support for NCCL communication lib') + variant('mpi_cuda', default=False, description='Builds with support for MPI-CUDA enabled library') + + depends_on('cmake@3.9.0:', type='build') + depends_on('cuda', when='+gpu') + depends_on('cudnn', when='+gpu') + depends_on('cub', when='+gpu') + depends_on('mpi', when='~mpi_cuda') + depends_on('mpi +cuda', when='+mpi_cuda') + depends_on('nccl', when='+nccl') + depends_on('hwloc') + + def cmake_args(self): + spec = self.spec + args = [ + '-DALUMINUM_ENABLE_CUDA:BOOL=%s' % ('+gpu' in spec), + '-DALUMINUM_ENABLE_MPI_CUDA:BOOL=%s' % ('+mpi_cuda' in spec), + '-DALUMINUM_ENABLE_NCCL:BOOL=%s' % ('+nccl' in spec)] + return args diff --git a/var/spack/repos/builtin/packages/conduit/package.py b/var/spack/repos/builtin/packages/conduit/package.py index 0e0c623ba4..d2820b92ec 100644 --- a/var/spack/repos/builtin/packages/conduit/package.py +++ b/var/spack/repos/builtin/packages/conduit/package.py @@ -77,7 +77,7 @@ class Conduit(Package): ####################### # we need a shared version of python b/c linking with static python lib # causes duplicate state issues when running compiled python modules. - depends_on("python+shared") + depends_on("python+shared", when="+python") extends("python", when="+python") depends_on("py-numpy", when="+python", type=('build', 'run')) diff --git a/var/spack/repos/builtin/packages/hydrogen/package.py b/var/spack/repos/builtin/packages/hydrogen/package.py index ce2621f34c..9a65ea4fe3 100644 --- a/var/spack/repos/builtin/packages/hydrogen/package.py +++ b/var/spack/repos/builtin/packages/hydrogen/package.py @@ -17,6 +17,7 @@ class Hydrogen(CMakePackage): git = "https://github.com/LLNL/Elemental.git" version('develop', branch='hydrogen') + version('1.0', sha256='d8a97de3133f2c6b6bb4b80d32b4a4cc25eb25e0df4f0cec0f8cb19bf34ece98') version('0.99', 'b678433ab1d498da47acf3dc5e056c23') variant('shared', default=True, @@ -45,6 +46,10 @@ class Hydrogen(CMakePackage): description='Builds with support for GPUs via CUDA and cuDNN') variant('test', default=False, description='Builds test suite') + variant('al', default=False, + description='Builds with Aluminum communication library') + variant('omp_taskloops', default=False, + description='Use OpenMP taskloops instead of parallel for loops.') # Note that #1712 forces us to enumerate the different blas variants depends_on('openblas', when='blas=openblas ~openmp_blas ~int64_blas') @@ -66,6 +71,10 @@ class Hydrogen(CMakePackage): depends_on('essl threads=openmp +ilp64', when='blas=essl +openmp_blas +int64_blas') depends_on('netlib-lapack +external-blas', when='blas=essl') + depends_on('aluminum@master', when='+al ~cuda') + depends_on('aluminum@master +gpu +mpi-cuda', when='+al +cuda ~nccl') + depends_on('aluminum@master +gpu +nccl +mpi_cuda', when='+al +cuda +nccl') + # Note that this forces us to use OpenBLAS until #1712 is fixed depends_on('lapack', when='blas=openblas ~openmp_blas') @@ -123,7 +132,7 @@ class Hydrogen(CMakePackage): args.extend([ '-DHydrogen_USE_OpenBLAS:BOOL=%s' % ('blas=openblas' in spec), '-DOpenBLAS_DIR:STRING={0}'.format( - spec['hydrogen'].prefix)]) + spec['openblas'].prefix)]) elif 'blas=mkl' in spec: args.extend([ '-DHydrogen_USE_MKL:BOOL=%s' % ('blas=mkl' in spec)]) @@ -133,4 +142,15 @@ class Hydrogen(CMakePackage): args.extend([ '-DHydrogen_USE_ESSL:BOOL=%s' % ('blas=essl' in spec)]) + if '+omp_taskloops' in spec: + args.extend([ + '-DHydrogen_ENABLE_OMP_TASKLOOP:BOOL=%s' % + ('+omp_taskloops' in spec)]) + + if '+al' in spec: + args.extend([ + '-DHydrogen_ENABLE_ALUMINUM:BOOL=%s' % ('+al' in spec), + '-DHYDROGEN_Aluminum_DIR={0}'.format( + spec['aluminum'].prefix)]) + return args diff --git a/var/spack/repos/builtin/packages/lbann/package.py b/var/spack/repos/builtin/packages/lbann/package.py index 112b89e815..148b097051 100644 --- a/var/spack/repos/builtin/packages/lbann/package.py +++ b/var/spack/repos/builtin/packages/lbann/package.py @@ -18,6 +18,8 @@ class Lbann(CMakePackage): git = "https://github.com/LLNL/lbann.git" version('develop', branch='develop') + version('0.95', sha256='d310b986948b5ee2bedec36383a7fe79403721c8dc2663a280676b4e431f83c2') + version('0.94', sha256='567e99b488ebe6294933c98a212281bffd5220fc13a0a5cd8441f9a3761ceccf') version('0.93', '1913a25a53d4025fa04c16f14afdaa55') version('0.92', 'c0eb1595a7c74640e96f280beb497564') version('0.91', '83b0ec9cd0b7625d41dfb06d2abd4134') @@ -32,32 +34,44 @@ class Lbann(CMakePackage): variant('build_type', default='Release', description='The build type to build', values=('Debug', 'Release')) + variant('al', default=True, description='Builds with support for Aluminum Library') + variant('conduit', default=False, description='Builds with support for Conduit Library') # It seems that there is a need for one statement per version bounds - depends_on('hydrogen +openmp_blas +shared +int64', when='@0.95:') - depends_on('hydrogen +openmp_blas +shared +int64', when='@:0.90') - depends_on('hydrogen +openmp_blas +shared +int64 build_type=Debug', - when=('build_type=Debug' '@0.95:')) + depends_on('hydrogen +openmp_blas +shared +int64', when='@:0.90,0.95: ~al') + depends_on('hydrogen +openmp_blas +shared +int64 +al', when='@:0.90,0.95: +al') + depends_on('hydrogen +openmp_blas +shared +int64 build_type=Debug', - when=('build_type=Debug' '@:0.90')) - depends_on('hydrogen +openmp_blas +shared +int64 +cuda', - when=('+gpu' '@0.95:')) + when='build_type=Debug @:0.90,0.95: ~al') + depends_on('hydrogen +openmp_blas +shared +int64 build_type=Debug +al', + when='build_type=Debug @:0.90,0.95: +al') + depends_on('hydrogen +openmp_blas +shared +int64 +cuda', - when=('+gpu' '@:0.90')) - depends_on('hydrogen +openmp_blas +shared +int64 +cuda build_type=Debug', - when=('build_type=Debug' '@0.95:' '+gpu')) + when='+gpu @:0.90,0.95: ~al') + depends_on('hydrogen +openmp_blas +shared +int64 +cuda +al', + when='+gpu @:0.90,0.95: +al') + depends_on('hydrogen +openmp_blas +shared +int64 +cuda build_type=Debug', - when=('build_type=Debug' '@:0.90' '+gpu')) - depends_on('elemental +openmp_blas +shared +int64', when=('@0.91:0.94')) + when='build_type=Debug @:0.90,0.95: +gpu') + depends_on('hydrogen +openmp_blas +shared +int64 +cuda build_type=Debug +al', + when='build_type=Debug @:0.90,0.95: +gpu +al') + + # Older versions depended on Elemental not Hydrogen + depends_on('elemental +openmp_blas +shared +int64', when='@0.91:0.94') depends_on('elemental +openmp_blas +shared +int64 build_type=Debug', - when=('build_type=Debug' '@0.91:0.94')) + when='build_type=Debug @0.91:0.94') + + depends_on('aluminum@master', when='@:0.90,0.95: +al ~gpu') + depends_on('aluminum@master +gpu +mpi-cuda', when='@:0.90,0.95: +al +gpu ~nccl') + depends_on('aluminum@master +gpu +nccl +mpi_cuda', when='@:0.90,0.95: +al +gpu +nccl') depends_on('cuda', when='+gpu') depends_on('cudnn', when='+gpu') depends_on('cub', when='+gpu') depends_on('mpi', when='~gpu') depends_on('mpi +cuda', when='+gpu') - depends_on('hwloc ~pci ~libxml2') + depends_on('hwloc') + # LBANN wraps OpenCV calls in OpenMP parallel loops, build without OpenMP # Additionally disable video related options, they incorrectly link in a # bad OpenMP library when building with clang or Intel compilers @@ -72,6 +86,8 @@ class Lbann(CMakePackage): depends_on('cnpy') depends_on('nccl', when='+gpu +nccl') + depends_on('conduit@master +hdf5', when='+conduit') + @property def common_config_args(self): spec = self.spec @@ -110,6 +126,15 @@ class Lbann(CMakePackage): '-DElemental_DIR={0}/CMake/elemental'.format( spec['elemental'].prefix)]) + if '+al' in spec: + args.extend(['-DLBANN_WITH_ALUMINUM:BOOL=%s' % ('+al' in spec), + '-DAluminum_DIR={0}'.format(spec['aluminum'].prefix)]) + + if '+conduit' in spec: + args.extend(['-DLBANN_CONDUIT_DIR:BOOL=%s' % ('+conduit' in spec), + '-DLBANN_CONDUIT_DIR={0}'.format( + spec['conduit'].prefix)]) + # Add support for OpenMP if (self.spec.satisfies('%clang')): if (sys.platform == 'darwin'): diff --git a/var/spack/repos/builtin/packages/nccl/package.py b/var/spack/repos/builtin/packages/nccl/package.py index f180e7c3af..d13d22596f 100644 --- a/var/spack/repos/builtin/packages/nccl/package.py +++ b/var/spack/repos/builtin/packages/nccl/package.py @@ -12,6 +12,9 @@ class Nccl(MakefilePackage): homepage = "https://github.com/NVIDIA/nccl" url = "https://github.com/NVIDIA/nccl/archive/v1.3.4-1.tar.gz" + version('2.3.5-5', + sha256='bac9950b4d3980c25baa8e3e4541d2dfb4d21edf32ad3b89022d04920357142f') + version('2.2', '5b9ce7fbdce0fde68e0f66318e6ff422') version('1.3.4-1', '5b9ce7fbdce0fde68e0f66318e6ff422') version('1.3.0-1', 'f6fb1d56913a7d212ca0c300e76f01fb') -- cgit v1.2.3-70-g09d2