From 95fec7adfcfc7c027dc69ff30eb8a20c7c917696 Mon Sep 17 00:00:00 2001 From: Tiziano Müller Date: Tue, 13 Aug 2019 19:59:08 +0200 Subject: CP2K/SIRIUS: add support for building with CUDA, fix CP2K build with MKL (#11418) * cp2k/sirius: add CUDA support * cp2k: fix building with MKL * sirius: make sure to link against Spacks blas/lapack/scalapack --- var/spack/repos/builtin/packages/cp2k/package.py | 86 +++++++++++++++++++--- .../cmake-fix-shared-library-installation.patch | 53 +++++++++++++ var/spack/repos/builtin/packages/sirius/package.py | 46 ++++++++++-- 3 files changed, 169 insertions(+), 16 deletions(-) create mode 100644 var/spack/repos/builtin/packages/sirius/cmake-fix-shared-library-installation.patch diff --git a/var/spack/repos/builtin/packages/cp2k/package.py b/var/spack/repos/builtin/packages/cp2k/package.py index 7c568c712a..3674ac9c1c 100644 --- a/var/spack/repos/builtin/packages/cp2k/package.py +++ b/var/spack/repos/builtin/packages/cp2k/package.py @@ -10,7 +10,7 @@ import copy import spack.util.environment -class Cp2k(MakefilePackage): +class Cp2k(MakefilePackage, CudaPackage): """CP2K is a quantum chemistry and solid state physics software package that can perform atomistic simulations of solid state, liquid, molecular, periodic, material, crystal, and biological systems @@ -44,6 +44,23 @@ class Cp2k(MakefilePackage): description=('Enable planewave electronic structure' ' calculations via SIRIUS')) + # override cuda_arch from CudaPackage since we only support one arch + # at a time and only specific ones for which we have parameter files + # for optimal kernels + variant('cuda_arch', + description='CUDA architecture', + default='none', + values=('none', '35', '37', '60'), + multi=False) + variant('cuda_arch_35_k20x', default=False, + description=('CP2K (resp. DBCSR) has specific parameter sets for' + ' different GPU models. Enable this when building' + ' with cuda_arch=35 for a K20x instead of a K40')) + variant('cuda_fft', default=False, + description=('Use CUDA also for FFTs in the PW part of CP2K')) + variant('cuda_blas', default=False, + description=('Use CUBLAS for general matrix operations in DBCSR')) + depends_on('python', type='build') depends_on('fftw@3:', when='~openmp') @@ -91,19 +108,23 @@ class Cp2k(MakefilePackage): # to get JSON-based UPF format support used in combination with SIRIUS depends_on('json-fortran', when='+sirius') + # the bundled libcusmm uses numpy in the parameter prediction (v7+) + depends_on('py-numpy', when='@7:+cuda', type='build') + # PEXSI, ELPA and SIRIUS need MPI in CP2K conflicts('~mpi', '+pexsi') conflicts('~mpi', '+elpa') conflicts('~mpi', '+sirius') conflicts('+sirius', '@:6.999') # sirius support was introduced in 7+ + conflicts('~cuda', '+cuda_fft') + conflicts('~cuda', '+cuda_blas') + # Apparently cp2k@4.1 needs an "experimental" version of libwannier.a # which is only available contacting the developer directly. See INSTALL # in the stage of cp2k@4.1 depends_on('wannier90', when='@3.0+mpi', type='build') - # TODO : add dependency on CUDA - # CP2K needs compiler specific compilation flags, e.g. optflags conflicts('%clang') conflicts('%cray') @@ -161,19 +182,23 @@ class Cp2k(MakefilePackage): elif '^mpi@2:' in spec: cppflags.append('-D__MPI_VERSION=2') - if '^intel-mkl' in spec: - cppflags.append('-D__FFTSG') - cflags = optimization_flags[self.spec.compiler.name][:] cxxflags = optimization_flags[self.spec.compiler.name][:] fcflags = optimization_flags[self.spec.compiler.name][:] + nvflags = ['-O3'] ldflags = [] libs = [] + gpuver = '' if '%intel' in spec: cflags.append('-fp-model precise') cxxflags.append('-fp-model precise') - fcflags.extend(['-fp-model source', '-heap-arrays 64']) + fcflags += [ + '-fp-model source', + '-heap-arrays 64', + '-g', + '-traceback', + ] elif '%gcc' in spec: fcflags.extend([ '-ffree-form', @@ -184,8 +209,12 @@ class Cp2k(MakefilePackage): fcflags.extend(['-Mfreeform', '-Mextend']) if '+openmp' in spec: + cflags.append(self.compiler.openmp_flag) + cxxflags.append(self.compiler.openmp_flag) fcflags.append(self.compiler.openmp_flag) ldflags.append(self.compiler.openmp_flag) + nvflags.append('-Xcompiler="{0}"'.format( + self.compiler.openmp_flag)) ldflags.append(fftw.libs.search_flags) @@ -231,6 +260,11 @@ class Cp2k(MakefilePackage): ldflags.append((lapack + blas).search_flags) libs.extend([str(x) for x in (fftw.libs, lapack, blas)]) + if self.spec.variants['blas'].value == 'mkl': + cppflags += ['-D__MKL'] + elif self.spec.variants['blas'].value == 'accelerate': + cppflags += ['-D__ACCELERATE'] + # MPI if '+mpi' in self.spec: cppflags.extend([ @@ -309,15 +343,38 @@ class Cp2k(MakefilePackage): sirius = spec['sirius'] cppflags.append('-D__SIRIUS') fcflags += ['-I{0}'.format(os.path.join(sirius.prefix, 'fortran'))] - libs += [ - os.path.join(sirius.libs.directories[0], - 'libsirius_f.{0}'.format(dso_suffix)) - ] + libs += list(sirius.libs) cppflags.append('-D__JSON') fcflags += ['$(shell pkg-config --cflags json-fortran)'] libs += ['$(shell pkg-config --libs json-fortran)'] + if self.spec.satisfies('+cuda'): + cppflags += ['-D__ACC'] + libs += ['-lcudart', '-lnvrtc', '-lcuda'] + + if self.spec.satisfies('+cuda_blas'): + cppflags += ['-D__DBCSR_ACC=2'] + libs += ['-lcublas'] + else: + cppflags += ['-D__DBCSR_ACC'] + + if self.spec.satisfies('+cuda_fft'): + cppflags += ['-D__PW_CUDA'] + libs += ['-lcufft', '-lcublas'] + + cuda_arch = self.spec.variants['cuda_arch'].value + if cuda_arch: + gpuver = { + '35': 'K40', + '37': 'K80', + '60': 'P100', + }[cuda_arch] + + if (cuda_arch == '35' + and self.spec.satisfies('+cuda_arch_35_k20x')): + gpuver = 'K20X' + if 'smm=libsmm' in spec: lib_dir = os.path.join( 'lib', self.makefile_architecture, self.makefile_version @@ -349,6 +406,7 @@ class Cp2k(MakefilePackage): cflags.extend(cppflags) cxxflags.extend(cppflags) fcflags.extend(cppflags) + nvflags.extend(cppflags) with open(self.makefile, 'w') as mkf: if '+plumed' in self.spec: @@ -373,11 +431,16 @@ class Cp2k(MakefilePackage): mkf.write('FC = {0}\n'.format(fc)) mkf.write('LD = {0}\n'.format(fc)) + if self.spec.satisfies('+cuda'): + mkf.write('NVCC = {0}\n'.format( + os.path.join(self.spec['cuda'].prefix, 'bin', 'nvcc'))) + # Write compiler flags to file mkf.write('DFLAGS = {0}\n\n'.format(' '.join(dflags))) mkf.write('CPPFLAGS = {0}\n\n'.format(' '.join(cppflags))) mkf.write('CFLAGS = {0}\n\n'.format(' '.join(cflags))) mkf.write('CXXFLAGS = {0}\n\n'.format(' '.join(cxxflags))) + mkf.write('NVFLAGS = {0}\n\n'.format(' '.join(nvflags))) mkf.write('FCFLAGS = {0}\n\n'.format(' '.join(fcflags))) mkf.write('LDFLAGS = {0}\n\n'.format(' '.join(ldflags))) if '%intel' in spec: @@ -385,6 +448,7 @@ class Cp2k(MakefilePackage): ' '.join(ldflags) + ' -nofor_main') ) mkf.write('LIBS = {0}\n\n'.format(' '.join(libs))) + mkf.write('GPUVER = {0}\n\n'.format(gpuver)) mkf.write('DATA_DIR = {0}\n\n'.format(self.prefix.share.data)) @property diff --git a/var/spack/repos/builtin/packages/sirius/cmake-fix-shared-library-installation.patch b/var/spack/repos/builtin/packages/sirius/cmake-fix-shared-library-installation.patch new file mode 100644 index 0000000000..bffa67e5da --- /dev/null +++ b/var/spack/repos/builtin/packages/sirius/cmake-fix-shared-library-installation.patch @@ -0,0 +1,53 @@ +From 4b51d07369b5972f3917cc8f2425caa814ae0975 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Tiziano=20M=C3=BCller?= +Date: Thu, 16 May 2019 10:53:04 +0200 +Subject: [PATCH] cmake: fix shared library installation + +fixes the error during `make install`: + + TARGETS given no LIBRARY DESTINATION for shared library target + +when building shared libraries. + +... and respect the current OS/distro library dir. +--- + src/CMakeLists.txt | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt +index 65307dd3..2b7a5279 100644 +--- a/src/CMakeLists.txt ++++ b/src/CMakeLists.txt +@@ -2,6 +2,8 @@ + # working correctly + # list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC") + ++include(GNUInstallDirs) # required to get a proper LIBDIR variable ++ + # keep two libraries: libsirius and libsirius_f + + if(USE_CUDA) +@@ -9,13 +11,18 @@ if(USE_CUDA) + file(GLOB_RECURSE CUFILES_KERNELS "Kernels/*.cu") + add_library(sirius_cu "${CUFILES_KERNELS};${CUFILES_SDDK}") + set_target_properties(sirius_cu PROPERTIES POSITION_INDEPENDENT_CODE ON) +- INSTALL (TARGETS sirius_cu ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib/) ++ INSTALL (TARGETS sirius_cu ++ ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" ++ LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" ++ ) + endif() + if(CREATE_FORTRAN_BINDINGS) + add_library(sirius_f "sirius_api.cpp;sirius.f90") + SIRIUS_SETUP_TARGET(sirius_f) + INSTALL (TARGETS sirius_f ARCHIVE DESTINATION +- ${CMAKE_INSTALL_PREFIX}/lib/) ++ ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" ++ LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" ++ ) + set_target_properties(sirius_f PROPERTIES POSITION_INDEPENDENT_CODE ON) + set_target_properties(sirius_f PROPERTIES Fortran_MODULE_DIRECTORY mod_files) + target_link_libraries(sirius_f PUBLIC OpenMP::OpenMP_CXX) +-- +2.16.4 + diff --git a/var/spack/repos/builtin/packages/sirius/package.py b/var/spack/repos/builtin/packages/sirius/package.py index d8ccc7e376..b8c8a4ae97 100644 --- a/var/spack/repos/builtin/packages/sirius/package.py +++ b/var/spack/repos/builtin/packages/sirius/package.py @@ -8,7 +8,7 @@ import os from spack import * -class Sirius(CMakePackage): +class Sirius(CMakePackage, CudaPackage): """Domain specific library for electronic structure calculations""" homepage = "https://github.com/electronic-structure/SIRIUS" @@ -38,12 +38,14 @@ class Sirius(CMakePackage): depends_on('elpa~openmp', when='+elpa~openmp') depends_on('libvdwxc+mpi', when='+vdwxc') depends_on('scalapack', when='+scalapack') + depends_on("cuda", when="+cuda") # TODO: - # add support for MKL, CUDA, MAGMA, CRAY_LIBSCI, Python bindings, testing + # add support for MKL, MAGMA, CRAY_LIBSCI, Python bindings, testing patch("strip-spglib-include-subfolder.patch") patch("link-libraries-fortran.patch") + patch("cmake-fix-shared-library-installation.patch") @property def libs(self): @@ -52,12 +54,17 @@ class Sirius(CMakePackage): if self.spec.satisfies('+fortran'): libraries += ['libsirius_f'] + if self.spec.satisfies('+cuda'): + libraries += ['libsirius_cu'] + return find_libraries( libraries, root=self.prefix, shared=self.spec.satisfies('+shared'), recursive=True ) def cmake_args(self): + spec = self.spec + def _def(variant, flag=None): """Returns "-DUSE_VARIANT:BOOL={ON,OFF}" depending on whether +variant is set. If the CMake flag differs from the variant @@ -68,7 +75,7 @@ class Sirius(CMakePackage): flag if flag else "USE_{0}".format( variant.strip('+~').upper() ), - "ON" if self.spec.satisfies(variant) else "OFF" + "ON" if spec.satisfies(variant) else "OFF" ) args = [ @@ -78,13 +85,42 @@ class Sirius(CMakePackage): _def('+vdwxc'), _def('+scalapack'), _def('+fortran', 'CREATE_FORTRAN_BINDINGS'), + _def('+cuda') ] - if self.spec.satisfies('+elpa'): + lapack = spec['lapack'] + blas = spec['blas'] + + args += [ + '-DLAPACK_FOUND=true', + '-DLAPACK_INCLUDE_DIRS={0}'.format(lapack.prefix.include), + '-DLAPACK_LIBRARIES={0}'.format(lapack.libs.joined(';')), + '-DBLAS_FOUND=true', + '-DBLAS_INCLUDE_DIRS={0}'.format(blas.prefix.include), + '-DBLAS_LIBRARIES={0}'.format(blas.libs.joined(';')), + ] + + if '+scalapack' in spec: + args += [ + '-DSCALAPACK_FOUND=true', + '-DSCALAPACK_INCLUDE_DIRS={0}'.format( + spec['scalapack'].prefix.include), + '-DSCALAPACK_LIBRARIES={0}'.format( + spec['scalapack'].libs.joined(';')), + ] + + if spec.satisfies('+elpa'): elpa_incdir = os.path.join( - self.spec['elpa'].headers.directories[0], + spec['elpa'].headers.directories[0], 'elpa' ) args += ["-DELPA_INCLUDE_DIR={0}".format(elpa_incdir)] + if spec.satisfies('+cuda'): + cuda_arch = spec.variants['cuda_arch'].value + if cuda_arch: + args += [ + '-DCMAKE_CUDA_FLAGS=-arch=sm_{0}'.format(cuda_arch[0]) + ] + return args -- cgit v1.2.3-70-g09d2