From 98c4d15655d4ca70cf173ca7d6d097353b6fec1d Mon Sep 17 00:00:00 2001 From: "Nichols A. Romero" Date: Sat, 15 Feb 2020 22:56:58 -0600 Subject: Flang - Support libomptarget offload to NVidia GPU (#15015) * Modify Flang NVidia GPU variant to make use of built-in CudaPackage * Add OpenMP Offload patch if March 2019 compiler is selected. * Flang parallel build has a race condition. * llvm-flang now uses built-in CudaPackage. * Add variant for different build releases. * Fix OpenMP target offload for NVidia GPUs. * Additional commong flags that are needed with comments. * NVidia BC required for libomp target requires special treatment. Use clang built in previous step to re-compile libomptarget. --- var/spack/repos/builtin/packages/flang/package.py | 22 +++--- .../repos/builtin/packages/llvm-flang/package.py | 85 ++++++++++++++++++++-- 2 files changed, 93 insertions(+), 14 deletions(-) diff --git a/var/spack/repos/builtin/packages/flang/package.py b/var/spack/repos/builtin/packages/flang/package.py index 853a8dfb35..f0f49ae506 100644 --- a/var/spack/repos/builtin/packages/flang/package.py +++ b/var/spack/repos/builtin/packages/flang/package.py @@ -8,7 +8,7 @@ from spack import * import os -class Flang(CMakePackage): +class Flang(CMakePackage, CudaPackage): """Flang is a Fortran compiler targeting LLVM.""" homepage = "https://github.com/flang-compiler/flang" @@ -24,10 +24,10 @@ class Flang(CMakePackage): version('20180921', sha256='f33bd1f054e474f1e8a204bb6f78d42f8f6ecf7a894fdddc3999f7c272350784') version('20180612', sha256='6af858bea013548e091371a97726ac784edbd4ff876222575eaae48a3c2920ed') - # Variants - variant('nvptx', - default=False, - description='Target OpenMP offload to NVidia GPUs') + # Patched only relevant for March 2019 release with OpenMP Offload support + patch('https://github.com/flang-compiler/flang/commit/b342225a64692d2b9c3aff7658a8e4f94a8923eb.diff', + sha256='3bd2c7453131eaaf11328785a3031fa2298bdd0c02cfd5e2b478e6e847d5da43', + when='@20190329 +cuda') # Build dependency depends_on('cmake@3.8:', type='build') @@ -45,11 +45,15 @@ class Flang(CMakePackage): depends_on('pgmath@20180921', when='@20180921') depends_on('pgmath@20180612', when='@20180612') - depends_on('cuda', when='+nvptx', type=('run')) + depends_on('llvm-flang +cuda', when='+cuda') # conflicts - conflicts('+nvptx', when='@:20181226', - msg='OMP offload to NVidia GPUs available March 2019 or later') + conflicts('+cuda', when='@:20181226', + msg='OpenMP offload to NVidia GPUs available 20190329 or later') + + # Spurious problems running in parallel the Makefile + # generated by the configure + parallel = False def cmake_args(self): spec = self.spec @@ -67,7 +71,7 @@ class Flang(CMakePackage): spec['python'].command.path) ] - if '+nvptx' in spec: + if '+cuda' in spec: options.append('-DFLANG_OPENMP_GPU_NVIDIA=ON') else: options.append('-DFLANG_OPENMP_GPU_NVIDIA=OFF') diff --git a/var/spack/repos/builtin/packages/llvm-flang/package.py b/var/spack/repos/builtin/packages/llvm-flang/package.py index 4c19e7b54b..10001e2340 100644 --- a/var/spack/repos/builtin/packages/llvm-flang/package.py +++ b/var/spack/repos/builtin/packages/llvm-flang/package.py @@ -7,7 +7,7 @@ from spack import * -class LlvmFlang(CMakePackage): +class LlvmFlang(CMakePackage, CudaPackage): """LLVM-Flang is the Flang fork of LLVM needed by the Flang package.""" homepage = "https://github.com/flang-compiler" @@ -30,10 +30,23 @@ class LlvmFlang(CMakePackage): variant('all_targets', default=False, description='Build all supported targets') - # Build dependency + variant('build_type', default='Release', + description='The CMake build type to build', + values=('Debug', 'Release', 'RelWithDebInfo', 'MinSizeRel')) + + # Universal dependency depends_on('cmake@3.8:', type='build') depends_on('python@2.7:', type='build') + # openmp dependencies + depends_on('perl-data-dumper', type=('build')) + depends_on('hwloc') + + # libomptarget dependencies + depends_on('libelf', when='+cuda') + depends_on('libffi', when='+cuda') + depends_on('cuda@:9', when='+cuda') # llvm 7 not compatible with newer version of cuda + # LLVM-Flang Componentes: Driver, OpenMP resource(name='flang-driver', git='https://github.com/flang-compiler/flang-driver.git', @@ -142,10 +155,18 @@ class LlvmFlang(CMakePackage): def cmake_args(self): spec = self.spec - args = [] + # universal + args = [ + '-DLLVM_ENABLE_RTTI:BOOL=ON', + '-DLLVM_ENABLE_EH:BOOL=ON', + '-DCLANG_DEFAULT_OPENMP_RUNTIME:STRING=libomp', + ] args.append('-DPYTHON_EXECUTABLE={0}'.format( spec['python'].command.path)) + # needed by flang-driver + args.append('-DFLANG_LLVM_EXTENSIONS=ON') + if '+all_targets' not in spec: # all is default in cmake if spec.target.family == 'x86' or spec.target.family == 'x86_64': target = 'X86' @@ -162,7 +183,61 @@ class LlvmFlang(CMakePackage): raise InstallError( 'Unsupported architecture: ' + spec.target.family) - args.append( - '-DLLVM_TARGETS_TO_BUILD:STRING=' + target) + if '+cuda' in spec: + args.append( + '-DLLVM_TARGETS_TO_BUILD:STRING=NVPTX;' + target) + else: + args.append( + '-DLLVM_TARGETS_TO_BUILD:STRING=' + target) + + # used by openmp + args.append('-DLIBOMP_USE_HWLOC=On') + args.append('-DLIBOMP_FORTRAN_MODULES=ON') + args.append('-DLIBOMP_ENABLE_SHARED=TRUE') + + # used by libomptarget for NVidia gpu + if '+cuda' in spec: + args.append('-DOPENMP_ENABLE_LIBOMPTARGET=ON') + cuda_arch_list = spec.variants['cuda_arch'].value + args.append('-DCUDA_TOOLKIT_ROOT_DIR=%s' % spec['cuda'].prefix) + args.append('-DLIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES={0}'.format( + ','.join(cuda_arch_list))) + args.append('-DCLANG_OPENMP_NVPTX_DEFAULT_ARCH=sm_{0}'.format( + cuda_arch_list[-1])) + else: + args.append('-DOPENMP_ENABLE_LIBOMPTARGET=OFF') return args + + @run_after("install") + def post_install(self): + spec = self.spec + + # Manual bootstrap needed to get NVidia BC compiled with the + # clang that was just built + if '+cuda' in spec: + ompdir = 'build-bootstrapped-omp' + # rebuild libomptarget to get bytecode runtime library files + with working_dir(ompdir, create=True): + args = [ + self.stage.source_path + '/projects/openmp', + '-DCMAKE_C_COMPILER:PATH={0}'.format( + spec.prefix.bin + '/clang'), + '-DCMAKE_CXX_COMPILER:PATH={0}'.format( + spec.prefix.bin + '/clang++'), + '-DCMAKE_INSTALL_PREFIX:PATH={0}'.format( + spec.prefix) + ] + args = args + self.cmake_args() + # args = self.cmake_args() + # enable CUDA bitcode + args.append('-DLIBOMPTARGET_NVPTX_ENABLE_BCLIB=true') + # work around bad libelf detection in libomptarget + args.append( + '-DCMAKE_CXX_FLAGS:String=-I{0} -I{1}'.format( + spec['libelf'].prefix.include, + spec['hwloc'].prefix.include)) + + cmake(*args) + make() + make('install') -- cgit v1.2.3-70-g09d2