From 49114ffff7ba365576137277a5a886350be7ba7e Mon Sep 17 00:00:00 2001 From: Veselin Dobrev Date: Tue, 8 Nov 2022 22:56:58 -0800 Subject: MFEM: more updates for v4.5 (#33603) * [mfem] updates related to building with cuda * [hypre] tweak to support building with external ROCm/HIP * [mfem] more tweaks related to building with +rocm * [mfem] temporary (?) workaround for issue #33684 * [mfem] fix style * [mfem] fix +shared+miniapps install --- var/spack/repos/builtin/packages/hypre/package.py | 3 +- var/spack/repos/builtin/packages/mfem/package.py | 68 ++++++++++++---- .../repos/builtin/packages/mfem/test_builds.sh | 90 ++++++++++++++-------- 3 files changed, 113 insertions(+), 48 deletions(-) diff --git a/var/spack/repos/builtin/packages/hypre/package.py b/var/spack/repos/builtin/packages/hypre/package.py index 03a6fc70d8..fd168400cc 100644 --- a/var/spack/repos/builtin/packages/hypre/package.py +++ b/var/spack/repos/builtin/packages/hypre/package.py @@ -242,7 +242,8 @@ class Hypre(AutotoolsPackage, CudaPackage, ROCmPackage): rocm_pkgs = ["rocsparse", "rocthrust", "rocprim", "rocrand"] rocm_inc = "" for pkg in rocm_pkgs: - rocm_inc += spec[pkg].headers.include_flags + " " + if "^" + pkg in spec: + rocm_inc += spec[pkg].headers.include_flags + " " configure_args.extend( [ "--with-hip", diff --git a/var/spack/repos/builtin/packages/mfem/package.py b/var/spack/repos/builtin/packages/mfem/package.py index d188c2fc88..984e715394 100644 --- a/var/spack/repos/builtin/packages/mfem/package.py +++ b/var/spack/repos/builtin/packages/mfem/package.py @@ -440,6 +440,12 @@ class Mfem(Package, CudaPackage, ROCmPackage): def setup_build_environment(self, env): env.unset("MFEM_DIR") env.unset("MFEM_BUILD_DIR") + # Workaround for changes made by the 'kokkos-nvcc-wrapper' package + # which can be a dependency e.g. through PETSc that uses Kokkos: + if "^kokkos-nvcc-wrapper" in self.spec: + env.set("MPICH_CXX", spack_cxx) + env.set("OMPI_CXX", spack_cxx) + env.set("MPICXX_CXX", spack_cxx) # # Note: Although MFEM does support CMake configuration, MFEM @@ -866,15 +872,33 @@ class Mfem(Package, CudaPackage, ROCmPackage): if "+rocm" in spec: amdgpu_target = ",".join(spec.variants["amdgpu_target"].value) options += ["HIP_CXX=%s" % spec["hip"].hipcc, "HIP_ARCH=%s" % amdgpu_target] + hip_libs = LibraryList([]) + # To use a C++ compiler that supports -xhip flag one can use + # something like this: + # options += [ + # "HIP_CXX=%s" % (spec["mpi"].mpicxx if "+mpi" in spec else spack_cxx), + # "HIP_FLAGS=-xhip --offload-arch=%s" % amdgpu_target, + # ] + # hip_libs += find_libraries("libamdhip64", spec["hip"].prefix.lib) if "^hipsparse" in spec: # hipsparse is needed @4.4.0:+rocm - # Note: MFEM's defaults.mk want to find librocsparse.* in - # $(HIP_DIR)/lib, so we set HIP_DIR to be the prefix of - # rocsparse (which is a dependency of hipsparse). - options += [ - "HIP_DIR=%s" % spec["rocsparse"].prefix, - "HIP_OPT=%s" % spec["hipsparse"].headers.cpp_flags, - "HIP_LIB=%s" % ld_flags_from_library_list(spec["hipsparse"].libs), - ] + hipsparse = spec["hipsparse"] + options += ["HIP_OPT=%s" % hipsparse.headers.cpp_flags] + hip_libs += hipsparse.libs + # Note: MFEM's defaults.mk wants to find librocsparse.* in + # $(HIP_DIR)/lib, so we set HIP_DIR to be $ROCM_PATH when using + # external HIP, or the prefix of rocsparse (which is a + # dependency of hipsparse) when using Spack-built HIP. + if spec["hip"].external: + options += ["HIP_DIR=%s" % env["ROCM_PATH"]] + else: + options += ["HIP_DIR=%s" % hipsparse["rocsparse"].prefix] + if "%cce" in spec: + # We assume the proper Cray CCE module (cce) is loaded: + craylibs_path = env["CRAYLIBS_" + env["MACHTYPE"].capitalize()] + craylibs = ["libmodules", "libfi", "libcraymath", "libf", "libu", "libcsup"] + hip_libs += find_libraries(craylibs, craylibs_path) + if hip_libs: + options += ["HIP_LIB=%s" % ld_flags_from_library_list(hip_libs)] if "+occa" in spec: options += [ @@ -883,12 +907,18 @@ class Mfem(Package, CudaPackage, ROCmPackage): ] if "+raja" in spec: - raja_opt = "-I%s" % spec["raja"].prefix.include - if spec["raja"].satisfies("^camp"): - raja_opt += " -I%s" % spec["camp"].prefix.include + raja = spec["raja"] + raja_opt = "-I%s" % raja.prefix.include + raja_lib = find_libraries( + "libRAJA", raja.prefix, shared=("+shared" in raja), recursive=True + ) + if raja.satisfies("^camp"): + camp = raja["camp"] + raja_opt += " -I%s" % camp.prefix.include + raja_lib += find_optional_library("libcamp", camp.prefix) options += [ "RAJA_OPT=%s" % raja_opt, - "RAJA_LIB=%s" % ld_flags_from_dirs([spec["raja"].prefix.lib], ["RAJA"]), + "RAJA_LIB=%s" % ld_flags_from_library_list(raja_lib), ] if "+amgx" in spec: @@ -975,10 +1005,13 @@ class Mfem(Package, CudaPackage, ROCmPackage): if "+hiop" in spec: hiop = spec["hiop"] - lapack_blas = spec["lapack"].libs + spec["blas"].libs + hiop_libs = hiop.libs + hiop_libs += spec["lapack"].libs + spec["blas"].libs + if "^magma" in hiop: + hiop_libs += hiop["magma"].libs options += [ "HIOP_OPT=-I%s" % hiop.prefix.include, - "HIOP_LIB=%s" % ld_flags_from_library_list(hiop.libs + lapack_blas), + "HIOP_LIB=%s" % ld_flags_from_library_list(hiop_libs), ] make("config", *options, parallel=False) @@ -996,6 +1029,9 @@ class Mfem(Package, CudaPackage, ROCmPackage): make("-C", "examples", "ex1p" if ("+mpi" in self.spec) else "ex1", parallel=False) # make('check', parallel=False) else: + # As of v4.5.0 and ROCm up to 5.2.3, the following miniapp crashes + # the HIP compiler, so it has to be disabled for testing with HIP: + # filter_file("PAR_MINIAPPS = hooke", "PAR_MINIAPPS =", "miniapps/hooke/makefile") make("all") make("test", parallel=False) @@ -1013,7 +1049,11 @@ class Mfem(Package, CudaPackage, ROCmPackage): with working_dir("config"): os.rename("config.mk", "config.mk.orig") copy(str(self.config_mk), "config.mk") + # Add '/mfem' to MFEM_INC_DIR for miniapps that include directly + # headers like "general/forall.hpp": + filter_file("(MFEM_INC_DIR.*)$", "\\1/mfem", "config.mk") shutil.copystat("config.mk.orig", "config.mk") + # TODO: miniapps linking to libmfem-common.* will not work. prefix_share = join_path(prefix, "share", "mfem") diff --git a/var/spack/repos/builtin/packages/mfem/test_builds.sh b/var/spack/repos/builtin/packages/mfem/test_builds.sh index 5c43f87512..ba2c6ceef3 100755 --- a/var/spack/repos/builtin/packages/mfem/test_builds.sh +++ b/var/spack/repos/builtin/packages/mfem/test_builds.sh @@ -19,9 +19,12 @@ hdf5_spec='^hdf5@1.8.19:1.8' # petsc spec petsc_spec='^petsc+suite-sparse+mumps' petsc_spec_cuda='^petsc+cuda+suite-sparse+mumps' -# strumpack spec without cuda (use @master until version > 6.3.1 is released) -strumpack_spec='^strumpack@master~slate~openmp~cuda' -strumpack_cuda_spec='^strumpack@master~slate~openmp' +# superlu-dist specs +superlu_spec_cuda='^superlu-dist+cuda cuda_arch='"${cuda_arch}" +superlu_spec_rocm='^superlu-dist+rocm amdgpu_target='"${rocm_arch}" +# strumpack spec without cuda (use version > 6.3.1) +strumpack_spec='^strumpack~slate~openmp~cuda' +strumpack_cuda_spec='^strumpack~slate~openmp' strumpack_rocm_spec='^strumpack+rocm~slate~openmp~cuda' builds=( @@ -116,27 +119,28 @@ builds_cuda=( ^raja+cuda~openmp ^hypre+cuda' # hypre without cuda: + # NOTE: PETSc tests may need PETSC_OPTIONS="-use_gpu_aware_mpi 0" # TODO: restore '+libceed' when the libCEED CUDA unit tests take less time. - # TODO: add back '+gslib' when the gslib test is fixed. - # TODO: restore '+superlu-dist' when the unit test is fixed. - # TODO: add back "+petsc+slepc $petsc_spec_cuda" when it works. + # TODO: remove "^hiop+shared" when the default static build is fixed. ${mfem}'+cuda+openmp+raja+occa cuda_arch='"${cuda_arch}"' \ - +strumpack+suite-sparse \ + +superlu-dist+strumpack+suite-sparse+gslib+petsc+slepc \ +sundials+pumi+mpfr+netcdf+zlib+gnutls+libunwind+conduit+ginkgo+hiop \ - ^raja+cuda+openmp'" $strumpack_cuda_spec"' \ - '"$hdf5_spec" + ^raja+cuda+openmp ^hiop+shared'" $strumpack_cuda_spec"' \ + '"$superlu_spec_cuda $petsc_spec_cuda $hdf5_spec" # hypre with cuda: # TODO: restore '+libceed' when the libCEED CUDA unit tests take less time. - # TODO: add back '+gslib' when the gslib test is fixed. - # TODO: restore '+superlu-dist' when we support it with '^hypre+cuda'. + # TODO: restore '+superlu-dist $superlu_spec_cuda' when we support it with + # '^hypre+cuda'. # TODO: add back "+strumpack $strumpack_cuda_spec" when it's supported. # TODO: add back "+petsc+slepc $petsc_spec_cuda" when it works. + # NOTE: PETSc tests may need PETSC_OPTIONS="-use_gpu_aware_mpi 0" # TODO: add back "+sundials" when it's supported with '^hypre+cuda'. + # TODO: remove "^hiop+shared" when the default static build is fixed. ${mfem}'+cuda+openmp+raja+occa cuda_arch='"${cuda_arch}"' \ - +suite-sparse \ + +suite-sparse+gslib \ +pumi+mpfr+netcdf+zlib+gnutls+libunwind+conduit+ginkgo+hiop \ - ^raja+cuda+openmp ^hypre+cuda \ + ^raja+cuda+openmp ^hiop+shared ^hypre+cuda \ '"$hdf5_spec" # @@ -155,32 +159,38 @@ builds_cuda=( ^raja+cuda~openmp ^hypre+cuda' # hypre without cuda: + # NOTE: PETSc tests may need PETSC_OPTIONS="-use_gpu_aware_mpi 0" # TODO: restore '+libceed' when the libCEED CUDA unit tests take less time. - # TODO: add back '+gslib' when the gslib test is fixed. - # TODO: restore '+superlu-dist' when the unit test is fixed. - # TODO: add back "+petsc+slepc $petsc_spec_cuda" when it works. + # TODO: remove "^hiop+shared" when the default static build is fixed. ${mfem_dev}'+cuda+openmp+raja+occa cuda_arch='"${cuda_arch}"' \ - +strumpack+suite-sparse \ + +superlu-dist+strumpack+suite-sparse+gslib+petsc+slepc \ +sundials+pumi+mpfr+netcdf+zlib+gnutls+libunwind+conduit+ginkgo+hiop \ - ^raja+cuda+openmp'" $strumpack_cuda_spec"' \ - '"$hdf5_spec" + ^raja+cuda+openmp ^hiop+shared'" $strumpack_cuda_spec"' \ + '"$superlu_spec_cuda $petsc_spec_cuda $hdf5_spec" # hypre with cuda: # TODO: restore '+libceed' when the libCEED CUDA unit tests take less time. - # TODO: add back '+gslib' when the gslib test is fixed. - # TODO: restore '+superlu-dist' when we support it with '^hypre+cuda'. + # TODO: restore '+superlu-dist $superlu_spec_cuda' when we support it with + # '^hypre+cuda'. # TODO: add back "+strumpack $strumpack_cuda_spec" when it's supported. # TODO: add back "+petsc+slepc $petsc_spec_cuda" when it works. + # NOTE: PETSc tests may need PETSC_OPTIONS="-use_gpu_aware_mpi 0" # TODO: add back "+sundials" when it's supported with '^hypre+cuda'. + # TODO: remove "^hiop+shared" when the default static build is fixed. ${mfem_dev}'+cuda+openmp+raja+occa cuda_arch='"${cuda_arch}"' \ - +suite-sparse \ + +suite-sparse+gslib \ +pumi+mpfr+netcdf+zlib+gnutls+libunwind+conduit+ginkgo+hiop \ - ^raja+cuda+openmp ^hypre+cuda \ + ^raja+cuda+openmp ^hiop+shared ^hypre+cuda \ '"$hdf5_spec" ) builds_rocm=( + # NOTE: the miniapp 'hooke' crashes the HIP compiler, so it needs to be + # disabled in Spack, e.g. with + # filter_file("PAR_MINIAPPS = hooke", "PAR_MINIAPPS =", + # "miniapps/hooke/makefile") + # hypre without rocm: ${mfem}'+rocm amdgpu_target='"${rocm_arch}" @@ -192,24 +202,30 @@ builds_rocm=( ^raja+rocm~openmp ^occa~cuda ^hypre+rocm' # hypre without rocm: - # TODO: add back '+gslib' when the gslib test is fixed. - # TODO: restore '+superlu-dist' when the unit test is fixed. # TODO: add "+petsc+slepc $petsc_spec_rocm" when it is supported. + # TODO: add back '+conduit' when it is no longer linked with tcmalloc*. + # TODO: add back '+hiop' when it is no longer linked with tcmalloc* through + # its magma dependency. + # TODO: add back '+ginkgo' when the Ginkgo example works. ${mfem}'+rocm+openmp+raja+occa+libceed amdgpu_target='"${rocm_arch}"' \ - +strumpack+suite-sparse \ - +sundials+pumi+mpfr+netcdf+zlib+gnutls+libunwind+conduit+ginkgo+hiop \ + +superlu-dist+strumpack+suite-sparse+gslib \ + +sundials+pumi+mpfr+netcdf+zlib+gnutls+libunwind \ ^raja+rocm~openmp ^occa~cuda'" $strumpack_rocm_spec"' \ - '"$hdf5_spec" + '"$superlu_spec_rocm $hdf5_spec" # hypre with rocm: - # TODO: add back '+gslib' when the gslib test is fixed. - # TODO: restore '+superlu-dist' when we support it with '^hypre+rocm'. + # TODO: restore '+superlu-dist $superlu_spec_rocm' when we support it with + # '^hypre+rocm'. # TODO: add back "+strumpack $strumpack_rocm_spec" when it's supported. # TODO: add back "+petsc+slepc $petsc_spec_rocm" when it works. + # TODO: add back '+conduit' when it is no longer linked with tcmalloc*. + # TODO: add back '+hiop' when it is no longer linked with tcmalloc* through + # its magma dependency. + # TODO: add back '+ginkgo' when the Ginkgo example works. # TODO: add back "+sundials" when it's supported with '^hypre+rocm'. ${mfem}'+rocm+openmp+raja+occa+libceed amdgpu_target='"${rocm_arch}"' \ - +suite-sparse \ - +pumi+mpfr+netcdf+zlib+gnutls+libunwind+conduit+ginkgo+hiop \ + +suite-sparse+gslib \ + +pumi+mpfr+netcdf+zlib+gnutls+libunwind \ ^raja+rocm~openmp ^occa~cuda ^hypre+rocm \ '"$hdf5_spec" @@ -230,14 +246,22 @@ run_builds=("${builds[@]}" "${builds2[@]}") # run_builds=("${builds_cuda[@]}") # run_builds=("${builds_rocm[@]}") +# PETSc CUDA tests on Lassen need this: +# export PETSC_OPTIONS="-use_gpu_aware_mpi 0" + for bld in "${run_builds[@]}"; do + eval bbb="\"${bld}\"" + printf "\n%s\n" "${SEP}" printf " %s\n" "${bld}" printf "%s\n" "${SEP}" - eval bbb="\"${bld}\"" spack spec --fresh -I $bbb || exit 1 printf "%s\n" "${sep}" spack install $spack_jobs --fresh --test=root $bbb || exit 2 + + # echo ./bin/spack spec --fresh -I $bbb + # echo ./bin/spack install $spack_jobs --fresh --test=root $bbb + # echo done # Uninstall all mfem builds: -- cgit v1.2.3-60-g2f50