diff options
5 files changed, 265 insertions, 8 deletions
diff --git a/var/spack/repos/builtin/packages/openmpi/accelerator-build-components-as-dso-s-by-default.patch b/var/spack/repos/builtin/packages/openmpi/accelerator-build-components-as-dso-s-by-default.patch new file mode 100644 index 0000000000..a4b5bf7342 --- /dev/null +++ b/var/spack/repos/builtin/packages/openmpi/accelerator-build-components-as-dso-s-by-default.patch @@ -0,0 +1,81 @@ +From 7e2e390e468db44c8540d2510841a22d146fa6ed Mon Sep 17 00:00:00 2001 +From: Howard Pritchard <howardp@lanl.gov> +Date: Tue, 7 Nov 2023 10:06:47 -0500 +Subject: [PATCH] accelerator: build components as dso's by default + +also need to switch rcache/gpsum and rcache/rgpusum + +to DSO by default. + +Fix a problem in opal_mca.m4 where the enable-mca-dso list wasn't being +processed correctly starting with 5.0.0. + +related to #12036 + +Signed-off-by: Howard Pritchard <howardp@lanl.gov> + +diff --git a/config/opal_mca.m4 b/config/opal_mca.m4 +index 935b8c65..b425fe63 100644 +--- a/config/opal_mca.m4 ++++ b/config/opal_mca.m4 +@@ -13,7 +13,7 @@ dnl All rights reserved. + dnl Copyright (c) 2010-2021 Cisco Systems, Inc. All rights reserved + dnl Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + dnl Copyright (c) 2018-2022 Amazon.com, Inc. or its affiliates. All Rights reserved. +-dnl Copyright (c) 2021 Triad National Security, LLC. All rights ++dnl Copyright (c) 2021-2023 Triad National Security, LLC. All rights + dnl reserved. + dnl $COPYRIGHT$ + dnl +@@ -167,6 +167,9 @@ of type-component pairs. For example, --enable-mca-no-build=pml-ob1]) + # Second, set the DSO_all and STATIC_all variables. conflict + # resolution (prefer static) is done in the big loop below + # ++ # Exception here is the components of the accelerator framework, ++ # which by default are built to be dynamic, except for null. ++ # + AC_MSG_CHECKING([which components should be run-time loadable]) + if test "$enable_static" != "no"; then + DSO_all=0 +@@ -174,9 +177,6 @@ of type-component pairs. For example, --enable-mca-no-build=pml-ob1]) + elif test "$OPAL_ENABLE_DLOPEN_SUPPORT" = 0; then + DSO_all=0 + msg="none (dlopen disabled)" +- elif test -z "$enable_mca_dso"; then +- DSO_all=0 +- msg=default + elif test "$enable_mca_dso" = "no"; then + DSO_all=0 + msg=none +@@ -184,15 +184,19 @@ of type-component pairs. For example, --enable-mca-no-build=pml-ob1]) + DSO_all=1 + msg=all + else +- DSO_all=0 +- ifs_save="$IFS" +- IFS="${IFS}$PATH_SEPARATOR," +- msg= +- for item in $enable_mca_dso; do +- AS_VAR_SET([AS_TR_SH([DSO_$item])], [1]) +- msg="$item $msg" +- done +- IFS="$ifs_save" ++ msg= ++ if test -z "$enable_mca_dso"; then ++ enable_mca_dso="accelerator-cuda,accelerator-rocm,accelerator-ze,btl-smcuda,rcache-gpusm,rcache-rgpusm" ++ msg="(default)" ++ fi ++ DSO_all=0 ++ ifs_save="$IFS" ++ IFS="${IFS}$PATH_SEPARATOR," ++ for item in $enable_mca_dso; do ++ AS_VAR_SET([AS_TR_SH([DSO_$item])], [1]) ++ msg="$item $msg" ++ done ++ IFS="$ifs_save" + fi + AC_MSG_RESULT([$msg]) + unset msg +-- +2.35.3 + diff --git a/var/spack/repos/builtin/packages/openmpi/accelerator-cuda-fix-bug-in-makefile.patch b/var/spack/repos/builtin/packages/openmpi/accelerator-cuda-fix-bug-in-makefile.patch new file mode 100644 index 0000000000..f0681b6ad5 --- /dev/null +++ b/var/spack/repos/builtin/packages/openmpi/accelerator-cuda-fix-bug-in-makefile.patch @@ -0,0 +1,33 @@ +From be28fa6421094fcd0c544a6d457c6d748670959a Mon Sep 17 00:00:00 2001 +From: Howard Pritchard <howardp@lanl.gov> +Date: Mon, 13 Nov 2023 08:12:28 -0700 +Subject: [PATCH] accelerator/cuda: fix bug in makefile.am + +that prevents correct linkage of libcuda.so if it is in +a non standard location. + +Related to https://github.com/spack/spack/pull/40913 + +Signed-off-by: Howard Pritchard <howardp@lanl.gov> + +diff --git a/opal/mca/accelerator/cuda/Makefile.am b/opal/mca/accelerator/cuda/Makefile.am +index 5646890b..2c533ece 100644 +--- a/opal/mca/accelerator/cuda/Makefile.am ++++ b/opal/mca/accelerator/cuda/Makefile.am +@@ -34,11 +34,11 @@ mcacomponentdir = $(opallibdir) + mcacomponent_LTLIBRARIES = $(component_install) + + mca_accelerator_cuda_la_SOURCES = $(sources) +-mca_accelerator_cuda_la_LDFLAGS = -module -avoid-version ++mca_accelerator_cuda_la_LDFLAGS = -module -avoid-version $(accelerator_cuda_LDFLAGS) + mca_accelerator_cuda_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_NAME@.la \ + $(accelerator_cuda_LIBS) + + noinst_LTLIBRARIES = $(component_noinst) + libmca_accelerator_cuda_la_SOURCES =$(sources) +-libmca_accelerator_cuda_la_LDFLAGS = -module -avoid-version ++libmca_accelerator_cuda_la_LDFLAGS = -module -avoid-version $(accelerator_cuda_LDFLAGS) + libmca_accelerator_cuda_la_LIBADD = $(accelerator_cuda_LIBS) +-- +2.35.3 + diff --git a/var/spack/repos/builtin/packages/openmpi/btlsmcuda-fix-problem-with-makefile.patch b/var/spack/repos/builtin/packages/openmpi/btlsmcuda-fix-problem-with-makefile.patch new file mode 100644 index 0000000000..44b4d2766d --- /dev/null +++ b/var/spack/repos/builtin/packages/openmpi/btlsmcuda-fix-problem-with-makefile.patch @@ -0,0 +1,73 @@ +From 27672784304d4c944e2e3c7d526dfd77f021a113 Mon Sep 17 00:00:00 2001 +From: Howard Pritchard <howardp@lanl.gov> +Date: Thu, 16 Nov 2023 07:05:01 -0700 +Subject: [PATCH] btlsmcuda: fix problem with makefile + +when libcuda.so is in a non-standard location. + +also fix rcache/gpusm and rcache/rgpsum + +Similar fix to that in #12065 + +Signed-off-by: Howard Pritchard <howardp@lanl.gov> + +diff --git a/opal/mca/btl/smcuda/Makefile.am b/opal/mca/btl/smcuda/Makefile.am +index f1a89df8..8ee37add 100644 +--- a/opal/mca/btl/smcuda/Makefile.am ++++ b/opal/mca/btl/smcuda/Makefile.am +@@ -51,7 +51,7 @@ endif + mcacomponentdir = $(opallibdir) + mcacomponent_LTLIBRARIES = $(component_install) + mca_btl_smcuda_la_SOURCES = $(libmca_btl_smcuda_la_sources) +-mca_btl_smcuda_la_LDFLAGS = -module -avoid-version ++mca_btl_smcuda_la_LDFLAGS = -module -avoid-version $(btl_smcuda_LDFLAGS) + mca_btl_smcuda_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_NAME@.la \ + $(OPAL_TOP_BUILDDIR)/opal/mca/common/sm/lib@OPAL_LIB_NAME@mca_common_sm.la \ + $(btl_smcuda_LIBS) +@@ -59,6 +59,6 @@ mca_btl_smcuda_la_CPPFLAGS = $(btl_smcuda_CPPFLAGS) + + noinst_LTLIBRARIES = $(component_noinst) + libmca_btl_smcuda_la_SOURCES = $(libmca_btl_smcuda_la_sources) +-libmca_btl_smcuda_la_LDFLAGS = -module -avoid-version ++libmca_btl_smcuda_la_LDFLAGS = -module -avoid-version $(btl_smcuda_LDFLAGS) + libmca_btl_smcuda_la_CPPFLAGS = $(btl_smcuda_CPPFLAGS) + libmca_btl_smcuda_la_LIBADD = $(btl_smcuda_LIBS) +diff --git a/opal/mca/rcache/gpusm/Makefile.am b/opal/mca/rcache/gpusm/Makefile.am +index 5645e5ea..1ff63b35 100644 +--- a/opal/mca/rcache/gpusm/Makefile.am ++++ b/opal/mca/rcache/gpusm/Makefile.am +@@ -48,11 +48,11 @@ endif + mcacomponentdir = $(opallibdir) + mcacomponent_LTLIBRARIES = $(component_install) + mca_rcache_gpusm_la_SOURCES = $(sources) +-mca_rcache_gpusm_la_LDFLAGS = -module -avoid-version ++mca_rcache_gpusm_la_LDFLAGS = -module -avoid-version $(rcache_gpusm_LDFLAGS) + mca_rcache_gpusm_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_NAME@.la \ + $(rcache_gpusm_LIBS) + + noinst_LTLIBRARIES = $(component_noinst) + libmca_rcache_gpusm_la_SOURCES = $(sources) +-libmca_rcache_gpusm_la_LDFLAGS = -module -avoid-version ++libmca_rcache_gpusm_la_LDFLAGS = -module -avoid-version $(rcache_gpusm_LDFLAGS) + libmca_rcache_gpusm_la_LIBADD = $(rcache_gpusm_LIBS) +diff --git a/opal/mca/rcache/rgpusm/Makefile.am b/opal/mca/rcache/rgpusm/Makefile.am +index 6d2fdbc3..dde81411 100644 +--- a/opal/mca/rcache/rgpusm/Makefile.am ++++ b/opal/mca/rcache/rgpusm/Makefile.am +@@ -46,11 +46,11 @@ endif + mcacomponentdir = $(opallibdir) + mcacomponent_LTLIBRARIES = $(component_install) + mca_rcache_rgpusm_la_SOURCES = $(sources) +-mca_rcache_rgpusm_la_LDFLAGS = -module -avoid-version ++mca_rcache_rgpusm_la_LDFLAGS = -module -avoid-version $(rcache_rgpusm_LDFLAGS) + mca_rcache_rgpusm_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_NAME@.la \ + $(rcache_rgpusm_LIBS) + + noinst_LTLIBRARIES = $(component_noinst) + libmca_rcache_rgpusm_la_SOURCES = $(sources) +-libmca_rcache_rgpusm_la_LDFLAGS = -module -avoid-version ++libmca_rcache_rgpusm_la_LDFLAGS = -module -avoid-version $(rcache_rgpusm_LDFLAGS) + libmca_rcache_rgpusm_la_LIBADD = $(rcache_rgpusm_LIBS) +-- +2.35.3 + diff --git a/var/spack/repos/builtin/packages/openmpi/fix-for-dlopen-missing-symbol-problem.patch b/var/spack/repos/builtin/packages/openmpi/fix-for-dlopen-missing-symbol-problem.patch new file mode 100644 index 0000000000..0a846b0326 --- /dev/null +++ b/var/spack/repos/builtin/packages/openmpi/fix-for-dlopen-missing-symbol-problem.patch @@ -0,0 +1,32 @@ +From 50731f03c1ae9d375bfc2771fc402d54fd22e276 Mon Sep 17 00:00:00 2001 +From: Howard Pritchard <howardp@lanl.gov> +Date: Sat, 4 Nov 2023 13:24:15 -0600 +Subject: [PATCH] spack:fix for dlopen missing symbol problem + +related to https://github.com/spack/spack/pull/40725 + +Signed-off-by: Howard Pritchard <howardp@lanl.gov> + +diff --git a/opal/mca/dl/dlopen/configure.m4 b/opal/mca/dl/dlopen/configure.m4 +index 07fda82001..4ae625b1fb 100644 +--- a/opal/mca/dl/dlopen/configure.m4 ++++ b/opal/mca/dl/dlopen/configure.m4 +@@ -27,7 +27,7 @@ AC_DEFUN([MCA_opal_dl_dlopen_CONFIG],[ + AC_CONFIG_FILES([opal/mca/dl/dlopen/Makefile]) + + OAC_CHECK_PACKAGE([dlopen], +- [dl_dlopen], ++ [opal_dl_dlopen], + [dlfcn.h], + [dl], + [dlopen], +@@ -38,5 +38,5 @@ AC_DEFUN([MCA_opal_dl_dlopen_CONFIG],[ + [$1], + [$2]) + +- AC_SUBST(dl_dlopen_LIBS) ++ AC_SUBST(opal_dl_dlopen_LIBS) + ]) +-- +2.39.3 + diff --git a/var/spack/repos/builtin/packages/openmpi/package.py b/var/spack/repos/builtin/packages/openmpi/package.py index 38bee519c8..38a64ac27d 100644 --- a/var/spack/repos/builtin/packages/openmpi/package.py +++ b/var/spack/repos/builtin/packages/openmpi/package.py @@ -44,11 +44,17 @@ class Openmpi(AutotoolsPackage, CudaPackage): # Current version( - "4.1.6", sha256="f740994485516deb63b5311af122c265179f5328a0d857a567b85db00b11e415" - ) # libmpi.so.40.30.6 + "5.0.1", sha256="e357043e65fd1b956a47d0dae6156a90cf0e378df759364936c1781f1a25ef80" + ) # libmpi.so.40.40.1 # Still supported version( + "5.0.0", sha256="9d845ca94bc1aeb445f83d98d238cd08f6ec7ad0f73b0f79ec1668dbfdacd613" + ) # libmpi.so.40.40.0 + version( + "4.1.6", sha256="f740994485516deb63b5311af122c265179f5328a0d857a567b85db00b11e415" + ) # libmpi.so.40.30.6 + version( "4.1.5", sha256="a640986bc257389dd379886fdae6264c8cfa56bc98b71ce3ae3dfbd8ce61dbe3" ) # libmpi.so.40.30.5 version( @@ -405,6 +411,14 @@ class Openmpi(AutotoolsPackage, CudaPackage): # To fix performance regressions introduced while fixing a bug in older # gcc versions on x86_64, Refs. open-mpi/ompi#8603 patch("opal_assembly_arch.patch", when="@4.0.0:4.0.5,4.1.0") + # To fix an error in Open MPI configury related to findng dl lib. + # This is specific to the 5.0.0 release. + patch("fix-for-dlopen-missing-symbol-problem.patch", when="@5.0.0") + # Patches to accelerator CUDA component to link in libcuda + # when in non-standard location + patch("accelerator-cuda-fix-bug-in-makefile.patch", when="@5.0.0") + patch("btlsmcuda-fix-problem-with-makefile.patch", when="@5.0.0") + patch("accelerator-build-components-as-dso-s-by-default.patch", when="@5.0.0:5.0.1") variant( "fabrics", @@ -439,7 +453,7 @@ class Openmpi(AutotoolsPackage, CudaPackage): # Additional support options variant("atomics", default=False, description="Enable built-in atomics") variant("java", default=False, when="@1.7.4:", description="Build Java support") - variant("static", default=True, description="Build static libraries") + variant("static", default=False, description="Build static libraries") variant("sqlite3", default=False, when="@1.7.3:1", description="Build SQLite3 support") variant("vt", default=True, description="Build VampirTrace support") variant( @@ -472,7 +486,8 @@ class Openmpi(AutotoolsPackage, CudaPackage): description="Build deprecated support for the Singularity container", ) variant("lustre", default=False, description="Lustre filesystem library support") - variant("romio", default=True, description="Enable ROMIO support") + variant("romio", default=True, when="@:5", description="Enable ROMIO support") + variant("romio", default=False, when="@5:", description="Enable ROMIO support") variant("rsh", default=True, description="Enable rsh (openssh) process lifecycle management") variant( "orterunprefix", @@ -511,10 +526,9 @@ class Openmpi(AutotoolsPackage, CudaPackage): if sys.platform != "darwin": depends_on("numactl") - depends_on("autoconf @2.69:", type="build", when="@main") - depends_on("automake @1.13.4:", type="build", when="@main") - depends_on("libtool @2.4.2:", type="build", when="@main") - depends_on("m4", type="build", when="@main") + depends_on("autoconf @2.69:", type="build", when="@5.0.0:,main") + depends_on("automake @1.13.4:", type="build", when="@5.0.0:,main") + depends_on("libtool @2.4.2:", type="build", when="@5.0.0:,main") depends_on("perl", type="build") depends_on("pkgconfig", type="build") @@ -572,6 +586,8 @@ class Openmpi(AutotoolsPackage, CudaPackage): depends_on("openssh", type="run", when="+rsh") + depends_on("cuda", type=("build", "link", "run"), when="@5: +cuda") + conflicts("+cxx_exceptions", when="%nvhpc", msg="nvc does not ignore -fexceptions, but errors") # CUDA support was added in 1.7, and since the variant is part of the @@ -914,6 +930,11 @@ class Openmpi(AutotoolsPackage, CudaPackage): perl = which("perl") perl("autogen.pl") + @when("@5.0.0:5.0.1") + def autoreconf(self, spec, prefix): + perl = which("perl") + perl("autogen.pl", "--force") + def configure_args(self): spec = self.spec config_args = ["--enable-shared", "--disable-silent-rules", "--disable-sphinx"] @@ -1084,6 +1105,23 @@ class Openmpi(AutotoolsPackage, CudaPackage): if wrapper_ldflags: config_args.append("--with-wrapper-ldflags={0}".format(" ".join(wrapper_ldflags))) + # + # the Spack path padding feature causes issues with Open MPI's lex based parsing system + # used by the compiler wrappers. Crank up lex buffer to 1MB to handle this. + # see https://spack.readthedocs.io/en/latest/binary_caches.html#relocation + # + + if spec.satisfies("@5.0.0:"): + config_args.append("CFLAGS=-DYY_BUF_SIZE=1048576") + + # + # disable romio for 5.0.0 or newer if using Intel OneAPI owing to a problem + # building ZE related components of the romio packaged with this release + # + + # if spec.satisfies("@5.0.0:") and spec.satisfies("%oneapi"): + # config_args.append("--disable-io-romio") + return config_args @run_after("install", when="+wrapper-rpath") |