From 260a4c49042ac86c52ae9888d0228703aa9eed77 Mon Sep 17 00:00:00 2001 From: sknigh Date: Tue, 26 Nov 2019 10:20:17 -0800 Subject: Days since OpenMPI+UCX trashed my cluster: 0 (#13818) --- .../openmpi/fix-ucx-1.7.0-api-instability.patch | 254 +++++++++++++++++++++ .../repos/builtin/packages/openmpi/package.py | 1 + 2 files changed, 255 insertions(+) create mode 100644 var/spack/repos/builtin/packages/openmpi/fix-ucx-1.7.0-api-instability.patch (limited to 'var') diff --git a/var/spack/repos/builtin/packages/openmpi/fix-ucx-1.7.0-api-instability.patch b/var/spack/repos/builtin/packages/openmpi/fix-ucx-1.7.0-api-instability.patch new file mode 100644 index 0000000000..59d9b06d50 --- /dev/null +++ b/var/spack/repos/builtin/packages/openmpi/fix-ucx-1.7.0-api-instability.patch @@ -0,0 +1,254 @@ +diff --git a/opal/mca/btl/uct/btl_uct.h b/opal/mca/btl/uct/btl_uct.h +index 3875679443..0e4ec9a949 100644 +--- a/opal/mca/btl/uct/btl_uct.h ++++ b/opal/mca/btl/uct/btl_uct.h +@@ -12,6 +12,7 @@ + * All rights reserved. + * Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights + * reserved. ++ * Copyright (c) 2019 Google, LLC. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow +@@ -85,6 +86,10 @@ struct mca_btl_uct_module_t { + /** array containing the am_tl and rdma_tl */ + mca_btl_uct_tl_t *comm_tls[2]; + ++#if UCT_API >= UCT_VERSION(1, 7) ++ uct_component_h uct_component; ++#endif ++ + /** registration cache */ + mca_rcache_base_module_t *rcache; + +diff --git a/opal/mca/btl/uct/btl_uct_amo.c b/opal/mca/btl/uct/btl_uct_amo.c +index f7d0232688..72398ce736 100644 +--- a/opal/mca/btl/uct/btl_uct_amo.c ++++ b/opal/mca/btl/uct/btl_uct_amo.c +@@ -110,7 +110,7 @@ int mca_btl_uct_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end + mca_btl_uct_uct_completion_release (comp); + } + +- uct_rkey_release (&rkey); ++ mca_btl_uct_rkey_release (uct_btl, &rkey); + + return rc; + } +@@ -184,7 +184,7 @@ int mca_btl_uct_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_e + mca_btl_uct_uct_completion_release (comp); + } + +- uct_rkey_release (&rkey); ++ mca_btl_uct_rkey_release (uct_btl, &rkey); + + return rc; + } +diff --git a/opal/mca/btl/uct/btl_uct_component.c b/opal/mca/btl/uct/btl_uct_component.c +index f968cb9c31..bff160736b 100644 +--- a/opal/mca/btl/uct/btl_uct_component.c ++++ b/opal/mca/btl/uct/btl_uct_component.c +@@ -314,7 +314,12 @@ ucs_status_t mca_btl_uct_am_handler (void *arg, void *data, size_t length, unsig + return UCS_OK; + } + ++#if UCT_API >= UCT_VERSION(1, 7) ++static int mca_btl_uct_component_process_uct_md (uct_component_h component, uct_md_resource_desc_t *md_desc, ++ char **allowed_ifaces) ++#else + static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc, char **allowed_ifaces) ++#endif + { + mca_rcache_base_resources_t rcache_resources; + uct_tl_resource_desc_t *tl_desc; +@@ -348,8 +353,14 @@ static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc + + md = OBJ_NEW(mca_btl_uct_md_t); + ++ ++#if UCT_API >= UCT_VERSION(1, 7) ++ uct_md_config_read (component, NULL, NULL, &uct_config); ++ uct_md_open (component, md_desc->md_name, uct_config, &md->uct_md); ++#else + uct_md_config_read (md_desc->md_name, NULL, NULL, &uct_config); + uct_md_open (md_desc->md_name, uct_config, &md->uct_md); ++#endif + uct_config_release (uct_config); + + uct_md_query (md->uct_md, &md_attr); +@@ -375,6 +386,10 @@ static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc + return OPAL_ERR_NOT_AVAILABLE; + } + ++#if UCT_API >= UCT_VERSION(1, 7) ++ module->uct_component = component; ++#endif ++ + mca_btl_uct_component.modules[mca_btl_uct_component.module_count++] = module; + + /* NTH: a registration cache shouldn't be necessary when using UCT but there are measurable +@@ -400,6 +415,42 @@ static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc + return OPAL_SUCCESS; + } + ++#if UCT_API >= UCT_VERSION(1, 7) ++static int mca_btl_uct_component_process_uct_component (uct_component_h component, char **allowed_ifaces) ++{ ++ uct_component_attr_t attr = {.field_mask = UCT_COMPONENT_ATTR_FIELD_NAME | ++ UCT_COMPONENT_ATTR_FIELD_MD_RESOURCE_COUNT}; ++ ucs_status_t ucs_status; ++ int rc; ++ ++ ucs_status = uct_component_query (component, &attr); ++ if (UCS_OK != ucs_status) { ++ return OPAL_ERROR; ++ } ++ ++ BTL_VERBOSE(("processing uct component %s", attr.name)); ++ ++ attr.md_resources = calloc (attr.md_resource_count, sizeof (*attr.md_resources)); ++ attr.field_mask |= UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES; ++ ucs_status = uct_component_query (component, &attr); ++ if (UCS_OK != ucs_status) { ++ return OPAL_ERROR; ++ } ++ ++ for (int i = 0 ; i < attr.md_resource_count ; ++i) { ++ rc = mca_btl_uct_component_process_uct_md (component, attr.md_resources + i, ++ allowed_ifaces); ++ if (OPAL_SUCCESS != rc) { ++ break; ++ } ++ } ++ ++ free (attr.md_resources); ++ ++ return OPAL_SUCCESS; ++} ++#endif /* UCT_API >= UCT_VERSION(1, 7) */ ++ + /* + * UCT component initialization: + * (1) read interface list from kernel and compare against component parameters +@@ -415,6 +466,7 @@ static mca_btl_base_module_t **mca_btl_uct_component_init (int *num_btl_modules, + struct mca_btl_base_module_t **base_modules; + uct_md_resource_desc_t *resources; + unsigned resource_count; ++ ucs_status_t ucs_status; + char **allowed_ifaces; + int rc; + +@@ -431,10 +483,32 @@ static mca_btl_base_module_t **mca_btl_uct_component_init (int *num_btl_modules, + return NULL; + } + +- uct_query_md_resources (&resources, &resource_count); +- + mca_btl_uct_component.module_count = 0; + ++#if UCT_API >= UCT_VERSION(1, 7) ++ uct_component_h *components; ++ unsigned num_components; ++ ++ ucs_status = uct_query_components(&components, &num_components); ++ if (UCS_OK != ucs_status) { ++ BTL_ERROR(("could not query UCT components")); ++ return NULL; ++ } ++ ++ /* generate all suitable btl modules */ ++ for (unsigned i = 0 ; i < num_components ; ++i) { ++ rc = mca_btl_uct_component_process_uct_component (components[i], allowed_ifaces); ++ if (OPAL_SUCCESS != rc) { ++ break; ++ } ++ } ++ ++ uct_release_component_list (components); ++ ++#else /* UCT 1.6 and older */ ++ ++ uct_query_md_resources (&resources, &resource_count); ++ + /* generate all suitable btl modules */ + for (unsigned i = 0 ; i < resource_count ; ++i) { + rc = mca_btl_uct_component_process_uct_md (resources + i, allowed_ifaces); +@@ -443,9 +517,11 @@ static mca_btl_base_module_t **mca_btl_uct_component_init (int *num_btl_modules, + } + } + +- opal_argv_free (allowed_ifaces); + uct_release_md_resource_list (resources); + ++#endif /* UCT_API >= UCT_VERSION(1, 7) */ ++ ++ opal_argv_free (allowed_ifaces); + mca_btl_uct_modex_send (); + + /* pass module array back to caller */ +diff --git a/opal/mca/btl/uct/btl_uct_rdma.c b/opal/mca/btl/uct/btl_uct_rdma.c +index 2d2d1c3f04..9ee9530f26 100644 +--- a/opal/mca/btl/uct/btl_uct_rdma.c ++++ b/opal/mca/btl/uct/btl_uct_rdma.c +@@ -132,7 +132,7 @@ int mca_btl_uct_get (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi + + BTL_VERBOSE(("get issued. status = %d", ucs_status)); + +- uct_rkey_release (&rkey); ++ mca_btl_uct_rkey_release (uct_btl, &rkey); + + return OPAL_LIKELY(UCS_OK == ucs_status) ? OPAL_SUCCESS : OPAL_ERR_RESOURCE_BUSY; + } +@@ -237,7 +237,7 @@ int mca_btl_uct_put (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi + mca_btl_uct_uct_completion_release (comp); + } + +- uct_rkey_release (&rkey); ++ mca_btl_uct_rkey_release (uct_btl, &rkey); + + return OPAL_LIKELY(UCS_OK == ucs_status) ? OPAL_SUCCESS : OPAL_ERR_RESOURCE_BUSY; + } +diff --git a/opal/mca/btl/uct/btl_uct_rdma.h b/opal/mca/btl/uct/btl_uct_rdma.h +index e9b0d6b19d..ab790371af 100644 +--- a/opal/mca/btl/uct/btl_uct_rdma.h ++++ b/opal/mca/btl/uct/btl_uct_rdma.h +@@ -55,8 +55,22 @@ static inline int mca_btl_uct_get_rkey (mca_btl_uct_module_t *module, + return rc; + } + ++#if UCT_API >= UCT_VERSION(1, 7) ++ ucs_status = uct_rkey_unpack (module->uct_component, (void *) remote_handle, rkey); ++#else + ucs_status = uct_rkey_unpack ((void *) remote_handle, rkey); ++#endif + return (UCS_OK == ucs_status) ? OPAL_SUCCESS : OPAL_ERROR; + } + ++static inline void mca_btl_uct_rkey_release (mca_btl_uct_module_t *uct_btl, uct_rkey_bundle_t *rkey) ++{ ++#if UCT_API >= UCT_VERSION(1, 7) ++ uct_rkey_release (uct_btl->uct_component, rkey); ++#else ++ (void) uct_btl; ++ uct_rkey_release (rkey); ++#endif ++} ++ + #endif /* !defined(BTL_UCT_RDMA_H) */ +diff --git a/opal/mca/btl/uct/btl_uct_tl.c b/opal/mca/btl/uct/btl_uct_tl.c +index a711a41ce9..e69c769b41 100644 +--- a/opal/mca/btl/uct/btl_uct_tl.c ++++ b/opal/mca/btl/uct/btl_uct_tl.c +@@ -516,7 +516,13 @@ static int mca_btl_uct_evaluate_tl (mca_btl_uct_module_t *module, mca_btl_uct_tl + * come up with a better estimate. */ + + /* UCT bandwidth is in bytes/sec, BTL is in MB/sec */ ++#if UCT_API >= UCT_VERSION(1, 7) ++ module->super.btl_bandwidth = (uint32_t) ((MCA_BTL_UCT_TL_ATTR(tl, 0).bandwidth.dedicated + ++ MCA_BTL_UCT_TL_ATTR(tl, 0).bandwidth.shared / ++ (opal_process_info.num_local_peers + 1)) / 1048576.0); ++#else + module->super.btl_bandwidth = (uint32_t) (MCA_BTL_UCT_TL_ATTR(tl, 0).bandwidth / 1048576.0); ++#endif + /* TODO -- figure out how to translate UCT latency to us */ + module->super.btl_latency = 1; + } diff --git a/var/spack/repos/builtin/packages/openmpi/package.py b/var/spack/repos/builtin/packages/openmpi/package.py index d4107dff31..bb97627200 100644 --- a/var/spack/repos/builtin/packages/openmpi/package.py +++ b/var/spack/repos/builtin/packages/openmpi/package.py @@ -189,6 +189,7 @@ class Openmpi(AutotoolsPackage): patch('llnl-platforms.patch', when="@1.6.5") patch('configure.patch', when="@1.10.1") patch('fix_multidef_pmi_class.patch', when="@2.0.0:2.0.1") + patch('fix-ucx-1.7.0-api-instability.patch', when='@4.0.0:4.0.3') # Vader Bug: https://github.com/open-mpi/ompi/issues/5375 # Haven't release fix for 2.1.x -- cgit v1.2.3-60-g2f50