diff options
Diffstat (limited to 'var/spack/repos/builtin/packages/mfem')
-rw-r--r-- | var/spack/repos/builtin/packages/mfem/mfem-4.7-sundials-7.patch | 1129 | ||||
-rw-r--r-- | var/spack/repos/builtin/packages/mfem/mfem-4.7.patch | 102 | ||||
-rw-r--r-- | var/spack/repos/builtin/packages/mfem/package.py | 328 | ||||
-rwxr-xr-x | var/spack/repos/builtin/packages/mfem/test_builds.sh | 120 |
4 files changed, 1531 insertions, 148 deletions
diff --git a/var/spack/repos/builtin/packages/mfem/mfem-4.7-sundials-7.patch b/var/spack/repos/builtin/packages/mfem/mfem-4.7-sundials-7.patch new file mode 100644 index 0000000000..59fc95a3dd --- /dev/null +++ b/var/spack/repos/builtin/packages/mfem/mfem-4.7-sundials-7.patch @@ -0,0 +1,1129 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 0be4f5d65d..1f8e13a8ec 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -337,7 +337,10 @@ if (MFEM_USE_SUNDIALS) + if (MFEM_USE_HIP) + list(APPEND SUNDIALS_COMPONENTS NVector_Hip) + endif() +- find_package(SUNDIALS REQUIRED ${SUNDIALS_COMPONENTS}) ++ # The Core component was added in SUNDIALS v7, so we treat it as optional in ++ # order to support older versions. ++ find_package(SUNDIALS REQUIRED ${SUNDIALS_COMPONENTS} ++ OPTIONAL_COMPONENTS Core) + endif() + + # SuperLU_DIST can only be enabled in parallel +diff --git a/config/cmake/modules/FindSUNDIALS.cmake b/config/cmake/modules/FindSUNDIALS.cmake +index 9a624a9c51..3617df7b24 100644 +--- a/config/cmake/modules/FindSUNDIALS.cmake ++++ b/config/cmake/modules/FindSUNDIALS.cmake +@@ -31,4 +31,5 @@ mfem_find_package(SUNDIALS SUNDIALS SUNDIALS_DIR + ADD_COMPONENT CVODE "include" cvode/cvode.h "lib" sundials_cvode + ADD_COMPONENT CVODES "include" cvodes/cvodes.h "lib" sundials_cvodes + ADD_COMPONENT ARKODE "include" arkode/arkode.h "lib" sundials_arkode +- ADD_COMPONENT KINSOL "include" kinsol/kinsol.h "lib" sundials_kinsol) ++ ADD_COMPONENT KINSOL "include" kinsol/kinsol.h "lib" sundials_kinsol ++ ADD_COMPONENT Core "include" sundials/sundials_core.h "lib" sundials_core) +diff --git a/config/defaults.mk b/config/defaults.mk +index f107f360de..d89344b9e8 100644 +--- a/config/defaults.mk ++++ b/config/defaults.mk +@@ -284,6 +284,13 @@ endif + ifeq ($(MFEM_USE_HIP),YES) + SUNDIALS_LIB += -lsundials_nvechip + endif ++SUNDIALS_CORE_PAT = $(subst\ ++ @MFEM_DIR@,$(MFEM_DIR),$(SUNDIALS_DIR))/lib*/libsundials_core.* ++ifeq ($(MFEM_USE_SUNDIALS),YES) ++ ifneq ($(wildcard $(SUNDIALS_CORE_PAT)),) ++ SUNDIALS_LIB += -lsundials_core ++ endif ++endif + # If SUNDIALS was built with KLU: + # MFEM_USE_SUITESPARSE = YES + +diff --git a/linalg/sundials.cpp b/linalg/sundials.cpp +index 1f4c141477..c8982387aa 100644 +--- a/linalg/sundials.cpp ++++ b/linalg/sundials.cpp +@@ -95,7 +95,7 @@ MFEM_DEPRECATED void* CVodeCreate(int lmm, SUNContext) + + /// (DEPRECATED) Wrapper function for backwards compatibility with SUNDIALS + /// version < 6 +-MFEM_DEPRECATED void* ARKStepCreate(ARKRhsFn fe, ARKRhsFn fi, realtype t0, ++MFEM_DEPRECATED void* ARKStepCreate(ARKRhsFn fe, ARKRhsFn fi, sunrealtype t0, + N_Vector y0, SUNContext) + { + return ARKStepCreate(fe, fi, t0, y0); +@@ -127,7 +127,7 @@ MFEM_DEPRECATED N_Vector N_VNewEmpty_Parallel(MPI_Comm comm, + /// (DEPRECATED) Wrapper function for backwards compatibility with SUNDIALS + /// version < 6 + MFEM_DEPRECATED N_Vector SUN_Hip_OR_Cuda(N_VNewWithMemHelp)(sunindextype length, +- booleantype use_managed_mem, ++ sunbooleantype use_managed_mem, + SUNMemoryHelper helper, + SUNContext) + { +@@ -157,6 +157,16 @@ MFEM_DEPRECATED N_Vector N_VMake_MPIPlusX(MPI_Comm comm, N_Vector local_vector, + + #endif // SUNDIALS_VERSION_MAJOR < 6 + ++#if MFEM_SUNDIALS_VERSION < 70100 ++#define MFEM_ARKode(FUNC) ARKStep##FUNC ++#else ++#define MFEM_ARKode(FUNC) ARKode##FUNC ++#endif ++ ++// Macro STR(): expand the argument and add double quotes ++#define STR1(s) #s ++#define STR(s) STR1(s) ++ + + namespace mfem + { +@@ -187,11 +197,21 @@ SundialsMemHelper &Sundials::GetMemHelper() + Sundials::Sundials() + { + #ifdef MFEM_USE_MPI +- MPI_Comm communicator = MPI_COMM_WORLD; ++ int mpi_initialized = 0; ++ MPI_Initialized(&mpi_initialized); ++ MPI_Comm communicator = mpi_initialized ? MPI_COMM_WORLD : MPI_COMM_NULL; ++#if SUNDIALS_VERSION_MAJOR < 7 + int return_val = SUNContext_Create((void*) &communicator, &context); + #else ++ int return_val = SUNContext_Create(communicator, &context); ++#endif ++#else // #ifdef MFEM_USE_MPI ++#if SUNDIALS_VERSION_MAJOR < 7 + int return_val = SUNContext_Create(nullptr, &context); ++#else ++ int return_val = SUNContext_Create((SUNComm)(0), &context); + #endif ++#endif // #ifdef MFEM_USE_MPI + MFEM_VERIFY(return_val == 0, "Call to SUNContext_Create failed"); + SundialsMemHelper actual_helper(context); + memHelper = std::move(actual_helper); +@@ -250,7 +270,11 @@ int SundialsMemHelper::SundialsMemHelper_Alloc(SUNMemoryHelper helper, + #endif + ) + { ++#if (SUNDIALS_VERSION_MAJOR < 7) + SUNMemory sunmem = SUNMemoryNewEmpty(); ++#else ++ SUNMemory sunmem = SUNMemoryNewEmpty(helper->sunctx); ++#endif + + sunmem->ptr = NULL; + sunmem->own = SUNTRUE; +@@ -631,7 +655,7 @@ static int LSFree(SUNLinearSolver LS) + // --------------------------------------------------------------------------- + // CVODE interface + // --------------------------------------------------------------------------- +-int CVODESolver::RHS(realtype t, const N_Vector y, N_Vector ydot, ++int CVODESolver::RHS(sunrealtype t, const N_Vector y, N_Vector ydot, + void *user_data) + { + // At this point the up-to-date data for N_Vector y and ydot is on the device. +@@ -648,7 +672,8 @@ int CVODESolver::RHS(realtype t, const N_Vector y, N_Vector ydot, + return (0); + } + +-int CVODESolver::root(realtype t, N_Vector y, realtype *gout, void *user_data) ++int CVODESolver::root(sunrealtype t, N_Vector y, sunrealtype *gout, ++ void *user_data) + { + CVODESolver *self = static_cast<CVODESolver*>(user_data); + +@@ -668,8 +693,9 @@ void CVODESolver::SetRootFinder(int components, RootFunction func) + MFEM_VERIFY(flag == CV_SUCCESS, "error in SetRootFinder()"); + } + +-int CVODESolver::LinSysSetup(realtype t, N_Vector y, N_Vector fy, SUNMatrix A, +- booleantype jok, booleantype *jcur, realtype gamma, ++int CVODESolver::LinSysSetup(sunrealtype t, N_Vector y, N_Vector fy, ++ SUNMatrix A, sunbooleantype jok, ++ sunbooleantype *jcur, sunrealtype gamma, + void*, N_Vector, N_Vector, N_Vector) + { + // Get data from N_Vectors +@@ -683,7 +709,7 @@ int CVODESolver::LinSysSetup(realtype t, N_Vector y, N_Vector fy, SUNMatrix A, + } + + int CVODESolver::LinSysSolve(SUNLinearSolver LS, SUNMatrix, N_Vector x, +- N_Vector b, realtype tol) ++ N_Vector b, sunrealtype tol) + { + SundialsNVector mfem_x(x); + const SundialsNVector mfem_b(b); +@@ -859,7 +885,7 @@ void CVODESolver::UseSundialsLinearSolver() + if (LSA != NULL) { SUNLinSolFree(LSA); LSA = NULL; } + + // Create linear solver +- LSA = SUNLinSol_SPGMR(*Y, PREC_NONE, 0, Sundials::GetContext()); ++ LSA = SUNLinSol_SPGMR(*Y, SUN_PREC_NONE, 0, Sundials::GetContext()); + MFEM_VERIFY(LSA, "error in SUNLinSol_SPGMR()"); + + // Attach linear solver +@@ -1150,7 +1176,7 @@ void CVODESSolver::UseSundialsLinearSolverB() + if (LSB != NULL) { SUNLinSolFree(LSB); LSB = NULL; } + + // Set default linear solver (Newton is the default Nonlinear Solver) +- LSB = SUNLinSol_SPGMR(*yB, PREC_NONE, 0, Sundials::GetContext()); ++ LSB = SUNLinSol_SPGMR(*yB, SUN_PREC_NONE, 0, Sundials::GetContext()); + MFEM_VERIFY(LSB, "error in SUNLinSol_SPGMR()"); + + /* Attach the matrix and linear solver */ +@@ -1158,11 +1184,11 @@ void CVODESSolver::UseSundialsLinearSolverB() + MFEM_VERIFY(flag == CV_SUCCESS, "error in CVodeSetLinearSolverB()"); + } + +-int CVODESSolver::LinSysSetupB(realtype t, N_Vector y, N_Vector yB, ++int CVODESSolver::LinSysSetupB(sunrealtype t, N_Vector y, N_Vector yB, + N_Vector fyB, SUNMatrix AB, +- booleantype jokB, booleantype *jcurB, +- realtype gammaB, void *user_data, N_Vector tmp1, +- N_Vector tmp2, N_Vector tmp3) ++ sunbooleantype jokB, sunbooleantype *jcurB, ++ sunrealtype gammaB, void *user_data, ++ N_Vector tmp1, N_Vector tmp2, N_Vector tmp3) + { + // Get data from N_Vectors + const SundialsNVector mfem_y(y); +@@ -1178,7 +1204,7 @@ int CVODESSolver::LinSysSetupB(realtype t, N_Vector y, N_Vector yB, + } + + int CVODESSolver::LinSysSolveB(SUNLinearSolver LS, SUNMatrix AB, N_Vector yB, +- N_Vector Rb, realtype tol) ++ N_Vector Rb, sunrealtype tol) + { + SundialsNVector mfem_yB(yB); + const SundialsNVector mfem_Rb(Rb); +@@ -1216,7 +1242,7 @@ void CVODESSolver::SetWFTolerances(EWTFunction func) + + // CVODESSolver static functions + +-int CVODESSolver::RHSQ(realtype t, const N_Vector y, N_Vector qdot, ++int CVODESSolver::RHSQ(sunrealtype t, const N_Vector y, N_Vector qdot, + void *user_data) + { + CVODESSolver *self = static_cast<CVODESSolver*>(user_data); +@@ -1229,7 +1255,7 @@ int CVODESSolver::RHSQ(realtype t, const N_Vector y, N_Vector qdot, + return 0; + } + +-int CVODESSolver::RHSQB(realtype t, N_Vector y, N_Vector yB, N_Vector qBdot, ++int CVODESSolver::RHSQB(sunrealtype t, N_Vector y, N_Vector yB, N_Vector qBdot, + void *user_dataB) + { + CVODESSolver *self = static_cast<CVODESSolver*>(user_dataB); +@@ -1243,7 +1269,7 @@ int CVODESSolver::RHSQB(realtype t, N_Vector y, N_Vector yB, N_Vector qBdot, + return 0; + } + +-int CVODESSolver::RHSB(realtype t, N_Vector y, N_Vector yB, N_Vector yBdot, ++int CVODESSolver::RHSB(sunrealtype t, N_Vector y, N_Vector yB, N_Vector yBdot, + void *user_dataB) + { + CVODESSolver *self = static_cast<CVODESSolver*>(user_dataB); +@@ -1341,46 +1367,67 @@ CVODESSolver::~CVODESSolver() + // ARKStep interface + // --------------------------------------------------------------------------- + +-int ARKStepSolver::RHS1(realtype t, const N_Vector y, N_Vector ydot, ++int ARKStepSolver::RHS1(sunrealtype t, const N_Vector y, N_Vector result, + void *user_data) + { + // Get data from N_Vectors + const SundialsNVector mfem_y(y); +- SundialsNVector mfem_ydot(ydot); ++ SundialsNVector mfem_result(result); + ARKStepSolver *self = static_cast<ARKStepSolver*>(user_data); + +- // Compute f(t, y) in y' = f(t, y) or fe(t, y) in y' = fe(t, y) + fi(t, y) ++ // Compute either f(t, y) in one of ++ // 1. y' = f(t, y) ++ // 2. M y' = f(t, y) ++ // or fe(t, y) in one of ++ // 1. y' = fe(t, y) + fi(t, y) ++ // 2. M y' = fe(t, y) + fi(t, y) + self->f->SetTime(t); + if (self->rk_type == IMEX) + { + self->f->SetEvalMode(TimeDependentOperator::ADDITIVE_TERM_1); + } +- self->f->Mult(mfem_y, mfem_ydot); ++ if (self->f->isExplicit()) // ODE is in form 1 ++ { ++ self->f->Mult(mfem_y, mfem_result); ++ } ++ else // ODE is in form 2 ++ { ++ self->f->ExplicitMult(mfem_y, mfem_result); ++ } + + // Return success + return (0); + } + +-int ARKStepSolver::RHS2(realtype t, const N_Vector y, N_Vector ydot, ++int ARKStepSolver::RHS2(sunrealtype t, const N_Vector y, N_Vector result, + void *user_data) + { + // Get data from N_Vectors + const SundialsNVector mfem_y(y); +- SundialsNVector mfem_ydot(ydot); ++ SundialsNVector mfem_result(result); + ARKStepSolver *self = static_cast<ARKStepSolver*>(user_data); + +- // Compute fi(t, y) in y' = fe(t, y) + fi(t, y) ++ // Compute fi(t, y) in one of ++ // 1. y' = fe(t, y) + fi(t, y) (ODE is expressed in EXPLICIT form) ++ // 2. M y' = fe(t, y) + fi(y, t) (ODE is expressed in IMPLICIT form) + self->f->SetTime(t); + self->f->SetEvalMode(TimeDependentOperator::ADDITIVE_TERM_2); +- self->f->Mult(mfem_y, mfem_ydot); ++ if (self->f->isExplicit()) ++ { ++ self->f->Mult(mfem_y, mfem_result); ++ } ++ else ++ { ++ self->f->ExplicitMult(mfem_y, mfem_result); ++ } + + // Return success + return (0); + } + +-int ARKStepSolver::LinSysSetup(realtype t, N_Vector y, N_Vector fy, SUNMatrix A, +- SUNMatrix, booleantype jok, booleantype *jcur, +- realtype gamma, ++int ARKStepSolver::LinSysSetup(sunrealtype t, N_Vector y, N_Vector fy, ++ SUNMatrix A, SUNMatrix, sunbooleantype jok, ++ sunbooleantype *jcur, sunrealtype gamma, + void*, N_Vector, N_Vector, N_Vector) + { + // Get data from N_Vectors +@@ -1398,7 +1445,7 @@ int ARKStepSolver::LinSysSetup(realtype t, N_Vector y, N_Vector fy, SUNMatrix A, + } + + int ARKStepSolver::LinSysSolve(SUNLinearSolver LS, SUNMatrix, N_Vector x, +- N_Vector b, realtype tol) ++ N_Vector b, sunrealtype tol) + { + SundialsNVector mfem_x(x); + const SundialsNVector mfem_b(b); +@@ -1412,7 +1459,7 @@ int ARKStepSolver::LinSysSolve(SUNLinearSolver LS, SUNMatrix, N_Vector x, + return (self->f->SUNImplicitSolve(mfem_b, mfem_x, tol)); + } + +-int ARKStepSolver::MassSysSetup(realtype t, SUNMatrix M, ++int ARKStepSolver::MassSysSetup(sunrealtype t, SUNMatrix M, + void*, N_Vector, N_Vector, N_Vector) + { + ARKStepSolver *self = static_cast<ARKStepSolver*>(GET_CONTENT(M)); +@@ -1423,7 +1470,7 @@ int ARKStepSolver::MassSysSetup(realtype t, SUNMatrix M, + } + + int ARKStepSolver::MassSysSolve(SUNLinearSolver LS, SUNMatrix, N_Vector x, +- N_Vector b, realtype tol) ++ N_Vector b, sunrealtype tol) + { + SundialsNVector mfem_x(x); + const SundialsNVector mfem_b(b); +@@ -1443,7 +1490,7 @@ int ARKStepSolver::MassMult1(SUNMatrix M, N_Vector x, N_Vector v) + return (self->f->SUNMassMult(mfem_x, mfem_v)); + } + +-int ARKStepSolver::MassMult2(N_Vector x, N_Vector v, realtype t, ++int ARKStepSolver::MassMult2(N_Vector x, N_Vector v, sunrealtype t, + void* mtimes_data) + { + const SundialsNVector mfem_x(x); +@@ -1514,7 +1561,7 @@ void ARKStepSolver::Init(TimeDependentOperator &f_) + // Free existing solver memory and re-create with new vector size + if (resize) + { +- ARKStepFree(&sundials_mem); ++ MFEM_ARKode(Free)(&sundials_mem); + sundials_mem = NULL; + } + } +@@ -1552,12 +1599,15 @@ void ARKStepSolver::Init(TimeDependentOperator &f_) + MFEM_VERIFY(sundials_mem, "error in ARKStepCreate()"); + + // Attach the ARKStepSolver as user-defined data +- flag = ARKStepSetUserData(sundials_mem, this); +- MFEM_VERIFY(flag == ARK_SUCCESS, "error in ARKStepSetUserData()"); ++ flag = MFEM_ARKode(SetUserData)(sundials_mem, this); ++ MFEM_VERIFY(flag == ARK_SUCCESS, ++ "error in " STR(MFEM_ARKode(SetUserData)) "()"); + + // Set default tolerances +- flag = ARKStepSStolerances(sundials_mem, default_rel_tol, default_abs_tol); +- MFEM_VERIFY(flag == ARK_SUCCESS, "error in ARKStepSetSStolerances()"); ++ flag = MFEM_ARKode(SStolerances)(sundials_mem, default_rel_tol, ++ default_abs_tol); ++ MFEM_VERIFY(flag == ARK_SUCCESS, ++ "error in " STR(MFEM_ARKode(SStolerances)) "()"); + + // If implicit, attach MFEM linear solver by default + if (use_implicit) { UseMFEMLinearSolver(); } +@@ -1567,7 +1617,7 @@ void ARKStepSolver::Init(TimeDependentOperator &f_) + reinit = true; + } + +-void ARKStepSolver::Step(Vector &x, double &t, double &dt) ++void ARKStepSolver::Step(Vector &x, real_t &t, real_t &dt) + { + Y->MakeRef(x, 0, x.Size()); + MFEM_VERIFY(Y->Size() == x.Size(), "size mismatch"); +@@ -1596,15 +1646,16 @@ void ARKStepSolver::Step(Vector &x, double &t, double &dt) + + // Integrate the system + double tout = t + dt; +- flag = ARKStepEvolve(sundials_mem, tout, *Y, &t, step_mode); +- MFEM_VERIFY(flag >= 0, "error in ARKStepEvolve()"); ++ flag = MFEM_ARKode(Evolve)(sundials_mem, tout, *Y, &t, step_mode); ++ MFEM_VERIFY(flag >= 0, "error in " STR(MFEM_ARKode(Evolve)) "()"); + + // Make sure host is up to date + Y->HostRead(); + + // Return the last incremental step size +- flag = ARKStepGetLastStep(sundials_mem, &dt); +- MFEM_VERIFY(flag == ARK_SUCCESS, "error in ARKStepGetLastStep()"); ++ flag = MFEM_ARKode(GetLastStep)(sundials_mem, &dt); ++ MFEM_VERIFY(flag == ARK_SUCCESS, ++ "error in " STR(MFEM_ARKode(GetLastStep)) "()"); + } + + void ARKStepSolver::UseMFEMLinearSolver() +@@ -1630,12 +1681,14 @@ void ARKStepSolver::UseMFEMLinearSolver() + A->ops->destroy = MatDestroy; + + // Attach the linear solver and matrix +- flag = ARKStepSetLinearSolver(sundials_mem, LSA, A); +- MFEM_VERIFY(flag == ARK_SUCCESS, "error in ARKStepSetLinearSolver()"); ++ flag = MFEM_ARKode(SetLinearSolver)(sundials_mem, LSA, A); ++ MFEM_VERIFY(flag == ARK_SUCCESS, ++ "error in " STR(MFEM_ARKode(SetLinearSolver)) "()"); + + // Set the linear system evaluation function +- flag = ARKStepSetLinSysFn(sundials_mem, ARKStepSolver::LinSysSetup); +- MFEM_VERIFY(flag == ARK_SUCCESS, "error in ARKStepSetLinSysFn()"); ++ flag = MFEM_ARKode(SetLinSysFn)(sundials_mem, ARKStepSolver::LinSysSetup); ++ MFEM_VERIFY(flag == ARK_SUCCESS, ++ "error in " STR(MFEM_ARKode(SetLinSysFn)) "()"); + } + + void ARKStepSolver::UseSundialsLinearSolver() +@@ -1645,12 +1698,13 @@ void ARKStepSolver::UseSundialsLinearSolver() + if (LSA != NULL) { SUNLinSolFree(LSA); LSA = NULL; } + + // Create linear solver +- LSA = SUNLinSol_SPGMR(*Y, PREC_NONE, 0, Sundials::GetContext()); ++ LSA = SUNLinSol_SPGMR(*Y, SUN_PREC_NONE, 0, Sundials::GetContext()); + MFEM_VERIFY(LSA, "error in SUNLinSol_SPGMR()"); + + // Attach linear solver +- flag = ARKStepSetLinearSolver(sundials_mem, LSA, NULL); +- MFEM_VERIFY(flag == ARK_SUCCESS, "error in ARKStepSetLinearSolver()"); ++ flag = MFEM_ARKode(SetLinearSolver)(sundials_mem, LSA, NULL); ++ MFEM_VERIFY(flag == ARK_SUCCESS, ++ "error in " STR(MFEM_ARKode(SetLinearSolver)) "()"); + } + + void ARKStepSolver::UseMFEMMassLinearSolver(int tdep) +@@ -1666,7 +1720,7 @@ void ARKStepSolver::UseMFEMMassLinearSolver(int tdep) + LSM->content = this; + LSM->ops->gettype = LSGetType; + LSM->ops->solve = ARKStepSolver::MassSysSolve; +- LSA->ops->free = LSFree; ++ LSM->ops->free = LSFree; + + M = SUNMatNewEmpty(Sundials::GetContext()); + MFEM_VERIFY(M, "error in SUNMatNewEmpty()"); +@@ -1677,12 +1731,17 @@ void ARKStepSolver::UseMFEMMassLinearSolver(int tdep) + M->ops->destroy = MatDestroy; + + // Attach the linear solver and matrix +- flag = ARKStepSetMassLinearSolver(sundials_mem, LSM, M, tdep); +- MFEM_VERIFY(flag == ARK_SUCCESS, "error in ARKStepSetLinearSolver()"); ++ flag = MFEM_ARKode(SetMassLinearSolver)(sundials_mem, LSM, M, tdep); ++ MFEM_VERIFY(flag == ARK_SUCCESS, ++ "error in " STR(MFEM_ARKode(SetMassLinearSolver)) "()"); + + // Set the linear system function +- flag = ARKStepSetMassFn(sundials_mem, ARKStepSolver::MassSysSetup); +- MFEM_VERIFY(flag == ARK_SUCCESS, "error in ARKStepSetMassFn()"); ++ flag = MFEM_ARKode(SetMassFn)(sundials_mem, ARKStepSolver::MassSysSetup); ++ MFEM_VERIFY(flag == ARK_SUCCESS, ++ "error in " STR(MFEM_ARKode(SetMassFn)) "()"); ++ ++ // Check that the ODE is not expressed in EXPLICIT form ++ MFEM_VERIFY(!f->isExplicit(), "ODE operator is expressed in EXPLICIT form") + } + + void ARKStepSolver::UseSundialsMassLinearSolver(int tdep) +@@ -1692,17 +1751,22 @@ void ARKStepSolver::UseSundialsMassLinearSolver(int tdep) + if (LSM != NULL) { SUNLinSolFree(LSM); LSM = NULL; } + + // Create linear solver +- LSM = SUNLinSol_SPGMR(*Y, PREC_NONE, 0, Sundials::GetContext()); ++ LSM = SUNLinSol_SPGMR(*Y, SUN_PREC_NONE, 0, Sundials::GetContext()); + MFEM_VERIFY(LSM, "error in SUNLinSol_SPGMR()"); + + // Attach linear solver +- flag = ARKStepSetMassLinearSolver(sundials_mem, LSM, NULL, tdep); +- MFEM_VERIFY(flag == ARK_SUCCESS, "error in ARKStepSetMassLinearSolver()"); ++ flag = MFEM_ARKode(SetMassLinearSolver)(sundials_mem, LSM, NULL, tdep); ++ MFEM_VERIFY(flag == ARK_SUCCESS, ++ "error in " STR(MFEM_ARKode(SetMassLinearSolver)) "()"); + + // Attach matrix multiplication function +- flag = ARKStepSetMassTimes(sundials_mem, NULL, ARKStepSolver::MassMult2, +- this); +- MFEM_VERIFY(flag == ARK_SUCCESS, "error in ARKStepSetMassTimes()"); ++ flag = MFEM_ARKode(SetMassTimes)(sundials_mem, NULL, ++ ARKStepSolver::MassMult2, this); ++ MFEM_VERIFY(flag == ARK_SUCCESS, ++ "error in " STR(MFEM_ARKode(SetMassTimes)) "()"); ++ ++ // Check that the ODE is not expressed in EXPLICIT form ++ MFEM_VERIFY(!f->isExplicit(), "ODE operator is expressed in EXPLICIT form") + } + + void ARKStepSolver::SetStepMode(int itask) +@@ -1712,20 +1776,23 @@ void ARKStepSolver::SetStepMode(int itask) + + void ARKStepSolver::SetSStolerances(double reltol, double abstol) + { +- flag = ARKStepSStolerances(sundials_mem, reltol, abstol); +- MFEM_VERIFY(flag == ARK_SUCCESS, "error in ARKStepSStolerances()"); ++ flag = MFEM_ARKode(SStolerances)(sundials_mem, reltol, abstol); ++ MFEM_VERIFY(flag == ARK_SUCCESS, ++ "error in " STR(MFEM_ARKode(SStolerances)) "()"); + } + + void ARKStepSolver::SetMaxStep(double dt_max) + { +- flag = ARKStepSetMaxStep(sundials_mem, dt_max); +- MFEM_VERIFY(flag == ARK_SUCCESS, "error in ARKStepSetMaxStep()"); ++ flag = MFEM_ARKode(SetMaxStep)(sundials_mem, dt_max); ++ MFEM_VERIFY(flag == ARK_SUCCESS, ++ "error in " STR(MFEM_ARKode(SetMaxStep)) "()"); + } + + void ARKStepSolver::SetOrder(int order) + { +- flag = ARKStepSetOrder(sundials_mem, order); +- MFEM_VERIFY(flag == ARK_SUCCESS, "error in ARKStepSetOrder()"); ++ flag = MFEM_ARKode(SetOrder)(sundials_mem, order); ++ MFEM_VERIFY(flag == ARK_SUCCESS, ++ "error in " STR(MFEM_ARKode(SetOrder)) "()"); + } + + void ARKStepSolver::SetERKTableNum(ARKODE_ERKTableID table_id) +@@ -1749,8 +1816,9 @@ void ARKStepSolver::SetIMEXTableNum(ARKODE_ERKTableID etable_id, + + void ARKStepSolver::SetFixedStep(double dt) + { +- flag = ARKStepSetFixedStep(sundials_mem, dt); +- MFEM_VERIFY(flag == ARK_SUCCESS, "error in ARKStepSetFixedStep()"); ++ flag = MFEM_ARKode(SetFixedStep)(sundials_mem, dt); ++ MFEM_VERIFY(flag == ARK_SUCCESS, ++ "error in " STR(MFEM_ARKode(SetFixedStep)) "()"); + } + + void ARKStepSolver::PrintInfo() const +@@ -1772,18 +1840,19 @@ void ARKStepSolver::PrintInfo() const + &netfails); + MFEM_VERIFY(flag == ARK_SUCCESS, "error in ARKStepGetTimestepperStats()"); + +- flag = ARKStepGetStepStats(sundials_mem, +- &nsteps, +- &hinused, +- &hlast, +- &hcur, +- &tcur); ++ flag = MFEM_ARKode(GetStepStats)(sundials_mem, ++ &nsteps, ++ &hinused, ++ &hlast, ++ &hcur, ++ &tcur); + + // Get nonlinear solver stats +- flag = ARKStepGetNonlinSolvStats(sundials_mem, +- &nniters, +- &nncfails); +- MFEM_VERIFY(flag == ARK_SUCCESS, "error in ARKStepGetNonlinSolvStats()"); ++ flag = MFEM_ARKode(GetNonlinSolvStats)(sundials_mem, ++ &nniters, ++ &nncfails); ++ MFEM_VERIFY(flag == ARK_SUCCESS, ++ "error in " STR(MFEM_ARKode(GetNonlinSolvStats)) "()"); + + mfem::out << + "ARKStep:\n" +@@ -1811,7 +1880,7 @@ ARKStepSolver::~ARKStepSolver() + SUNMatDestroy(A); + SUNLinSolFree(LSA); + SUNNonlinSolFree(NLS); +- ARKStepFree(&sundials_mem); ++ MFEM_ARKode(Free)(&sundials_mem); + } + + // --------------------------------------------------------------------------- +@@ -1834,7 +1903,7 @@ int KINSolver::Mult(const N_Vector u, N_Vector fu, void *user_data) + + // Wrapper for computing Jacobian-vector products + int KINSolver::GradientMult(N_Vector v, N_Vector Jv, N_Vector u, +- booleantype *new_u, void *user_data) ++ sunbooleantype *new_u, void *user_data) + { + const SundialsNVector mfem_v(v); + SundialsNVector mfem_Jv(Jv); +@@ -1874,7 +1943,7 @@ int KINSolver::LinSysSetup(N_Vector u, N_Vector, SUNMatrix J, + + // Wrapper for solving linear systems J u = b + int KINSolver::LinSysSolve(SUNLinearSolver LS, SUNMatrix, N_Vector u, +- N_Vector b, realtype) ++ N_Vector b, sunrealtype) + { + SundialsNVector mfem_u(u), mfem_b(b); + KINSolver *self = static_cast<KINSolver*>(GET_CONTENT(LS)); +@@ -1926,28 +1995,36 @@ int KINSolver::PrecSolve(N_Vector uu, + + KINSolver::KINSolver(int strategy, bool oper_grad) + : global_strategy(strategy), use_oper_grad(oper_grad), y_scale(NULL), +- f_scale(NULL), jacobian(NULL), maa(0) ++ f_scale(NULL), jacobian(NULL) + { + Y = new SundialsNVector(); + y_scale = new SundialsNVector(); + f_scale = new SundialsNVector(); + + // Default abs_tol and print_level ++#if MFEM_SUNDIALS_VERSION < 70000 + abs_tol = pow(UNIT_ROUNDOFF, 1.0/3.0); ++#else ++ abs_tol = pow(SUN_UNIT_ROUNDOFF, 1.0/3.0); ++#endif + print_level = 0; + } + + #ifdef MFEM_USE_MPI + KINSolver::KINSolver(MPI_Comm comm, int strategy, bool oper_grad) + : global_strategy(strategy), use_oper_grad(oper_grad), y_scale(NULL), +- f_scale(NULL), jacobian(NULL), maa(0) ++ f_scale(NULL), jacobian(NULL) + { + Y = new SundialsNVector(comm); + y_scale = new SundialsNVector(comm); + f_scale = new SundialsNVector(comm); + + // Default abs_tol and print_level ++#if MFEM_SUNDIALS_VERSION < 70000 + abs_tol = pow(UNIT_ROUNDOFF, 1.0/3.0); ++#else ++ abs_tol = pow(SUN_UNIT_ROUNDOFF, 1.0/3.0); ++#endif + print_level = 0; + } + #endif +@@ -2019,11 +2096,22 @@ void KINSolver::SetOperator(const Operator &op) + sundials_mem = KINCreate(Sundials::GetContext()); + MFEM_VERIFY(sundials_mem, "Error in KINCreate()."); + +- // Set number of acceleration vectors +- if (maa > 0) ++ // Enable Anderson Acceleration ++ if (aa_n > 0) + { +- flag = KINSetMAA(sundials_mem, maa); ++ flag = KINSetMAA(sundials_mem, aa_n); + MFEM_ASSERT(flag == KIN_SUCCESS, "error in KINSetMAA()"); ++ ++ flag = KINSetDelayAA(sundials_mem, aa_delay); ++ MFEM_ASSERT(flag == KIN_SUCCESS, "error in KINSetDelayAA()"); ++ ++ flag = KINSetDampingAA(sundials_mem, aa_damping); ++ MFEM_ASSERT(flag == KIN_SUCCESS, "error in KINSetDampingAA()"); ++ ++#if SUNDIALS_VERSION_MAJOR >= 6 ++ flag = KINSetOrthAA(sundials_mem, aa_orth); ++ MFEM_ASSERT(flag == KIN_SUCCESS, "error in KINSetOrthAA()"); ++#endif + } + + // Initialize KINSOL +@@ -2034,6 +2122,9 @@ void KINSolver::SetOperator(const Operator &op) + flag = KINSetUserData(sundials_mem, this); + MFEM_ASSERT(flag == KIN_SUCCESS, "error in KINSetUserData()"); + ++ flag = KINSetDamping(sundials_mem, fp_damping); ++ MFEM_ASSERT(flag == KIN_SUCCESS, "error in KINSetDamping()"); ++ + // Set the linear solver + if (prec || jfnk) + { +@@ -2045,7 +2136,7 @@ void KINSolver::SetOperator(const Operator &op) + if (A != NULL) { SUNMatDestroy(A); A = NULL; } + if (LSA != NULL) { SUNLinSolFree(LSA); LSA = NULL; } + +- LSA = SUNLinSol_SPGMR(*Y, PREC_NONE, 0, Sundials::GetContext()); ++ LSA = SUNLinSol_SPGMR(*Y, SUN_PREC_NONE, 0, Sundials::GetContext()); + MFEM_VERIFY(LSA, "error in SUNLinSol_SPGMR()"); + + flag = KINSetLinearSolver(sundials_mem, LSA, NULL); +@@ -2114,12 +2205,12 @@ void KINSolver::SetJFNKSolver(Solver &solver) + if (LSA != NULL) { SUNLinSolFree(LSA); LSA = NULL; } + + // Setup FGMRES +- LSA = SUNLinSol_SPFGMR(*Y, prec ? PREC_RIGHT : PREC_NONE, maxli, ++ LSA = SUNLinSol_SPFGMR(*Y, prec ? SUN_PREC_RIGHT : SUN_PREC_NONE, maxli, + Sundials::GetContext()); + MFEM_VERIFY(LSA, "error in SUNLinSol_SPFGMR()"); + + flag = SUNLinSol_SPFGMRSetMaxRestarts(LSA, maxlrs); +- MFEM_VERIFY(flag == SUNLS_SUCCESS, "error in SUNLinSol_SPFGMR()"); ++ MFEM_VERIFY(flag == SUN_SUCCESS, "error in SUNLinSol_SPFGMR()"); + + flag = KINSetLinearSolver(sundials_mem, LSA, NULL); + MFEM_VERIFY(flag == KIN_SUCCESS, "error in KINSetLinearSolver()"); +@@ -2145,15 +2236,52 @@ void KINSolver::SetMaxSetupCalls(int max_calls) + MFEM_ASSERT(flag == KIN_SUCCESS, "error in KINSetMaxSetupCalls()"); + } + +-void KINSolver::SetMAA(int m_aa) ++void KINSolver::EnableAndersonAcc(int n, int orth, int delay, double damping) + { +- // Store internally as maa must be set before calling KINInit() to +- // set the maximum acceleration space size. +- maa = m_aa; +- if (sundials_mem) ++ if (sundials_mem != nullptr) + { +- flag = KINSetMAA(sundials_mem, maa); ++ if (aa_n < n) ++ { ++ MFEM_ABORT("Subsequent calls to EnableAndersonAcc() must set" ++ " the subspace size to less or equal to the initially requested size." ++ " If SetOperator() has already been called, the subspace size can't be" ++ " increased."); ++ } ++ ++ flag = KINSetMAA(sundials_mem, n); + MFEM_ASSERT(flag == KIN_SUCCESS, "error in KINSetMAA()"); ++ ++ flag = KINSetDelayAA(sundials_mem, delay); ++ MFEM_ASSERT(flag == KIN_SUCCESS, "error in KINSetDelayAA()"); ++ ++ flag = KINSetDampingAA(sundials_mem, damping); ++ MFEM_ASSERT(flag == KIN_SUCCESS, "error in KINSetDampingAA()"); ++ ++#if SUNDIALS_VERSION_MAJOR >= 6 ++ flag = KINSetOrthAA(sundials_mem, orth); ++ MFEM_ASSERT(flag == KIN_SUCCESS, "error in KINSetOrthAA()"); ++#else ++ if (orth != KIN_ORTH_MGS) ++ { ++ MFEM_WARNING("SUNDIALS < v6 does not support setting the Anderson" ++ " acceleration orthogonalization routine!"); ++ } ++#endif ++ } ++ ++ aa_n = n; ++ aa_delay = delay; ++ aa_damping = damping; ++ aa_orth = orth; ++} ++ ++void KINSolver::SetDamping(double damping) ++{ ++ fp_damping = damping; ++ if (sundials_mem) ++ { ++ flag = KINSetDamping(sundials_mem, fp_damping); ++ MFEM_ASSERT(flag == KIN_SUCCESS, "error in KINSetDamping()"); + } + } + +@@ -2239,18 +2367,21 @@ void KINSolver::Mult(Vector &x, + + if (rank == 0) + { ++#if MFEM_SUNDIALS_VERSION < 70000 + flag = KINSetPrintLevel(sundials_mem, print_level); + MFEM_VERIFY(flag == KIN_SUCCESS, "KINSetPrintLevel() failed!"); ++#endif ++ // NOTE: there is no KINSetPrintLevel in SUNDIALS v7! + + #ifdef SUNDIALS_BUILD_WITH_MONITORING + if (jfnk && print_level) + { + flag = SUNLinSolSetInfoFile_SPFGMR(LSA, stdout); +- MFEM_VERIFY(flag == SUNLS_SUCCESS, ++ MFEM_VERIFY(flag == SUN_SUCCESS, + "error in SUNLinSolSetInfoFile_SPFGMR()"); + + flag = SUNLinSolSetPrintLevel_SPFGMR(LSA, 1); +- MFEM_VERIFY(flag == SUNLS_SUCCESS, ++ MFEM_VERIFY(flag == SUN_SUCCESS, + "error in SUNLinSolSetPrintLevel_SPFGMR()"); + } + #endif +diff --git a/linalg/sundials.hpp b/linalg/sundials.hpp +index f34b4deaf7..08a908c24c 100644 +--- a/linalg/sundials.hpp ++++ b/linalg/sundials.hpp +@@ -54,6 +54,10 @@ + + #include <functional> + ++#define MFEM_SUNDIALS_VERSION \ ++ (SUNDIALS_VERSION_MAJOR*10000 + SUNDIALS_VERSION_MINOR*100 + \ ++ SUNDIALS_VERSION_PATCH) ++ + #if (SUNDIALS_VERSION_MAJOR < 6) + + /// (DEPRECATED) Map SUNDIALS version >= 6 datatypes and constants to +@@ -68,7 +72,30 @@ constexpr ARKODE_ERKTableID ARKODE_FEHLBERG_13_7_8 = FEHLBERG_13_7_8; + /// arbitrary type for more compact backwards compatibility + using SUNContext = void*; + +-#endif // SUNDIALS_VERSION_MAJOR < 6 ++/// 'sunrealtype' was first introduced in v6.0.0 ++typedef realtype sunrealtype; ++/// 'sunbooleantype' was first introduced in v6.0.0 ++typedef booleantype sunbooleantype; ++ ++/// New constant names introduced in v6.0.0 ++enum { SUN_PREC_NONE, SUN_PREC_LEFT, SUN_PREC_RIGHT, SUN_PREC_BOTH }; ++ ++// KIN_ORTH_MGS was introduced in SUNDIALS v6; here, we define it just so that ++// it can be used as the default option in the second parameter of ++// KINSolver::EnableAndersonAcc -- the actual value of the parameter will be ++// ignored when using SUNDIALS < v6. ++#define KIN_ORTH_MGS 0 ++ ++#endif // #if SUNDIALS_VERSION_MAJOR < 6 ++ ++#if (SUNDIALS_VERSION_MAJOR < 7) ++ ++/** @brief The enum constant SUN_SUCCESS was added in v7 as a replacement of ++ various *_SUCCESS macros that were removed in v7. */ ++enum { SUN_SUCCESS = 0 }; ++ ++#endif // #if SUNDIALS_VERSION_MAJOR < 7 ++ + + namespace mfem + { +@@ -238,7 +265,14 @@ public: + + #ifdef MFEM_USE_MPI + /// Returns the MPI communicator for the internal N_Vector x. +- inline MPI_Comm GetComm() const { return *static_cast<MPI_Comm*>(N_VGetCommunicator(x)); } ++ inline MPI_Comm GetComm() const ++ { ++#if SUNDIALS_VERSION_MAJOR < 7 ++ return *static_cast<MPI_Comm*>(N_VGetCommunicator(x)); ++#else ++ return N_VGetCommunicator(x); ++#endif ++ } + + /// Returns the MPI global length for the internal N_Vector x. + inline long GlobalSize() const { return N_VGetLength(x); } +@@ -390,24 +424,26 @@ protected: + int root_components; /// Number of components in gout + + /// Wrapper to compute the ODE rhs function. +- static int RHS(realtype t, const N_Vector y, N_Vector ydot, void *user_data); ++ static int RHS(sunrealtype t, const N_Vector y, N_Vector ydot, ++ void *user_data); + + /// Setup the linear system $ A x = b $. +- static int LinSysSetup(realtype t, N_Vector y, N_Vector fy, SUNMatrix A, +- booleantype jok, booleantype *jcur, +- realtype gamma, void *user_data, N_Vector tmp1, ++ static int LinSysSetup(sunrealtype t, N_Vector y, N_Vector fy, SUNMatrix A, ++ sunbooleantype jok, sunbooleantype *jcur, ++ sunrealtype gamma, void *user_data, N_Vector tmp1, + N_Vector tmp2, N_Vector tmp3); + + /// Solve the linear system $ A x = b $. + static int LinSysSolve(SUNLinearSolver LS, SUNMatrix A, N_Vector x, +- N_Vector b, realtype tol); ++ N_Vector b, sunrealtype tol); + + /// Prototype to define root finding for CVODE +- static int root(realtype t, N_Vector y, realtype *gout, void *user_data); ++ static int root(sunrealtype t, N_Vector y, sunrealtype *gout, ++ void *user_data); + + /// Typedef for root finding functions +- typedef std::function<int(realtype t, Vector y, Vector gout, CVODESolver *)> +- RootFunction; ++ typedef std::function<int(sunrealtype t, Vector y, Vector gout, ++ CVODESolver *)> RootFunction; + + /// A class member to facilitate pointing to a user-specified root function + RootFunction root_func; +@@ -415,7 +451,8 @@ protected: + /// Typedef declaration for error weight functions + typedef std::function<int(Vector y, Vector w, CVODESolver*)> EWTFunction; + +- /// A class member to facilitate pointing to a user-specified error weight function ++ /** @brief A class member to facilitate pointing to a user-specified error ++ weight function */ + EWTFunction ewt_func; + + public: +@@ -449,7 +486,7 @@ public: + @note If this method is called a second time with a different problem + size, then any non-default user-set options will be lost and will need + to be set again. */ +- void Init(TimeDependentOperator &f_); ++ void Init(TimeDependentOperator &f_) override; + + /// Integrate the ODE with CVODE using the specified step mode. + /** @param[in,out] x On output, the solution vector at the requested output +@@ -460,7 +497,7 @@ public: + @note On input, the values of @a t and @a dt are used to compute desired + output time for the integration, tout = @a t + @a dt. + */ +- virtual void Step(Vector &x, double &t, double &dt); ++ void Step(Vector &x, double &t, double &dt) override; + + /** @brief Attach the linear system setup and solve methods from the + TimeDependentOperator i.e., SUNImplicitSetup() and SUNImplicitSolve() to +@@ -525,14 +562,15 @@ protected: + int indexB; ///< backward problem index + + /// Wrapper to compute the ODE RHS Quadrature function. +- static int RHSQ(realtype t, const N_Vector y, N_Vector qdot, void *user_data); ++ static int RHSQ(sunrealtype t, const N_Vector y, N_Vector qdot, ++ void *user_data); + + /// Wrapper to compute the ODE RHS backward function. +- static int RHSB(realtype t, N_Vector y, ++ static int RHSB(sunrealtype t, N_Vector y, + N_Vector yB, N_Vector yBdot, void *user_dataB); + + /// Wrapper to compute the ODE RHS Backwards Quadrature function. +- static int RHSQB(realtype t, N_Vector y, N_Vector yB, ++ static int RHSQB(sunrealtype t, N_Vector y, N_Vector yB, + N_Vector qBdot, void *user_dataB); + + /// Error control function +@@ -586,7 +624,7 @@ public: + + @note On input, the values of t and dt are used to compute desired + output time for the integration, tout = t + dt. */ +- virtual void Step(Vector &x, double &t, double &dt); ++ void Step(Vector &x, double &t, double &dt) override; + + /// Solve one adjoint time step + virtual void StepB(Vector &w, double &t, double &dt); +@@ -648,15 +686,15 @@ public: + void SetSVtolerancesB(double reltol, Vector abstol); + + /// Setup the linear system A x = b +- static int LinSysSetupB(realtype t, N_Vector y, N_Vector yB, N_Vector fyB, ++ static int LinSysSetupB(sunrealtype t, N_Vector y, N_Vector yB, N_Vector fyB, + SUNMatrix A, +- booleantype jok, booleantype *jcur, +- realtype gamma, void *user_data, N_Vector tmp1, ++ sunbooleantype jok, sunbooleantype *jcur, ++ sunrealtype gamma, void *user_data, N_Vector tmp1, + N_Vector tmp2, N_Vector tmp3); + + /// Solve the linear system A x = b + static int LinSysSolveB(SUNLinearSolver LS, SUNMatrix A, N_Vector x, +- N_Vector b, realtype tol); ++ N_Vector b, sunrealtype tol); + + + /// Destroy the associated CVODES memory and SUNDIALS objects. +@@ -689,33 +727,35 @@ protected: + RHS1 is explicit RHS and RHS2 the implicit RHS for IMEX integration. When + purely implicit or explicit only RHS1 is used. */ + ///@{ +- static int RHS1(realtype t, const N_Vector y, N_Vector ydot, void *user_data); +- static int RHS2(realtype t, const N_Vector y, N_Vector ydot, void *user_data); ++ static int RHS1(sunrealtype t, const N_Vector y, N_Vector ydot, ++ void *user_data); ++ static int RHS2(sunrealtype t, const N_Vector y, N_Vector ydot, ++ void *user_data); + ///@} + + /// Setup the linear system $ A x = b $. +- static int LinSysSetup(realtype t, N_Vector y, N_Vector fy, SUNMatrix A, +- SUNMatrix M, booleantype jok, booleantype *jcur, +- realtype gamma, void *user_data, N_Vector tmp1, ++ static int LinSysSetup(sunrealtype t, N_Vector y, N_Vector fy, SUNMatrix A, ++ SUNMatrix M, sunbooleantype jok, sunbooleantype *jcur, ++ sunrealtype gamma, void *user_data, N_Vector tmp1, + N_Vector tmp2, N_Vector tmp3); + + /// Solve the linear system $ A x = b $. + static int LinSysSolve(SUNLinearSolver LS, SUNMatrix A, N_Vector x, +- N_Vector b, realtype tol); ++ N_Vector b, sunrealtype tol); + + /// Setup the linear system $ M x = b $. +- static int MassSysSetup(realtype t, SUNMatrix M, void *user_data, ++ static int MassSysSetup(sunrealtype t, SUNMatrix M, void *user_data, + N_Vector tmp1, N_Vector tmp2, N_Vector tmp3); + + /// Solve the linear system $ M x = b $. + static int MassSysSolve(SUNLinearSolver LS, SUNMatrix M, N_Vector x, +- N_Vector b, realtype tol); ++ N_Vector b, sunrealtype tol); + + /// Compute the matrix-vector product $ v = M x $. + static int MassMult1(SUNMatrix M, N_Vector x, N_Vector v); + + /// Compute the matrix-vector product $v = M_t x $ at time t. +- static int MassMult2(N_Vector x, N_Vector v, realtype t, ++ static int MassMult2(N_Vector x, N_Vector v, sunrealtype t, + void* mtimes_data); + + public: +@@ -751,7 +791,7 @@ public: + @note If this method is called a second time with a different problem + size, then any non-default user-set options will be lost and will need + to be set again. */ +- void Init(TimeDependentOperator &f_); ++ void Init(TimeDependentOperator &f_) override; + + /// Integrate the ODE with ARKode using the specified step mode. + /** +@@ -763,7 +803,7 @@ public: + @note On input, the values of @a t and @a dt are used to compute desired + output time for the integration, tout = @a t + @a dt. + */ +- virtual void Step(Vector &x, double &t, double &dt); ++ void Step(Vector &x, real_t &t, real_t &dt) override; + + /** @brief Attach the linear system setup and solve methods from the + TimeDependentOperator i.e., SUNImplicitSetup() and SUNImplicitSolve() to +@@ -850,18 +890,22 @@ protected: + bool use_oper_grad; ///< use the Jv prod function + mutable SundialsNVector *y_scale, *f_scale; ///< scaling vectors + const Operator *jacobian; ///< stores oper->GetGradient() +- int maa; ///< number of acceleration vectors +- bool jfnk = false; ///< enable JFNK +- Vector wrk; ///< Work vector needed for the JFNK PC +- int maxli = 5; ///< Maximum linear iterations +- int maxlrs = 0; ///< Maximum linear solver restarts ++ int aa_n = 0; ///< number of acceleration vectors ++ int aa_delay; ///< Anderson Acceleration delay ++ double aa_damping; ///< Anderson Acceleration damping ++ int aa_orth; ///< Anderson Acceleration orthogonalization routine ++ double fp_damping = 1.0; ///< Fixed Point or Picard damping parameter ++ bool jfnk = false; ///< enable JFNK ++ Vector wrk; ///< Work vector needed for the JFNK PC ++ int maxli = 5; ///< Maximum linear iterations ++ int maxlrs = 0; ///< Maximum linear solver restarts + + /// Wrapper to compute the nonlinear residual $ F(u) = 0 $. + static int Mult(const N_Vector u, N_Vector fu, void *user_data); + + /// Wrapper to compute the Jacobian-vector product $ J(u) v = Jv $. + static int GradientMult(N_Vector v, N_Vector Jv, N_Vector u, +- booleantype *new_u, void *user_data); ++ sunbooleantype *new_u, void *user_data); + + /// Setup the linear system $ J u = b $. + static int LinSysSetup(N_Vector u, N_Vector fu, SUNMatrix J, +@@ -869,7 +913,7 @@ protected: + + /// Solve the linear system $ J u = b $. + static int LinSysSolve(SUNLinearSolver LS, SUNMatrix J, N_Vector u, +- N_Vector b, realtype tol); ++ N_Vector b, sunrealtype tol); + + /// Setup the preconditioner. + static int PrecSetup(N_Vector uu, +@@ -916,7 +960,7 @@ public: + /** @note If this method is called a second time with a different problem + size, then non-default KINSOL-specific options will be lost and will need + to be set again. */ +- virtual void SetOperator(const Operator &op); ++ void SetOperator(const Operator &op) override; + + /// Set the linear solver for inverting the Jacobian. + /** @note This function assumes that Operator::GetGradient(const Vector &) +@@ -924,10 +968,10 @@ public: + SetOperator(const Operator &). + + This method must be called after SetOperator(). */ +- virtual void SetSolver(Solver &solver); ++ void SetSolver(Solver &solver) override; + + /// Equivalent to SetSolver(solver). +- virtual void SetPreconditioner(Solver &solver) { SetSolver(solver); } ++ void SetPreconditioner(Solver &solver) override { SetSolver(solver); } + + /// Set KINSOL's scaled step tolerance. + /** The default tolerance is $ U^\frac{2}{3} $ , where +@@ -940,13 +984,22 @@ public: + @note This method must be called after SetOperator(). */ + void SetMaxSetupCalls(int max_calls); + +- /// Set the number of acceleration vectors to use with KIN_FP or KIN_PICARD. +- /** The default is 0. +- @ note This method must be called before SetOperator() to set the +- maximum size of the acceleration space. The value of @a maa can be +- altered after SetOperator() is called but it can't be higher than initial +- maximum. */ +- void SetMAA(int maa); ++ /// Enable Anderson Acceleration for KIN_FP or KIN_PICARD. ++ /** @note Has to be called once before SetOperator() in order to set up the ++ maximum subspace size. Subsequent calls need @a n less or equal to the ++ initial subspace size. ++ @param[in] n Anderson Acceleration subspace size ++ @param[in] orth Anderson Acceleration orthogonalization routine ++ @param[in] delay Anderson Acceleration delay ++ @param[in] damping Anderson Acceleration damping parameter valid from 0 < ++ d <= 1.0. Default is 1.0 (no damping) */ ++ void EnableAndersonAcc(int n, int orth = KIN_ORTH_MGS, int delay = 0, ++ double damping = 1.0); ++ ++ /// Specifies the value of the damping parameter in the fixed point or Picard ++ /// iteration. ++ /** param[in] damping fixed point iteration or Picard damping parameter */ ++ void SetDamping(double damping); + + /// Set the Jacobian Free Newton Krylov flag. The default is false. + /** This flag indicates to use JFNK as the linear solver for KINSOL. This +@@ -967,10 +1020,10 @@ public: + void SetLSMaxRestarts(int m) { maxlrs = m; } + + /// Set the print level for the KINSetPrintLevel function. +- virtual void SetPrintLevel(int print_lvl) { print_level = print_lvl; } ++ void SetPrintLevel(int print_lvl) override { print_level = print_lvl; } + + /// This method is not supported and will throw an error. +- virtual void SetPrintLevel(PrintLevel); ++ void SetPrintLevel(PrintLevel) override; + + /// Solve the nonlinear system $ F(x) = 0 $. + /** This method computes the x_scale and fx_scale vectors and calls the +@@ -981,7 +1034,7 @@ public: + @param[in,out] x On input, initial guess, if @a #iterative_mode = true, + otherwise the initial guess is zero; on output, the + solution */ +- virtual void Mult(const Vector &b, Vector &x) const; ++ void Mult(const Vector &b, Vector &x) const override; + + /// Solve the nonlinear system $ F(x) = 0 $. + /** Calls KINSol() to solve the nonlinear system. Before calling KINSol(), diff --git a/var/spack/repos/builtin/packages/mfem/mfem-4.7.patch b/var/spack/repos/builtin/packages/mfem/mfem-4.7.patch new file mode 100644 index 0000000000..6e0d3c7ef5 --- /dev/null +++ b/var/spack/repos/builtin/packages/mfem/mfem-4.7.patch @@ -0,0 +1,102 @@ +diff --git a/examples/hiop/ex9p.cpp b/examples/hiop/ex9p.cpp +index 4facbb3c0b..f783b97a3b 100644 +--- a/examples/hiop/ex9p.cpp ++++ b/examples/hiop/ex9p.cpp +@@ -96,6 +96,7 @@ public: + { + Vector w_glob(width); + pfes.Dof_TrueDof_Matrix()->MultTranspose(w, w_glob); ++ w_glob.HostReadWrite(); // read+write -> can use w_glob(i) (non-const) + for (int i = 0; i < width; i++) { grad(0, i) = w_glob(i); } + } + +diff --git a/linalg/sparsemat.cpp b/linalg/sparsemat.cpp +index 0b5334d2a6..efe471d416 100644 +--- a/linalg/sparsemat.cpp ++++ b/linalg/sparsemat.cpp +@@ -1267,24 +1267,32 @@ real_t SparseMatrix::InnerProduct(const Vector &x, const Vector &y) const + + void SparseMatrix::GetRowSums(Vector &x) const + { +- for (int i = 0; i < height; i++) ++ if (Finalized()) + { +- real_t a = 0.0; +- if (A) ++ auto d_I = ReadI(); ++ auto d_A = ReadData(); ++ auto d_x = x.Write(); ++ mfem::forall(height, [=] MFEM_HOST_DEVICE (int i) + { +- for (int j = I[i], end = I[i+1]; j < end; j++) ++ real_t sum = 0.0; ++ for (int j = d_I[i], end = d_I[i+1]; j < end; j++) + { +- a += A[j]; ++ sum += d_A[j]; + } +- } +- else ++ d_x[i] = sum; ++ }); ++ } ++ else ++ { ++ for (int i = 0; i < height; i++) + { ++ real_t a = 0.0; + for (RowNode *np = Rows[i]; np != NULL; np = np->Prev) + { + a += np->Value; + } ++ x(i) = a; + } +- x(i) = a; + } + } + +diff --git a/linalg/sparsemat.hpp b/linalg/sparsemat.hpp +index 7042279663..dc2d773bc4 100644 +--- a/linalg/sparsemat.hpp ++++ b/linalg/sparsemat.hpp +@@ -216,7 +216,7 @@ public: + void ClearCuSparse() { ClearGPUSparse(); } + + /// Check if the SparseMatrix is empty. +- bool Empty() const { return (A == NULL) && (Rows == NULL); } ++ bool Empty() const { return A.Empty() && (Rows == NULL); } + + /// Return the array #I. + inline int *GetI() { return I; } +diff --git a/tests/unit/general/test_umpire_mem.cpp b/tests/unit/general/test_umpire_mem.cpp +index 84457669ec..d4a7b85093 100644 +--- a/tests/unit/general/test_umpire_mem.cpp ++++ b/tests/unit/general/test_umpire_mem.cpp +@@ -18,12 +18,13 @@ + #include <unistd.h> + #include <stdio.h> + #include "umpire/Umpire.hpp" ++#include <umpire/strategy/QuickPool.hpp> + + #ifdef MFEM_USE_CUDA + #include <cuda.h> + constexpr const char * device_name = "cuda"; + #elif defined(MFEM_USE_HIP) +-constexpr const char * device_name = "raja-hip"; ++constexpr const char * device_name = "hip"; + #endif + + using namespace mfem; +@@ -45,10 +46,12 @@ static bool is_pinned_host(void * h_p) + unsigned flags; + #ifdef MFEM_USE_CUDA + auto err = cudaHostGetFlags(&flags, h_p); ++ cudaGetLastError(); // also resets last error + if (err == cudaSuccess) { return true; } + else if (err == cudaErrorInvalidValue) { return false; } + #elif defined(MFEM_USE_HIP) + auto err = hipHostGetFlags(&flags, h_p); ++ hipGetLastError(); // also resets last error + if (err == hipSuccess) { return true; } + else if (err == hipErrorInvalidValue) { return false; } + #endif diff --git a/var/spack/repos/builtin/packages/mfem/package.py b/var/spack/repos/builtin/packages/mfem/package.py index 618b397181..93692ef580 100644 --- a/var/spack/repos/builtin/packages/mfem/package.py +++ b/var/spack/repos/builtin/packages/mfem/package.py @@ -6,7 +6,6 @@ import os import shutil import sys -from platform import machine from spack.package import * @@ -52,6 +51,13 @@ class Mfem(Package, CudaPackage, ROCmPackage): version("develop", branch="master") version( + "4.7.0", + sha256="5e889493f5f79848f7b2d16afaae307c59880ac2a7ff2315551c60ca54717751", + url="https://bit.ly/mfem-4-7", + extension="tar.gz", + ) + + version( "4.6.0", sha256="5fa9465b5bec56bfb777a4d2826fba48d85fbace4aed8b64a2fd4059bf075b15", url="https://bit.ly/mfem-4-6", @@ -100,9 +106,6 @@ class Mfem(Package, CudaPackage, ROCmPackage): extension="tar.gz", ) - # Tagged development version used by xSDK - version("4.0.1-xsdk", commit="c55c80d17b82d80de04b849dd526e17044f8c99a") - version( "4.0.0", sha256="df5bdac798ea84a263979f6fbf79de9013e1c55562f95f98644c3edcacfbc727", @@ -155,14 +158,16 @@ class Mfem(Package, CudaPackage, ROCmPackage): extension="tar.gz", ) + depends_on("cxx", type="build") # generated + variant("static", default=True, description="Build static library") variant("shared", default=False, description="Build shared library") variant("mpi", default=True, sticky=True, description="Enable MPI parallelism") - # Can we make the default value for 'metis' to depend on the 'mpi' value? + # Can we make the default value for "metis" to depend on the "mpi" value? variant("metis", default=True, sticky=True, description="Enable METIS support") variant("openmp", default=False, description="Enable OpenMP parallelism") - # Note: '+cuda' and 'cuda_arch' variants are added by the CudaPackage - # Note: '+rocm' and 'amdgpu_target' variants are added by the ROCmPackage + # Note: "+cuda" and "cuda_arch" variants are added by the CudaPackage + # Note: "+rocm" and "amdgpu_target" variants are added by the ROCmPackage variant("occa", default=False, description="Enable OCCA backend") variant("raja", default=False, description="Enable RAJA backend") variant("libceed", default=False, description="Enable libCEED backend") @@ -184,6 +189,7 @@ class Mfem(Package, CudaPackage, ROCmPackage): variant("strumpack", default=False, description="Enable support for STRUMPACK") variant("suite-sparse", default=False, description="Enable serial, sparse direct solvers") variant("petsc", default=False, description="Enable PETSc solvers, preconditioners, etc.") + variant("mumps", default=False, description="Enable MUMPS solver.") variant("slepc", default=False, description="Enable SLEPc integration") variant("sundials", default=False, description="Enable Sundials time integrators") variant("pumi", default=False, description="Enable functionality based on PUMI") @@ -211,6 +217,21 @@ class Mfem(Package, CudaPackage, ROCmPackage): variant("examples", default=False, description="Build and install examples") variant("miniapps", default=False, description="Build and install miniapps") variant("exceptions", default=False, description="Enable the use of exceptions") + variant( + "precision", + default="double", + values=("single", "double"), + multi=False, + description="Floating point precision", + when="@4.7.0:", + ) + variant( + "cxxstd", + default="auto", + values=("auto", conditional("98", when="@:3"), "11", "14", "17"), + multi=False, + description="C++ language standard", + ) conflicts("+shared", when="@:3.3.2") conflicts("~static~shared") @@ -254,30 +275,28 @@ class Mfem(Package, CudaPackage, ROCmPackage): conflicts("+slepc", when="~petsc") conflicts("+pumi", when="~mpi") conflicts("timer=mpi", when="~mpi") + conflicts("+mumps", when="~mpi") # See https://github.com/mfem/mfem/issues/2957 conflicts("^mpich@4:", when="@:4.3+mpi") depends_on("mpi", when="+mpi") depends_on("hipsparse", when="@4.4.0:+rocm") - depends_on("hypre@2.10.0:2.13", when="@:3.3+mpi") - depends_on("hypre@:2.20.0", when="@3.4:4.2+mpi") - depends_on("hypre@:2.23.0", when="@4.3.0+mpi") - depends_on("hypre", when="+mpi") - # Propagate 'cuda_arch' to 'hypre' without propagating the '+cuda' - # variant because we want to allow 'mfem+cuda ^hypre~cuda': + + with when("+mpi"): + depends_on("hypre") + depends_on("hypre@2.10.0:2.13", when="@:3.3") + depends_on("hypre@:2.20.0", when="@3.4:4.2") + depends_on("hypre@:2.23.0", when="@4.3.0") + + # If hypre is built with +cuda, propagate cuda_arch + requires("^hypre@2.22.1:", when="+mpi+cuda ^hypre+cuda") for sm_ in CudaPackage.cuda_arch_values: - depends_on( - "hypre@2.22.1:+cuda cuda_arch={0}".format(sm_), - when="+mpi+cuda cuda_arch={0} ^hypre+cuda".format(sm_), - ) - # Propagate 'amdgpu_target' to 'hypre' without propagating the '+rocm' - # variant because we want to allow 'mfem+rocm ^hypre~rocm': + requires(f"^hypre cuda_arch={sm_}", when=f"+mpi+cuda cuda_arch={sm_} ^hypre+cuda") + # If hypre is built with +rocm, propagate amdgpu_target + requires("^hypre@2.23.0: ", when="+mpi+rocm ^hypre+rocm") for gfx in ROCmPackage.amdgpu_targets: - depends_on( - "hypre@2.23.0:+rocm amdgpu_target={0}".format(gfx), - when="+mpi+rocm amdgpu_target={0} ^hypre+rocm".format(gfx), - ) + requires(f"^hypre amdgpu_target={gfx}", when=f"+mpi+rocm amdgpu_target={gfx} ^hypre+rocm") depends_on("metis", when="+metis") depends_on("blas", when="+lapack") @@ -287,10 +306,13 @@ class Mfem(Package, CudaPackage, ROCmPackage): depends_on("sundials@2.7.0+mpi+hypre", when="@:3.3.0+sundials+mpi") depends_on("sundials@2.7.0:", when="@3.3.2:+sundials~mpi") depends_on("sundials@2.7.0:+mpi+hypre", when="@3.3.2:+sundials+mpi") - depends_on("sundials@5.0.0:5", when="@4.0.1-xsdk:4.4+sundials~mpi") - depends_on("sundials@5.0.0:5+mpi+hypre", when="@4.0.1-xsdk:4.4+sundials+mpi") - depends_on("sundials@5.0.0:", when="@4.5.0:+sundials~mpi") - depends_on("sundials@5.0.0:+mpi+hypre", when="@4.5.0:+sundials+mpi") + depends_on("sundials@5.0.0:5", when="@4.1.0:4.4+sundials~mpi") + depends_on("sundials@5.0.0:5+mpi+hypre", when="@4.1.0:4.4+sundials+mpi") + depends_on("sundials@5.0.0:6.7.0", when="@4.5.0:4.6+sundials~mpi") + depends_on("sundials@5.0.0:6.7.0+mpi+hypre", when="@4.5.0:4.6+sundials+mpi") + depends_on("sundials@5.0.0:", when="@4.7.0:+sundials~mpi") + depends_on("sundials@5.0.0:+mpi+hypre", when="@4.7.0:+sundials+mpi") + conflicts("cxxstd=11", when="^sundials@6.4.0:") for sm_ in CudaPackage.cuda_arch_values: depends_on( "sundials@5.4.0:+cuda cuda_arch={0}".format(sm_), @@ -311,19 +333,17 @@ class Mfem(Package, CudaPackage, ROCmPackage): depends_on("gslib@1.0.7:", when="@4.3.0:+gslib") depends_on("suite-sparse", when="+suite-sparse") depends_on("superlu-dist", when="+superlu-dist") - # Propagate 'cuda_arch' to 'superlu-dist' without propagating the '+cuda' - # variant so we can build 'mfem+cuda+superlu-dist ^superlu-dist~cuda': + # If superlu-dist is built with +cuda, propagate cuda_arch for sm_ in CudaPackage.cuda_arch_values: - depends_on( - "superlu-dist+cuda cuda_arch={0}".format(sm_), - when="+superlu-dist+cuda cuda_arch={0} ^superlu-dist+cuda".format(sm_), + requires( + f"^superlu-dist cuda_arch={sm_}", + when=f"+superlu-dist+cuda cuda_arch={sm_} ^superlu-dist+cuda", ) - # Propagate 'amdgpu_target' to 'superlu-dist' without propagating the '+rocm' - # variant so we can build 'mfem+rocm+superlu-dist ^superlu-dist~rocm': + # If superlu-dist is built with +rocm, propagate amdgpu_target for gfx in ROCmPackage.amdgpu_targets: - depends_on( - "superlu-dist+rocm amdgpu_target={0}".format(gfx), - when="+superlu-dist+rocm amdgpu_target={0} ^superlu-dist+rocm".format(gfx), + requires( + f"^superlu-dist+rocm amdgpu_target={gfx}", + when=f"+superlu-dist+rocm amdgpu_target={gfx} ^superlu-dist+rocm", ) depends_on("strumpack@3.0.0:", when="+strumpack~shared") depends_on("strumpack@3.0.0:+shared", when="+strumpack+shared") @@ -340,30 +360,23 @@ class Mfem(Package, CudaPackage, ROCmPackage): # The PETSc tests in MFEM will fail if PETSc is not configured with # MUMPS (and SuiteSparse in older versions). On the other hand, PETSc built # with MUMPS is not strictly required, so we do not require it here. - depends_on("petsc@3.8:+mpi+double+hypre", when="+petsc") + depends_on("petsc@3.8:+mpi+hypre", when="+petsc") + # rocPRIM is a dependency when using petsc+rocm and requires C++14 or newer: + conflicts("cxxstd=11", when="^rocprim@5.5.0:") depends_on("slepc@3.8.0:", when="+slepc") - # Propagate 'cuda_arch' to 'petsc'/'slepc' without propagating the '+cuda' - # variant because we want to allow 'mfem+cuda+petsc ^petsc~cuda': + # If petsc is built with +cuda, propagate cuda_arch to petsc and slepc for sm_ in CudaPackage.cuda_arch_values: - depends_on( - "petsc+cuda cuda_arch={0}".format(sm_), - when="+cuda+petsc cuda_arch={0} ^petsc+cuda".format(sm_), - ) - depends_on( - "slepc+cuda cuda_arch={0}".format(sm_), - when="+cuda+slepc cuda_arch={0} ^petsc+cuda".format(sm_), - ) - # Propagate 'amdgpu_target' to 'petsc'/'slepc' without propagating the - # '+rocm' variant because we want to allow 'mfem+rocm+petsc ^petsc~rocm': + requires(f"^petsc cuda_arch={sm_}", when=f"+cuda+petsc cuda_arch={sm_} ^petsc+cuda") + depends_on(f"slepc+cuda cuda_arch={sm_}", when=f"+cuda+slepc cuda_arch={sm_} ^petsc+cuda") + # If petsc is built with +rocm, propagate amdgpu_target to petsc and slepc for gfx in ROCmPackage.amdgpu_targets: - depends_on( - "petsc+rocm amdgpu_target={0}".format(gfx), - when="+rocm+petsc amdgpu_target={0} ^petsc+rocm".format(gfx), + requires( + f"^petsc amdgpu_target={gfx}", when=f"+rocm+petsc amdgpu_target={gfx} ^petsc+rocm" ) depends_on( - "slepc+rocm amdgpu_target={0}".format(gfx), - when="+rocm+slepc amdgpu_target={0} ^petsc+rocm".format(gfx), + f"slepc+rocm amdgpu_target={gfx}", when=f"+rocm+slepc amdgpu_target={gfx} ^petsc+rocm" ) + depends_on("mumps@5.1.1:", when="+mumps") depends_on("mpfr", when="+mpfr") depends_on("netcdf-c@4.1.3:", when="+netcdf") depends_on("unwind", when="+libunwind") @@ -373,6 +386,7 @@ class Mfem(Package, CudaPackage, ROCmPackage): depends_on("conduit+mpi", when="+conduit+mpi") depends_on("libfms@0.2.0:", when="+fms") depends_on("ginkgo@1.4.0:", when="+ginkgo") + conflicts("cxxstd=11", when="^ginkgo") for sm_ in CudaPackage.cuda_arch_values: depends_on( "ginkgo+cuda cuda_arch={0}".format(sm_), when="+ginkgo+cuda cuda_arch={0}".format(sm_) @@ -407,13 +421,14 @@ class Mfem(Package, CudaPackage, ROCmPackage): depends_on("occa@1.0.8:", when="@:4.1+occa") depends_on("occa@1.1.0", when="@4.2.0:+occa") depends_on("occa+cuda", when="+occa+cuda") - # TODO: propagate '+rocm' variant to occa when it is supported + # TODO: propagate "+rocm" variant to occa when it is supported depends_on("raja@0.7.0:0.9.0", when="@4.0.0+raja") depends_on("raja@0.10.0:0.12.1", when="@4.0.1:4.2.0+raja") depends_on("raja@0.13.0", when="@4.3.0+raja") depends_on("raja@0.14.0:2022.03", when="@4.4.0:4.5.0+raja") depends_on("raja@2022.10.3:", when="@4.5.2:+raja") + conflicts("cxxstd=11", when="^raja@2022.03.0:") for sm_ in CudaPackage.cuda_arch_values: depends_on( "raja+cuda cuda_arch={0}".format(sm_), when="+raja+cuda cuda_arch={0}".format(sm_) @@ -441,6 +456,7 @@ class Mfem(Package, CudaPackage, ROCmPackage): depends_on("umpire@2.0.0:2.1.0", when="@:4.3.0+umpire") depends_on("umpire@3.0.0:", when="@4.4.0:+umpire") + conflicts("cxxstd=11", when="^umpire@2022.03.0:") for sm_ in CudaPackage.cuda_arch_values: depends_on( "umpire+cuda cuda_arch={0}".format(sm_), when="+umpire+cuda cuda_arch={0}".format(sm_) @@ -460,6 +476,20 @@ class Mfem(Package, CudaPackage, ROCmPackage): "amgx~mpi cuda_arch={0}".format(sm_), when="+amgx~mpi cuda_arch={0}".format(sm_) ) + for using_double_cond in ["@:4.6", "precision=double"]: + with when(using_double_cond): + # May need to enforce precision consistency on other packages in the + # future. + depends_on("hypre precision=double", when="+mpi") + depends_on("petsc+double", when="+petsc") + depends_on("mumps+double", when="+mumps") + with when("precision=single"): + # May need to enforce precision consistency on other packages in the + # future. + depends_on("hypre precision=single", when="+mpi") + depends_on("petsc~double", when="+petsc") + depends_on("mumps+float", when="+mumps") + patch("mfem_ppc_build.patch", when="@3.2:3.3.0 arch=ppc64le") patch("mfem-3.4.patch", when="@3.4.0") patch("mfem-3.3-3.4-petsc-3.9.patch", when="@3.3.0:3.4.0 +petsc ^petsc@3.9.0:") @@ -480,6 +510,8 @@ class Mfem(Package, CudaPackage, ROCmPackage): when="@4.6.0 +gslib+shared+miniapps", sha256="2a31682d876626529e2778a216d403648b83b90997873659a505d982d0e65beb", ) + patch("mfem-4.7.patch", when="@4.7.0") + patch("mfem-4.7-sundials-7.patch", when="@4.7.0+sundials ^sundials@7:") phases = ["configure", "build", "install"] @@ -500,56 +532,16 @@ class Mfem(Package, CudaPackage, ROCmPackage): # likely to be up to date in supporting *all* of MFEM's # configuration options. So, don't use CMake # - def configure(self, spec, prefix): + def get_make_config_options(self, spec, prefix): def yes_no(varstr): return "YES" if varstr in self.spec else "NO" - # See also find_system_libraries in lib/spack/llnl/util/filesystem.py - # where the same list of paths is used. - sys_lib_paths = [ - "/lib64", - "/lib", - "/usr/lib64", - "/usr/lib", - "/usr/local/lib64", - "/usr/local/lib", - "/usr/lib/x86_64-linux-gnu", - ] - - def is_sys_lib_path(dir): - return dir in sys_lib_paths - - xcompiler = "" - xlinker = "-Wl," - if "+cuda" in spec: - xcompiler = "-Xcompiler=" - xlinker = "-Xlinker=" - cuda_arch = None if "~cuda" in spec else spec.variants["cuda_arch"].value + xcompiler = "" if "~cuda" in spec else "-Xcompiler=" # We need to add rpaths explicitly to allow proper export of link flags - # from within MFEM. - - # Similar to spec[pkg].libs.ld_flags but prepends rpath flags too. - # Also does not add system library paths as defined by 'sys_lib_paths' - # above -- this is done to avoid issues like this: - # https://github.com/mfem/mfem/issues/1088. - def ld_flags_from_library_list(libs_list): - flags = [ - "%s-rpath,%s" % (xlinker, dir) - for dir in libs_list.directories - if not is_sys_lib_path(dir) - ] - flags += ["-L%s" % dir for dir in libs_list.directories if not is_sys_lib_path(dir)] - flags += [libs_list.link_flags] - return " ".join(flags) - - def ld_flags_from_dirs(pkg_dirs_list, pkg_libs_list): - flags = [ - "%s-rpath,%s" % (xlinker, dir) for dir in pkg_dirs_list if not is_sys_lib_path(dir) - ] - flags += ["-L%s" % dir for dir in pkg_dirs_list if not is_sys_lib_path(dir)] - flags += ["-l%s" % lib for lib in pkg_libs_list] - return " ".join(flags) + # from within MFEM. We use the following two functions to do that. + ld_flags_from_library_list = self.ld_flags_from_library_list + ld_flags_from_dirs = self.ld_flags_from_dirs def find_optional_library(name, prefix): for shared in [True, False]: @@ -592,8 +584,8 @@ class Mfem(Package, CudaPackage, ROCmPackage): "PREFIX=%s" % prefix, "MFEM_USE_MEMALLOC=YES", "MFEM_DEBUG=%s" % yes_no("+debug"), - # NOTE: env['CXX'] is the spack c++ compiler wrapper. The real - # compiler is defined by env['SPACK_CXX']. + # NOTE: env["CXX"] is the spack c++ compiler wrapper. The real + # compiler is defined by env["SPACK_CXX"]. "CXX=%s" % env["CXX"], "MFEM_USE_LIBUNWIND=%s" % yes_no("+libunwind"), "%s=%s" % (zlib_var, yes_no("+zlib")), @@ -628,7 +620,10 @@ class Mfem(Package, CudaPackage, ROCmPackage): "MFEM_MPIEXEC=%s" % mfem_mpiexec, "MFEM_MPIEXEC_NP=%s" % mfem_mpiexec_np, "MFEM_USE_EXCEPTIONS=%s" % yes_no("+exceptions"), + "MFEM_USE_MUMPS=%s" % yes_no("+mumps"), ] + if spec.satisfies("@4.7.0:"): + options += ["MFEM_PRECISION=%s" % spec.variants["precision"].value] # Determine C++ standard to use: cxxstd = None @@ -642,6 +637,14 @@ class Mfem(Package, CudaPackage, ROCmPackage): cxxstd = "14" if self.spec.satisfies("^ginkgo"): cxxstd = "14" + # When rocPRIM is used (e.g. by PETSc + ROCm) we need C++14: + if self.spec.satisfies("^rocprim@5.5.0:"): + cxxstd = "14" + cxxstd_req = spec.variants["cxxstd"].value + if cxxstd_req != "auto": + # Constraints for valid standard level should be imposed during + # concretization based on 'conflicts' or other directives. + cxxstd = cxxstd_req cxxstd_flag = None if cxxstd: if "+cuda" in spec: @@ -649,6 +652,8 @@ class Mfem(Package, CudaPackage, ROCmPackage): else: cxxstd_flag = getattr(self.compiler, "cxx" + cxxstd + "_flag") + cuda_arch = None if "~cuda" in spec else spec.variants["cuda_arch"].value + cxxflags = spec.compiler_flags["cxxflags"].copy() if cxxflags: @@ -697,8 +702,10 @@ class Mfem(Package, CudaPackage, ROCmPackage): if "+mpi" in spec: options += ["MPICXX=%s" % spec["mpi"].mpicxx] hypre = spec["hypre"] - # The hypre package always links with 'blas' and 'lapack'. - all_hypre_libs = hypre.libs + hypre["lapack"].libs + hypre["blas"].libs + all_hypre_libs = hypre.libs + if "+lapack" in hypre: + all_hypre_libs += hypre["lapack"].libs + hypre["blas"].libs + hypre_gpu_libs = "" if "+cuda" in hypre: hypre_gpu_libs = " -lcusparse -lcurand -lcublas" @@ -767,7 +774,7 @@ class Mfem(Package, CudaPackage, ROCmPackage): elif "^spectrum-mpi" in strumpack: sp_lib += [ld_flags_from_dirs([mpi.prefix.lib], ["mpi_ibm_mpifh"])] if "+openmp" in strumpack: - # The '+openmp' in the spec means strumpack will TRY to find + # The "+openmp" in the spec means strumpack will TRY to find # OpenMP; if not found, we should not add any flags -- how do # we figure out if strumpack found OpenMP? if not self.spec.satisfies("%apple-clang"): @@ -802,7 +809,7 @@ class Mfem(Package, CudaPackage, ROCmPackage): ) sp_lib += [ld_flags_from_library_list(zfp_lib)] if "+cuda" in strumpack: - # assuming also ('+cuda' in spec) + # assuming also ("+cuda" in spec) sp_lib += ["-lcusolver", "-lcublas"] options += [ "STRUMPACK_OPT=%s" % " ".join(sp_opt), @@ -917,7 +924,7 @@ class Mfem(Package, CudaPackage, ROCmPackage): headers = find_headers("libunwind", libunwind.prefix.include) headers.add_macro("-g") libs = find_optional_library("libunwind", libunwind.prefix) - # When mfem uses libunwind, it also needs 'libdl'. + # When mfem uses libunwind, it also needs "libdl". libs += LibraryList(find_system_libraries("libdl")) options += [ "LIBUNWIND_OPT=%s" % headers.cpp_flags, @@ -976,14 +983,35 @@ class Mfem(Package, CudaPackage, ROCmPackage): if "^rocthrust" in spec and not spec["hip"].external: # petsc+rocm needs the rocthrust header path hip_headers += spec["rocthrust"].headers + if "^rocprim" in spec and not spec["hip"].external: + # rocthrust [via petsc+rocm] has a dependency on rocprim + hip_headers += spec["rocprim"].headers if "^hipblas" in spec and not spec["hip"].external: # superlu-dist+rocm needs the hipblas header path hip_headers += spec["hipblas"].headers if "%cce" in spec: # We assume the proper Cray CCE module (cce) is loaded: - craylibs_path = env["CRAYLIBS_" + machine().upper()] - craylibs = ["libmodules", "libfi", "libcraymath", "libf", "libu", "libcsup"] + proc = str(spec.target.family) + craylibs_var = "CRAYLIBS_" + proc.upper() + craylibs_path = env.get(craylibs_var, None) + if not craylibs_path: + raise InstallError( + f"The environment variable {craylibs_var} is not defined.\n" + "\tMake sure the 'cce' module is in the compiler spec." + ) + craylibs = [ + "libmodules", + "libfi", + "libcraymath", + "libf", + "libu", + "libcsup", + "libpgas-shmem", + ] hip_libs += find_libraries(craylibs, craylibs_path) + craylibs_path2 = join_path(craylibs_path, "../../../cce-clang", proc, "lib") + hip_libs += find_libraries("libunwind", craylibs_path2) + if hip_headers: options += ["HIP_OPT=%s" % hip_headers.cpp_flags] if hip_libs: @@ -1027,9 +1055,17 @@ class Mfem(Package, CudaPackage, ROCmPackage): ] if "+umpire" in spec: + umpire = spec["umpire"] + umpire_opts = umpire.headers + umpire_libs = umpire.libs + if "^camp" in umpire: + umpire_opts += umpire["camp"].headers + if "^fmt" in umpire: + umpire_opts += umpire["fmt"].headers + umpire_libs += umpire["fmt"].libs options += [ - "UMPIRE_OPT=-I%s" % spec["umpire"].prefix.include, - "UMPIRE_LIB=%s" % ld_flags_from_library_list(spec["umpire"].libs), + "UMPIRE_OPT=%s" % umpire_opts.cpp_flags, + "UMPIRE_LIB=%s" % ld_flags_from_library_list(umpire_libs), ] timer_ids = {"std": "0", "posix": "2", "mac": "4", "mpi": "6"} @@ -1098,7 +1134,7 @@ class Mfem(Package, CudaPackage, ROCmPackage): hiop_libs = hiop.libs hiop_hdrs += spec["lapack"].headers + spec["blas"].headers hiop_libs += spec["lapack"].libs + spec["blas"].libs - hiop_opt_libs = ["magma", "umpire"] + hiop_opt_libs = ["magma", "umpire", "hipblas", "hiprand"] for opt_lib in hiop_opt_libs: if "^" + opt_lib in hiop: hiop_hdrs += hiop[opt_lib].headers @@ -1114,11 +1150,28 @@ class Mfem(Package, CudaPackage, ROCmPackage): camp = raja["camp"] hiop_hdrs += camp.headers hiop_libs += find_optional_library("libcamp", camp.prefix) + if hiop.satisfies("@0.6:+cuda"): + hiop_libs += LibraryList(["cublas", "curand"]) options += [ "HIOP_OPT=%s" % hiop_hdrs.cpp_flags, "HIOP_LIB=%s" % ld_flags_from_library_list(hiop_libs), ] + if "+mumps" in spec: + mumps = spec["mumps"] + mumps_opt = ["-I%s" % mumps.prefix.include] + if "+openmp" in mumps: + if not self.spec.satisfies("%apple-clang"): + mumps_opt += [xcompiler + self.compiler.openmp_flag] + options += [ + "MUMPS_OPT=%s" % " ".join(mumps_opt), + "MUMPS_LIB=%s" % ld_flags_from_library_list(mumps.libs), + ] + + return options + + def configure(self, spec, prefix): + options = self.get_make_config_options(spec, prefix) make("config", *options, parallel=False) make("info", parallel=False) @@ -1180,7 +1233,7 @@ class Mfem(Package, CudaPackage, ROCmPackage): # Clean the 'examples' directory -- at least one example is always built # and we do not want to cache executables. make("examples/clean", parallel=False) - self.cache_extra_test_sources([self.examples_src_dir, self.examples_data_dir]) + cache_extra_test_sources(self, [self.examples_src_dir, self.examples_data_dir]) def test_ex10(self): """build and run ex10(p)""" @@ -1257,7 +1310,7 @@ class Mfem(Package, CudaPackage, ROCmPackage): @property def config_mk(self): """Export the location of the config.mk file. - This property can be accessed using spec['mfem'].package.config_mk + This property can be accessed using spec["mfem"].package.config_mk """ dirs = [self.prefix, self.prefix.share.mfem] for d in dirs: @@ -1269,7 +1322,7 @@ class Mfem(Package, CudaPackage, ROCmPackage): @property def test_mk(self): """Export the location of the test.mk file. - This property can be accessed using spec['mfem'].package.test_mk. + This property can be accessed using spec["mfem"].package.test_mk. In version 3.3.2 and newer, the location of test.mk is also defined inside config.mk, variable MFEM_TEST_MK. """ @@ -1279,3 +1332,46 @@ class Mfem(Package, CudaPackage, ROCmPackage): if os.access(f, os.R_OK): return FileList(f) return FileList(find(self.prefix, "test.mk", recursive=True)) + + # See also find_system_libraries in lib/spack/llnl/util/filesystem.py + # where the similar list of paths is used. + sys_lib_paths = [ + "/lib64", + "/lib", + "/usr/lib64", + "/usr/lib", + "/usr/local/lib64", + "/usr/local/lib", + "/usr/lib/x86_64-linux-gnu", + ] + + def is_sys_lib_path(self, dir): + return dir in self.sys_lib_paths + + @property + def xlinker(self): + return "-Wl," if "~cuda" in self.spec else "-Xlinker=" + + # Similar to spec[pkg].libs.ld_flags but prepends rpath flags too. + # Also does not add system library paths as defined by 'sys_lib_paths' + # above -- this is done to avoid issues like this: + # https://github.com/mfem/mfem/issues/1088. + def ld_flags_from_library_list(self, libs_list): + flags = [ + "%s-rpath,%s" % (self.xlinker, dir) + for dir in libs_list.directories + if not self.is_sys_lib_path(dir) + ] + flags += ["-L%s" % dir for dir in libs_list.directories if not self.is_sys_lib_path(dir)] + flags += [libs_list.link_flags] + return " ".join(flags) + + def ld_flags_from_dirs(self, pkg_dirs_list, pkg_libs_list): + flags = [ + "%s-rpath,%s" % (self.xlinker, dir) + for dir in pkg_dirs_list + if not self.is_sys_lib_path(dir) + ] + flags += ["-L%s" % dir for dir in pkg_dirs_list if not self.is_sys_lib_path(dir)] + flags += ["-l%s" % lib for lib in pkg_libs_list] + return " ".join(flags) diff --git a/var/spack/repos/builtin/packages/mfem/test_builds.sh b/var/spack/repos/builtin/packages/mfem/test_builds.sh index cb658dd59c..be0d27bc0f 100755 --- a/var/spack/repos/builtin/packages/mfem/test_builds.sh +++ b/var/spack/repos/builtin/packages/mfem/test_builds.sh @@ -14,9 +14,9 @@ rocm_arch="gfx908" spack_jobs='' # spack_jobs='-j 128' -mfem='mfem@4.6.0'${compiler} +mfem='mfem@4.7.0'${compiler} # mfem_dev='mfem@develop'${compiler} -mfem_dev='mfem@4.6.0'${compiler} +mfem_dev='mfem@4.7.0'${compiler} backends='+occa+raja+libceed' backends_specs='^occa~cuda ^raja~openmp' @@ -31,44 +31,60 @@ petsc_spec_rocm='^petsc+rocm+mumps' strumpack_spec='^strumpack~slate~openmp~cuda' strumpack_cuda_spec='^strumpack+cuda~slate~openmp' strumpack_rocm_spec='^strumpack+rocm~slate~openmp~cuda' -# superlu specs with cuda and rocm -superlu_cuda_spec='^superlu-dist+cuda' -superlu_rocm_spec='^superlu-dist+rocm' +# superlu specs with cpu, cuda and rocm +# - v8.2.1 on CPU and GPU stalls in ex11p; works when superlu::PARMETIS is +# replaced with superlu::METIS_AT_PLUS_A, at least on CPU +superlu_spec='^superlu-dist@8.1.2' +superlu_cuda_spec='^superlu-dist@8.1.2+cuda' +superlu_rocm_spec='^superlu-dist@8.1.2+rocm' +# FMS spec +fms_spec='^libfms+conduit' builds=( # preferred version: ${mfem} ${mfem}'~mpi~metis~zlib' - ${mfem}"$backends"'+superlu-dist+strumpack+suite-sparse+petsc+slepc+gslib \ - +sundials+pumi+mpfr+netcdf+zlib+gnutls+libunwind+conduit+ginkgo+hiop \ - '"$backends_specs $strumpack_spec $petsc_spec $conduit_spec" + # TODO: add back "+fms $fms_spec" when the FMS unit test is fixed + ${mfem}"$backends"'+superlu-dist+strumpack+mumps+suite-sparse+petsc+slepc \ + +gslib+sundials+pumi+mpfr+netcdf+zlib+gnutls+libunwind+conduit+ginkgo \ + +hiop \ + '"$backends_specs $superlu_spec $strumpack_spec $petsc_spec"' \ + '"$conduit_spec" + # TODO: add back "+fms $fms_spec" when the FMS unit test is fixed ${mfem}'~mpi \ '"$backends"'+suite-sparse+sundials+gslib+mpfr+netcdf \ +zlib+gnutls+libunwind+conduit+ginkgo+hiop \ '"$backends_specs $conduit_spec"' ^sundials~mpi' + ${mfem}' precision=single +mumps+petsc '"$petsc_spec" # develop version, shared builds: ${mfem_dev}'+shared~static' ${mfem_dev}'+shared~static~mpi~metis~zlib' # NOTE: Shared build with +gslib works on mac but not on linux # TODO: add back '+gslib' when the above NOTE is addressed. + # TODO: add back "+fms $fms_spec" when the FMS unit test is fixed ${mfem_dev}'+shared~static \ - '"$backends"'+superlu-dist+strumpack+suite-sparse+petsc+slepc \ + '"$backends"'+superlu-dist+strumpack+mumps+suite-sparse+petsc+slepc \ +sundials+pumi+mpfr+netcdf+zlib+gnutls+libunwind+conduit+ginkgo+hiop \ - '"$backends_specs $strumpack_spec $petsc_spec $conduit_spec" + '"$backends_specs $superlu_spec $strumpack_spec $petsc_spec"' \ + '"$conduit_spec" # NOTE: Shared build with +gslib works on mac but not on linux # TODO: add back '+gslib' when the above NOTE is addressed. + # TODO: add back "+fms $fms_spec" when the FMS unit test is fixed ${mfem_dev}'+shared~static~mpi \ '"$backends"'+suite-sparse+sundials+mpfr+netcdf \ +zlib+gnutls+libunwind+conduit+ginkgo+hiop \ '"$backends_specs $conduit_spec"' ^sundials~mpi' + ${mfem_dev}'+shared~static precision=single +mumps+petsc '"$petsc_spec" ) builds2=( # preferred version ${mfem}"$backends $backends_specs" - ${mfem}'+superlu-dist' + ${mfem}' precision=single' + ${mfem}'+superlu-dist'" $superlu_spec" ${mfem}'+strumpack'" $strumpack_spec" + ${mfem}'+mumps' ${mfem}'+suite-sparse~mpi' ${mfem}'+suite-sparse' ${mfem}'+sundials~mpi ^sundials~mpi' @@ -81,6 +97,8 @@ builds2=( ${mfem}'+gnutls' ${mfem}'+conduit~mpi'" $conduit_spec" ${mfem}'+conduit'" $conduit_spec" + # TODO: uncomment next line when the FMS unit test is fixed + # ${mfem}'+fms'" $fms_spec" ${mfem}'+umpire' ${mfem}'+petsc'" $petsc_spec" ${mfem}'+petsc+slepc'" $petsc_spec" @@ -93,8 +111,10 @@ builds2=( # # develop version ${mfem_dev}"$backends $backends_specs" - ${mfem_dev}'+superlu-dist' + ${mfem_dev}' precision=single' + ${mfem_dev}'+superlu-dist'" $superlu_spec" ${mfem_dev}'+strumpack'" $strumpack_spec" + ${mfem_dev}'+mumps' ${mfem_dev}'+suite-sparse~mpi' ${mfem_dev}'+suite-sparse' ${mfem_dev}'+sundials~mpi ^sundials~mpi' @@ -107,6 +127,8 @@ builds2=( ${mfem_dev}'+gnutls' ${mfem_dev}'+conduit~mpi'" $conduit_spec" ${mfem_dev}'+conduit'" $conduit_spec" + # TODO: uncomment next line when the FMS unit test is fixed + # ${mfem_dev}'+fms'" $fms_spec" ${mfem_dev}'+umpire' ${mfem_dev}'+petsc'" $petsc_spec" ${mfem_dev}'+petsc+slepc'" $petsc_spec" @@ -134,25 +156,37 @@ builds_cuda=( # hypre without cuda: # NOTE: PETSc tests may need PETSC_OPTIONS="-use_gpu_aware_mpi 0" # TODO: restore '+libceed' when the libCEED CUDA unit tests take less time. - # TODO: remove "^hiop+shared" when the default static build is fixed. ${mfem}'+cuda+openmp+raja+occa cuda_arch='"${cuda_arch}"' \ +superlu-dist+strumpack+suite-sparse+gslib+petsc+slepc \ - +sundials+pumi+mpfr+netcdf+zlib+gnutls+libunwind+conduit+ginkgo+hiop \ - ^raja+cuda+openmp ^hiop+shared'" $strumpack_cuda_spec"' \ + +sundials+pumi+mpfr+netcdf+zlib+gnutls+libunwind+conduit+ginkgo \ + ^raja+cuda+openmp'" $strumpack_cuda_spec"' \ '"$superlu_cuda_spec $petsc_spec_cuda $conduit_spec" + ${mfem}'+cuda cuda_arch='"${cuda_arch}"' +raja+umpire' + + # hiop needs older versions of raja, umpire, etc + # TODO: combine this spec with the above spec when the combined spec works. + ${mfem}'+cuda cuda_arch='"${cuda_arch}"' +hiop' + # hypre with cuda: # TODO: restore '+libceed' when the libCEED CUDA unit tests take less time. # TODO: add back "+petsc+slepc $petsc_spec_cuda" when it works. # NOTE: PETSc tests may need PETSC_OPTIONS="-use_gpu_aware_mpi 0" # TODO: add back "+sundials" when it's supported with '^hypre+cuda'. - # TODO: remove "^hiop+shared" when the default static build is fixed. ${mfem}'+cuda+openmp+raja+occa cuda_arch='"${cuda_arch}"' \ +superlu-dist+strumpack+suite-sparse+gslib \ - +pumi+mpfr+netcdf+zlib+gnutls+libunwind+conduit+ginkgo+hiop \ - ^raja+cuda+openmp ^hiop+shared ^hypre+cuda \ + +pumi+mpfr+netcdf+zlib+gnutls+libunwind+conduit+ginkgo \ + ^raja+cuda+openmp ^hypre+cuda \ '" $strumpack_cuda_spec $superlu_cuda_spec $conduit_spec" + ${mfem}'+cuda cuda_arch='"${cuda_arch}"' +raja+umpire ^hypre+cuda' + + # hiop needs older versions of raja, umpire, etc + # TODO: combine this spec with the above spec when the combined spec works. + ${mfem}'+cuda cuda_arch='"${cuda_arch}"' +hiop ^hypre+cuda' + + ${mfem}' precision=single +cuda cuda_arch='"${cuda_arch}"' ^hypre+cuda' + # # same builds as above with ${mfem_dev} # @@ -171,24 +205,36 @@ builds_cuda=( # hypre without cuda: # NOTE: PETSc tests may need PETSC_OPTIONS="-use_gpu_aware_mpi 0" # TODO: restore '+libceed' when the libCEED CUDA unit tests take less time. - # TODO: remove "^hiop+shared" when the default static build is fixed. ${mfem_dev}'+cuda+openmp+raja+occa cuda_arch='"${cuda_arch}"' \ +superlu-dist+strumpack+suite-sparse+gslib+petsc+slepc \ - +sundials+pumi+mpfr+netcdf+zlib+gnutls+libunwind+conduit+ginkgo+hiop \ - ^raja+cuda+openmp ^hiop+shared'" $strumpack_cuda_spec"' \ + +sundials+pumi+mpfr+netcdf+zlib+gnutls+libunwind+conduit+ginkgo \ + ^raja+cuda+openmp'" $strumpack_cuda_spec"' \ '"$superlu_cuda_spec $petsc_spec_cuda $conduit_spec" + ${mfem_dev}'+cuda cuda_arch='"${cuda_arch}"' +raja+umpire' + + # hiop needs older versions of raja, umpire, etc + # TODO: combine this spec with the above spec when the combined spec works. + ${mfem_dev}'+cuda cuda_arch='"${cuda_arch}"' +hiop' + # hypre with cuda: # TODO: restore '+libceed' when the libCEED CUDA unit tests take less time. # TODO: add back "+petsc+slepc $petsc_spec_cuda" when it works. # NOTE: PETSc tests may need PETSC_OPTIONS="-use_gpu_aware_mpi 0" # TODO: add back "+sundials" when it's supported with '^hypre+cuda'. - # TODO: remove "^hiop+shared" when the default static build is fixed. ${mfem_dev}'+cuda+openmp+raja+occa cuda_arch='"${cuda_arch}"' \ +superlu-dist+strumpack+suite-sparse+gslib \ - +pumi+mpfr+netcdf+zlib+gnutls+libunwind+conduit+ginkgo+hiop \ - ^raja+cuda+openmp ^hiop+shared ^hypre+cuda \ + +pumi+mpfr+netcdf+zlib+gnutls+libunwind+conduit+ginkgo \ + ^raja+cuda+openmp ^hypre+cuda \ '"$strumpack_cuda_spec $superlu_cuda_spec $conduit_spec" + + ${mfem_dev}'+cuda cuda_arch='"${cuda_arch}"' +raja+umpire ^hypre+cuda' + + # hiop needs older versions of raja, umpire, etc + # TODO: combine this spec with the above spec when the combined spec works. + ${mfem_dev}'+cuda cuda_arch='"${cuda_arch}"' +hiop ^hypre+cuda' + + ${mfem_dev}' precision=single +cuda cuda_arch='"${cuda_arch}"' ^hypre+cuda' ) @@ -204,27 +250,35 @@ builds_rocm=( ^raja+rocm~openmp ^occa~cuda~openmp ^hypre+rocm' # hypre without rocm: - # TODO: add back '+hiop' when it is no longer linked with tcmalloc* through - # its magma dependency. - # TODO: add back '+ginkgo' when the Ginkgo example works. ${mfem}'+rocm+openmp+raja+occa+libceed amdgpu_target='"${rocm_arch}"' \ +superlu-dist+strumpack+suite-sparse+gslib+petsc+slepc \ - +sundials+pumi+mpfr+netcdf+zlib+gnutls+libunwind+conduit \ + +sundials+pumi+mpfr+netcdf+zlib+gnutls+libunwind+conduit+ginkgo \ ^raja+rocm~openmp ^occa~cuda'" $strumpack_rocm_spec"' \ '"$superlu_rocm_spec $petsc_spec_rocm $conduit_spec" + ${mfem}'+rocm amdgpu_target='"${rocm_arch}"' +raja+umpire' + + # hiop needs older versions of raja, umpire, etc + # TODO: combine this spec with the above spec when the combined spec works. + ${mfem}'+rocm amdgpu_target='"${rocm_arch}"' +hiop' + # hypre with rocm: # TODO: add back "+petsc+slepc $petsc_spec_rocm" when it works. - # TODO: add back '+hiop' when it is no longer linked with tcmalloc* through - # its magma dependency. - # TODO: add back '+ginkgo' when the Ginkgo example works. # TODO: add back "+sundials" when it's supported with '^hypre+rocm'. ${mfem}'+rocm+openmp+raja+occa+libceed amdgpu_target='"${rocm_arch}"' \ +superlu-dist+strumpack+suite-sparse+gslib \ - +pumi+mpfr+netcdf+zlib+gnutls+libunwind+conduit \ + +pumi+mpfr+netcdf+zlib+gnutls+libunwind+conduit+ginkgo \ ^raja+rocm~openmp ^occa~cuda ^hypre+rocm \ '"$strumpack_rocm_spec $superlu_rocm_spec $conduit_spec" + ${mfem}'+rocm amdgpu_target='"${rocm_arch}"' +raja+umpire ^hypre+rocm' + + # hiop needs older versions of raja, umpire, etc + # TODO: combine this spec with the above spec when the combined spec works. + ${mfem}'+rocm amdgpu_target='"${rocm_arch}"' +hiop ^hypre+rocm' + + ${mfem}' precision=single +rocm amdgpu_target='"${rocm_arch}"' ^hypre+rocm' + # # same builds as above with ${mfem_dev} # @@ -244,6 +298,8 @@ run_builds=("${builds[@]}" "${builds2[@]}") # PETSc CUDA tests on Lassen need this: # export PETSC_OPTIONS="-use_gpu_aware_mpi 0" +# STRUMPACK forces "^openblas threads=openmp" when using openblas: +export OMP_NUM_THREADS=1 # spack files to clean in "$mfem_src_dir" when using 'dev-build' clean_files=( |