From 80f92cfddeb5aa835ec5853596127bbf5cbc9318 Mon Sep 17 00:00:00 2001
From: liuyangzhuan <liuyangzhuan@gmail.com>
Date: Fri, 28 Jan 2022 01:31:56 -0800
Subject: gptune: add  variants (mpispawn, hypre)  (#27733)

Co-authored-by: Adam J. Stewart <ajstewart426@gmail.com>
---
 var/spack/repos/builtin/packages/gptune/package.py | 201 ++++++----
 .../repos/builtin/packages/hypre/ij_gptune.patch   | 440 +++++++++++++++++++++
 var/spack/repos/builtin/packages/hypre/package.py  |  10 +
 .../superlu-dist/CMAKE_INSTALL_LIBDIR.patch        |  59 +++
 .../repos/builtin/packages/superlu-dist/package.py |   2 +
 5 files changed, 626 insertions(+), 86 deletions(-)
 create mode 100644 var/spack/repos/builtin/packages/hypre/ij_gptune.patch
 create mode 100644 var/spack/repos/builtin/packages/superlu-dist/CMAKE_INSTALL_LIBDIR.patch

(limited to 'var')

diff --git a/var/spack/repos/builtin/packages/gptune/package.py b/var/spack/repos/builtin/packages/gptune/package.py
index eeebd6f40d..d96c86b074 100644
--- a/var/spack/repos/builtin/packages/gptune/package.py
+++ b/var/spack/repos/builtin/packages/gptune/package.py
@@ -18,7 +18,9 @@ class Gptune(CMakePackage):
 
     version('master', branch='master')
 
-    variant('app', default=False, description='Build all HPC application examples')
+    variant('superlu', default=False, description='Build the SuperLU_DIST example')
+    variant('hypre', default=False, description='Build the Hypre example')
+    variant('mpispawn', default=True, description='MPI spawning-based interface')
 
     depends_on('mpi', type=('build', 'link', 'run'))
     depends_on('cmake@3.3:', type='build')
@@ -48,9 +50,14 @@ class Gptune(CMakePackage):
     depends_on('pygmo', type=('build', 'run'))
     depends_on('openturns', type=('build', 'run'))
 
-    depends_on('superlu-dist@develop', when='+app', type=('build', 'run'))
+    depends_on('superlu-dist@develop', when='+superlu', type=('build', 'run'))
+    depends_on('hypre+gptune@2.19.0', when='+hypre', type=('build', 'run'))
 
-    conflicts('openmpi@:3')
+    depends_on('openmpi@4:', when='+mpispawn', type=('build', 'run'))
+    conflicts('mpich', when='+mpispawn')
+    conflicts('spectrum-mpi', when='+mpispawn')
+    conflicts('cray-mpich', when='+mpispawn')
+    conflicts('gcc@:7')
 
     def cmake_args(self):
         spec = self.spec
@@ -59,6 +66,7 @@ class Gptune(CMakePackage):
             fc_flags.append('-fallow-argument-mismatch')
 
         args = [
+            '-DGPTUNE_INSTALL_PATH=%s' % site_packages_dir,
             '-DTPL_BLAS_LIBRARIES=%s' % spec['blas'].libs.joined(";"),
             '-DTPL_LAPACK_LIBRARIES=%s' % spec['lapack'].libs.joined(";"),
             '-DTPL_SCALAPACK_LIBRARIES=%s' % spec['scalapack'].
@@ -81,96 +89,117 @@ class Gptune(CMakePackage):
     def cache_test_sources(self):
         """Copy the example source files after the package is installed to an
         install test subdirectory for use during `spack test run`."""
-        self.cache_extra_test_sources([self.examples_src_dir, self.src_dir])
+        self.cache_extra_test_sources([self.examples_src_dir])
+
+    def setup_run_environment(self, env):
+        env.set('GPTUNE_INSTALL_PATH', site_packages_dir)
 
     def test(self):
         spec = self.spec
         comp_name = self.compiler.name
         comp_version = str(self.compiler.version).replace('.', ',')
-        test_dir = join_path(self.install_test_root, self.examples_src_dir)
+        test_dir = join_path(self.test_suite.current_test_cache_dir,
+                             self.examples_src_dir)
 
-        if '+app' in spec:
-            superludriver = join_path(spec['superlu-dist'].prefix.bin, 'pddrive_spawn')
+        if '+superlu' in spec:
+            superludriver = join_path(spec['superlu-dist'].prefix.lib,
+                                      'EXAMPLE/pddrive_spawn')
             op = ['-r', superludriver, '.']
             # copy superlu-dist executables to the correct place
-            with working_dir(join_path(test_dir, 'SuperLU_DIST'), create=False):
-                self.run_test('rm', options=['-rf', 'superlu_dist'], work_dir='.')
-                self.run_test('git', options=['clone', 'https://github.com/xiaoyeli/superlu_dist.git'], work_dir='.')
-                self.run_test('mkdir', options=['-p',
-                                                'build'], work_dir='./superlu_dist')
-                self.run_test('mkdir', options=['-p', 'EXAMPLE'],
-                              work_dir='./superlu_dist/build')
-                self.run_test('cp', options=op, work_dir='./superlu_dist/build/EXAMPLE')
-
-        with working_dir(self.install_test_root, create=False):
-            cdir = join_path(self.prefix, 'gptuneclcm')
-            self.run_test('cp', options=['-r', cdir, '.'], work_dir='.')
-            self.run_test('rm', options=['-rf', 'build'], work_dir='.')
-            self.run_test('mv', options=['gptuneclcm', 'build'], work_dir='.')
-
-            with open('{0}/run_env.sh'.format(self.install_test_root), 'w') as envfile:
-                envfile.write('if [[ $NERSC_HOST = "cori" ]]; then\n')
-                envfile.write('    export machine=cori\n')
-                envfile.write('elif [[ $(uname -s) = "Darwin" ]]; then\n')
-                envfile.write('    export machine=mac\n')
-                envfile.write('elif [[ $(dnsdomainname) = ' +
-                              '"summit.olcf.ornl.gov" ]]; then\n')
-                envfile.write('    export machine=summit\n')
-                envfile.write('elif [[ $(cat /etc/os-release | grep "PRETTY_NAME") ==' +
-                              ' *"Ubuntu"* || $(cat /etc/os-release | grep' +
-                              ' "PRETTY_NAME") == *"Debian"* ]]; then\n')
-                envfile.write('    export machine=unknownlinux\n')
-                envfile.write('fi\n')
-                envfile.write('export GPTUNEROOT=$PWD\n')
-                envfile.write('export MPIRUN={0}\n'.format
-                              (which(spec['mpi'].prefix.bin + '/mpirun')))
-                envfile.write('export proc=$(spack arch)\n')
-                envfile.write('export mpi={0}\n'.format(spec['mpi'].name))
-                envfile.write('export compiler={0}\n'.format(comp_name))
-                envfile.write('export nodes={0} \n'.format(self.nodes))
-                envfile.write('export cores={0} \n'.format(self.cores))
-                envfile.write('export ModuleEnv=$machine-$proc-$mpi-$compiler \n')
-                envfile.write('software_json=$(echo ",\\\"software_configuration\\\":' +
-                              '{\\\"' + spec['blas'].name +
-                              '\\\":{\\\"version_split\\\":' +
-                              ' [' + str(spec['blas'].versions).replace('.', ',') +
-                              ']},\\\"' + spec['mpi'].name +
-                              '\\\":{\\\"version_split\\\": [' +
-                              str(spec['mpi'].versions).replace('.', ',') + ']},\\\"' +
-                              spec['scalapack'].name +
-                              '\\\":{\\\"version_split\\\": [' +
-                              str(spec['scalapack'].versions).replace('.', ',') +
-                              ']},\\\"' +
-                              str(comp_name) + '\\\":{\\\"version_split\\\": [' +
-                              str(comp_version) + ']}}") \n')
-                envfile.write('loadable_software_json=$(echo ",\\\"loadable_software_' +
-                              'configurations\\\":{\\\"' + spec['blas'].name +
-                              '\\\":{\\\"version_split\\\": [' +
-                              str(spec['blas'].versions).replace('.', ',') +
-                              ']},\\\"' + spec['mpi'].name +
-                              '\\\":{\\\"version_split\\\": [' +
-                              str(spec['mpi'].versions).replace('.', ',') + ']},\\\"' +
-                              spec['scalapack'].name +
-                              '\\\":{\\\"version_split\\\": [' +
-                              str(spec['scalapack'].versions).replace('.', ',') +
-                              ']},\\\"' + str(comp_name) +
-                              '\\\":{\\\"version_split\\\": ['
-                              + str(comp_version) + ']}}") \n')
-                envfile.write('machine_json=$(echo ",\\\"machine_configuration\\\":' +
-                              '{\\\"machine_name\\\":\\\"$machine\\\",\\\"$proc\\\":' +
-                              '{\\\"nodes\\\":$nodes,\\\"cores\\\":$cores}}") \n')
-                envfile.write('loadable_machine_json=$(echo ",\\\"loadable_machine_' +
-                              'configurations\\\":{\\\"$machine\\\":{\\\"$proc\\\":' +
-                              '{\\\"nodes\\\":$nodes,\\\"cores\\\":$cores}}}") \n')
-
-        if '+app' in spec:
-            apps = ['GPTune-Demo', 'SuperLU_DIST', 'SuperLU_DIST_RCI',
-                    'Scalapack-PDGEQRF', 'Scalapack-PDGEQRF_RCI']
-        else:
-            apps = ['GPTune-Demo', 'Scalapack-PDGEQRF', 'Scalapack-PDGEQRF_RCI']
+            wd = join_path(test_dir, 'SuperLU_DIST')
+            self.run_test('rm', options=['-rf', 'superlu_dist'], work_dir=wd)
+            self.run_test('git', options=['clone', 'https://github.com/xiaoyeli/superlu_dist.git'], work_dir=wd)
+            self.run_test('mkdir', options=['-p',
+                                            'build'], work_dir=wd + '/superlu_dist')
+            self.run_test('mkdir', options=['-p', 'EXAMPLE'],
+                          work_dir=wd + '/superlu_dist/build')
+            self.run_test('cp', options=op, work_dir=wd + '/superlu_dist/build/EXAMPLE')
+
+        if '+hypre' in spec:
+            hypredriver = join_path(spec['hypre'].prefix.bin, 'ij')
+            op = ['-r', hypredriver, '.']
+            # copy superlu-dist executables to the correct place
+            wd = join_path(test_dir, 'Hypre')
+            self.run_test('rm', options=['-rf', 'hypre'], work_dir=wd)
+            self.run_test('git', options=['clone', 'https://github.com/hypre-space/hypre.git'], work_dir=wd)
+            self.run_test('cp', options=op, work_dir=wd + '/hypre/src/test/')
+
+        wd = self.test_suite.current_test_cache_dir
+        with open('{0}/run_env.sh'.format(wd), 'w') as envfile:
+            envfile.write('if [[ $NERSC_HOST = "cori" ]]; then\n')
+            envfile.write('    export machine=cori\n')
+            envfile.write('elif [[ $(uname -s) = "Darwin" ]]; then\n')
+            envfile.write('    export machine=mac\n')
+            envfile.write('elif [[ $(dnsdomainname) = ' +
+                          '"summit.olcf.ornl.gov" ]]; then\n')
+            envfile.write('    export machine=summit\n')
+            envfile.write('elif [[ $(cat /etc/os-release | grep "PRETTY_NAME") ==' +
+                          ' *"Ubuntu"* || $(cat /etc/os-release | grep' +
+                          ' "PRETTY_NAME") == *"Debian"* ]]; then\n')
+            envfile.write('    export machine=unknownlinux\n')
+            envfile.write('fi\n')
+            envfile.write('export GPTUNEROOT=$PWD\n')
+            envfile.write('export MPIRUN={0}\n'.format
+                          (which(spec['mpi'].prefix.bin + '/mpirun')))
+            envfile.write('export PYTHONPATH={0}:$PYTHONPATH\n'.format
+                          (site_packages_dir + '/gptune'))
+            envfile.write('export proc=$(spack arch)\n')
+            envfile.write('export mpi={0}\n'.format(spec['mpi'].name))
+            envfile.write('export compiler={0}\n'.format(comp_name))
+            envfile.write('export nodes={0} \n'.format(self.nodes))
+            envfile.write('export cores={0} \n'.format(self.cores))
+            envfile.write('export ModuleEnv=$machine-$proc-$mpi-$compiler \n')
+            envfile.write('software_json=$(echo ",\\\"software_configuration\\\":' +
+                          '{\\\"' + spec['blas'].name +
+                          '\\\":{\\\"version_split\\\":' +
+                          ' [' + str(spec['blas'].versions).replace('.', ',') +
+                          ']},\\\"' + spec['mpi'].name +
+                          '\\\":{\\\"version_split\\\": [' +
+                          str(spec['mpi'].versions).replace('.', ',') + ']},\\\"' +
+                          spec['scalapack'].name +
+                          '\\\":{\\\"version_split\\\": [' +
+                          str(spec['scalapack'].versions).replace('.', ',') +
+                          ']},\\\"' +
+                          str(comp_name) + '\\\":{\\\"version_split\\\": [' +
+                          str(comp_version) + ']}}") \n')
+            envfile.write('loadable_software_json=$(echo ",\\\"loadable_software_' +
+                          'configurations\\\":{\\\"' + spec['blas'].name +
+                          '\\\":{\\\"version_split\\\": [' +
+                          str(spec['blas'].versions).replace('.', ',') +
+                          ']},\\\"' + spec['mpi'].name +
+                          '\\\":{\\\"version_split\\\": [' +
+                          str(spec['mpi'].versions).replace('.', ',') + ']},\\\"' +
+                          spec['scalapack'].name +
+                          '\\\":{\\\"version_split\\\": [' +
+                          str(spec['scalapack'].versions).replace('.', ',') +
+                          ']},\\\"' + str(comp_name) +
+                          '\\\":{\\\"version_split\\\": ['
+                          + str(comp_version) + ']}}") \n')
+            envfile.write('machine_json=$(echo ",\\\"machine_configuration\\\":' +
+                          '{\\\"machine_name\\\":\\\"$machine\\\",\\\"$proc\\\":' +
+                          '{\\\"nodes\\\":$nodes,\\\"cores\\\":$cores}}") \n')
+            envfile.write('loadable_machine_json=$(echo ",\\\"loadable_machine_' +
+                          'configurations\\\":{\\\"$machine\\\":{\\\"$proc\\\":' +
+                          '{\\\"nodes\\\":$nodes,\\\"cores\\\":$cores}}}") \n')
+
+        # copy the environment configuration files to non-cache directories
+        op = ['run_env.sh', site_packages_dir + '/gptune/.']
+        self.run_test('cp', options=op, work_dir=wd)
+        op = ['run_env.sh', self.install_test_root + '/.']
+        self.run_test('cp', options=op, work_dir=wd)
+
+        apps = ['Scalapack-PDGEQRF_RCI']
+        if '+mpispawn' in spec:
+            apps = apps + ['GPTune-Demo', 'Scalapack-PDGEQRF']
+        if '+superlu' in spec:
+            apps = apps + ['SuperLU_DIST_RCI']
+            if '+mpispawn' in spec:
+                apps = apps + ['SuperLU_DIST']
+        if '+hypre' in spec:
+            if '+mpispawn' in spec:
+                apps = apps + ['Hypre']
 
         for app in apps:
-            with working_dir(join_path(test_dir, app), create=False):
-                # PDGEQRF with GPTune
-                self.run_test('bash', options=['run_examples.sh'], work_dir='.',
-                              purpose='gptune smoke test for {0}'.format(app))
+            wd = join_path(test_dir, app)
+            self.run_test('bash', options=['run_examples.sh'], work_dir=wd,
+                          purpose='gptune smoke test for {0}'.format(app))
diff --git a/var/spack/repos/builtin/packages/hypre/ij_gptune.patch b/var/spack/repos/builtin/packages/hypre/ij_gptune.patch
new file mode 100644
index 0000000000..36dbf44afb
--- /dev/null
+++ b/var/spack/repos/builtin/packages/hypre/ij_gptune.patch
@@ -0,0 +1,440 @@
+diff --git a/src/test/ij.c b/src/test/ij.c
+old mode 100644
+new mode 100755
+index fcda91898..c43dcf477
+--- a/src/test/ij.c
++++ b/src/test/ij.c
+@@ -85,10 +85,81 @@ extern HYPRE_Int hypre_FlexGMRESModifyPCDefault(void *precond_data, HYPRE_Int it
+ #endif
+ #define SECOND_TIME 0
+ 
++
++HYPRE_Int
++hypre_fPrintTiming(FILE* fp, const char     *heading,
++                   MPI_Comm        comm  )
++{
++   HYPRE_Int  ierr = 0;
++
++   HYPRE_Real  local_wall_time;
++   HYPRE_Real  local_cpu_time;
++   HYPRE_Real  wall_time;
++   HYPRE_Real  cpu_time;
++   HYPRE_Real  wall_mflops;
++   HYPRE_Real  cpu_mflops;
++
++   HYPRE_Int     i;
++   HYPRE_Int     myrank;
++
++   if (hypre_global_timing == NULL)
++      return ierr;
++
++   hypre_MPI_Comm_rank(comm, &myrank );
++
++   /* print heading */
++   if (myrank == 0)
++   {
++      hypre_fprintf(fp, "=============================================\n");
++      hypre_fprintf(fp, "%s:\n", heading);
++      hypre_fprintf(fp, "=============================================\n");
++   }
++
++   for (i = 0; i < (hypre_global_timing -> size); i++)
++   {
++      if (hypre_TimingNumRegs(i) > 0)
++      {
++         local_wall_time = hypre_TimingWallTime(i);
++         local_cpu_time  = hypre_TimingCPUTime(i);
++         hypre_MPI_Allreduce(&local_wall_time, &wall_time, 1,
++                       hypre_MPI_REAL, hypre_MPI_MAX, comm);
++         hypre_MPI_Allreduce(&local_cpu_time, &cpu_time, 1,
++                       hypre_MPI_REAL, hypre_MPI_MAX, comm);
++
++         if (myrank == 0)
++         {
++            hypre_fprintf(fp, "%s:\n", hypre_TimingName(i));
++
++            /* print wall clock info */
++            hypre_fprintf(fp, "  wall clock time = %f seconds\n", wall_time);
++            if (wall_time)
++               wall_mflops = hypre_TimingFLOPS(i) / wall_time / 1.0E6;
++            else
++               wall_mflops = 0.0;
++            hypre_fprintf(fp, "  wall MFLOPS     = %f\n", wall_mflops);
++
++            /* print CPU clock info */
++            hypre_fprintf(fp, "  cpu clock time  = %f seconds\n", cpu_time);
++            if (cpu_time)
++               cpu_mflops = hypre_TimingFLOPS(i) / cpu_time / 1.0E6;
++            else
++               cpu_mflops = 0.0;
++            hypre_fprintf(fp, "  cpu MFLOPS      = %f\n\n", cpu_mflops);
++         }
++      }
++   }
++
++   return ierr;
++}
++
++
++
++
+ hypre_int
+ main( hypre_int argc,
+       char *argv[] )
+ {
++   FILE *fplog;
+    HYPRE_Int           arg_index;
+    HYPRE_Int           print_usage;
+    HYPRE_Int           sparsity_known = 0;
+@@ -414,7 +485,9 @@ main( hypre_int argc,
+    size_t mempool_max_cached_bytes = 2000LL * 1024 * 1024;
+ 
+    /* Initialize MPI */
+-   hypre_MPI_Init(&argc, &argv);
++   hypre_MPI_Init(&argc, &argv);MPI_Comm parent; MPI_Comm_get_parent(&parent);
++      
++
+ 
+    hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
+    hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
+@@ -481,6 +554,11 @@ main( hypre_int argc,
+          build_matrix_type      = 3;
+          build_matrix_arg_index = arg_index;
+       }
++      else if ( strcmp(argv[arg_index], "-logfile") == 0 )
++      {
++         arg_index++;
++         fplog = fopen(argv[arg_index++], "w");
++      }      
+       else if ( strcmp(argv[arg_index], "-27pt") == 0 )
+       {
+          arg_index++;
+@@ -2068,7 +2146,7 @@ main( hypre_int argc,
+    HYPRE_Init();
+ 
+    hypre_EndTiming(time_index);
+-   hypre_PrintTiming("Hypre init times", hypre_MPI_COMM_WORLD);
++   hypre_fPrintTiming(fplog, "Hypre init times", hypre_MPI_COMM_WORLD);
+    hypre_FinalizeTiming(time_index);
+    hypre_ClearTiming();
+ 
+@@ -2182,7 +2260,7 @@ main( hypre_int argc,
+       local_num_cols = (HYPRE_Int)(last_local_col - first_local_col + 1);
+    }
+    hypre_EndTiming(time_index);
+-   hypre_PrintTiming("Generate Matrix", hypre_MPI_COMM_WORLD);
++   hypre_fPrintTiming(fplog, "Generate Matrix", hypre_MPI_COMM_WORLD);
+    hypre_FinalizeTiming(time_index);
+    hypre_ClearTiming();
+ 
+@@ -2404,7 +2482,7 @@ main( hypre_int argc,
+       ierr += HYPRE_IJMatrixAssemble( ij_A );
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("IJ Matrix Setup", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "IJ Matrix Setup", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -3032,7 +3110,7 @@ main( hypre_int argc,
+    }
+ 
+    hypre_EndTiming(time_index);
+-   hypre_PrintTiming("IJ Vector Setup", hypre_MPI_COMM_WORLD);
++   hypre_fPrintTiming(fplog, "IJ Vector Setup", hypre_MPI_COMM_WORLD);
+    hypre_FinalizeTiming(time_index);
+    hypre_ClearTiming();
+ 
+@@ -3124,7 +3202,7 @@ main( hypre_int argc,
+ #endif
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("MatVec Test", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "MatVec Test", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -3190,7 +3268,7 @@ main( hypre_int argc,
+       HYPRE_ParCSRHybridSetup(amg_solver, parcsr_A, b, x);
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Setup phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -3200,7 +3278,7 @@ main( hypre_int argc,
+       HYPRE_ParCSRHybridSolve(amg_solver, parcsr_A, b, x);
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Solve phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -3422,7 +3500,7 @@ main( hypre_int argc,
+ #endif
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Setup phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -3448,7 +3526,7 @@ main( hypre_int argc,
+ #endif
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Solve phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -3638,7 +3716,7 @@ main( hypre_int argc,
+       HYPRE_BoomerAMGSetup(amg_solver, parcsr_A, b, x);
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Setup phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -3648,7 +3726,7 @@ main( hypre_int argc,
+       HYPRE_BoomerAMGSolve(amg_solver, parcsr_A, b, x);
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Solve phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -4112,7 +4190,7 @@ main( hypre_int argc,
+       HYPRE_PCGSetup(pcg_solver, (HYPRE_Matrix)parcsr_A,
+                      (HYPRE_Vector)b, (HYPRE_Vector)x);
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Setup phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -4123,7 +4201,7 @@ main( hypre_int argc,
+                      (HYPRE_Vector)b, (HYPRE_Vector)x);
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Solve phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -4140,7 +4218,7 @@ main( hypre_int argc,
+                      (HYPRE_Vector)b, (HYPRE_Vector)x);
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Setup phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -4151,7 +4229,7 @@ main( hypre_int argc,
+                      (HYPRE_Vector)b, (HYPRE_Vector)x);
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Solve phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ #endif
+@@ -4550,7 +4628,7 @@ main( hypre_int argc,
+           *                     (HYPRE_Vector)b, (HYPRE_Vector)x); */
+ 
+          hypre_EndTiming(time_index);
+-         hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
++         hypre_fPrintTiming(fplog, "Setup phase times", hypre_MPI_COMM_WORLD);
+          hypre_FinalizeTiming(time_index);
+          hypre_ClearTiming();
+ 
+@@ -4610,7 +4688,7 @@ main( hypre_int argc,
+          HYPRE_LOBPCGSolve(lobpcg_solver, constraints, eigenvectors, eigenvalues );
+ 
+          hypre_EndTiming(time_index);
+-         hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
++         hypre_fPrintTiming(fplog, "Solve phase times", hypre_MPI_COMM_WORLD);
+          hypre_FinalizeTiming(time_index);
+          hypre_ClearTiming();
+ 
+@@ -4942,7 +5020,7 @@ main( hypre_int argc,
+                                (HYPRE_Vector)x);
+ 
+          hypre_EndTiming(time_index);
+-         hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
++         hypre_fPrintTiming(fplog, "Setup phase times", hypre_MPI_COMM_WORLD);
+          hypre_FinalizeTiming(time_index);
+          hypre_ClearTiming();
+ 
+@@ -4980,7 +5058,7 @@ main( hypre_int argc,
+          HYPRE_LOBPCGSolve(pcg_solver, constraints, eigenvectors, eigenvalues);
+ 
+          hypre_EndTiming(time_index);
+-         hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
++         hypre_fPrintTiming(fplog, "Solve phase times", hypre_MPI_COMM_WORLD);
+          hypre_FinalizeTiming(time_index);
+          hypre_ClearTiming();
+ 
+@@ -5490,7 +5568,7 @@ main( hypre_int argc,
+          (pcg_solver, (HYPRE_Matrix)parcsr_A, (HYPRE_Vector)b, (HYPRE_Vector)x);
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Setup phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -5501,7 +5579,7 @@ main( hypre_int argc,
+          (pcg_solver, (HYPRE_Matrix)parcsr_A, (HYPRE_Vector)b, (HYPRE_Vector)x);
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Solve phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -5723,7 +5801,7 @@ main( hypre_int argc,
+          (pcg_solver, (HYPRE_Matrix)parcsr_A, (HYPRE_Vector)b, (HYPRE_Vector)x);
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Setup phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -5734,7 +5812,7 @@ main( hypre_int argc,
+          (pcg_solver, (HYPRE_Matrix)parcsr_A, (HYPRE_Vector)b, (HYPRE_Vector)x);
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Solve phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -6060,7 +6138,7 @@ main( hypre_int argc,
+          (pcg_solver, (HYPRE_Matrix)parcsr_A, (HYPRE_Vector)b, (HYPRE_Vector)x);
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Setup phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -6071,7 +6149,7 @@ main( hypre_int argc,
+          (pcg_solver, (HYPRE_Matrix)parcsr_A, (HYPRE_Vector)b, (HYPRE_Vector)x);
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Solve phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -6397,7 +6475,7 @@ main( hypre_int argc,
+                           (HYPRE_Vector)b, (HYPRE_Vector)x);
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Setup phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -6408,7 +6486,7 @@ main( hypre_int argc,
+                           (HYPRE_Vector)b, (HYPRE_Vector)x);
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Solve phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -6723,7 +6801,7 @@ main( hypre_int argc,
+                          (HYPRE_Vector)b, (HYPRE_Vector)x);
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Setup phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -6734,7 +6812,7 @@ main( hypre_int argc,
+                          (HYPRE_Vector)b, (HYPRE_Vector)x);
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Solve phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -6930,7 +7008,7 @@ main( hypre_int argc,
+                       (HYPRE_Vector)x);
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Setup phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -6941,7 +7019,7 @@ main( hypre_int argc,
+                       (HYPRE_Vector)x);
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Solve phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -7073,7 +7151,7 @@ main( hypre_int argc,
+       HYPRE_MGRSetup(mgr_solver, parcsr_A, b, x);
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Setup phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -7084,7 +7162,7 @@ main( hypre_int argc,
+       HYPRE_MGRSolve(mgr_solver, parcsr_A, b, x);
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Solve phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -7170,7 +7248,7 @@ main( hypre_int argc,
+       HYPRE_ILUSetup(ilu_solver, parcsr_A, b, x);
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Setup phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -7181,7 +7259,7 @@ main( hypre_int argc,
+       HYPRE_ILUSolve(ilu_solver, parcsr_A, b, x);
+ 
+       hypre_EndTiming(time_index);
+-      hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
++      hypre_fPrintTiming(fplog, "Solve phase times", hypre_MPI_COMM_WORLD);
+       hypre_FinalizeTiming(time_index);
+       hypre_ClearTiming();
+ 
+@@ -7286,7 +7364,13 @@ main( hypre_int argc,
+    /* Finalize Hypre */
+    HYPRE_Finalize();
+ 
++
++   fflush(fplog);
++   fclose(fplog);
++
+    /* Finalize MPI */
++   if(parent!=MPI_COMM_NULL)
++   MPI_Comm_disconnect(&parent);
+    hypre_MPI_Finalize();
+ 
+    /* when using cuda-memcheck --leak-check full, uncomment this */
diff --git a/var/spack/repos/builtin/packages/hypre/package.py b/var/spack/repos/builtin/packages/hypre/package.py
index dff8419df8..d4dc0d6a39 100644
--- a/var/spack/repos/builtin/packages/hypre/package.py
+++ b/var/spack/repos/builtin/packages/hypre/package.py
@@ -68,6 +68,11 @@ class Hypre(AutotoolsPackage, CudaPackage):
     variant('unified-memory', default=False, description='Use unified memory')
     variant('fortran', default=True,
             description='Enables fortran bindings')
+    variant('gptune', default=False,
+            description='Add the GPTune hookup code')
+
+    # Patch to add gptune hookup codes
+    patch('ij_gptune.patch', when='+gptune@2.19.0')
 
     # Patch to add ppc64le in config.guess
     patch('ibm-ppc64le.patch', when='@:2.11.1')
@@ -88,6 +93,7 @@ class Hypre(AutotoolsPackage, CudaPackage):
 
     conflicts('+cuda', when='+int64')
     conflicts('+unified-memory', when='~cuda')
+    conflicts('+gptune', when='~mpi')
 
     # Patch to build shared libraries on Darwin does not apply to
     # versions before 2.13.0
@@ -228,6 +234,10 @@ class Hypre(AutotoolsPackage, CudaPackage):
                 sstruct('-in', 'test/sstruct.in.default', '-solver', '40',
                         '-rhsone')
             make("install")
+            if '+gptune' in self.spec:
+                make("test")
+                self.run_test('mkdir', options=['-p', self.prefix.bin])
+                self.run_test('cp', options=['test/ij', self.prefix.bin + '/.'])
 
     extra_install_tests = join_path('src', 'examples')
 
diff --git a/var/spack/repos/builtin/packages/superlu-dist/CMAKE_INSTALL_LIBDIR.patch b/var/spack/repos/builtin/packages/superlu-dist/CMAKE_INSTALL_LIBDIR.patch
new file mode 100644
index 0000000000..d3d8311863
--- /dev/null
+++ b/var/spack/repos/builtin/packages/superlu-dist/CMAKE_INSTALL_LIBDIR.patch
@@ -0,0 +1,59 @@
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 2b2fdf2..f89cf4c 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -456,15 +456,6 @@ endif()
+ #target_compile_features(SuperLU_DIST PUBLIC cxx_std_11)
+ 
+ # Generate various configure files with proper definitions
+-# configure_file(${CMAKE_SOURCE_DIR}/make.inc.in ${CMAKE_BINARY_DIR}/make.inc)
+-configure_file(${SuperLU_DIST_SOURCE_DIR}/make.inc.in ${SuperLU_DIST_SOURCE_DIR}/make.inc)
+-
+-configure_file(${SuperLU_DIST_SOURCE_DIR}/SRC/superlu_dist_config.h.in ${SuperLU_DIST_BINARY_DIR}/SRC/superlu_dist_config.h)
+-configure_file(${SuperLU_DIST_SOURCE_DIR}/SRC/superlu_dist_config.h.in ${SuperLU_DIST_SOURCE_DIR}/SRC/superlu_dist_config.h)
+-
+-# Following is to configure a file for FORTRAN code
+-configure_file(${SuperLU_DIST_SOURCE_DIR}/SRC/superlu_dist_config.h.in ${SuperLU_DIST_BINARY_DIR}/FORTRAN/superlu_dist_config.h)
+-
+ 
+ # Add pkg-config support
+ if(IS_ABSOLUTE ${CMAKE_INSTALL_LIBDIR})
+@@ -476,4 +467,13 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/superlu_dist.pc.in ${CMAKE_CURRENT_BI
+ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/superlu_dist.pc
+ 	DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
+ 
++# configure_file(${CMAKE_SOURCE_DIR}/make.inc.in ${CMAKE_BINARY_DIR}/make.inc)
++configure_file(${SuperLU_DIST_SOURCE_DIR}/make.inc.in ${SuperLU_DIST_SOURCE_DIR}/make.inc)
++
++configure_file(${SuperLU_DIST_SOURCE_DIR}/SRC/superlu_dist_config.h.in ${SuperLU_DIST_BINARY_DIR}/SRC/superlu_dist_config.h)
++configure_file(${SuperLU_DIST_SOURCE_DIR}/SRC/superlu_dist_config.h.in ${SuperLU_DIST_SOURCE_DIR}/SRC/superlu_dist_config.h)
++
++# Following is to configure a file for FORTRAN code
++configure_file(${SuperLU_DIST_SOURCE_DIR}/SRC/superlu_dist_config.h.in ${SuperLU_DIST_BINARY_DIR}/FORTRAN/superlu_dist_config.h)
++
+ #message("MPI_Fortran_LINK_FLAGS '${MPI_Fortran_LINK_FLAGS}'")
+diff --git a/make.inc.in b/make.inc.in
+index 0beb461..860c0bf 100644
+--- a/make.inc.in
++++ b/make.inc.in
+@@ -18,7 +18,8 @@
+ #
+ SuperLUroot = ${CMAKE_INSTALL_PREFIX}
+ #DSUPERLULIB = $(SuperLUroot)/SRC/${PROJECT_NAME_LIB_EXPORT}
+-DSUPERLULIB = $(SuperLUroot)/@CMAKE_INSTALL_LIBDIR@/${PROJECT_NAME_LIB_EXPORT}
++#DSUPERLULIB = $(SuperLUroot)/@CMAKE_INSTALL_LIBDIR@/${PROJECT_NAME_LIB_EXPORT}
++DSUPERLULIB = @pkgconfig_libdir@/${PROJECT_NAME_LIB_EXPORT}
+ INCLUDEDIR  = $(SuperLUroot)/@CMAKE_INSTALL_INCLUDEDIR@
+ 
+ XSDK_INDEX_SIZE = @XSDK_INDEX_SIZE@
+@@ -29,7 +30,8 @@ HAVE_CUDA       = @HAVE_CUDA@
+ 
+ XSDK_ENABLE_Fortran = @XSDK_ENABLE_Fortran@
+ ifeq ($(XSDK_ENABLE_Fortran),ON)
+-  DFORTRANLIB = $(SuperLUroot)/@CMAKE_INSTALL_LIBDIR@/${PROJECT_NAME_LIB_FORTRAN}
++#  DFORTRANLIB = $(SuperLUroot)/@CMAKE_INSTALL_LIBDIR@/${PROJECT_NAME_LIB_FORTRAN}
++  DFORTRANLIB = @pkgconfig_libdir@/${PROJECT_NAME_LIB_FORTRAN}
+   LIBS = $(DFORTRANLIB) $(DSUPERLULIB) ${BLAS_LIB_EXPORT} -lm
+   LIBS += ${EXTRA_FLIB_EXPORT}
+ else
+
diff --git a/var/spack/repos/builtin/packages/superlu-dist/package.py b/var/spack/repos/builtin/packages/superlu-dist/package.py
index 9677446893..7af879c45f 100644
--- a/var/spack/repos/builtin/packages/superlu-dist/package.py
+++ b/var/spack/repos/builtin/packages/superlu-dist/package.py
@@ -20,6 +20,7 @@ class SuperluDist(CMakePackage, CudaPackage, ROCmPackage):
 
     version('develop', branch='master')
     version('amd', branch='amd')
+    version('7.2.0', sha256='20b60bd8a3d88031c9ce6511ae9700b7a8dcf12e2fd704e74b1af762b3468b8c')
     version('7.1.1', sha256='558053b3d4a56eb661c4f04d4fcab6604018ce5db97115394c161b56c9c278ff')
     version('7.1.0', sha256='edbea877562be95fb22c7de1ff484f18685bec4baa8e4f703c414d3c035d4a66')
     version('6.4.0', sha256='cb9c0b2ba4c28e5ed5817718ba19ae1dd63ccd30bc44c8b8252b54f5f04a44cc')
@@ -58,6 +59,7 @@ class SuperluDist(CMakePackage, CudaPackage, ROCmPackage):
     patch('xl-611.patch', when='@:6.1.1 %xl')
     patch('xl-611.patch', when='@:6.1.1 %xl_r')
     patch('superlu-cray-ftn-case.patch', when='@7.1.1 %cce')
+    patch('CMAKE_INSTALL_LIBDIR.patch', when='@7.0.0:7.2.0')
 
     def cmake_args(self):
         spec = self.spec
-- 
cgit v1.2.3-70-g09d2