diff options
author | AMD Toolchain Support <73240730+amd-toolchain-support@users.noreply.github.com> | 2024-03-04 22:13:27 +0530 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-04 08:43:27 -0800 |
commit | e08e66ad8967083b5309b2b4af0ac5ac225f67de (patch) | |
tree | 63b0e080ce343f148960b54f4a59673dc9e83f39 | |
parent | 054371025893a0ac54529f77cbca7c6485af71bf (diff) | |
download | spack-e08e66ad8967083b5309b2b4af0ac5ac225f67de.tar.gz spack-e08e66ad8967083b5309b2b4af0ac5ac225f67de.tar.bz2 spack-e08e66ad8967083b5309b2b4af0ac5ac225f67de.tar.xz spack-e08e66ad8967083b5309b2b4af0ac5ac225f67de.zip |
AOCL: add v4.2.0 (#42920)
* AOCL: add v4.2.0
Co-authored-by: Phil Tooley <phil.tooley@amd.com> and
vijay kallesh <Vijay-teekinavar.Kallesh@amd.com>
* Review comments for spack community PR #42920
---------
Co-authored-by: Phil Tooley <phil.tooley@amd.com> and vijay kallesh <Vijay-teekinavar.Kallesh@amd.com>
14 files changed, 2050 insertions, 92 deletions
diff --git a/var/spack/repos/builtin/packages/amd-aocl/package.py b/var/spack/repos/builtin/packages/amd-aocl/package.py index b37da1b9cb..9026dd74e2 100644 --- a/var/spack/repos/builtin/packages/amd-aocl/package.py +++ b/var/spack/repos/builtin/packages/amd-aocl/package.py @@ -24,6 +24,7 @@ class AmdAocl(BundlePackage): maintainers("amd-toolchain-support") + version("4.2", preferred=True) version("4.1") version("4.0") version("3.2") @@ -43,11 +44,15 @@ class AmdAocl(BundlePackage): depends_on("amdfftw ~openmp") depends_on("amdlibflame threads=none") - for vers in ("2.2", "3.0", "3.1", "3.2", "4.0", "4.1"): - with when(f"@{vers}"): - depends_on(f"amdblis@{vers}") - depends_on(f"amdfftw@{vers}") - depends_on(f"amdlibflame@{vers}") - depends_on(f"amdlibm@{vers}") - depends_on(f"amdscalapack@{vers}") - depends_on(f"aocl-sparse@{vers}") + for vers in ["2.2", "3.0", "3.1", "3.2", "4.0", "4.1", "4.2"]: + with when(f"@={vers}"): + depends_on(f"amdblis@={vers}") + depends_on(f"amdfftw@={vers}") + depends_on(f"amdlibflame@={vers}") + depends_on(f"amdlibm@={vers}") + depends_on(f"amdscalapack@={vers}") + depends_on(f"aocl-sparse@={vers}") + if Version(vers) >= Version("4.2"): + depends_on(f"aocl-compression@={vers}") + depends_on(f"aocl-crypto@={vers}") + depends_on(f"aocl-libmem@={vers}") diff --git a/var/spack/repos/builtin/packages/amdblis/blis-4.0_gcc_clang_znver4.patch b/var/spack/repos/builtin/packages/amdblis/blis-4.0_gcc_clang_znver4.patch new file mode 100644 index 0000000000..31f71bf8c5 --- /dev/null +++ b/var/spack/repos/builtin/packages/amdblis/blis-4.0_gcc_clang_znver4.patch @@ -0,0 +1,55 @@ +diff -Naur a/config/zen4/make_defs.mk b/config/zen4/make_defs.mk +--- a/config/zen4/make_defs.mk 2022-11-12 13:05:45.000000000 +0000 ++++ b/config/zen4/make_defs.mk 2023-05-12 14:40:10.848359434 +0000 +@@ -73,6 +73,15 @@ + + + # gcc 11.0 or later: ++ifeq ($(shell test $(GCC_VERSION) -ge 13; echo $$?),0) ++ifneq ($(DEBUG_TYPE),noopt) ++CKOPTFLAGS := -O2 -fgcse-after-reload -fipa-cp-clone -floop-interchange -floop-unroll-and-jam -fpeel-loops -fpredictive-commoning -fsplit-loops -fsplit-paths -ftree-loop-distribution -funswitch-loops -fvect-cost-model=dynamic -fversion-loops-for-strides -fomit-frame-pointer ++endif ++ ++CKVECFLAGS += -march=znver4 -mfpmath=sse ++CRVECFLAGS += -march=znver4 ++ ++else + ifeq ($(shell test $(GCC_VERSION) -ge 11; echo $$?),0) + # Update CKOPTFLAGS for gcc 11+ to use O3 optimization without + # -ftree-partial-pre flag. This flag results in suboptimal code +@@ -100,6 +109,7 @@ + endif # GCC 8 + endif # GCC 9 + endif # GCC 11 ++endif # GCC 13 + else + ifeq ($(CC_VENDOR),clang) + +@@ -132,6 +142,16 @@ + #if compiling with clang + VENDOR_STRING := $(strip $(shell ${CC_VENDOR} --version | egrep -o '[0-9]+\.[0-9]+\.?[0-9]*')) + CC_MAJOR := $(shell (echo ${VENDOR_STRING} | cut -d. -f1)) ++#clang 16 or later: ++ifeq ($(shell test $(CC_MAJOR) -ge 16; echo $$?),0) ++CKVECFLAGS += -march=znver4 ++CRVECFLAGS += -march=znver4 ++else ++#clang 12 or later: ++ifeq ($(shell test $(CC_MAJOR) -ge 12; echo $$?),0) ++CKVECFLAGS += -march=znver3 -mavx512f -mavx512dq -mavx512bw -mavx512vl -mavx512vnni -mavx512bf16 -mfpmath=sse -falign-loops=64 ++CRVECFLAGS += -march=znver3 ++else + #clang 9.0 or later: + ifeq ($(shell test $(CC_MAJOR) -ge 9; echo $$?),0) + CKVECFLAGS += -march=znver2 +@@ -139,7 +159,9 @@ + else + CKVECFLAGS += -march=znver1 + CRVECFLAGS += -march=znver1 +-endif # ge 9 ++endif # clang 9 ++endif # clang 12 ++endif # clang 16 + endif # aocc 2 + endif # aocc 3 + endif # aocc 4 diff --git a/var/spack/repos/builtin/packages/amdblis/package.py b/var/spack/repos/builtin/packages/amdblis/package.py index 5492f54b4b..bf78a22b44 100644 --- a/var/spack/repos/builtin/packages/amdblis/package.py +++ b/var/spack/repos/builtin/packages/amdblis/package.py @@ -23,7 +23,7 @@ class Amdblis(BlisBase): LICENSING INFORMATION: By downloading, installing and using this software, you agree to the terms and conditions of the AMD AOCL-BLIS license agreement. You may obtain a copy of this license agreement from - https://www.amd.com/en/developer/aocl/dense/eula/blas-4-1-eula.html + https://www.amd.com/en/developer/aocl/dense/eula/blas-4-2-eula.html https://www.amd.com/en/developer/aocl/dense/eula/blas-eula.html """ @@ -38,6 +38,11 @@ class Amdblis(BlisBase): license("BSD-3-Clause") + version( + "4.2", + sha256="0e1baf850ba0e6f99e79f64bbb0a59fcb838ddb5028e24527f52b407c3c62963", + preferred=True, + ) version("4.1", sha256="a05c6c7d359232580d1d599696053ad0beeedf50f3b88d5d22ee7d34375ab577") version("4.0", sha256="cddd31176834a932753ac0fc4c76332868feab3e9ac607fa197d8b44c1e74a41") version("3.2", sha256="5a400ee4fc324e224e12f73cc37b915a00f92b400443b15ce3350278ad46fff6") @@ -55,15 +60,15 @@ class Amdblis(BlisBase): args = super().configure_args() if not ( - spec.satisfies(r"%aocc@3.2:4.1") + spec.satisfies(r"%aocc@3.2:4.2") or spec.satisfies(r"%gcc@12.2:13.1") - or spec.satisfies(r"%clang@15:16") + or spec.satisfies(r"%clang@15:17") ): tty.warn( - "AOCL has been tested to work with the following compilers\ - versions - gcc@12.2:13.1, aocc@3.2:4.1, and clang@15:16\ - see the following aocl userguide for details: \ - https://www.amd.com/content/dam/amd/en/documents/developer/version-4-1-documents/aocl/aocl-4-1-user-guide.pdf" + "AOCL has been tested to work with the following compilers " + "versions - gcc@12.2:13.1, aocc@3.2:4.2, and clang@15:17 " + "see the following aocl userguide for details: " + "https://www.amd.com/content/dam/amd/en/documents/developer/version-4-2-documents/aocl/aocl-4-2-user-guide.pdf" ) if spec.satisfies("+ilp64"): diff --git a/var/spack/repos/builtin/packages/amdfftw/amdfftw40-clang.patch b/var/spack/repos/builtin/packages/amdfftw/amdfftw40-clang.patch new file mode 100644 index 0000000000..b475027a33 --- /dev/null +++ b/var/spack/repos/builtin/packages/amdfftw/amdfftw40-clang.patch @@ -0,0 +1,11 @@ +diff -Nur amd-fftw-4.0/kernel/cpy2d-pair.c amd-fftw-4.0-new/kernel/cpy2d-pair.c +--- amd-fftw-4.0/kernel/cpy2d-pair.c 2022-11-11 16:52:26.000000000 +0530 ++++ amd-fftw-4.0-new/kernel/cpy2d-pair.c 2023-05-12 00:09:10.408511128 +0530 +@@ -21,6 +21,7 @@ + + /* out of place copy routines for pairs of isomorphic 2D arrays */ + #include "kernel/ifftw.h" ++#include <string.h> + + #ifdef AMD_OPT_ALL + #include "immintrin.h" diff --git a/var/spack/repos/builtin/packages/amdfftw/package.py b/var/spack/repos/builtin/packages/amdfftw/package.py index 93ca30265d..56dcd09145 100644 --- a/var/spack/repos/builtin/packages/amdfftw/package.py +++ b/var/spack/repos/builtin/packages/amdfftw/package.py @@ -28,7 +28,7 @@ class Amdfftw(FftwBase): LICENSING INFORMATION: By downloading, installing and using this software, you agree to the terms and conditions of the AMD AOCL-FFTW license agreement. You may obtain a copy of this license agreement from - https://www.amd.com/en/developer/aocl/fftw/eula/fftw-libraries-4-1-eula.html + https://www.amd.com/en/developer/aocl/fftw/eula/fftw-libraries-4-2-eula.html https://www.amd.com/en/developer/aocl/fftw/eula/fftw-libraries-eula.html """ @@ -41,6 +41,11 @@ class Amdfftw(FftwBase): license("GPL-2.0-only") + version( + "4.2", + sha256="391ef7d933e696762e3547a35b58ab18d22a6cf3e199c74889bcf25a1d1fc89b", + preferred=True, + ) version("4.1", sha256="f1cfecfcc0729f96a5bd61c6b26f3fa43bb0662d3fff370d4f73490c60cf4e59") version("4.0", sha256="5f02cb05f224bd86bd88ec6272b294c26dba3b1d22c7fb298745fd7b9d2271c0") version("3.2", sha256="31cab17a93e03b5b606e88dd6116a1055b8f49542d7d0890dbfcca057087b8d0") @@ -103,7 +108,7 @@ class Amdfftw(FftwBase): depends_on("texinfo") - provides("fftw-api@3", when="@2:") + provides("fftw-api@3") conflicts( "precision=quad", @@ -167,15 +172,15 @@ class Amdfftw(FftwBase): options.append("F77={0}".format(os.path.basename(spack_fc))) if not ( - spec.satisfies(r"%aocc@3.2:4.1") + spec.satisfies(r"%aocc@3.2:4.2") or spec.satisfies(r"%gcc@12.2:13.1") - or spec.satisfies(r"%clang@15:16") + or spec.satisfies(r"%clang@15:17") ): tty.warn( - "AOCL has been tested to work with the following compilers\ - versions - gcc@12.2:13.1, aocc@3.2:4.1, and clang@15:16\ - see the following aocl userguide for details: \ - https://www.amd.com/content/dam/amd/en/documents/developer/version-4-1-documents/aocl/aocl-4-1-user-guide.pdf" + "AOCL has been tested to work with the following compilers " + "versions - gcc@12.2:13.1, aocc@3.2:4.2, and clang@15:17 " + "see the following aocl userguide for details: " + "https://www.amd.com/content/dam/amd/en/documents/developer/version-4-2-documents/aocl/aocl-4-2-user-guide.pdf" ) if "+debug" in spec: diff --git a/var/spack/repos/builtin/packages/amdlibflame/package.py b/var/spack/repos/builtin/packages/amdlibflame/package.py index 8dd7aa57f2..d63d7c57e8 100644 --- a/var/spack/repos/builtin/packages/amdlibflame/package.py +++ b/var/spack/repos/builtin/packages/amdlibflame/package.py @@ -3,15 +3,16 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) # ----------------------------------------------------------------------------\ -import os from llnl.util import tty +import spack.build_systems.autotools +import spack.build_systems.cmake from spack.package import * from spack.pkg.builtin.libflame import LibflameBase -class Amdlibflame(LibflameBase): +class Amdlibflame(CMakePackage, LibflameBase): """libFLAME (AMD Optimized version) is a portable library for dense matrix computations, providing much of the functionality present in Linear Algebra Package (LAPACK). It includes a @@ -36,7 +37,7 @@ class Amdlibflame(LibflameBase): LICENSING INFORMATION: By downloading, installing and using this software, you agree to the terms and conditions of the AMD AOCL-libFLAME license agreement. You may obtain a copy of this license agreement from - https://www.amd.com/en/developer/aocl/dense/eula-libflame/libflame-4-1-eula.html + https://www.amd.com/en/developer/aocl/dense/eula-libflame/libflame-4-2-eula.html https://www.amd.com/en/developer/aocl/dense/eula-libflame/libflame-eula.html """ @@ -48,7 +49,11 @@ class Amdlibflame(LibflameBase): maintainers("amd-toolchain-support") license("BSD-3-Clause") - + version( + "4.2", + sha256="93a433c169528ffba74a99df0ba3ce3d5b1fab9bf06ce8d2fd72ee84768ed84c", + preferred=True, + ) version("4.1", sha256="8aed69c60d11cc17e058cabcb8a931cee4f343064ade3e73d3392b7214624b61") version("4.0", sha256="bcb05763aa1df1e88f0da5e43ff86d956826cbea1d9c5ff591d78a3e091c66a4") version("3.2", sha256="6b5337fb668b82d0ed0a4ab4b5af4e2f72e4cedbeeb4a8b6eb9a3ef057fb749a") @@ -57,9 +62,33 @@ class Amdlibflame(LibflameBase): version("3.0", sha256="d94e08b688539748571e6d4c1ec1ce42732eac18bd75de989234983c33f01ced") version("2.2", sha256="12b9c1f92d2c2fa637305aaa15cf706652406f210eaa5cbc17aaea9fcfa576dc") - variant("ilp64", default=False, description="Build with ILP64 support") + variant("ilp64", default=False, when="@3.0.1: ", description="Build with ILP64 support") + variant( + "enable-aocl-blas", + default=False, + when="@4.1.0:", + description="Enables tight coupling with AOCL-BLAS library in order to use AOCL-BLAS\ + internal routines", + ) + variant( + "vectorization", + default="auto", + when="@4.2:", + values=("auto", "avx2", "avx512", "none"), + multi=False, + description="Use hardware vectorization support", + ) + + # Build system + build_system( + conditional("cmake", when="@4.2:"), conditional("autotools", when="@:4.1"), default="cmake" + ) + + # Required dependencies + with when("build_system=cmake"): + generator("make") + depends_on("cmake@3.15.0:", type="build") - conflicts("+ilp64", when="@:3.0.0", msg="ILP64 is supported from 3.0.1 onwards") conflicts("threads=pthreads", msg="pthread is not supported") conflicts("threads=openmp", when="@:3", msg="openmp is not supported by amdlibflame < 4.0") requires("target=x86_64:", msg="AMD libflame available only on x86_64") @@ -72,7 +101,9 @@ class Amdlibflame(LibflameBase): depends_on("python+pythoncmd", type="build") depends_on("gmake@4:", when="@3.0.1,3.1:", type="build") - depends_on("aocl-utils", type=("build"), when="@4.1: ") + for vers in ["4.1", "4.2"]: + with when(f"@{vers}"): + depends_on(f"aocl-utils@{vers}") @property def lapack_libs(self): @@ -94,62 +125,101 @@ class Amdlibflame(LibflameBase): flags.append("-Wno-error=incompatible-function-pointer-types") flags.append("-Wno-implicit-function-declaration") flags.append("-Wno-sometimes-uninitialized") + if name == "ldflags": + if self.spec.satisfies("^aocl-utils~shared"): + flags.append("-lstdc++") return (flags, None, None) + +class CMakeBuilder(spack.build_systems.cmake.CMakeBuilder): + def cmake_args(self): + spec = self.spec + args = [self.define("LIBAOCLUTILS_INCLUDE_PATH", spec["aocl-utils"].prefix.include)] + aocl_utils_lib_path = spec["aocl-utils"].libs + args.append("-DLIBAOCLUTILS_LIBRARY_PATH={0}".format(aocl_utils_lib_path)) + # From 3.2 version, amd optimized flags are encapsulated under: + # ENABLE_AMD_AOCC_FLAGS for AOCC compiler + # ENABLE_AMD_FLAGS for all other compilers + if spec.satisfies("@3.2:"): + if spec.satisfies("%aocc"): + args.append(self.define("ENABLE_AMD_AOCC_FLAGS", True)) + else: + args.append(self.define("ENABLE_AMD_FLAGS", True)) + + if spec.satisfies("@3.0.1: +ilp64"): + args.append(self.define("ENABLE_ILP64", True)) + + if spec.satisfies("@4.1.0: +enable-aocl-blas"): + args.append(self.define("ENABLE_AOCL_BLAS", True)) + args.append("-DAOCL_ROOT:PATH={0}".format(spec["blas"].prefix)) + + if spec.variants["vectorization"].value == "auto": + if spec.satisfies("target=avx512"): + args.append("-DLF_ISA_CONFIG=avx512") + elif spec.satisfies("target=avx2"): + args.append("-DLF_ISA_CONFIG=avx2") + else: + args.append("-DLF_ISA_CONFIG=none") + else: + args.append(self.define("LF_ISA_CONFIG", spec.variants["vectorization"].value)) + + return args + + +class AutotoolsBuilder(spack.build_systems.autotools.AutotoolsBuilder): def configure_args(self): """configure_args function""" - args = super().configure_args() + args = self.pkg.configure_args() + spec = self.spec if not ( - self.spec.satisfies(r"%aocc@3.2:4.1") - or self.spec.satisfies(r"%gcc@12.2:13.1") - or self.spec.satisfies(r"%clang@15:16") + spec.satisfies(r"%aocc@3.2:4.2") + or spec.satisfies(r"%gcc@12.2:13.1") + or spec.satisfies(r"%clang@15:17") ): tty.warn( - "AOCL has been tested to work with the following compilers\ - versions - gcc@12.2:13.1, aocc@3.2:4.1, and clang@15:16\ - see the following aocl userguide for details: \ - https://www.amd.com/content/dam/amd/en/documents/developer/version-4-1-documents/aocl/aocl-4-1-user-guide.pdf" + "AOCL has been tested to work with the following compilers " + "versions - gcc@12.2:13.1, aocc@3.2:4.2, and clang@15:17 " + "see the following aocl userguide for details: " + "https://www.amd.com/content/dam/amd/en/documents/developer/version-4-2-documents/aocl/aocl-4-2-user-guide.pdf" ) # From 3.2 version, amd optimized flags are encapsulated under: # enable-amd-aocc-flags for AOCC compiler # enable-amd-flags for all other compilers - if "@3.2:" in self.spec: - if "%aocc" in self.spec: + if spec.satisfies("@3.2: "): + if spec.satisfies("%aocc"): args.append("--enable-amd-aocc-flags") else: args.append("--enable-amd-flags") - if "@:3.1" in self.spec: + if spec.satisfies("@:3.1"): args.append("--enable-external-lapack-interfaces") - if "@3.1" in self.spec: + if spec.satisfies("@3.1"): args.append("--enable-blas-ext-gemmt") - if "@3.1 %aocc" in self.spec: + if spec.satisfies("@3.1 %aocc"): args.append("--enable-void-return-complex") - if "@3.0:3.1 %aocc" in self.spec: + if spec.satisfies("@3.0:3.1 %aocc"): """To enabled Fortran to C calling convention for complex types when compiling with aocc flang""" args.append("--enable-f2c-dotc") - if "@3.0.1: +ilp64" in self.spec: + if spec.satisfies("@3.0.1: +ilp64"): args.append("--enable-ilp64") - if "@4.1:" in self.spec: - args.append("CFLAGS=-I{0}".format(self.spec["aocl-utils"].prefix.include)) - aocl_utils_lib_path = os.path.join( - self.spec["aocl-utils"].prefix.lib, "libaoclutils.a" - ) + if spec.satisfies("@4.1:"): + args.append("CFLAGS=-I{0}".format(spec["aocl-utils"].prefix.include)) + aocl_utils_lib_path = spec["aocl-utils"].libs args.append("LIBAOCLUTILS_LIBRARY_PATH={0}".format(aocl_utils_lib_path)) return args @when("@4.1:") - def build(self, spec, prefix): - aocl_utils_lib_path = os.path.join(self.spec["aocl-utils"].prefix.lib, "libaoclutils.a") + def build(self, pkg, spec, prefix): + aocl_utils_lib_path = spec["aocl-utils"].libs make("all", "LIBAOCLUTILS_LIBRARY_PATH={0}".format(aocl_utils_lib_path)) @run_after("build") @@ -162,7 +232,11 @@ class Amdlibflame(LibflameBase): else: make("check", "LIBBLAS = {0}".format(blas_flags), parallel=False) - def install(self, spec, prefix): + def install(self, pkg, spec, prefix): """make install function""" # make install in parallel fails with message 'File already exists' make("install", parallel=False) + + def setup_dependent_run_environment(self, env, dependent_spec): + if self.spec.external: + env.prepend_path("LD_LIBRARY_PATH", self.prefix.lib) diff --git a/var/spack/repos/builtin/packages/amdlibm/package.py b/var/spack/repos/builtin/packages/amdlibm/package.py index 05b3788336..6f47bdc381 100644 --- a/var/spack/repos/builtin/packages/amdlibm/package.py +++ b/var/spack/repos/builtin/packages/amdlibm/package.py @@ -21,7 +21,7 @@ class Amdlibm(SConsPackage): LICENSING INFORMATION: By downloading, installing and using this software, you agree to the terms and conditions of the AMD AOCL-FFTW license agreement. You may obtain a copy of this license agreement from - https://www.amd.com/en/developer/aocl/libm/eula/libm-4-1-eula.html + https://www.amd.com/en/developer/aocl/libm/eula/libm-4-2-eula.html https://www.amd.com/en/developer/aocl/libm/libm-eula.html """ @@ -33,6 +33,11 @@ class Amdlibm(SConsPackage): license("BSD-3-Clause") + version( + "4.2", + sha256="58847b942e998b3f52eb41ae26403c7392d244fcafa707cbf23165aac24edd9e", + preferred=True, + ) version("4.1", sha256="5bbbbc6bc721d9a775822eab60fbc11eb245e77d9f105b4fcb26a54d01456122") version("4.0", sha256="038c1eab544be77598eccda791b26553d3b9e2ee4ab3f5ad85fdd2a77d015a7d") version("3.2", sha256="c75b287c38a3ce997066af1f5c8d2b19fc460d5e56678ea81f3ac33eb79ec890") @@ -40,13 +45,15 @@ class Amdlibm(SConsPackage): version("3.0", sha256="eb26b5e174f43ce083928d0d8748a6d6d74853333bba37d50057aac2bef7c7aa") version("2.2", commit="4033e022da428125747e118ccd6fdd9cee21c470") - variant("verbose", default=False, description="Building with verbosity") + variant("verbose", default=False, description="Building with verbosity", when="@:4.1") # Mandatory dependencies depends_on("python@3.6.1:", type=("build", "run")) depends_on("scons@3.1.2:", type=("build")) - depends_on("aocl-utils", type=("build"), when="@4.1: ") depends_on("mpfr", type=("link")) + for vers in ["4.1", "4.2"]: + with when(f"@{vers}"): + depends_on(f"aocl-utils@{vers}") patch("0001-libm-ose-Scripts-cleanup-pyc-files.patch", when="@2.2") patch("0002-libm-ose-prevent-log-v3.c-from-building.patch", when="@2.2") @@ -64,15 +71,15 @@ class Amdlibm(SConsPackage): args.append("--aocl_utils_install_path={0}".format(self.spec["aocl-utils"].prefix)) if not ( - self.spec.satisfies(r"%aocc@3.2:4.1") + self.spec.satisfies(r"%aocc@3.2:4.2") or self.spec.satisfies(r"%gcc@12.2:13.1") or self.spec.satisfies(r"%clang@15:16") ): tty.warn( "AOCL has been tested to work with the following compilers\ - versions - gcc@12.2:13.1, aocc@3.2:4.1, and clang@15:16\ + versions - gcc@12.2:13.1, aocc@3.2:4.2, and clang@15:16\ see the following aocl userguide for details: \ - https://www.amd.com/content/dam/amd/en/documents/developer/version-4-1-documents/aocl/aocl-4-1-user-guide.pdf" + https://www.amd.com/content/dam/amd/en/documents/developer/version-4-2-documents/aocl/aocl-4-2-user-guide.pdf" ) # we are circumventing the use of @@ -85,10 +92,8 @@ class Amdlibm(SConsPackage): args.append("{0}CC={1}".format(var_prefix, self.compiler.cc)) args.append("{0}CXX={1}".format(var_prefix, self.compiler.cxx)) - if "+verbose" in self.spec: - args.append("--verbose=1") - else: - args.append("--verbose=0") + # Always build verbose + args.append("--verbose=1") return args diff --git a/var/spack/repos/builtin/packages/amdscalapack/clang-hollerith.patch b/var/spack/repos/builtin/packages/amdscalapack/clang-hollerith.patch new file mode 100644 index 0000000000..c442a0a22b --- /dev/null +++ b/var/spack/repos/builtin/packages/amdscalapack/clang-hollerith.patch @@ -0,0 +1,1459 @@ +diff -urN aocl-scalapack-4.0/PBLAS/TESTING/pcblastst.f aocl-scalapack-4.0.a/PBLAS/TESTING/pcblastst.f +--- aocl-scalapack-4.0/PBLAS/TESTING/pcblastst.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/PBLAS/TESTING/pcblastst.f 2023-05-13 12:32:33.101012202 -0500 +@@ -3530,7 +3530,7 @@ + * + IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN + * +- DO 40 H = 0, JB-1 ++ DO H = 0, JB-1 + IB = DESCA( IMB_ ) - IA + 1 + IF( IB.LE.0 ) + $ IB = ( ( -IB ) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB +@@ -3561,7 +3561,7 @@ + * + II = IIA + ICURROW = IAROW +- 40 CONTINUE ++ 40 END DO + * + JJ = JJ + JB + * +@@ -3574,7 +3574,7 @@ + DO 90 J = JN+1, JA+N-1, DESCA( NB_ ) + JB = MIN( DESCA( NB_ ), JA+N-J ) + IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN +- DO 80 H = 0, JB-1 ++ DO H = 0, JB-1 + IB = DESCA( IMB_ ) - IA + 1 + IF( IB.LE.0 ) + $ IB = ( ( -IB ) / DESCA( MB_ ) + 1 )*DESCA( MB_ ) + IB +@@ -3606,7 +3606,7 @@ + * + II = IIA + ICURROW = IAROW +- 80 CONTINUE ++ 80 END DO + * + JJ = JJ + JB + END IF +@@ -9586,7 +9586,7 @@ + $ JB = ( (-JB) / DESCA( NB_ ) + 1 ) * DESCA( NB_ ) + JB + JB = MIN( JB, N ) + JN = JA+JB-1 +- DO 60 H = 0, JB-1 ++ DO H = 0, JB-1 + IB = DESCA( IMB_ ) - IA + 1 + IF( IB.LE.0 ) + $ IB = ( (-IB) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB +@@ -9656,7 +9656,7 @@ + * + II = IIA + ICURROW = IAROW +- 60 CONTINUE ++ 60 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +@@ -9668,7 +9668,7 @@ + * + DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) + JB = MIN( DESCA( NB_ ), JA+N-J ) +- DO 120 H = 0, JB-1 ++ DO H = 0, JB-1 + IB = DESCA( IMB_ )-IA+1 + IF( IB.LE.0 ) + $ IB = ( (-IB) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB +@@ -9738,7 +9738,7 @@ + * + II = IIA + ICURROW = IAROW +- 120 CONTINUE ++ 120 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +diff -urN aocl-scalapack-4.0/PBLAS/TESTING/pdblastst.f aocl-scalapack-4.0.a/PBLAS/TESTING/pdblastst.f +--- aocl-scalapack-4.0/PBLAS/TESTING/pdblastst.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/PBLAS/TESTING/pdblastst.f 2023-05-13 12:33:15.052873951 -0500 +@@ -3524,7 +3524,7 @@ + * + IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN + * +- DO 40 H = 0, JB-1 ++ DO H = 0, JB-1 + IB = DESCA( IMB_ ) - IA + 1 + IF( IB.LE.0 ) + $ IB = ( ( -IB ) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB +@@ -3555,7 +3555,7 @@ + * + II = IIA + ICURROW = IAROW +- 40 CONTINUE ++ 40 END DO + * + JJ = JJ + JB + * +@@ -3568,7 +3568,7 @@ + DO 90 J = JN+1, JA+N-1, DESCA( NB_ ) + JB = MIN( DESCA( NB_ ), JA+N-J ) + IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN +- DO 80 H = 0, JB-1 ++ DO H = 0, JB-1 + IB = DESCA( IMB_ ) - IA + 1 + IF( IB.LE.0 ) + $ IB = ( ( -IB ) / DESCA( MB_ ) + 1 )*DESCA( MB_ ) + IB +@@ -3600,7 +3600,7 @@ + * + II = IIA + ICURROW = IAROW +- 80 CONTINUE ++ 80 END DO + * + JJ = JJ + JB + END IF +@@ -8918,7 +8918,7 @@ + $ JB = ( (-JB) / DESCA( NB_ ) + 1 ) * DESCA( NB_ ) + JB + JB = MIN( JB, N ) + JN = JA+JB-1 +- DO 60 H = 0, JB-1 ++ DO H = 0, JB-1 + IB = DESCA( IMB_ ) - IA + 1 + IF( IB.LE.0 ) + $ IB = ( (-IB) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB +@@ -8982,7 +8982,7 @@ + * + II = IIA + ICURROW = IAROW +- 60 CONTINUE ++ 60 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +@@ -8994,7 +8994,7 @@ + * + DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) + JB = MIN( DESCA( NB_ ), JA+N-J ) +- DO 120 H = 0, JB-1 ++ DO H = 0, JB-1 + IB = DESCA( IMB_ )-IA+1 + IF( IB.LE.0 ) + $ IB = ( (-IB) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB +@@ -9058,7 +9058,7 @@ + * + II = IIA + ICURROW = IAROW +- 120 CONTINUE ++ 120 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +diff -urN aocl-scalapack-4.0/PBLAS/TESTING/psblastst.f aocl-scalapack-4.0.a/PBLAS/TESTING/psblastst.f +--- aocl-scalapack-4.0/PBLAS/TESTING/psblastst.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/PBLAS/TESTING/psblastst.f 2023-05-13 12:33:56.413724041 -0500 +@@ -3524,7 +3524,7 @@ + * + IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN + * +- DO 40 H = 0, JB-1 ++ DO H = 0, JB-1 + IB = DESCA( IMB_ ) - IA + 1 + IF( IB.LE.0 ) + $ IB = ( ( -IB ) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB +@@ -3555,7 +3555,7 @@ + * + II = IIA + ICURROW = IAROW +- 40 CONTINUE ++ 40 END DO + * + JJ = JJ + JB + * +@@ -3568,7 +3568,7 @@ + DO 90 J = JN+1, JA+N-1, DESCA( NB_ ) + JB = MIN( DESCA( NB_ ), JA+N-J ) + IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN +- DO 80 H = 0, JB-1 ++ DO H = 0, JB-1 + IB = DESCA( IMB_ ) - IA + 1 + IF( IB.LE.0 ) + $ IB = ( ( -IB ) / DESCA( MB_ ) + 1 )*DESCA( MB_ ) + IB +@@ -3600,7 +3600,7 @@ + * + II = IIA + ICURROW = IAROW +- 80 CONTINUE ++ 80 END DO + * + JJ = JJ + JB + END IF +@@ -8920,7 +8920,7 @@ + $ JB = ( (-JB) / DESCA( NB_ ) + 1 ) * DESCA( NB_ ) + JB + JB = MIN( JB, N ) + JN = JA+JB-1 +- DO 60 H = 0, JB-1 ++ DO H = 0, JB-1 + IB = DESCA( IMB_ ) - IA + 1 + IF( IB.LE.0 ) + $ IB = ( (-IB) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB +@@ -8984,7 +8984,7 @@ + * + II = IIA + ICURROW = IAROW +- 60 CONTINUE ++ 60 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +@@ -8996,7 +8996,7 @@ + * + DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) + JB = MIN( DESCA( NB_ ), JA+N-J ) +- DO 120 H = 0, JB-1 ++ DO H = 0, JB-1 + IB = DESCA( IMB_ )-IA+1 + IF( IB.LE.0 ) + $ IB = ( (-IB) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB +@@ -9060,7 +9060,7 @@ + * + II = IIA + ICURROW = IAROW +- 120 CONTINUE ++ 120 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +diff -urN aocl-scalapack-4.0/PBLAS/TESTING/pzblastst.f aocl-scalapack-4.0.a/PBLAS/TESTING/pzblastst.f +--- aocl-scalapack-4.0/PBLAS/TESTING/pzblastst.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/PBLAS/TESTING/pzblastst.f 2023-05-13 12:34:31.725450989 -0500 +@@ -3530,7 +3530,7 @@ + * + IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN + * +- DO 40 H = 0, JB-1 ++ DO H = 0, JB-1 + IB = DESCA( IMB_ ) - IA + 1 + IF( IB.LE.0 ) + $ IB = ( ( -IB ) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB +@@ -3561,7 +3561,7 @@ + * + II = IIA + ICURROW = IAROW +- 40 CONTINUE ++ 40 END DO + * + JJ = JJ + JB + * +@@ -3574,7 +3574,7 @@ + DO 90 J = JN+1, JA+N-1, DESCA( NB_ ) + JB = MIN( DESCA( NB_ ), JA+N-J ) + IF( MYCOL.EQ.ICURCOL .OR. COLREP ) THEN +- DO 80 H = 0, JB-1 ++ DO H = 0, JB-1 + IB = DESCA( IMB_ ) - IA + 1 + IF( IB.LE.0 ) + $ IB = ( ( -IB ) / DESCA( MB_ ) + 1 )*DESCA( MB_ ) + IB +@@ -3606,7 +3606,7 @@ + * + II = IIA + ICURROW = IAROW +- 80 CONTINUE ++ 80 END DO + * + JJ = JJ + JB + END IF +@@ -9588,7 +9588,7 @@ + $ JB = ( (-JB) / DESCA( NB_ ) + 1 ) * DESCA( NB_ ) + JB + JB = MIN( JB, N ) + JN = JA+JB-1 +- DO 60 H = 0, JB-1 ++ DO H = 0, JB-1 + IB = DESCA( IMB_ ) - IA + 1 + IF( IB.LE.0 ) + $ IB = ( (-IB) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB +@@ -9658,7 +9658,7 @@ + * + II = IIA + ICURROW = IAROW +- 60 CONTINUE ++ 60 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +@@ -9670,7 +9670,7 @@ + * + DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) + JB = MIN( DESCA( NB_ ), JA+N-J ) +- DO 120 H = 0, JB-1 ++ DO H = 0, JB-1 + IB = DESCA( IMB_ )-IA+1 + IF( IB.LE.0 ) + $ IB = ( (-IB) / DESCA( MB_ ) + 1 ) * DESCA( MB_ ) + IB +@@ -9740,7 +9740,7 @@ + * + II = IIA + ICURROW = IAROW +- 120 CONTINUE ++ 120 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +diff -urN aocl-scalapack-4.0/SRC/CMakeLists.txt aocl-scalapack-4.0.a/SRC/CMakeLists.txt +--- aocl-scalapack-4.0/SRC/CMakeLists.txt 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/SRC/CMakeLists.txt 2023-05-13 13:04:13.525220791 -0500 +@@ -99,8 +99,8 @@ + pdgelq2.f pdgelqf.f pdgels.f pdgeql2.f pdgeqlf.f pdgeqpf.f pdgeqr2.f + pdgeqrf.f pdgerfs.f pdgerq2.f pdgerqf.f + pdgesv.f pdgesvd.f pdgesvx.f pdgetf2.f pdgetrf.f pdgetri.f pdgetrs.f +- pdggqrf.f pdggrqf.f pdlabrd.f pdgetf2K.f pdupdate.f pdgetrf0.f +- pdgetrfla.f pdgetf2_comm.f icopypv.f ++ pdggqrf.f pdggrqf.f pdlabrd.f pdgetrf0.f ++ pdgetf2_comm.f icopypv.f + pdlacon.f pdlacp2.f pdlacpy.f pdlahrd.f pdlange.f pdlanhs.f pdlansy.f + pdlantr.f pdlapiv.f pdlapv2.f pdlaqge.f pdlaqsy.f pdlarf.f pdlarfb.f + pdlarfg.f pdlarft.f pdlase2.f pdlaset.f pdlascl.f pdlassq.f pdlaswp.f +diff -urN aocl-scalapack-4.0/SRC/icopypv.f aocl-scalapack-4.0.a/SRC/icopypv.f +--- aocl-scalapack-4.0/SRC/icopypv.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/SRC/icopypv.f 2023-05-13 13:00:35.204701618 -0500 +@@ -75,7 +75,7 @@ + * .. Parameters .. + INTEGER I, IACOL, IAROW, ICTXT, + $ J, MN, MYCOL, MYROW, NPCOL, NPROW +- PARAMETER ( CTXT_ = 2 ) ++ INTEGER,PARAMETER :: CTXT_ = 2 + * .. + * .. External Subroutines .. + EXTERNAL BLACS_ABORT, BLACS_GRIDINFO, CHK1MAT, IGEBR2D, +diff -urN aocl-scalapack-4.0/TESTING/LIN/pcdbdriver.f aocl-scalapack-4.0.a/TESTING/LIN/pcdbdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pcdbdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pcdbdriver.f 2023-05-13 13:09:30.282786496 -0500 +@@ -552,7 +552,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + IERR( 1 ) = 0 + * +@@ -876,7 +876,7 @@ + $ TMFLOPS2, PASSED + * + END IF +- 20 CONTINUE ++ 20 END DO + * + * + 30 CONTINUE +diff -urN aocl-scalapack-4.0/TESTING/LIN/pcdtdriver.f aocl-scalapack-4.0.a/TESTING/LIN/pcdtdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pcdtdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pcdtdriver.f 2023-05-13 13:09:43.763067570 -0500 +@@ -545,7 +545,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + IERR( 1 ) = 0 + * +@@ -877,7 +877,7 @@ + $ TMFLOPS2, PASSED + * + END IF +- 20 CONTINUE ++ 20 END DO + * + * + 30 CONTINUE +diff -urN aocl-scalapack-4.0/TESTING/LIN/pcgbdriver.f aocl-scalapack-4.0.a/TESTING/LIN/pcgbdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pcgbdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pcgbdriver.f 2023-05-13 13:09:57.636356839 -0500 +@@ -564,7 +564,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + IERR( 1 ) = 0 + * +@@ -888,7 +888,7 @@ + $ TMFLOPS2, PASSED + * + END IF +- 20 CONTINUE ++ 20 END DO + * + * + 30 CONTINUE +diff -urN aocl-scalapack-4.0/TESTING/LIN/pclltdriver.f aocl-scalapack-4.0.a/TESTING/LIN/pclltdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pclltdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pclltdriver.f 2023-05-13 13:10:07.068553508 -0500 +@@ -487,7 +487,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + NRHS = NRVAL( HH ) + * +@@ -843,7 +843,7 @@ + * + END IF + 10 CONTINUE +- 20 CONTINUE ++ 20 END DO + * + IF( CHECK .AND. SRESID.GT.THRESH ) THEN + * +diff -urN aocl-scalapack-4.0/TESTING/LIN/pclsdriver.f aocl-scalapack-4.0.a/TESTING/LIN/pclsdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pclsdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pclsdriver.f 2023-05-13 13:10:34.924134319 -0500 +@@ -374,7 +374,7 @@ + * + * Loop over the different values for NRHS + * +- DO 40 HH = 1, NNR ++ DO HH = 1, NNR + * + NRHS = NRVAL( HH ) + * +@@ -1038,7 +1038,7 @@ + $ TMFLOPS, PASSED + END IF + 30 CONTINUE +- 40 CONTINUE ++ 40 END DO + 50 CONTINUE + 60 CONTINUE + 70 CONTINUE +diff -urN aocl-scalapack-4.0/TESTING/LIN/pcludriver.f aocl-scalapack-4.0.a/TESTING/LIN/pcludriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pcludriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pcludriver.f 2023-05-13 13:10:48.332413896 -0500 +@@ -614,7 +614,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + NRHS = NRVAL( HH ) + * +@@ -990,7 +990,7 @@ + $ PASSED + END IF + 10 CONTINUE +- 20 CONTINUE ++ 20 END DO + * + IF( CHECK.AND.( SRESID.GT.THRESH ) ) THEN + * +diff -urN aocl-scalapack-4.0/TESTING/LIN/pcpbdriver.f aocl-scalapack-4.0.a/TESTING/LIN/pcpbdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pcpbdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pcpbdriver.f 2023-05-13 13:10:59.963656414 -0500 +@@ -531,7 +531,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + IERR( 1 ) = 0 + * +@@ -834,7 +834,7 @@ + $ TMFLOPS2, PASSED + * + END IF +- 20 CONTINUE ++ 20 END DO + * + * + 30 CONTINUE +diff -urN aocl-scalapack-4.0/TESTING/LIN/pcptdriver.f aocl-scalapack-4.0.a/TESTING/LIN/pcptdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pcptdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pcptdriver.f 2023-05-13 13:19:00.445674845 -0500 +@@ -505,10 +505,10 @@ + * For SPD Tridiagonal complex matrices, diagonal is stored + * as a real. Thus, compact D into half the space + * +- DO 10 H=1, NUMROC(N,NB,MYCOL,0,NPCOL)/2 ++ DO H=1, NUMROC(N,NB,MYCOL,0,NPCOL)/2 + MEM( IPA+INT_TEMP+H-1 ) = MEM( IPA+INT_TEMP+2*H-2 ) + $ +MEM( IPA+INT_TEMP+2*H-1 )*( 0.0E+0, 1.0E+0 ) +- 10 CONTINUE ++ 10 END DO + IF( 2*(NUMROC(N,NB,MYCOL,0,NPCOL)/2).NE. + $ NUMROC(N,NB,MYCOL,0,NPCOL) ) THEN + H=NUMROC(N,NB,MYCOL,0,NPCOL)/2+1 +@@ -550,7 +550,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + IERR( 1 ) = 0 + * +@@ -860,7 +860,7 @@ + $ TMFLOPS2, PASSED + * + END IF +- 20 CONTINUE ++ 20 END DO + * + * + 30 CONTINUE +diff -urN aocl-scalapack-4.0/TESTING/LIN/pddbdriver.f aocl-scalapack-4.0.a/TESTING/LIN/pddbdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pddbdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pddbdriver.f 2023-05-13 13:08:04.798008071 -0500 +@@ -544,7 +544,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + IERR( 1 ) = 0 + * +@@ -860,7 +860,7 @@ + $ TMFLOPS2, PASSED + * + END IF +- 20 CONTINUE ++ 20 END DO + * + * + 30 CONTINUE +diff -urN aocl-scalapack-4.0/TESTING/LIN/pddtdriver.f aocl-scalapack-4.0.a/TESTING/LIN/pddtdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pddtdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pddtdriver.f 2023-05-13 13:11:20.509084801 -0500 +@@ -542,7 +542,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + IERR( 1 ) = 0 + * +@@ -866,7 +866,7 @@ + $ TMFLOPS2, PASSED + * + END IF +- 20 CONTINUE ++ 20 END DO + * + * + 30 CONTINUE +diff -urN aocl-scalapack-4.0/TESTING/LIN/pdgbdriver.f aocl-scalapack-4.0.a/TESTING/LIN/pdgbdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pdgbdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pdgbdriver.f 2023-05-13 13:11:33.069346692 -0500 +@@ -562,7 +562,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + IERR( 1 ) = 0 + * +@@ -878,7 +878,7 @@ + $ TMFLOPS2, PASSED + * + END IF +- 20 CONTINUE ++ 20 END DO + * + * + 30 CONTINUE +diff -urN aocl-scalapack-4.0/TESTING/LIN/pdlltdriver.f aocl-scalapack-4.0.a/TESTING/LIN/pdlltdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pdlltdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pdlltdriver.f 2023-05-13 13:11:44.660588379 -0500 +@@ -487,7 +487,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + NRHS = NRVAL( HH ) + * +@@ -843,7 +843,7 @@ + * + END IF + 10 CONTINUE +- 20 CONTINUE ++ 20 END DO + * + IF( CHECK .AND. SRESID.GT.THRESH ) THEN + * +diff -urN aocl-scalapack-4.0/TESTING/LIN/pdlsdriver.f aocl-scalapack-4.0.a/TESTING/LIN/pdlsdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pdlsdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pdlsdriver.f 2023-05-13 13:11:56.287830817 -0500 +@@ -371,7 +371,7 @@ + * + * Loop over the different values for NRHS + * +- DO 40 HH = 1, NNR ++ DO HH = 1, NNR + * + NRHS = NRVAL( HH ) + * +@@ -1033,7 +1033,7 @@ + $ TMFLOPS, PASSED + END IF + 30 CONTINUE +- 40 CONTINUE ++ 40 END DO + 50 CONTINUE + 60 CONTINUE + 70 CONTINUE +diff -urN aocl-scalapack-4.0/TESTING/LIN/pdludriver.f aocl-scalapack-4.0.a/TESTING/LIN/pdludriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pdludriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pdludriver.f 2023-05-13 13:12:07.670068146 -0500 +@@ -615,7 +615,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + NRHS = NRVAL( HH ) + * +@@ -991,7 +991,7 @@ + $ PASSED + END IF + 10 CONTINUE +- 20 CONTINUE ++ 20 END DO + * + IF( CHECK.AND.( SRESID.GT.THRESH ) ) THEN + * +diff -urN aocl-scalapack-4.0/TESTING/LIN/pdpbdriver.f aocl-scalapack-4.0.a/TESTING/LIN/pdpbdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pdpbdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pdpbdriver.f 2023-05-13 13:12:17.111265004 -0500 +@@ -528,7 +528,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + IERR( 1 ) = 0 + * +@@ -827,7 +827,7 @@ + $ TMFLOPS2, PASSED + * + END IF +- 20 CONTINUE ++ 20 END DO + * + * + 30 CONTINUE +diff -urN aocl-scalapack-4.0/TESTING/LIN/pdptdriver.f aocl-scalapack-4.0.a/TESTING/LIN/pdptdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pdptdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pdptdriver.f 2023-05-13 13:12:27.001471220 -0500 +@@ -533,7 +533,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + IERR( 1 ) = 0 + * +@@ -839,7 +839,7 @@ + $ TMFLOPS2, PASSED + * + END IF +- 20 CONTINUE ++ 20 END DO + * + * + 30 CONTINUE +diff -urN aocl-scalapack-4.0/TESTING/LIN/psdbdriver.f aocl-scalapack-4.0.a/TESTING/LIN/psdbdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/psdbdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/psdbdriver.f 2023-05-13 13:12:36.062660154 -0500 +@@ -543,7 +543,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + IERR( 1 ) = 0 + * +@@ -859,7 +859,7 @@ + $ TMFLOPS2, PASSED + * + END IF +- 20 CONTINUE ++ 20 END DO + * + * + 30 CONTINUE +diff -urN aocl-scalapack-4.0/TESTING/LIN/psdtdriver.f aocl-scalapack-4.0.a/TESTING/LIN/psdtdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/psdtdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/psdtdriver.f 2023-05-13 13:12:44.822842810 -0500 +@@ -541,7 +541,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + IERR( 1 ) = 0 + * +@@ -865,7 +865,7 @@ + $ TMFLOPS2, PASSED + * + END IF +- 20 CONTINUE ++ 20 END DO + * + * + 30 CONTINUE +diff -urN aocl-scalapack-4.0/TESTING/LIN/psgbdriver.f aocl-scalapack-4.0.a/TESTING/LIN/psgbdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/psgbdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/psgbdriver.f 2023-05-13 13:12:54.198038291 -0500 +@@ -561,7 +561,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + IERR( 1 ) = 0 + * +@@ -877,7 +877,7 @@ + $ TMFLOPS2, PASSED + * + END IF +- 20 CONTINUE ++ 20 END DO + * + * + 30 CONTINUE +diff -urN aocl-scalapack-4.0/TESTING/LIN/pslltdriver.f aocl-scalapack-4.0.a/TESTING/LIN/pslltdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pslltdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pslltdriver.f 2023-05-13 13:13:05.375271347 -0500 +@@ -487,7 +487,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + NRHS = NRVAL( HH ) + * +@@ -843,7 +843,7 @@ + * + END IF + 10 CONTINUE +- 20 CONTINUE ++ 20 END DO + * + IF( CHECK .AND. SRESID.GT.THRESH ) THEN + * +diff -urN aocl-scalapack-4.0/TESTING/LIN/pslsdriver.f aocl-scalapack-4.0.a/TESTING/LIN/pslsdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pslsdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pslsdriver.f 2023-05-13 13:13:16.854510696 -0500 +@@ -370,7 +370,7 @@ + * + * Loop over the different values for NRHS + * +- DO 40 HH = 1, NNR ++ DO HH = 1, NNR + * + NRHS = NRVAL( HH ) + * +@@ -1032,7 +1032,7 @@ + $ TMFLOPS, PASSED + END IF + 30 CONTINUE +- 40 CONTINUE ++ 40 END DO + 50 CONTINUE + 60 CONTINUE + 70 CONTINUE +diff -urN aocl-scalapack-4.0/TESTING/LIN/psludriver.f aocl-scalapack-4.0.a/TESTING/LIN/psludriver.f +--- aocl-scalapack-4.0/TESTING/LIN/psludriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/psludriver.f 2023-05-13 13:13:50.751217470 -0500 +@@ -604,7 +604,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + NRHS = NRVAL( HH ) + * +@@ -980,7 +980,7 @@ + $ PASSED + END IF + 10 CONTINUE +- 20 CONTINUE ++ 20 END DO + * + IF( CHECK.AND.( SRESID.GT.THRESH ) ) THEN + * +diff -urN aocl-scalapack-4.0/TESTING/LIN/pspbdriver.f aocl-scalapack-4.0.a/TESTING/LIN/pspbdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pspbdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pspbdriver.f 2023-05-13 13:14:00.485420435 -0500 +@@ -527,7 +527,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + IERR( 1 ) = 0 + * +@@ -826,7 +826,7 @@ + $ TMFLOPS2, PASSED + * + END IF +- 20 CONTINUE ++ 20 END DO + * + * + 30 CONTINUE +diff -urN aocl-scalapack-4.0/TESTING/LIN/psptdriver.f aocl-scalapack-4.0.a/TESTING/LIN/psptdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/psptdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/psptdriver.f 2023-05-13 13:14:10.085620608 -0500 +@@ -532,7 +532,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + IERR( 1 ) = 0 + * +@@ -838,7 +838,7 @@ + $ TMFLOPS2, PASSED + * + END IF +- 20 CONTINUE ++ 20 END DO + * + * + 30 CONTINUE +diff -urN aocl-scalapack-4.0/TESTING/LIN/pzdbdriver.f aocl-scalapack-4.0.a/TESTING/LIN/pzdbdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pzdbdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pzdbdriver.f 2023-05-13 13:14:20.446836646 -0500 +@@ -547,7 +547,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + IERR( 1 ) = 0 + * +@@ -871,7 +871,7 @@ + $ TMFLOPS2, PASSED + * + END IF +- 20 CONTINUE ++ 20 END DO + * + * + 30 CONTINUE +diff -urN aocl-scalapack-4.0/TESTING/LIN/pzdtdriver.f aocl-scalapack-4.0.a/TESTING/LIN/pzdtdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pzdtdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pzdtdriver.f 2023-05-13 13:14:30.888054354 -0500 +@@ -545,7 +545,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + IERR( 1 ) = 0 + * +@@ -877,7 +877,7 @@ + $ TMFLOPS2, PASSED + * + END IF +- 20 CONTINUE ++ 20 END DO + * + * + 30 CONTINUE +diff -urN aocl-scalapack-4.0/TESTING/LIN/pzgbdriver.f aocl-scalapack-4.0.a/TESTING/LIN/pzgbdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pzgbdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pzgbdriver.f 2023-05-13 13:14:42.399294375 -0500 +@@ -565,7 +565,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + IERR( 1 ) = 0 + * +@@ -889,7 +889,7 @@ + $ TMFLOPS2, PASSED + * + END IF +- 20 CONTINUE ++ 20 END DO + * + * + 30 CONTINUE +diff -urN aocl-scalapack-4.0/TESTING/LIN/pzlltdriver.f aocl-scalapack-4.0.a/TESTING/LIN/pzlltdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pzlltdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pzlltdriver.f 2023-05-13 13:14:52.103496713 -0500 +@@ -485,7 +485,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + NRHS = NRVAL( HH ) + * +@@ -841,7 +841,7 @@ + * + END IF + 10 CONTINUE +- 20 CONTINUE ++ 20 END DO + * + IF( CHECK .AND. SRESID.GT.THRESH ) THEN + * +diff -urN aocl-scalapack-4.0/TESTING/LIN/pzlsdriver.f aocl-scalapack-4.0.a/TESTING/LIN/pzlsdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pzlsdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pzlsdriver.f 2023-05-13 13:15:03.839741423 -0500 +@@ -373,7 +373,7 @@ + * + * Loop over the different values for NRHS + * +- DO 40 HH = 1, NNR ++ DO HH = 1, NNR + * + NRHS = NRVAL( HH ) + * +@@ -1037,7 +1037,7 @@ + $ TMFLOPS, PASSED + END IF + 30 CONTINUE +- 40 CONTINUE ++ 40 END DO + 50 CONTINUE + 60 CONTINUE + 70 CONTINUE +diff -urN aocl-scalapack-4.0/TESTING/LIN/pzludriver.f aocl-scalapack-4.0.a/TESTING/LIN/pzludriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pzludriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pzludriver.f 2023-05-13 13:15:16.529006006 -0500 +@@ -612,7 +612,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + NRHS = NRVAL( HH ) + * +@@ -988,7 +988,7 @@ + $ PASSED + END IF + 10 CONTINUE +- 20 CONTINUE ++ 20 END DO + * + IF( CHECK.AND.( SRESID.GT.THRESH ) ) THEN + * +diff -urN aocl-scalapack-4.0/TESTING/LIN/pzpbdriver.f aocl-scalapack-4.0.a/TESTING/LIN/pzpbdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pzpbdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pzpbdriver.f 2023-05-13 13:15:26.328210325 -0500 +@@ -531,7 +531,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + IERR( 1 ) = 0 + * +@@ -834,7 +834,7 @@ + $ TMFLOPS2, PASSED + * + END IF +- 20 CONTINUE ++ 20 END DO + * + * + 30 CONTINUE +diff -urN aocl-scalapack-4.0/TESTING/LIN/pzptdriver.f aocl-scalapack-4.0.a/TESTING/LIN/pzptdriver.f +--- aocl-scalapack-4.0/TESTING/LIN/pzptdriver.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TESTING/LIN/pzptdriver.f 2023-05-13 13:19:09.725868346 -0500 +@@ -504,10 +504,10 @@ + * For SPD Tridiagonal complex matrices, diagonal is stored + * as a real. Thus, compact D into half the space + * +- DO 10 H=1, NUMROC(N,NB,MYCOL,0,NPCOL)/2 ++ DO H=1, NUMROC(N,NB,MYCOL,0,NPCOL)/2 + MEM( IPA+INT_TEMP+H-1 ) = MEM( IPA+INT_TEMP+2*H-2 ) + $ +MEM( IPA+INT_TEMP+2*H-1 )*( 0.0D+0, 1.0D+0 ) +- 10 CONTINUE ++ 10 END DO + IF( 2*(NUMROC(N,NB,MYCOL,0,NPCOL)/2).NE. + $ NUMROC(N,NB,MYCOL,0,NPCOL) ) THEN + H=NUMROC(N,NB,MYCOL,0,NPCOL)/2+1 +@@ -549,7 +549,7 @@ + * + * Loop over the different values for NRHS + * +- DO 20 HH = 1, NNR ++ DO HH = 1, NNR + * + IERR( 1 ) = 0 + * +@@ -859,7 +859,7 @@ + $ TMFLOPS2, PASSED + * + END IF +- 20 CONTINUE ++ 20 END DO + * + * + 30 CONTINUE +diff -urN aocl-scalapack-4.0/TOOLS/pclaprnt.f aocl-scalapack-4.0.a/TOOLS/pclaprnt.f +--- aocl-scalapack-4.0/TOOLS/pclaprnt.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TOOLS/pclaprnt.f 2023-05-13 12:34:57.291977317 -0500 +@@ -162,7 +162,7 @@ + * + JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) + JB = JN-JA+1 +- DO 60 H = 0, JB-1 ++ DO H = 0, JB-1 + IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) + IB = IN-IA+1 + IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN +@@ -228,7 +228,7 @@ + * + II = IIA + ICURROW = IAROW +- 60 CONTINUE ++ 60 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +@@ -239,7 +239,7 @@ + * + DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) + JB = MIN( DESCA( NB_ ), JA+N-J ) +- DO 120 H = 0, JB-1 ++ DO H = 0, JB-1 + IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) + IB = IN-IA+1 + IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN +@@ -305,7 +305,7 @@ + * + II = IIA + ICURROW = IAROW +- 120 CONTINUE ++ 120 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +diff -urN aocl-scalapack-4.0/TOOLS/pclaread.f aocl-scalapack-4.0.a/TOOLS/pclaread.f +--- aocl-scalapack-4.0/TOOLS/pclaread.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TOOLS/pclaread.f 2023-05-13 12:35:16.251367625 -0500 +@@ -95,7 +95,7 @@ + * + DO 50 J = 1, N, DESCA( NB_ ) + JB = MIN( DESCA( NB_ ), N-J+1 ) +- DO 40 H = 0, JB-1 ++ DO H = 0, JB-1 + * + * Loop over block of rows + * +@@ -128,7 +128,7 @@ + * + II = 1 + ICURROW = DESCA( RSRC_ ) +- 40 CONTINUE ++ 40 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +diff -urN aocl-scalapack-4.0/TOOLS/pclawrite.f aocl-scalapack-4.0.a/TOOLS/pclawrite.f +--- aocl-scalapack-4.0/TOOLS/pclawrite.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TOOLS/pclawrite.f 2023-05-13 12:35:35.428762427 -0500 +@@ -77,7 +77,7 @@ + * + JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) + JB = JN-JA+1 +- DO 60 H = 0, JB-1 ++ DO H = 0, JB-1 + IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) + IB = IN-IA+1 + IF( ICURROW.EQ.IRWRIT .AND. ICURCOL.EQ.ICWRIT ) THEN +@@ -137,7 +137,7 @@ + * + II = IIA + ICURROW = IAROW +- 60 CONTINUE ++ 60 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +@@ -148,7 +148,7 @@ + * + DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) + JB = MIN( DESCA( NB_ ), JA+N-J ) +- DO 120 H = 0, JB-1 ++ DO H = 0, JB-1 + IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) + IB = IN-IA+1 + IF( ICURROW.EQ.IRWRIT .AND. ICURCOL.EQ.ICWRIT ) THEN +@@ -210,7 +210,7 @@ + * + II = IIA + ICURROW = IAROW +- 120 CONTINUE ++ 120 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +diff -urN aocl-scalapack-4.0/TOOLS/pdlaprnt.f aocl-scalapack-4.0.a/TOOLS/pdlaprnt.f +--- aocl-scalapack-4.0/TOOLS/pdlaprnt.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TOOLS/pdlaprnt.f 2023-05-13 12:35:56.637199031 -0500 +@@ -162,7 +162,7 @@ + * + JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) + JB = JN-JA+1 +- DO 60 H = 0, JB-1 ++ DO H = 0, JB-1 + IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) + IB = IN-IA+1 + IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN +@@ -222,7 +222,7 @@ + * + II = IIA + ICURROW = IAROW +- 60 CONTINUE ++ 60 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +@@ -233,7 +233,7 @@ + * + DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) + JB = MIN( DESCA( NB_ ), JA+N-J ) +- DO 120 H = 0, JB-1 ++ DO H = 0, JB-1 + IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) + IB = IN-IA+1 + IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN +@@ -293,7 +293,7 @@ + * + II = IIA + ICURROW = IAROW +- 120 CONTINUE ++ 120 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +diff -urN aocl-scalapack-4.0/TOOLS/pdlaread.f aocl-scalapack-4.0.a/TOOLS/pdlaread.f +--- aocl-scalapack-4.0/TOOLS/pdlaread.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TOOLS/pdlaread.f 2023-05-13 12:36:07.683426435 -0500 +@@ -94,7 +94,7 @@ + * + DO 50 J = 1, N, DESCA( NB_ ) + JB = MIN( DESCA( NB_ ), N-J+1 ) +- DO 40 H = 0, JB-1 ++ DO H = 0, JB-1 + * + * Loop over block of rows + * +@@ -125,7 +125,7 @@ + * + II = 1 + ICURROW = DESCA( RSRC_ ) +- 40 CONTINUE ++ 40 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +diff -urN aocl-scalapack-4.0/TOOLS/pdlawrite.f aocl-scalapack-4.0.a/TOOLS/pdlawrite.f +--- aocl-scalapack-4.0/TOOLS/pdlawrite.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TOOLS/pdlawrite.f 2023-05-13 12:36:25.980803115 -0500 +@@ -77,7 +77,7 @@ + * + JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) + JB = JN-JA+1 +- DO 60 H = 0, JB-1 ++ DO H = 0, JB-1 + IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) + IB = IN-IA+1 + IF( ICURROW.EQ.IRWRIT .AND. ICURCOL.EQ.ICWRIT ) THEN +@@ -133,7 +133,7 @@ + * + II = IIA + ICURROW = IAROW +- 60 CONTINUE ++ 60 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +@@ -144,7 +144,7 @@ + * + DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) + JB = MIN( DESCA( NB_ ), JA+N-J ) +- DO 120 H = 0, JB-1 ++ DO H = 0, JB-1 + IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) + IB = IN-IA+1 + IF( ICURROW.EQ.IRWRIT .AND. ICURCOL.EQ.ICWRIT ) THEN +@@ -200,7 +200,7 @@ + * + II = IIA + ICURROW = IAROW +- 120 CONTINUE ++ 120 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +diff -urN aocl-scalapack-4.0/TOOLS/pilaprnt.f aocl-scalapack-4.0.a/TOOLS/pilaprnt.f +--- aocl-scalapack-4.0/TOOLS/pilaprnt.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TOOLS/pilaprnt.f 2023-05-13 12:22:11.627246250 -0500 +@@ -162,7 +162,7 @@ + * + JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) + JB = JN-JA+1 +- DO 60 H = 0, JB-1 ++ DO H = 0, JB-1 + IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) + IB = IN-IA+1 + IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN +@@ -222,7 +222,7 @@ + * + II = IIA + ICURROW = IAROW +- 60 CONTINUE ++ END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +@@ -233,7 +233,7 @@ + * + DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) + JB = MIN( DESCA( NB_ ), JA+N-J ) +- DO 120 H = 0, JB-1 ++ DO H = 0, JB-1 + IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) + IB = IN-IA+1 + IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN +@@ -293,7 +293,7 @@ + * + II = IIA + ICURROW = IAROW +- 120 CONTINUE ++ END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +diff -urN aocl-scalapack-4.0/TOOLS/pslaprnt.f aocl-scalapack-4.0.a/TOOLS/pslaprnt.f +--- aocl-scalapack-4.0/TOOLS/pslaprnt.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TOOLS/pslaprnt.f 2023-05-13 12:36:44.549185377 -0500 +@@ -162,7 +162,7 @@ + * + JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) + JB = JN-JA+1 +- DO 60 H = 0, JB-1 ++ DO H = 0, JB-1 + IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) + IB = IN-IA+1 + IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN +@@ -222,7 +222,7 @@ + * + II = IIA + ICURROW = IAROW +- 60 CONTINUE ++ 60 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +@@ -233,7 +233,7 @@ + * + DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) + JB = MIN( DESCA( NB_ ), JA+N-J ) +- DO 120 H = 0, JB-1 ++ DO H = 0, JB-1 + IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) + IB = IN-IA+1 + IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN +@@ -293,7 +293,7 @@ + * + II = IIA + ICURROW = IAROW +- 120 CONTINUE ++ 120 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +diff -urN aocl-scalapack-4.0/TOOLS/pslaread.f aocl-scalapack-4.0.a/TOOLS/pslaread.f +--- aocl-scalapack-4.0/TOOLS/pslaread.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TOOLS/pslaread.f 2023-05-13 12:36:55.734415640 -0500 +@@ -94,7 +94,7 @@ + * + DO 50 J = 1, N, DESCA( NB_ ) + JB = MIN( DESCA( NB_ ), N-J+1 ) +- DO 40 H = 0, JB-1 ++ DO H = 0, JB-1 + * + * Loop over block of rows + * +@@ -125,7 +125,7 @@ + * + II = 1 + ICURROW = DESCA( RSRC_ ) +- 40 CONTINUE ++ 40 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +diff -urN aocl-scalapack-4.0/TOOLS/pslawrite.f aocl-scalapack-4.0.a/TOOLS/pslawrite.f +--- aocl-scalapack-4.0/TOOLS/pslawrite.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TOOLS/pslawrite.f 2023-05-13 12:37:18.124876586 -0500 +@@ -77,7 +77,7 @@ + * + JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) + JB = JN-JA+1 +- DO 60 H = 0, JB-1 ++ DO H = 0, JB-1 + IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) + IB = IN-IA+1 + IF( ICURROW.EQ.IRWRIT .AND. ICURCOL.EQ.ICWRIT ) THEN +@@ -133,7 +133,7 @@ + * + II = IIA + ICURROW = IAROW +- 60 CONTINUE ++ 60 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +@@ -144,7 +144,7 @@ + * + DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) + JB = MIN( DESCA( NB_ ), JA+N-J ) +- DO 120 H = 0, JB-1 ++ DO H = 0, JB-1 + IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) + IB = IN-IA+1 + IF( ICURROW.EQ.IRWRIT .AND. ICURCOL.EQ.ICWRIT ) THEN +@@ -200,7 +200,7 @@ + * + II = IIA + ICURROW = IAROW +- 120 CONTINUE ++ 120 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +diff -urN aocl-scalapack-4.0/TOOLS/pzlaprnt.f aocl-scalapack-4.0.a/TOOLS/pzlaprnt.f +--- aocl-scalapack-4.0/TOOLS/pzlaprnt.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TOOLS/pzlaprnt.f 2023-05-13 12:37:37.150268256 -0500 +@@ -162,7 +162,7 @@ + * + JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) + JB = JN-JA+1 +- DO 60 H = 0, JB-1 ++ DO H = 0, JB-1 + IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) + IB = IN-IA+1 + IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN +@@ -228,7 +228,7 @@ + * + II = IIA + ICURROW = IAROW +- 60 CONTINUE ++ 60 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +@@ -239,7 +239,7 @@ + * + DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) + JB = MIN( DESCA( NB_ ), JA+N-J ) +- DO 120 H = 0, JB-1 ++ DO H = 0, JB-1 + IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) + IB = IN-IA+1 + IF( ICURROW.EQ.IRPRNT .AND. ICURCOL.EQ.ICPRNT ) THEN +@@ -305,7 +305,7 @@ + * + II = IIA + ICURROW = IAROW +- 120 CONTINUE ++ 120 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +diff -urN aocl-scalapack-4.0/TOOLS/pzlaread.f aocl-scalapack-4.0.a/TOOLS/pzlaread.f +--- aocl-scalapack-4.0/TOOLS/pzlaread.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TOOLS/pzlaread.f 2023-05-13 12:37:46.765466199 -0500 +@@ -95,7 +95,7 @@ + * + DO 50 J = 1, N, DESCA( NB_ ) + JB = MIN( DESCA( NB_ ), N-J+1 ) +- DO 40 H = 0, JB-1 ++ DO H = 0, JB-1 + * + * Loop over block of rows + * +@@ -128,7 +128,7 @@ + * + II = 1 + ICURROW = DESCA( RSRC_ ) +- 40 CONTINUE ++ 40 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +diff -urN aocl-scalapack-4.0/TOOLS/pzlawrite.f aocl-scalapack-4.0.a/TOOLS/pzlawrite.f +--- aocl-scalapack-4.0/TOOLS/pzlawrite.f 2022-11-13 09:31:10.000000000 -0600 ++++ aocl-scalapack-4.0.a/TOOLS/pzlawrite.f 2023-05-13 12:38:12.069987133 -0500 +@@ -77,7 +77,7 @@ + * + JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) + JB = JN-JA+1 +- DO 60 H = 0, JB-1 ++ DO H = 0, JB-1 + IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) + IB = IN-IA+1 + IF( ICURROW.EQ.IRWRIT .AND. ICURCOL.EQ.ICWRIT ) THEN +@@ -137,7 +137,7 @@ + * + II = IIA + ICURROW = IAROW +- 60 CONTINUE ++ 60 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB +@@ -148,7 +148,7 @@ + * + DO 130 J = JN+1, JA+N-1, DESCA( NB_ ) + JB = MIN( DESCA( NB_ ), JA+N-J ) +- DO 120 H = 0, JB-1 ++ DO H = 0, JB-1 + IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) + IB = IN-IA+1 + IF( ICURROW.EQ.IRWRIT .AND. ICURCOL.EQ.ICWRIT ) THEN +@@ -210,7 +210,7 @@ + * + II = IIA + ICURROW = IAROW +- 120 CONTINUE ++ 120 END DO + * + IF( MYCOL.EQ.ICURCOL ) + $ JJ = JJ + JB diff --git a/var/spack/repos/builtin/packages/amdscalapack/package.py b/var/spack/repos/builtin/packages/amdscalapack/package.py index 3ccf226089..aaa489a057 100644 --- a/var/spack/repos/builtin/packages/amdscalapack/package.py +++ b/var/spack/repos/builtin/packages/amdscalapack/package.py @@ -22,7 +22,7 @@ class Amdscalapack(ScalapackBase): LICENSING INFORMATION: By downloading, installing and using this software, you agree to the terms and conditions of the AMD AOCL-ScaLAPACK license agreement. You may obtain a copy of this license agreement from - https://www.amd.com/en/developer/aocl/scalapack/eula/scalapack-libraries-4-1-eula.html + https://www.amd.com/en/developer/aocl/scalapack/eula/scalapack-libraries-4-2-eula.html https://www.amd.com/en/developer/aocl/scalapack/eula/scalapack-libraries-eula.html """ @@ -33,7 +33,11 @@ class Amdscalapack(ScalapackBase): maintainers("amd-toolchain-support") license("BSD-3-Clause-Open-MPI") - + version( + "4.2", + sha256="c6e9a846c05cdc05252b0b5f264164329812800bf13f9d97c77114dc138e6ccb", + preferred=True, + ) version("4.1", sha256="b2e51c3604e5869d1faaef2e52c92071fcb3de1345aebb2ea172206622067ad9") version("4.0", sha256="f02913b5984597b22cdb9a36198ed61039a1bf130308e778dc31b2a7eb88b33b") version("3.2", sha256="9e00979bb1be39d627bdacb01774bc043029840d542fafc934d16fec3e3b0892") @@ -46,6 +50,13 @@ class Amdscalapack(ScalapackBase): conflicts("+ilp64", when="@:3.0", msg="ILP64 is supported from 3.1 onwards") requires("target=x86_64:", msg="AMD scalapack available only on x86_64") + patch("clang-hollerith.patch", when="%clang@16:") + + def patch(self): + # Flang-New gets confused and thinks it finds Hollerith constants + if self.spec.satisfies("%clang@16:"): + filter_file("-cpp", "", "CMakeLists.txt") + def url_for_version(self, version): vers = "https://github.com/amd/{0}/archive/{1}.tar.gz" if version >= Version("3.1"): @@ -59,15 +70,15 @@ class Amdscalapack(ScalapackBase): spec = self.spec if not ( - spec.satisfies(r"%aocc@3.2:4.1") + spec.satisfies(r"%aocc@3.2:4.2") or spec.satisfies(r"%gcc@12.2:13.1") - or spec.satisfies(r"%clang@15:16") + or spec.satisfies(r"%clang@15:17") ): tty.warn( - "AOCL has been tested to work with the following compilers\ - versions - gcc@12.2:13.1, aocc@3.2:4.1, and clang@15:16\ - see the following aocl userguide for details: \ - https://www.amd.com/content/dam/amd/en/documents/developer/version-4-1-documents/aocl/aocl-4-1-user-guide.pdf" + "AOCL has been tested to work with the following compilers " + "versions - gcc@12.2:13.1, aocc@3.2:4.2, and clang@15:17 " + "see the following aocl userguide for details: " + "https://www.amd.com/content/dam/amd/en/documents/developer/version-4-2-documents/aocl/aocl-4-2-user-guide.pdf" ) if spec.satisfies("%gcc@10:"): @@ -109,3 +120,7 @@ class Amdscalapack(ScalapackBase): ) return args + + def setup_dependent_run_environment(self, env, dependent_spec): + if self.spec.external: + env.prepend_path("LD_LIBRARY_PATH", self.prefix.lib) diff --git a/var/spack/repos/builtin/packages/aocl-compression/package.py b/var/spack/repos/builtin/packages/aocl-compression/package.py new file mode 100644 index 0000000000..133c92b8f4 --- /dev/null +++ b/var/spack/repos/builtin/packages/aocl-compression/package.py @@ -0,0 +1,109 @@ +# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +# ---------------------------------------------------------------------------- +from llnl.util import tty + +from spack.package import * + + +class AoclCompression(CMakePackage): + """ + AOCL-Compression is a software framework of various lossless compression + and decompression methods tuned and optimized for AMD Zen based CPUs. + This framework offers a single set of unified APIs for all the supported + compression and decompression methods which facilitate the applications + to easily integrate and use them. + AOCL-Compression supports lz4, zlib/deflate, lzma, zstd, bzip2, snappy, + and lz4hc based compression and decompression methods along with their + native APIs. + The library offers openMP based multi-threaded implementation of lz4, zlib, + zstd and snappy compression methods. It supports the dynamic dispatcher + feature that executes the most optimal function variant implemented using + Function Multi-versioning thereby offering a single optimized library + portable across different x86 CPU architectures. + AOCL-Compression framework is developed in C for UNIX® and Windows® based + systems. A test suite is provided for the validation and performance + benchmarking of the supported compression and decompression methods. + This suite also supports the benchmarking of IPP compression methods, such + as, lz4, lz4hc, zlib and bzip2. The library build framework offers CTest + based testing of the test cases implemented using GTest and the library + test suite. + + LICENSING INFORMATION: By downloading, installing and using this software, + you agree to the terms and conditions of the AMD AOCL-Compression license + agreement. You may obtain a copy of this license agreement from + https://www.amd.com/content/dam/amd/en/documents/developer/version-4-2-eulas/compression-elua-4-2.pdf + """ + + _name = "aocl-compression" + homepage = "https://www.amd.com/en/developer/aocl/compression.html" + git = "https://github.com/amd/aocl-compression.git" + url = "https://github.com/amd/aocl-compression/archive/refs/tags/4.2.tar.gz" + + maintainers("amd-toolchain-support") + + version( + "4.2", + sha256="a18b3e7f64a8105c1500dda7b4c343e974b5e26bfe3dd838a1c1acf82a969c6f", + preferred=True, + ) + + variant("shared", default=True, description="Build shared library") + variant("zlib", default=True, description="Build zlib library") + variant("bzip2", default=True, description="Build bzip2 library") + variant("snappy", default=True, description="Build snappy library") + variant("zstd", default=True, description="Build zstd library") + variant("lzma", default=True, description="Build lzma library") + variant("lz4", default=True, description="Build lz4 library") + variant("lz4hc", default=True, description="Build lz4hc library") + variant( + "openmp", + default=False, + description="openmp based multi-threaded compression and decompression", + ) + + depends_on("cmake@3.15:", type="build") + + def cmake_args(self): + """Runs ``cmake`` in the build directory""" + spec = self.spec + args = [] + + if not ( + spec.satisfies(r"%aocc@4.1:4.2") + or spec.satisfies(r"%gcc@12.2:13.1") + or spec.satisfies(r"%clang@16:17") + ): + tty.warn( + "AOCL has been tested to work with the following compilers " + "versions - gcc@12.2:13.1, aocc@4.1:4.2, and clang@16:17 " + "see the following aocl userguide for details: " + "https://www.amd.com/content/dam/amd/en/documents/developer/version-4-2-documents/aocl/aocl-4-2-user-guide.pdf" + ) + + args = [ + self.define_from_variant("AOCL_ENABLE_THREADS", "openmp"), + "-DLZ4_FRAME_FORMAT_SUPPORT=ON", + "-DAOCL_LZ4HC_DISABLE_PATTERN_ANALYSIS=ON", + ] + if "~shared" in spec: + args.append("-DBUILD_STATIC_LIBS=ON") + if "~zlib" in spec: + args.append("-DAOCL_EXCLUDE_ZLIB=ON") + if "~bzip2" in spec: + args.append("-DAOCL_EXCLUDE_BZIP2=ON") + if "~snappy" in spec: + args.append("-DAOCL_EXCLUDE_SNAPPY=ON") + if "~zstd" in spec: + args.append("-DAOCL_EXCLUDE_ZSTD=ON") + if "~lzma" in spec: + args.append("-DAOCL_EXCLUDE_LZMA=ON") + if "~lz4" in spec: + args.append("-DAOCL_EXCLUDE_LZ4=ON") + if "~lz4hc" in spec: + args.append("-DAOCL_EXCLUDE_LZ4HC=ON") + + return args diff --git a/var/spack/repos/builtin/packages/aocl-crypto/package.py b/var/spack/repos/builtin/packages/aocl-crypto/package.py new file mode 100644 index 0000000000..908117655a --- /dev/null +++ b/var/spack/repos/builtin/packages/aocl-crypto/package.py @@ -0,0 +1,93 @@ +# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +# ---------------------------------------------------------------------------- +from llnl.util import tty + +from spack.package import * + + +class AoclCrypto(CMakePackage): + """ + AOCL-Crypto is a library consisting of basic cryptographic functions + optimized and tuned for AMD Zen™ based microarchitecture. + + This library provides a unified solution for Cryptographic routines such + as AES (Advanced Encryption Standard) encryption/decryption routines + (CFB, CTR, CBC, CCM, GCM, OFB, SIV, XTS), SHA (Secure Hash Algorithms) + routines (SHA2, SHA3, SHAKE), Message Authentication Code (CMAC, HMAC), + ECDH (Elliptic-curve Diffie–Hellman) and RSA (Rivest, Shamir, and Adleman) + key generation functions, etc. AOCL Crypto supports a dynamic dispatcher + feature that executes the most optimal function variant implemented using + Function Multi-versioning thereby offering a single optimized library + portable across different x86 CPU architectures. + + AOCL Crypto framework is developed in C / C++ for Unix and Windows based + systems. + + LICENSING INFORMATION: By downloading, installing and using this software, + you agree to the terms and conditions of the AMD AOCL-Cryptography license + agreement. You may obtain a copy of this license agreement from + https://www.amd.com/en/developer/aocl/cryptography/eula/cryptography-4-2-eula.html + """ + + _name = "aocl-crypto" + homepage = "https://www.amd.com/en/developer/aocl/cryptography.html" + git = "https://github.com/amd/aocl-crypto" + url = "https://github.com/amd/aocl-crypto/archive/refs/tags/4.2.tar.gz" + + maintainers("amd-toolchain-support") + version( + "4.2", + sha256="2bdbedd8ab1b28632cadff237f4abd776e809940ad3633ad90fc52ce225911fe", + preferred=True, + ) + variant("examples", default=False, description="Build examples") + + depends_on("cmake@3.15:", type="build") + depends_on("openssl@3.0.0:") + depends_on("p7zip", type="build") + for vers in ["4.2"]: + with when(f"@={vers}"): + depends_on(f"aocl-utils@={vers}") + + @property + def build_directory(self): + """Returns the directory to use when building the package + + :return: directory where to build the package + """ + + build_directory = self.stage.source_path + + if self.spec.variants["build_type"].value == "Debug": + build_directory = join_path(build_directory, "build", "debug") + else: + build_directory = join_path(build_directory, "build", "release") + + return build_directory + + def cmake_args(self): + """Runs ``cmake`` in the build directory""" + spec = self.spec + if not ( + spec.satisfies(r"%aocc@4.1:4.2") + or spec.satisfies(r"%gcc@12.2:13.1") + or spec.satisfies(r"%clang@16:17") + ): + tty.warn( + "AOCL has been tested to work with the following compilers " + "versions - gcc@12.2:13.1, aocc@4.1:4.2, and clang@16:17 " + "see the following aocl userguide for details: " + "https://www.amd.com/content/dam/amd/en/documents/developer/version-4-2-documents/aocl/aocl-4-2-user-guide.pdf" + ) + + args = ["-DCMAKE_C_COMPILER=%s" % spack_cc, "-DCMAKE_CXX_COMPILER=%s" % spack_cxx] + args.append(self.define_from_variant("ALCP_ENABLE_EXAMPLES", "examples")) + args.append("-DOPENSSL_INSTALL_DIR=" + spec["openssl"].prefix) + args.append("-DENABLE_AOCL_UTILS=ON") + args.append("-DAOCL_UTILS_INSTALL_DIR=" + spec["aocl-utils"].prefix) + + return args diff --git a/var/spack/repos/builtin/packages/aocl-libmem/package.py b/var/spack/repos/builtin/packages/aocl-libmem/package.py new file mode 100644 index 0000000000..323a04c0cf --- /dev/null +++ b/var/spack/repos/builtin/packages/aocl-libmem/package.py @@ -0,0 +1,93 @@ +# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +# ---------------------------------------------------------------------------- +from llnl.util import tty + +from spack.package import * + + +class AoclLibmem(CMakePackage): + """ + AOCL-LibMem is a Linux library of data movement and manipulation + functions (such as memcpy and strcpy) highly optimized for AMD Zen + micro-architecture. + + This library has multiple implementations of each function that can be + chosen based on the application requirements as per alignments, instruction + choice, threshold values, and tunable parameters. + + By default, this library will choose the best fit implementation based on + the underlying micro-architectural support for CPU features and instructions. + + LICENSING INFORMATION: By downloading, installing and using this software, + you agree to the terms and conditions of the AMD AOCL-LibMem license + agreement. You may obtain a copy of this license agreement from + https://www.amd.com/en/developer/aocl/libmem/eula/libmem-4-2-eula.html + """ + + _name = "aocl-libmem" + homepage = "https://www.amd.com/en/developer/aocl/libmem.html" + git = "https://github.com/amd/aocl-libmem" + url = "https://github.com/amd/aocl-libmem/archive/refs/tags/4.2.tar.gz" + + maintainers("amd-toolchain-support") + + version( + "4.2", + sha256="4ff5bd8002e94cc2029ef1aeda72e7cf944b797c7f07383656caa93bcb447569", + preferred=True, + ) + + variant("logging", default=False, description="Enable/Disable logger") + variant("tunables", default=False, description="Enable/Disable user input") + variant("shared", default=True, description="build shared library") + variant( + "vectorization", + default="auto", + description="Use hardware vectorization support", + values=("avx2", "avx512", "auto"), + multi=False, + ) + + depends_on("cmake@3.15:", type="build") + + @property + def libs(self): + """find libmem libs function""" + shared = "+shared" in self.spec + return find_libraries("libaocl-libmem", root=self.prefix, recursive=True, shared=shared) + + def cmake_args(self): + """Runs ``cmake`` in the build directory""" + spec = self.spec + + if not ( + spec.satisfies(r"%aocc@4.1:4.2") + or spec.satisfies(r"%gcc@12.2:13.1") + or spec.satisfies(r"%clang@16:17") + ): + tty.warn( + "AOCL has been tested to work with the following compilers " + "versions - gcc@12.2:13.1, aocc@4.1:4.2, and clang@16:17 " + "see the following aocl userguide for details: " + "https://www.amd.com/content/dam/amd/en/documents/developer/version-4-2-documents/aocl/aocl-4-2-user-guide.pdf" + ) + + args = [] + args.append(self.define_from_variant("ENABLE_LOGGING", "logging")) + args.append(self.define_from_variant("ENABLE_TUNABLES", "tunables")) + args.append(self.define_from_variant("BUILD_SHARED_LIBS", "shared")) + + if spec.satisfies("vectorisation=auto"): + if "avx512" in self.spec.target: + args.append("-ALMEM_ARCH=avx512") + elif "avx2" in self.spec.target: + args.append("-ALMEM_ARCH=avx2") + else: + args.append("-ALMEM_ARCH=none") + else: + args.append(self.define("ALMEM_ARCH", spec.variants["vectorization"].value)) + return args diff --git a/var/spack/repos/builtin/packages/aocl-sparse/package.py b/var/spack/repos/builtin/packages/aocl-sparse/package.py index a3f82377dd..4b0892cc87 100644 --- a/var/spack/repos/builtin/packages/aocl-sparse/package.py +++ b/var/spack/repos/builtin/packages/aocl-sparse/package.py @@ -19,19 +19,24 @@ class AoclSparse(CMakePackage): LICENSING INFORMATION: By downloading, installing and using this software, you agree to the terms and conditions of the AMD AOCL-Sparse license agreement. You may obtain a copy of this license agreement from - https://www.amd.com/en/developer/aocl/sparse/eula/sparse-libraries-4-1-eula.html + https://www.amd.com/en/developer/aocl/sparse/eula/sparse-libraries-4-2-eula.html https://www.amd.com/en/developer/aocl/sparse/eula/sparse-libraries-eula.html """ _name = "aocl-sparse" homepage = "https://www.amd.com/en/developer/aocl/sparse.html" - url = "https://github.com/amd/aocl-sparse/archive/3.0.tar.gz" git = "https://github.com/amd/aocl-sparse" + url = "https://github.com/amd/aocl-sparse/archive/3.0.tar.gz" maintainers("amd-toolchain-support") license("MIT") + version( + "4.2", + sha256="03cd67adcfea4a574fece98b60b4aba0a6e5a9c8f608ff1ccc1fb324a7185538", + preferred=True, + ) version("4.1", sha256="35ef437210bc25fdd802b462eaca830bfd928f962569b91b592f2866033ef2bb") version("4.0", sha256="68524e441fdc7bb923333b98151005bed39154d9f4b5e8310b5c37de1d69c2c3") version("3.2", sha256="db7d681a8697d6ef49acf3e97e8bec35b048ce0ad74549c3b738bbdff496618f") @@ -51,11 +56,15 @@ class AoclSparse(CMakePackage): description="Enable experimental AVX512 support", ) - depends_on("amdblis", when="@4.1:") - depends_on("amdlibflame", when="@4.1:") + for vers in ["4.1", "4.2"]: + with when(f"@={vers}"): + depends_on(f"amdblis@={vers}") + depends_on(f"amdlibflame@={vers}") + if Version(vers) >= Version("4.2"): + depends_on(f"aocl-utils@={vers}") depends_on("boost", when="+benchmarks") depends_on("boost", when="@2.2") - depends_on("cmake@3.11:", type="build") + depends_on("cmake@3.15:", type="build") @property def build_directory(self): @@ -78,15 +87,15 @@ class AoclSparse(CMakePackage): spec = self.spec if not ( - spec.satisfies(r"%aocc@3.2:4.1") + spec.satisfies(r"%aocc@3.2:4.2") or spec.satisfies(r"%gcc@12.2:13.1") - or spec.satisfies(r"%clang@15:16") + or spec.satisfies(r"%clang@15:17") ): tty.warn( - "AOCL has been tested to work with the following compilers\ - versions - gcc@12.2:13.1, aocc@3.2:4.1, and clang@15:16\ - see the following aocl userguide for details: \ - https://www.amd.com/content/dam/amd/en/documents/developer/version-4-1-documents/aocl/aocl-4-1-user-guide.pdf" + "AOCL has been tested to work with the following compilers " + "versions - gcc@12.2:13.1, aocc@3.2:4.2, and clang@15:17 " + "see the following aocl userguide for details: " + "https://www.amd.com/content/dam/amd/en/documents/developer/version-4-2-documents/aocl/aocl-4-2-user-guide.pdf" ) args = [] @@ -100,15 +109,21 @@ class AoclSparse(CMakePackage): args.append(self.define_from_variant("BUILD_ILP64", "ilp64")) if self.spec.satisfies("@4.0:"): - args.append("-DAOCL_BLIS_LIB=" + str(self.spec["amdblis"].libs)) + args.append(f"-DAOCL_BLIS_LIB={self.spec['amdblis'].libs}") args.append( "-DAOCL_BLIS_INCLUDE_DIR={0}/blis".format(self.spec["amdblis"].prefix.include) ) - args.append("-DAOCL_LIBFLAME=" + str(self.spec["amdlibflame"].libs)) + args.append(f"-DAOCL_LIBFLAME={self.spec['amdlibflame'].libs}") args.append( "-DAOCL_LIBFLAME_INCLUDE_DIR={0}".format(self.spec["amdlibflame"].prefix.include) ) + if "@4.2:" in self.spec: + args.append(f"-DAOCL_UTILS_LIB={self.spec['aocl-utils'].libs}") + args.append( + "-DAOCL_UTILS_INCLUDE_DIR={0}".format(self.spec["aocl-utils"].prefix.include) + ) + return args @run_after("build") diff --git a/var/spack/repos/builtin/packages/aocl-utils/package.py b/var/spack/repos/builtin/packages/aocl-utils/package.py index 7b279e8e47..cafe60e684 100644 --- a/var/spack/repos/builtin/packages/aocl-utils/package.py +++ b/var/spack/repos/builtin/packages/aocl-utils/package.py @@ -25,7 +25,7 @@ class AoclUtils(CMakePackage): LICENSING INFORMATION: By downloading, installing and using this software, you agree to the terms and conditions of the AMD AOCL-Utils license agreement. You may obtain a copy of this license agreement from - https://www.amd.com/en/developer/aocl/utils/utils-eula/utils-eula-4-1.html + https://www.amd.com/content/dam/amd/en/documents/developer/version-4-2-eulas/utils-elua-4-2.pdf """ _name = "aocl-utils" @@ -37,30 +37,44 @@ class AoclUtils(CMakePackage): license("BSD-3-Clause") + version( + "4.2", + sha256="48ce7fae592f5c73a1c3d2c18752f43c939451ed5d3f7a154551f738af440d77", + preferred=True, + ) version("4.1", sha256="a2f271f5eef07da366dae421af3c89286ebb6239047a31a46451758d4a06bc85") variant("doc", default=False, description="enable documentation") variant("tests", default=False, description="enable testing") + variant("shared", default=True, when="@4.2:", description="build shared library") variant("examples", default=False, description="enable examples") depends_on("doxygen", when="+doc") + @property + def libs(self): + """find aocl-utils libs function""" + shared = "+shared" in self.spec + return find_libraries("libaoclutils", root=self.prefix, recursive=True, shared=shared) + def cmake_args(self): + spec = self.spec if not ( - self.spec.satisfies(r"%aocc@3.2:4.1") - or self.spec.satisfies(r"%gcc@12.2:13.1") - or self.spec.satisfies(r"%clang@15:16") + spec.satisfies(r"%aocc@3.2:4.2") + or spec.satisfies(r"%gcc@12.2:13.1") + or spec.satisfies(r"%clang@15:17") ): tty.warn( - "AOCL has been tested to work with the following compilers\ - versions - gcc@12.2:13.1, aocc@3.2:4.1, and clang@15:16\ - see the following aocl userguide for details: \ - https://www.amd.com/content/dam/amd/en/documents/developer/version-4-1-documents/aocl/aocl-4-1-user-guide.pdf" + "AOCL has been tested to work with the following compilers " + "versions - gcc@12.2:13.1, aocc@3.2:4.2, and clang@15:17 " + "see the following aocl userguide for details: " + "https://www.amd.com/content/dam/amd/en/documents/developer/version-4-2-documents/aocl/aocl-4-2-user-guide.pdf" ) args = [] args.append(self.define_from_variant("ALCI_DOCS", "doc")) args.append(self.define_from_variant("ALCI_TESTS", "tests")) + args.append(self.define_from_variant("BUILD_SHARED_LIBS", "shared")) args.append(self.define_from_variant("ALCI_EXAMPLES", "examples")) return args |