summaryrefslogtreecommitdiff
path: root/var
diff options
context:
space:
mode:
authorKaan <61908449+kaanolgu@users.noreply.github.com>2024-11-01 00:42:40 +0000
committerGitHub <noreply@github.com>2024-10-31 18:42:40 -0600
commit504cc808d642a969d0e13242b505e6f8430a594f (patch)
treec633282df82e427b42705ecbc60280e632413f37 /var
parent8076134c918ff8866bfd23b787ae6609c1bdc2f7 (diff)
downloadspack-504cc808d642a969d0e13242b505e6f8430a594f.tar.gz
spack-504cc808d642a969d0e13242b505e6f8430a594f.tar.bz2
spack-504cc808d642a969d0e13242b505e6f8430a594f.tar.xz
spack-504cc808d642a969d0e13242b505e6f8430a594f.zip
Babelstream v5.0 Spack Package Updates (#41019)
- Merging sycl2020usm and sycl2020acc into sycl2020 and the submodel=acc/usm variant is introduced - implementation is renamed to option - impl ( fortran implementation options) renamed to foption - sycl_compiler_implementation and thrust_backend - stddata,stdindices,stdranges to a single std with std_submodel introduction - std_use_tbb to be boolean; also changed model filtering algorithm to make sure that it only picks model names - Modified comments to clear confusion with cuda_arch cc_ and sm_ prefix appends - Deleted duplicate of cuda_arch definition from +omp - CMAKE_CXX_COMPILER moved to be shared arg between all models except tbb and thrust - Replaced sys.exit with InstallError and created a dictionary to simplify things and eliminate excess code lines doing same checks - Replaced the -mcpu flags to -march since it is deprecated now - Replaced platform.machine with spec.target - Removing raja_backend, introducing openmp_flag,removing -march flags,clearing debugging print(), removing excess if ___ in self.spec.variants - [FIX] Issue where Thrust couldn't find correct compiler (it requires nvcc) - [FIX] Fortran unsupported check to match the full string - [FIX] RAJA cuda_arch to be with sm_ not cc_ - dir= option is no longer needed for kokkos - dir is no longer needed - [omp] Adding clang support for nvidia offload - SYCL2020 offload to nvidia GPU - changing model dependency to be languages rather than build system - removing hardcoded arch flags and replacing with archspec - removing cpu_arch from acc model --------- Signed-off-by: Todd Gamblin <tgamblin@llnl.gov> Co-authored-by: Greg Becker <becker33@llnl.gov> Co-authored-by: Kaan Olgu <kaan.olgu@bristol.ac.uk> Co-authored-by: Todd Gamblin <tgamblin@llnl.gov>
Diffstat (limited to 'var')
-rw-r--r--var/spack/repos/builtin/packages/babelstream/package.py990
1 files changed, 723 insertions, 267 deletions
diff --git a/var/spack/repos/builtin/packages/babelstream/package.py b/var/spack/repos/builtin/packages/babelstream/package.py
index ec85b2d356..b09fcc5f6e 100644
--- a/var/spack/repos/builtin/packages/babelstream/package.py
+++ b/var/spack/repos/builtin/packages/babelstream/package.py
@@ -3,65 +3,107 @@
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
-import re # To get the variant name after (+)
-
+import spack.build_systems.cmake
+import spack.build_systems.makefile
from spack.package import *
-def find_model_flag(str):
- res = re.findall(r"\+(\w+)", str)
- if not res:
- return ""
- return res
-
-
-class Babelstream(CMakePackage, CudaPackage, ROCmPackage):
+class Babelstream(CMakePackage, CudaPackage, ROCmPackage, MakefilePackage):
"""Measure memory transfer rates to/from global device memory on GPUs.
This benchmark is similar in spirit, and based on, the STREAM benchmark for CPUs."""
homepage = "https://github.com/UoB-HPC/BabelStream"
- url = "https://github.com/UoB-HPC/BabelStream/archive/refs/tags/v4.0.tar.gz"
+ url = "https://github.com/UoB-HPC/BabelStream/archive/refs/tags/v5.0.tar.gz"
git = "https://github.com/UoB-HPC/BabelStream.git"
+ version("5.0", sha256="1a418203fbfd95595bdc66047e2e39d8f1bba95a49725c9ecb907caf1af2521f")
version("4.0", sha256="a9cd39277fb15d977d468435eb9b894f79f468233f0131509aa540ffda4f5953")
+ version("3.4", sha256="e34ee9d5ccdead019e3ea478333bcb7886117d600e5da8579a626f6ee34209cf")
+ version("3.3", sha256="4c89c805b277d52776feeb7a8eef7985a0d9295ce3e0bb2333bf715f724723cf")
+ version("3.2", sha256="20309b27ddd09ea37406bcc6f46fd32e9372bf3d145757e55938d19d69cdc49d")
+ version("3.1", sha256="be69e6085e8966e12aa2df897eea6254b172e5adfa03de0adbb89bc3065f4fbe")
+ version("3.0", sha256="776219c72e0fdc36f134e6975b68c7ab25f38206f8f8af84a6f9630648c24800")
+ version("1.0", sha256="3cfb9e45601f1f249878355c72baa6e6a61f6c811f8716d60b83c7fb544e1d5c")
version("main", branch="main")
- version("develop", branch="develop")
-
- depends_on("cxx", type="build") # generated
-
- maintainers("tomdeakin", "kaanolgu", "tom91136", "robj0nes")
-
+ maintainers("tomdeakin", "kaanolgu", "tom91136")
+ # Previous maintainers: "robj0nes"
+ depends_on("cxx", type="build", when="languages=cxx")
+ depends_on("fortran", type="build", when="languages=fortran")
# Languages
- # Also supported variants are cuda and rocm (for HIP)
- variant("sycl", default=False, description="Enable SYCL support")
- variant("sycl2020", default=False, description="Enable SYCL support")
- variant("omp", default=False, description="Enable OpenMP support")
- variant("ocl", default=False, description="Enable OpenCL support")
- variant("tbb", default=False, description="Enable TBB support")
- variant("acc", default=False, description="Enable OpenACC support")
- variant("thrust", default=False, description="Enable THRUST support")
- variant("raja", default=False, description="Enable RAJA support")
- variant("stddata", default=False, description="Enable STD-data support")
- variant("stdindices", default=False, description="Enable STD-indices support")
- variant("stdranges", default=False, description="Enable STD-ranges support")
+ # in the future it could be possible to add other languages too
+ variant(
+ "languages",
+ default="cxx",
+ values=("cxx", "fortran"),
+ description="Languages Babelstream Spack Package Support",
+ )
+ # Build System
+ build_system(
+ conditional("cmake", when="languages=cxx"),
+ conditional("makefile", when="languages=fortran"),
+ default="cmake",
+ )
+ with when("languages=cxx"):
+ # Also supported variants are cuda and rocm (for HIP)
+ # not included here because they are supplied via respective packages
+ variant("sycl", default=False, description="Enable SYCL support")
+ variant("sycl2020", default=False, description="Enable SYCL support")
+ variant("omp", default=False, description="Enable OpenMP support")
+ variant("ocl", default=False, description="Enable OpenCL support")
+ variant("tbb", default=False, description="Enable TBB support")
+ variant("acc", default=False, description="Enable OpenACC support")
+ variant("hip", default=False, description="Enable HIP support")
+ variant("thrust", default=False, description="Enable THRUST support")
+ variant("raja", default=False, description="Enable RAJA support")
+ variant("std", default=False, description="Enable STD support")
# Some models need to have the programming model abstraction downloaded -
# this variant enables a path to be provided.
variant("dir", values=str, default="none", description="Enable Directory support")
+ variant(
+ "sycl2020_submodel",
+ values=("usm", "acc"),
+ when="+sycl2020",
+ default="usm",
+ description="SYCL2020 -> choose between usm and acc methods",
+ )
+ variant(
+ "std_submodel",
+ values=("data", "indices", "ranges"),
+ when="+std",
+ default="data",
+ description="STD -> choose between data, indices and ranges models",
+ )
- # Kokkos conflict and variant
- conflicts(
- "dir=none", when="+kokkos", msg="KOKKKOS requires architecture to be specfied by dir="
+ variant(
+ "sycl2020_offload",
+ values=("nvidia", "intel"),
+ default="intel",
+ when="+sycl2020",
+ description="Offloading to NVIDIA GPU or not",
)
- variant("kokkos", default=False, description="Enable KOKKOS support")
- # ACC conflict
- variant("cpu_arch", values=str, default="none", description="Enable CPU Target for ACC")
- variant("acc_target", values=str, default="none", description="Enable CPU Target for ACC")
+ variant(
+ "thrust_submodel",
+ values=("cuda", "rocm"),
+ default="cuda",
+ when="+thrust",
+ description="Which THRUST implementation to use, supported options include option= \
+ - CUDA (via https://github.com/NVIDIA/thrust)\
+ - ROCM (via https://github.com/ROCmSoftwarePlatform/rocThrust)",
+ )
+ variant(
+ "thrust_backend",
+ values=("cuda", "omp", "tbb"),
+ default="cuda",
+ when="+thrust",
+ description="Which THRUST implementation to use, supported options include option",
+ )
+
+ # Kokkos variant
+ variant("kokkos", default=False, description="Enable KOKKOS support")
# STD conflicts
- conflicts("+stddata", when="%gcc@:10.1.0", msg="STD-data requires newer version of GCC")
- conflicts("+stdindices", when="%gcc@:10.1.0", msg="STD-indices requires newer version of GCC")
- conflicts("+stdranges", when="%gcc@:10.1.0", msg="STD-ranges requires newer version of GCC")
+ conflicts("+std", when="%gcc@:10.1.0", msg="STD requires newer version of GCC")
# CUDA conflict
conflicts(
@@ -69,349 +111,763 @@ class Babelstream(CMakePackage, CudaPackage, ROCmPackage):
when="+cuda",
msg="CUDA requires architecture to be specfied by cuda_arch=",
)
- variant("mem", values=str, default="DEFAULT", description="Enable MEM Target for CUDA")
- # Raja Conflict
variant(
- "offload", values=str, default="none", description="Enable RAJA Target [CPU or NVIDIA]"
- )
- conflicts(
- "offload=none",
- when="+raja",
- msg="RAJA requires architecture to be specfied by acc_target=[CPU,NVIDIA]",
+ "cuda_memory_mode",
+ values=("default", "managed", "pagefault"),
+ default="default",
+ when="+cuda",
+ description="Enable MEM Target for CUDA",
)
- # download raja from https://github.com/LLNL/RAJA
+ # OMP offload
+ variant("omp_offload", default=False, when="+omp", description="Enable OpenMP Target")
+ variant(
+ "omp_flags",
+ values=str,
+ default="none",
+ when="+omp",
+ description="If OFFLOAD is enabled, this *overrides* the default offload flags",
+ )
conflicts(
- "dir=none",
+ "omp_flags=none",
+ when="+omp_offload",
+ msg="OpenMP requires offload flags to be specfied by omp_flags=",
+ )
+ # Raja offload
+ variant(
+ "raja_offload",
+ values=("cpu", "nvidia"),
+ default="cpu",
when="+raja",
- msg="RAJA implementation requires architecture to be specfied by dir=",
+ description="Enable RAJA Target [CPU or NVIDIA] / Offload with custom settings for OpenMP",
+ )
+ # std-* offload
+ variant(
+ "std_offload",
+ values=("nvhpc", "none"),
+ default="none",
+ when="+std",
+ description="Enable offloading support (via the non-standard `-stdpar`)\
+ for the new NVHPC SDK",
+ )
+ variant(
+ "std_onedpl_backend",
+ values=("openmp", "tbb", "dpcpp", "none"),
+ default="none",
+ when="+std",
+ description="Implements policies using OpenMP,TBB or dpc++",
+ )
+ variant(
+ "std_use_tbb",
+ values=(True, False),
+ default=False,
+ when="+std",
+ description="No-op if ONE_TBB_DIR is set. Link against an in-tree oneTBB\
+ via FetchContent_Declare, see top level CMakeLists.txt for details",
+ )
+ variant(
+ "std_use_onedpl",
+ values=(True, False),
+ default=False,
+ when="+std",
+ description="Link oneDPL which implements C++17 executor policies\
+ (via execution_policy_tag) for different backends",
+ )
+ # hip memory mode
+ variant(
+ "hip_mem_mode",
+ values=("default", "managed", "pagefault"),
+ default="default",
+ when="+hip",
+ description="Enable MEM Target for HIP",
+ )
+ # tbb use vector
+ variant(
+ "tbb_use_vector",
+ values=(True, False),
+ default=False,
+ when="+tbb",
+ description="Whether to use std::vector<T> for storage or use aligned_alloc. \
+ C++ vectors are *zero* initialised where as aligned_alloc is \
+ uninitialised before first use.",
)
# Thrust Conflict
- # conflicts("~cuda", when="+thrust", msg="Thrust requires +cuda variant")
depends_on("thrust", when="+thrust")
- depends_on("rocthrust", when="+thrust implementation=rocm")
-
+ depends_on("cuda", when="thrust_submodel=cuda")
+ depends_on("cuda", when="+raja raja_offload=nvidia")
+ depends_on("hip", when="+hip")
+ depends_on("rocthrust", when="thrust_submodel=rocm")
+ depends_on("intel-tbb", when="+std +std_use_tbb")
+ depends_on("intel-oneapi-dpl", when="+std +std_use_onedpl")
+ depends_on("intel-tbb", when="+std +std_use_onedpl")
# TBB Dependency
- depends_on("intel-oneapi-tbb", when="+tbb")
- partitioner_vals = ["auto", "affinity", "static", "simple"]
+ depends_on("intel-tbb", when="+tbb")
+
variant(
- "partitioner",
- values=partitioner_vals,
+ "tbb_partitioner",
+ values=("auto", "affinity", "static", "simple"),
default="auto",
+ when="+tbb",
description="Partitioner specifies how a loop template should partition its work among threads.\
Possible values are:\
AUTO - Optimize range subdivision based on work-stealing events.\
AFFINITY - Proportional splitting that optimizes for cache affinity.\
STATIC - Distribute work uniformly with no additional load balancing.\
SIMPLE - Recursively split its range until it cannot be further subdivided.\
- See https://spec.oneapi.com/versions/latest/elements/oneTBB/source/algorithms.html#partitioners for more details.",
+ See https://spec.oneapi.com/versions/latest/elements/oneTBB/source/algorithms.html#partitioners",
)
- # Kokkos Dependency
- depends_on("kokkos@3.7.1", when="+kokkos")
+ # Kokkos & RAJA Dependency
+ cuda_archs = CudaPackage.cuda_arch_values
+ for sm_ in cuda_archs:
+ depends_on(
+ "kokkos +cuda +wrapper cuda_arch={0}".format(sm_),
+ when="kokkos_backend=cuda cuda_arch={0}".format(sm_),
+ )
+ depends_on(
+ "raja +cuda cuda_arch={0}".format(sm_),
+ when="raja_offload=nvidia cuda_arch={0}".format(sm_),
+ )
+ depends_on("kokkos +openmp", when="kokkos_backend=omp")
+ depends_on("raja +openmp", when="raja_offload=cpu")
# OpenCL Dependency
-
- backends = {
- "ocl": [
- ("amd", "rocm-opencl", "enable ROCM backend"),
- ("cuda", "cuda", "enable Cuda backend"),
- ("intel", "intel-oneapi-compilers", "enable Intel backend"),
- ("pocl", "pocl@1.5", "enable POCL backend"),
- ],
- "kokkos": [
- ("cuda", "cuda", "enable Cuda backend"),
- ("omp", "none", "enable Cuda backend"),
- ],
- }
- backend_vals = ["none"]
- for lang in backends:
- for item in backends[lang]:
- backend, dpdncy, descr = item
- backend_vals.append(backend.lower())
-
- variant("backend", values=backend_vals, default="none", description="Enable backend support")
-
- for lang in backends:
- for item in backends[lang]:
- backend, dpdncy, descr = item
- if dpdncy.lower() != "none":
- depends_on("%s" % dpdncy.lower(), when="backend=%s" % backend.lower())
- # this flag could be used in all required languages
- variant("flags", values=str, default="none", description="Additional CXX flags to be provided")
-
- # comp_impl_vals=["ONEAPI-DPCPP","DPCPP","HIPSYCL","COMPUTECPP"]
variant(
- "implementation",
- values=str,
+ "ocl_backend",
+ values=("amd", "cuda", "intel", "pocl", "none"),
default="none",
- description="Compile using the specified SYCL compiler option",
+ when="+ocl",
+ description="Enable Backend Target for OpenCL",
)
-
- conflicts(
- "implementation=none",
- when="+sycl",
- msg="SYCL requires compiler implementation to be specified by option=",
+ variant(
+ "kokkos_backend",
+ values=("cuda", "omp", "none"),
+ default="none",
+ when="+kokkos",
+ description="Enable Backend Target for kokkos",
)
conflicts(
- "implementation=none",
- when="+thrust",
- msg="Which Thrust implementation to use, supported options include:\
- - CUDA (via https://github.com/NVIDIA/thrust)\
- - ROCM (via https://github.com/ROCm/rocThrust)",
+ "ocl_backend=none",
+ when="+ocl",
+ msg="OpenCL implementation requires backend to be specfied by ocl_backend=",
)
+ # depends_on("rocm-opencl@6.0.2", when="+ocl ocl_backend=amd")
+ depends_on("cuda", when="+ocl ocl_backend=cuda")
+ depends_on("cuda", when="+sycl2020 sycl2020_offload=nvidia")
+ depends_on("intel-oneapi-compilers", when="+ocl ocl_backend=intel")
+ depends_on("pocl@1.5", when="+ocl ocl_backend=pocl")
+
+ variant(
+ "cuda_extra_flags",
+ values=str,
+ default="none",
+ description="Additional CUDA Compiler flags to be provided",
+ )
+
+ # CMake specific dependency
+ with when("build_system=cmake"):
+ depends_on("cmake@3.14.0:", type="build")
# This applies to all
- depends_on("cmake@3.14.0:", type="build")
depends_on("opencl-c-headers", when="+ocl")
+ # Fortran related configurations
+ with when("languages=fortran"):
+ implementation_vals = [
+ "DoConcurrent",
+ "Array",
+ "OpenMP",
+ "OpenMPWorkshare",
+ "OpenMPTarget",
+ "OpenMPTargetLoop",
+ "OpenMPTaskloop",
+ "OpenACC",
+ "OpenACCArray",
+ "CUDA",
+ "CUDAKernel",
+ "Sequential",
+ ]
+ variant(
+ "foption",
+ values=implementation_vals,
+ default="Sequential",
+ description="Implementation",
+ )
+ # The fortran Makefile is inside the src/fortran so we need to address this
+ build_directory = "src/fortran"
+ build_name = ""
+ variant(
+ "fortran_flags",
+ values=str,
+ default="none",
+ description="Additional Fortran flags to be provided",
+ )
+
+
+class CMakeBuilder(spack.build_systems.cmake.CMakeBuilder):
def cmake_args(self):
- # convert spec to string to work on it
- spec_string = str(self.spec)
-
- # take only the first portion of the spec until space
- spec_string_truncate = spec_string.split(" ", 1)[0]
- model_list = find_model_flag(spec_string_truncate) # Prints out ['cuda', 'thrust']
-
- if len(model_list) > 1:
- ignore_list = ["cuda"] # if +acc is provided ignore the cuda model
- model = list(set(model_list) - set(ignore_list))
- # We choose 'thrust' from the list of ['cuda', 'thrust']
- args = ["-DMODEL=" + model[0]]
+ model_list = [
+ "sycl",
+ "sycl2020",
+ "omp",
+ "cuda",
+ "ocl",
+ "tbb",
+ "acc",
+ "hip",
+ "thrust",
+ "raja",
+ "std",
+ "kokkos",
+ ]
+ # for +acc and +thrust the CudaPackage appends +cuda variant too so we need
+ # to filter cuda from list e.g. we choose 'thrust'
+ # from the list of ['cuda', 'thrust']
+ model_names = [name for name in model_list if f"+{name}" in self.spec]
+ print("model names : ", model_names)
+ if len(model_names) > 1:
+ model_names = [elem for elem in model_names if (elem != "cuda" and elem != "rocm")]
+ if "std" in model_names[0]:
+ args = ["-DMODEL=" + "std-" + self.spec.variants["std_submodel"].value]
+ elif "sycl2020" in model_names[0]: # this is for nvidia offload
+ args = ["-DMODEL=" + "sycl2020-" + self.spec.variants["sycl2020_submodel"].value]
+ else:
+ args = ["-DMODEL=" + model_names[0]]
else:
- # if it is +stddata,indices etc. we need to pass it
- # as std-data to the CMake compiler
- # do some alterations here
- if "std" in model_list[0]:
- args = ["-DMODEL=" + "std-" + model_list[0].split("d", 1)[1]]
+ # do some alterations here to append sub models too
+ if "std" in model_names[0]:
+ args = ["-DMODEL=" + "std-" + self.spec.variants["std_submodel"].value]
+ elif "sycl2020" in model_names[0]:
+ args = ["-DMODEL=" + "sycl2020-" + self.spec.variants["sycl2020_submodel"].value]
+ print(args)
+ elif "rocm" in model_names[0]:
+ args = ["-DMODEL=hip"]
else:
- args = ["-DMODEL=" + model_list[0]]
+ args = ["-DMODEL=" + model_names[0]]
+ if model_names[0] != "tbb" and model_names[0] != "thrust":
+ args.append("-DCMAKE_CXX_COMPILER=" + spack_cxx)
# ===================================
# ACC
# ===================================
- if ("+acc" in self.spec) and ("~cuda" in self.spec):
- args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx)
- if "cuda_arch" in self.spec.variants:
- cuda_arch_list = self.spec.variants["cuda_arch"].value
- # the architecture value is only number so append sm_ to the name
- cuda_arch = "cc" + cuda_arch_list[0]
- args.append("-DTARGET_DEVICE=gpu")
- args.append("-DCUDA_ARCH=" + cuda_arch)
- elif "cpu_arch" in self.spec.variants:
- cpu_arch_list = self.spec.variants["cpu_arch"].value
- # the architecture value is only number so append sm_ to the name
- cpu_arch = cpu_arch_list[0]
- args.append("-DTARGET_DEVICE=multicore")
- args.append("-DTARGET_PROCESSOR=" + cpu_arch)
-
+ """
+ register_flag_optional(TARGET_DEVICE
+ "[PGI/NVHPC only] This sets the `-target` flag, possible values are:
+ gpu - Globally set the target device to an NVIDIA GPU
+ multicore - Globally set the target device to the host CPU
+ Refer to `nvc++ --help` for the full list"
+ register_flag_optional(CUDA_ARCH
+ "[PGI/NVHPC only] Only applicable if `TARGET_DEVICE` is set to `gpu`.
+ Nvidia architecture in ccXY format, for example, sm_70 becomes cc70,
+ will be passed in via `-gpu=` (e.g `cc70`)
+ Possible values are:
+ cc35 - Compile for compute capability 3.5
+ cc50 - Compile for compute capability 5.0
+ cc60 - Compile for compute capability 6.0
+ cc62 - Compile for compute capability 6.2
+ cc70 - Compile for compute capability 7.0
+ cc72 - Compile for compute capability 7.2
+ cc75 - Compile for compute capability 7.5
+ cc80 - Compile for compute capability 8.0
+ ccall - Compile for all supported compute capabilities
+ Refer to `nvc++ --help` for the full list"
+ "")
+
+register_flag_optional(TARGET_PROCESSOR
+ "[PGI/NVHPC only] This sets the `-tp` (target processor) flag, possible values are:
+ px - Generic x86 Processor
+ bulldozer - AMD Bulldozer processor
+ piledriver - AMD Piledriver processor
+ zen - AMD Zen architecture (Epyc, Ryzen)
+ zen2 - AMD Zen 2 architecture (Ryzen 2)
+ sandybridge - Intel SandyBridge processor
+ haswell - Intel Haswell processor
+ knl - Intel Knights Landing processor
+ skylake - Intel Skylake Xeon processor
+ host - Link native version of HPC SDK cpu math library
+ native - Alias for -tp host
+ Refer to `nvc++ --help` for the full list"
+ "")
+ """
+ if self.spec.satisfies("+acc~kokkos~raja"):
+ if (self.spec.compiler.name == "nvhpc") or (self.spec.compiler.name == "pgi"):
+ target_device = "gpu" if "cuda_arch" in self.spec.variants else "multicore"
+ if "cuda_arch" in self.spec.variants:
+ cuda_arch_list = self.spec.variants["cuda_arch"].value
+ # the architecture value is only number so append cc_ to the name
+ cuda_arch = "cc" + cuda_arch_list[0]
+ # args.append(
+ # "-DCXX_EXTRA_FLAGS=" + "-target=" + target_device + "-gpu=" + cuda_arch
+ # )
+ args.append("-DCUDA_ARCH=" + cuda_arch)
+ else:
+ # get the cpu architecture value from user
+ target_processor = str(
+ self.spec.target
+ ) # self.spec.variants["cpu_arch"].value[0]
+ args.append("-DTARGET_PROCESSOR=" + target_processor)
+ # args.append(
+ # "-DCXX_EXTRA_FLAGS="
+ # + "-target="
+ # + target_device
+ # + "-tp="
+ # + target_processor
+ # )
+ args.append("-DTARGET_DEVICE=" + target_device)
# ===================================
# STDdata,STDindices,STDranges
# ===================================
- std_list = ["+stddata", "+stdindices", "+stdranges"]
- if spec_string.startswith(tuple(std_list)):
- args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx)
+
+ if "+std" in self.spec:
+ if self.spec.satisfies("+std_use_tbb"):
+ args.append("-DCXX_EXTRA_FLAGS=-ltbb")
+ if self.spec.satisfies("+std_use_onedpl"):
+ # args.append("-DCXX_EXTRA_FLAGS=-ltbb")
+ # args.append("-DCXX_EXTRA_FLAGS=-loneDPL")
+ args.append(
+ "-DUSE_ONEDPL=" + self.spec.variants["std_onedpl_backend"].value.upper()
+ )
+ if self.spec.variants["std_offload"].value != "none":
+ # the architecture value is only number so append cc_ to the name
+ cuda_arch = "cc" + self.spec.variants["cuda_arch"].value[0]
+ args.append("-DNVHPC_OFFLOAD=" + cuda_arch)
# ===================================
# CUDA
# ===================================
-
- if ("+cuda" in self.spec) and ("~kokkos" in self.spec) and ("~acc" in self.spec):
+ if self.spec.satisfies("+cuda~kokkos~acc~omp~thrust~raja"):
# Set up the cuda macros needed by the build
cuda_arch_list = self.spec.variants["cuda_arch"].value
+ # "-DCUDA_ARCH" requires sm_
# the architecture value is only number so append sm_ to the name
cuda_arch = "sm_" + cuda_arch_list[0]
args.append("-DCUDA_ARCH=" + cuda_arch)
cuda_dir = self.spec["cuda"].prefix
cuda_comp = cuda_dir + "/bin/nvcc"
args.append("-DCMAKE_CUDA_COMPILER=" + cuda_comp)
- args.append("-DMEM=" + self.spec.variants["mem"].value)
- if self.spec.variants["flags"].value != "none":
- args.append("-DCUDA_EXTRA_FLAGS=" + self.spec.variants["flags"].value)
+ args.append("-DMEM=" + self.spec.variants["cuda_memory_mode"].value.upper())
+ if self.spec.variants["cuda_extra_flags"].value != "none":
+ args.append("-DCUDA_EXTRA_FLAGS=" + self.spec.variants["cuda_extra_flags"].value)
# ===================================
# OMP
# ===================================
# `~kokkos` option is there to prevent +kokkos +omp setting to use omp directly from here
# Same applies for raja
- if ("+omp" in self.spec) and ("~kokkos" in self.spec) and ("~raja" in self.spec):
- args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx)
- if "cuda_arch" in self.spec.variants:
- cuda_arch_list = self.spec.variants["cuda_arch"].value
- # the architecture value is only number so append sm_ to the name
- cuda_arch = "sm_" + cuda_arch_list[0]
- args.append("-DOFFLOAD= " + "NVIDIA:" + cuda_arch)
- elif "amdgpu_target" in self.spec.variants:
- rocm_arch = self.spec.variants["amdgpu_target"].value
- # the architecture value is only number so append sm_ to the name
- args.append("-DOFFLOAD=" + " AMD:" + rocm_arch)
+ if self.spec.satisfies("+omp~kokkos~raja"):
+ args.append("-DCMAKE_C_COMPILER=" + spack_cc)
+ if self.spec.satisfies("~omp_offload"):
+ args.append("-DOFFLOAD=" + "OFF")
+ # Check if the omp_flags variant is not set to "none"
+ args.append(
+ "-DCMAKE_CXX_FLAGS="
+ + self.pkg.compiler.openmp_flag
+ + " "
+ + (
+ self.spec.variants["omp_flags"].value
+ if self.spec.variants["omp_flags"].value != "none"
+ else ""
+ )
+ )
else:
- args.append("-DOFFLOAD=" + "INTEL")
+ offload_args = ""
+ args.append("-DOFFLOAD=ON")
+ if "cuda_arch" in self.spec.variants:
+ if self.spec.satisfies("%nvhpc"):
+ cuda_arch = "cc" + self.spec.variants["cuda_arch"].value[0]
+ offload_args = " -mp=gpu;" + "-gpu=" + cuda_arch + " "
+ if self.spec.satisfies("%clang"):
+ cuda_arch = "sm_" + self.spec.variants["cuda_arch"].value[0]
+ offload_args = "-fopenmp;--offload-arch=" + cuda_arch
+ elif ("amdgpu_target" in self.spec.variants) and (
+ self.spec.variants["amdgpu_target"].value != "none"
+ ):
+ offload_args = (
+ ";--offload-arch=" + self.spec.variants["amdgpu_target"].value[0]
+ )
+
+ args.append(
+ "-DOFFLOAD_FLAGS="
+ + self.pkg.compiler.openmp_flag
+ + ";"
+ + offload_args
+ + ";"
+ + self.spec.variants["omp_flags"].value
+ )
# ===================================
- # SYCL
+ # SYCL
# ===================================
- if self.spec.satisfies("+sycl"):
- args.append("-DSYCL_COMPILER=" + self.spec.variants["implementation"].value.upper())
- if self.spec.variants["implementation"].value.upper() != "ONEAPI-DPCPP":
- args.append(
- "-DSYCL_COMPILER_DIR=" + self.spec.variants["implementation"].value.upper()
- )
- if self.spec.variants["implementation"].value.upper() == "COMPUTE-CPP":
- args.append("-DOpenCL_LIBRARY=")
+ if "+sycl" in self.spec:
+ if self.spec.satisfies("%oneapi"):
+ # -fsycl flag is required for setting up sycl/sycl.hpp seems like
+ # it doesn't get it from the CMake file
+ args.append("-DSYCL_COMPILER=ONEAPI-ICPX")
+ args.append("-DCXX_EXTRA_FLAGS= -fsycl")
+ elif self.spec.satisfies("%clang"):
+ # this requires the clang inside oneapi installation
+ args.append("-DSYCL_COMPILER=ONEAPI-Clang")
+ args.append("-DCXX_EXTRA_FLAGS= -fsycl")
+ else:
+ args.append("-DSYCL_COMPILER=HIPSYCL")
+ args.append("-DSYCL_COMPILER_DIR=" + self.spec.variants["dir"].value)
+ args.append("-DCXX_EXTRA_FLAGS= -fsycl")
# ===================================
- # SYCL 2020
+ # SYCL 2020
# ===================================
- if self.spec.satisfies("+sycl2020"):
+ if "+sycl2020" in self.spec:
if self.spec.satisfies("%oneapi"):
# -fsycl flag is required for setting up sycl/sycl.hpp seems like
# it doesn't get it from the CMake file
- args.append("-DCXX_EXTRA_FLAGS= -fsycl -O3")
- # this is required to enable -DCMAKE_CXX_COMPILER=icpx flag from CMake
args.append("-DSYCL_COMPILER=ONEAPI-ICPX")
+ args.append("-DCXX_EXTRA_FLAGS= -fsycl")
+ elif self.spec.satisfies("%clang"):
+ # this requires the clang inside oneapi installation
+ args.append("-DSYCL_COMPILER=ONEAPI-Clang")
+ args.append("-DCXX_EXTRA_FLAGS= -fsycl")
else:
+ args.append("-DSYCL_COMPILER=HIPSYCL")
+ args.append("-DSYCL_COMPILER_DIR=" + self.spec.variants["dir"].value)
+ args.append("-DCXX_EXTRA_FLAGS= -fsycl")
+ # if self.spec.variants["flags"].value != "none":
+ if self.spec.variants["sycl2020_offload"].value == "nvidia":
+ cuda_dir = self.spec["cuda"].prefix
+ cuda_arch = "sm_" + self.spec.variants["cuda_arch"].value[0]
args.append(
- "-DSYCL_COMPILER=" + self.spec.variants["implementation"].value.upper()
- )
- if self.spec.variants["implementation"].value.upper() != "ONEAPI-DPCPP":
- args.append(
- "-DSYCL_COMPILER_DIR=" + self.spec.variants["implementation"].value.upper()
+ "-DCXX_EXTRA_FLAGS="
+ + "-fsycl;-fsycl-targets=nvptx64-nvidia-cuda;"
+ + self.spec.target.optimization_flags(
+ self.spec.compiler.name, str(self.spec.compiler.version)
)
- if self.spec.variants["implementation"].value.upper() == "COMPUTE-CPP":
- args.append("-DOpenCL_LIBRARY=")
+ + " --cuda-path="
+ + cuda_dir
+ )
# ===================================
# HIP(ROCM)
# ===================================
- if self.spec.satisfies("+rocm"):
- hip_comp = self.spec["rocm"].prefix + "/bin/hipcc"
+ if "+hip" in self.spec:
+ hip_comp = self.spec["hip"].prefix + "/bin/hipcc"
+ offload_arch = str(self.spec.variants["amdgpu_target"].value[0])
+
args.append("-DCMAKE_CXX_COMPILER=" + hip_comp)
- args.append(
- "-DCXX_EXTRA_FLAGS= --offload-arch="
- + self.spec.variants["amdgpu_target"].value
- + " "
- + self.spec.variants["flags"].value
- + " -O3"
- )
+ args.append(f"-DCXX_EXTRA_FLAGS=--offload-arch={offload_arch} -O3")
+ if str(self.spec.variants["hip_mem_mode"].value) != "none":
+ args.append("-DMEM=" + self.spec.variants["hip_mem_mode"].value.upper())
# ===================================
# TBB
# ===================================
- if self.spec.satisfies("+tbb"):
- args.append("-DONE_TBB_DIR=" + self.spec["tbb"].prefix + "/tbb/latest/")
- args.append("-DPARTITIONER=" + self.spec.variants["partitioner"].value.upper())
+ if "+tbb" in self.spec:
+ args.append("-DONE_TBB_DIR=" + self.spec["intel-tbb"].prefix + "/tbb/latest/")
+ args.append("-DCXX_EXTRA_FLAGS=-ltbb")
+ args.append("-DPARTITIONER=" + self.spec.variants["tbb_partitioner"].value.upper())
+ if self.spec.satisfies("+tbb_use_vector"):
+ args.append("-DUSE_VECTOR=ON")
# ===================================
# OpenCL (ocl)
# ===================================
- if self.spec.satisfies("+ocl"):
- if "backend" in self.spec.variants:
- if "cuda" in self.spec.variants["backend"].value:
- cuda_dir = self.spec["cuda"].prefix
- args.append("-DOpenCL_LIBRARY=" + cuda_dir + "/lib64/libOpenCL.so")
- elif "amd" in self.spec.variants["backend"].value:
- rocm_dir = self.spec["rocm-opencl"].prefix
- args.append("-DOpenCL_LIBRARY=" + rocm_dir + "/lib64/libOpenCL.so")
- elif "intel" in self.spec.variants["backend"].value:
- intel_lib = (
- self.spec["intel-oneapi-compilers"].prefix
- + "/compiler/2023.0.0/linux/lib/libOpenCL.so"
- )
- args.append("-DOpenCL_LIBRARY=" + intel_lib)
- elif "pocl" in self.spec.variants["backend"].value:
- args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx)
- pocl_lib = self.spec["pocl"].prefix + "/lib64/libOpenCL.so"
- args.append("-DOpenCL_LIBRARY=" + pocl_lib)
- args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx)
+
+ if "+ocl" in self.spec:
+ if "cuda" in self.spec.variants["ocl_backend"].value:
+ cuda_dir = self.spec["cuda"].prefix
+ args.append("-DOpenCL_LIBRARY=" + cuda_dir + "/lib64/libOpenCL.so")
+ elif "amd" in self.spec.variants["ocl_backend"].value:
+ rocm_dir = self.spec["rocm-opencl"].prefix
+ args.append("-DOpenCL_LIBRARY=" + rocm_dir + "/lib64/libOpenCL.so")
+ elif "intel" in self.spec.variants["ocl_backend"].value:
+ intel_lib = (
+ self.spec["intel-oneapi-compilers"].prefix
+ + "/compiler/"
+ + str(self.spec["intel-oneapi-compilers"].version)
+ + "/linux/lib/libOpenCL.so"
+ )
+ args.append("-DOpenCL_LIBRARY=" + intel_lib)
+ elif "pocl" in self.spec.variants["ocl_backend"].value:
+ pocl_lib = self.spec["pocl"].prefix + "/lib64/libOpenCL.so"
+ args.append("-DOpenCL_LIBRARY=" + pocl_lib)
# ===================================
# RAJA
# ===================================
- if self.spec.satisfies("+raja"):
- args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx)
- args.append("-DRAJA_IN_TREE=" + self.spec.variants["dir"].value)
- if "offload" in self.spec.variants:
- if "nvidia" in self.spec.variants["offload"].value:
- cuda_dir = self.spec["cuda"].prefix
- cuda_comp = cuda_dir + "/bin/nvcc"
- args.append("-DCMAKE_CUDA_COMPILER=" + cuda_comp)
- args.append("-DTARGET=NVIDIA")
- cuda_arch_list = self.spec.variants["cuda_arch"].value
- cuda_arch = "sm_" + cuda_arch_list[0]
- args.append("-DCUDA_ARCH=" + cuda_arch)
- args.append("DCUDA_TOOLKIT_ROOT_DIR=" + self.spec["cuda"].prefix)
- if self.spec.variants["flags"].value != "none":
- args.append("-DCUDA_EXTRA_FLAGS=" + self.spec.variants["flags"].value)
- # if("cpu" in self.spec.variants['offload'].value):
+ if "+raja" in self.spec:
+ args.append("-DCMAKE_C_COMPILER=" + spack_cc)
+ args.append("-DRAJA_IN_PACKAGE=" + self.spec["raja"].prefix)
+ if "nvidia" in self.spec.variants["raja_offload"].value:
+ cuda_comp = self.spec["cuda"].prefix + "/bin/nvcc"
+ args.append("-DTARGET=NVIDIA")
+ cuda_arch = "sm_" + self.spec.variants["cuda_arch"].value[0]
+ args.append("-DCUDA_ARCH=" + cuda_arch)
- if "omp" in self.spec.variants["backend"].value:
- args.append("-DENABLE_OPENMP=ON")
- if "cuda" in self.spec.variants["backend"].value:
args.append("-DENABLE_CUDA=ON")
+ args.append("-DCUDA_TOOLKIT_ROOT_DIR=" + self.spec["cuda"].prefix)
+ if self.spec.variants["cuda_extra_flags"].value != "none":
+ args.append(
+ "-DCMAKE_CUDA_FLAGS=" + self.spec.variants["cuda_extra_flags"].value
+ )
# ===================================
# THRUST
# ===================================
- if self.spec.satisfies("+thrust"):
- if "cuda" in self.spec.variants["implementation"].value:
- args.append("-DTHRUST_IMPL=" + self.spec.variants["implementation"].value.upper())
+
+ if "+thrust" in self.spec:
+ if "cuda" in self.spec.variants["thrust_submodel"].value:
+ args.append("-DTHRUST_IMPL=" + self.spec.variants["thrust_submodel"].value.upper())
+
args.append("-SDK_DIR=" + self.spec["thrust"].prefix + "/include")
- cuda_arch_list = self.spec.variants["cuda_arch"].value
- # the architecture value is only number so append sm_ to the name
- cuda_arch = "sm_" + cuda_arch_list[0]
- args.append("-DCUDA_ARCH=" + cuda_arch)
+ # this model uses CMAKE_CUDA_ARCHITECTURES which only requires number of cuda_arch
+ # no need to append sm_ or cc_
+ args.append("-DCUDA_ARCH=" + self.spec.variants["cuda_arch"].value[0])
cuda_dir = self.spec["cuda"].prefix
cuda_comp = cuda_dir + "/bin/nvcc"
args.append("-DCMAKE_CUDA_COMPILER=" + cuda_comp)
- args.append("-DBACKEND=" + self.spec.variants["backend"].value.upper())
- if self.spec.variants["flags"].value != "none":
- args.append("-DCUDA_EXTRA_FLAGS=" + self.spec.variants["flags"].value)
-
- if "rocm" in self.spec.variants["implementation"].value:
- args.append("-DTHRUST_IMPL=" + self.spec.variants["implementation"].value.upper())
+ # args.append("-DCMAKE_CUDA_COMPILER=" + spack_cxx)
+ # args.append("-DCMAKE_CUDA_FLAGS=-ccbin " + spack_cc)
+ args.append("-DBACKEND=" + self.spec.variants["thrust_backend"].value.upper())
+ if self.spec.variants["cuda_extra_flags"].value != "none":
+ args.append(
+ "-DCUDA_EXTRA_FLAGS=" + self.spec.variants["cuda_extra_flags"].value
+ )
+ if "rocm" in self.spec.variants["thrust_submodel"].value:
+ args.append("-DCMAKE_CXX_COMPILER=" + self.spec["hip"].hipcc)
+ args.append("-DTHRUST_IMPL=" + self.spec.variants["thrust_submodel"].value.upper())
args.append("-SDK_DIR=" + self.spec["rocthrust"].prefix)
- args.append("-DBACKEND=" + self.spec.variants["backend"].value.upper())
# ===================================
# kokkos
# ===================================
# kokkos implementation is versatile and it could use cuda or omp architectures as backend
- # The usage should be spack install babelstream +kokkos +cuda [or +omp]
- if self.spec.satisfies("+kokkos"):
- args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx)
- args.append("-DKOKKOS_IN_TREE=" + self.spec.variants["dir"].value)
- # args.append("-DKOKKOS_IN_PACKAGE=" + self.spec["kokkos"].prefix)
- if "backend" in self.spec.variants:
- if "cuda" in self.spec.variants["backend"].value:
- args.append("-DKokkos_ENABLE_CUDA=ON")
- cuda_arch_list = self.spec.variants["cuda_arch"].value
- cuda_arch = cuda_arch_list[0]
- # arhitecture kepler optimisations
- if cuda_arch in ("30", "32", "35", "37"):
- args.append("-D" + "Kokkos_ARCH_KEPLER" + cuda_arch + "=ON")
- # arhitecture maxwell optimisations
- if cuda_arch in ("50", "52", "53"):
- args.append("-D" + "Kokkos_ARCH_MAXWELL" + cuda_arch + "=ON")
- # arhitecture pascal optimisations
- if cuda_arch in ("60", "61"):
- args.append("-D" + "Kokkos_ARCH_PASCAL" + cuda_arch + "=ON")
- # architecture volta optimisations
- if cuda_arch in ("70", "72"):
- args.append("-D" + "Kokkos_ARCH_VOLTA" + cuda_arch + "=ON")
- if cuda_arch == "75":
- args.append("-DKokkos_ARCH_TURING75=ON")
- if "omp" in self.spec.variants["backend"].value:
- args.append("-DKokkos_ENABLE_OPENMP=ON")
+
+ # The usage should be spack install babelstream +kokkos backend=[cuda or omp or none]
+ if "+kokkos" in self.spec:
+ args.append("-DCMAKE_C_COMPILER=" + spack_cc)
+ args.append("-DKOKKOS_IN_PACKAGE=" + self.spec["kokkos"].prefix)
+ if "cuda" in self.spec.variants["kokkos_backend"].value:
+ # args.append("-DCMAKE_CXX_COMPILER=" + self.spec["cuda"].nvcc)
+ args.append("-DCMAKE_CXX_COMPILER=" + spack_cxx)
+ args.append("-DKokkos_ENABLE_CUDA=ON")
+ int_cuda_arch = int(self.spec.variants["cuda_arch"].value[0])
+ # arhitecture kepler optimisations
+ if int_cuda_arch in (30, 32, 35, 37):
+ args.append("-D" + "Kokkos_ARCH_KEPLER" + str(int_cuda_arch) + "=ON")
+ # arhitecture maxwell optimisations
+ if int_cuda_arch in (50, 52, 53):
+ args.append("-D" + "Kokkos_ARCH_MAXWELL" + str(int_cuda_arch) + "=ON")
+ # arhitecture pascal optimisations
+ if int_cuda_arch in (60, 61):
+ args.append("-D" + "Kokkos_ARCH_PASCAL" + str(int_cuda_arch) + "=ON")
+ # architecture volta optimisations
+ if int_cuda_arch in (70, 72):
+ args.append("-D" + "Kokkos_ARCH_VOLTA" + str(int_cuda_arch) + "=ON")
+ if int_cuda_arch == 75:
+ args.append("-DKokkos_ARCH_TURING75=ON")
+ if int_cuda_arch == 80:
+ args.append("-DKokkos_ARCH_AMPERE80=ON")
+ if "omp" in self.spec.variants["kokkos_backend"].value:
+ args.append("-DKokkos_ENABLE_OPENMP=ON")
# not in ["kokkos", "raja", "acc", "hip"] then compiler forced true
if set(model_list).intersection(["kokkos", "raja", "acc", "hip"]) is True:
args.append("-DCMAKE_CXX_COMPILER_FORCED=True")
return args
+
+
+class MakefileBuilder(spack.build_systems.makefile.MakefileBuilder):
+ build_directory = "src/fortran"
+
+ # Generate Compiler Specific includes
+ def edit(self, pkg, spec, prefix):
+ config = {
+ "FC": pkg.compiler.fc,
+ "FCFLAGS": "",
+ "ARCH": spec.target.family,
+ "DOCONCURRENT_FLAG": "",
+ "ARRAY_FLAG": "",
+ "OPENMP_FLAG": "",
+ "OPENACC_FLAG": "",
+ "CUDA_FLAG": "",
+ "SEQUENTIAL_FLAG": "",
+ }
+ # Dictionary mapping compiler names to unsupported options
+ unsupported_options = {
+ "arm": ["CUDA", "CUDAKernel", "OpenACC", "OpenACCArray"],
+ "aocc": ["CUDA", "CUDAKernel"],
+ "cce": ["CUDA", "CUDAKernel"],
+ "gcc": ["CUDA", "CUDAKernel"],
+ "nvhpc": ["OpenMPTaskloop"],
+ "oneapi": ["CUDA", "CUDAKernel", "OpenACC", "OpenACCArray"],
+ "fj": ["CUDA", "CUDAKernel", "OpenACC"],
+ }
+
+ # Check if spec.compiler.name is in the unsupported_options dictionary
+ unsupported_value = self.spec.variants["foption"].value
+ compiler_name = spec.compiler.name
+ unsupported = any(
+ unsupported_value in options
+ for options in unsupported_options.get(compiler_name, [])
+ if options == unsupported_value
+ )
+ if unsupported:
+ raise InstallError(
+ f"{unsupported_value} is not supported by the {compiler_name} compiler"
+ )
+ # ===================================
+ # ARM
+ # ===================================
+ if spec.compiler.name == "arm":
+ fortran_flags = (
+ "-std=f2018 " + pkg.compiler.opt_flags[4] + " -Wall -Wno-unused-variable"
+ )
+ fortran_flags += self.spec.target.optimization_flags(
+ self.spec.compiler.name, str(self.spec.compiler.version)
+ )
+
+ config["FCFLAGS"] = fortran_flags
+ config["DOCONCURRENT_FLAG"] = pkg.compiler.openmp_flag # libomp.so required
+ config["ARRAY_FLAG"] = pkg.compiler.openmp_flag # libomp.so required
+ config["OPENMP_FLAG"] = pkg.compiler.openmp_flag # libomp.so required
+ config["OPENACC_FLAG"] = "-fopenacc"
+
+ # ===================================
+ # AMD
+ # ===================================
+ if spec.compiler.name == "aocc":
+ fortran_flags = (
+ "-std=f2018 " + pkg.compiler.opt_flags[3] + " -Wall -Wno-unused-variable"
+ )
+ config["FCFLAGS"] = fortran_flags
+ config["DOCONCURRENT_FLAG"] = pkg.compiler.openmp_flag # libomp.so required
+ config["ARRAY_FLAG"] = pkg.compiler.openmp_flag # libomp.so required
+ config["OPENMP_FLAG"] = pkg.compiler.openmp_flag # libomp.so required
+ config["OPENACC_FLAG"] = "-fopenacc"
+
+ # ===================================
+ # CRAY
+ # ===================================
+ if spec.compiler.name == "cce":
+ fortran_flags = "-e F -O3"
+ config["FCFLAGS"] = fortran_flags
+ config["DOCONCURRENT_FLAG"] = "-h thread_do_concurrent -DCRAY_THREAD_DOCONCURRENT"
+ config["ARRAY_FLAG"] = "-h autothread"
+ config["OPENMP_FLAG"] = pkg.compiler.openmp_flag
+ config["OPENACC_FLAG"] = "-h acc" # for cpu only -h omp
+
+ # ===================================
+ # GCC
+ # ===================================
+ if spec.compiler.name == "gcc":
+ fortran_flags = "-std=f2018 -O3 "
+ fortran_flags += "-Wall -Wno-unused-dummy-argument -Wno-unused-variable "
+ fortran_flags += self.spec.target.optimization_flags(
+ self.spec.compiler.name, str(self.spec.compiler.version)
+ )
+
+ config["FCFLAGS"] = fortran_flags
+ config["DOCONCURRENT_FLAG"] = "-ftree-parallelize-loops=4"
+ config["OPENMP_FLAG"] = pkg.compiler.openmp_flag
+ config["OPENACC_FLAG"] = "-fopenacc"
+
+ # ===================================
+ # NVHPC
+ # ===================================
+ if spec.compiler.name == "nvhpc":
+ fortran_flags = pkg.compiler.opt_flags[4] # for -O3
+ # FCFLAGS := -O3 -Minform=inform -Minfo=all
+ fortran_flags += " -Minform=warn "
+ TARGET = "gpu" # target = "multicore"
+ config["TARGET"] = TARGET
+ if "cuda_arch" in self.spec.variants:
+ cuda_arch_list = self.spec.variants["cuda_arch"].value
+ # the architecture value is only number so append sm_ to the name
+ cuda_arch = "cc" + cuda_arch_list[0]
+ GPUFLAG = " -gpu=" + cuda_arch
+ fortran_flags += "-tp=" + str(spec.target)
+ # this is to allow apples-to-apples comparison with DC in non-DC GPU impls
+ # set exactly one of these pairs!
+ # MANAGED = "-DUSE_MANAGED -gpu=managed"
+ # DEVICE=""
+ # ------------
+ DEVICE = "-DUSE_DEVICE -cuda -gpu=nomanaged"
+ MANAGED = ""
+ config["FCFLAGS"] = fortran_flags
+ config["DOCONCURRENT_FLAG"] = GPUFLAG + " -stdpar=" + TARGET + " " + DEVICE
+ config["ARRAY_FLAG"] = GPUFLAG + " -stdpar=" + TARGET + " " + MANAGED
+ config["OPENMP_FLAG"] = GPUFLAG + " -mp=" + TARGET + " " + MANAGED
+ config["OPENACC_FLAG"] = GPUFLAG + " -acc=" + TARGET + " " + MANAGED
+ config["CUDA_FLAG"] = GPUFLAG + " -cuda -acc=gpu" + " " + MANAGED
+
+ # ===================================
+ # ONEAPI
+ # ===================================
+ if spec.compiler.name == "oneapi":
+ fortran_flags = "-std18 -Ofast -xHOST -qopt-zmm-usage=low"
+ if config["FC"] == "ifort":
+ fortran_flags += "-qopt-streaming-stores=always"
+
+ config["DOCONCURRENT_FLAG"] = "-qopenmp" + (
+ "-parallel" if config["FC"] == "ifort" else ""
+ )
+ config["ARRAY_FLAG"] = "-qopenmp" + ("-parallel" if config["FC"] == "ifort" else "")
+ config["OPENMP_FLAG"] = "-qopenmp" + (
+ "-fopenmp-targets=spir64 -DUSE_FLOAT=1" if config["FC"] == "ifx" else ""
+ )
+ config["FCFLAGS"] = fortran_flags
+
+ # ===================================
+ # FJ
+ # ===================================
+ if spec.compiler.name == "fj":
+ fortran_flags = "-X08 -Kfast -KA64FX -KSVE -KARMV8_3_A -Kzfill=100 "
+ fortran_flags += "-Kprefetch_sequential=soft "
+ fortran_flags += "-Kprefetch_line=8 -Kprefetch_line_L2=16 -Koptmsg=2 "
+ # FJ Fortran system_clock is low resolution
+ fortran_flags += "-Keval -DUSE_OMP_GET_WTIME=1 "
+
+ config["FCFLAGS"] = fortran_flags
+ config["DOCONCURRENT_FLAG"] = "-Kparallel,reduction -DNOTSHARED"
+ config["ARRAY_FLAG"] = "-Kparallel,reduction"
+ config["OPENMP_FLAG"] = pkg.compiler.openmp_flag
+
+ with open(self.build_directory + "/make.inc." + spec.compiler.name, "w+") as inc:
+ for key in config:
+ inc.write("{0} = {1}\n".format(key, config[key]))
+
+ def setup_build_environment(self, env):
+ ######################################
+ # Build and Installation Directories #
+ ######################################
+
+ # The environment variable ESMF_DIR must be set to the full pathname
+ # of the top level ESMF directory before building the framework.
+ env.set("COMPILER", self.spec.compiler.name)
+ env.set("IMPLEMENTATION", self.spec.variants["foption"].value)
+ # DEBUG
+ # print(self.spec.variants["foption"].value)
+ # print(self.spec.compiler.version)
+ # print(platform.machine())
+ # This creates a testing tree (if one doesn't already exist) and
+ # copies the binaries from `src/fortran` to `SpackPackage/bin`.
+ # This allows you to use the testing tree independently of the
+ # source tree in the future.
+ # print(pkg.compiler.cc_pic_flag)
+
+ @property
+ def build_name(self):
+ compiler_prefix = self.spec.compiler.name
+ implementation_prefix = self.spec.variants["foption"].value
+ return "{}.{}.{}".format("BabelStream", compiler_prefix, implementation_prefix)
+
+ def install(self, pkg, spec, prefix):
+ mkdir(prefix.bin)
+ install(self.build_directory + "/" + self.build_name, prefix.bin)
+ # To check the make.inc file generated
+ install_tree(self.build_directory, prefix.lib)