Babelstream v5.0 Spack Package Updates (#41019)

- Merging sycl2020usm and sycl2020acc into sycl2020 and the submodel=acc/usm variant is introduced - implementation is renamed to option - impl ( fortran implementation options) renamed to foption - sycl_compiler_implementation and thrust_backend - stddata,stdindices,stdranges to a single std with std_submodel introduction - std_use_tbb to be boolean; also changed model filtering algorithm to make sure that it only picks model names - Modified comments to clear confusion with cuda_arch cc_ and sm_ prefix appends - Deleted duplicate of cuda_arch definition from +omp - CMAKE_CXX_COMPILER moved to be shared arg between all models except tbb and thrust - Replaced sys.exit with InstallError and created a dictionary to simplify things and eliminate excess code lines doing same checks - Replaced the -mcpu flags to -march since it is deprecated now - Replaced platform.machine with spec.target - Removing raja_backend, introducing openmp_flag,removing -march flags,clearing debugging print(), removing excess if ___ in self.spec.variants - [FIX] Issue where Thrust couldn't find correct compiler (it requires nvcc) - [FIX] Fortran unsupported check to match the full string - [FIX] RAJA cuda_arch to be with sm_ not cc_ - dir= option is no longer needed for kokkos - dir is no longer needed - [omp] Adding clang support for nvidia offload - SYCL2020 offload to nvidia GPU - changing model dependency to be languages rather than build system - removing hardcoded arch flags and replacing with archspec - removing cpu_arch from acc model --------- Signed-off-by: Todd Gamblin <tgamblin@llnl.gov> Co-authored-by: Greg Becker <becker33@llnl.gov> Co-authored-by: Kaan Olgu <kaan.olgu@bristol.ac.uk> Co-authored-by: Todd Gamblin <tgamblin@llnl.gov>
author: Kaan <61908449+kaanolgu@users.noreply.github.com> 2024-11-01 00:42:40 +0000
committer: GitHub <noreply@github.com> 2024-10-31 18:42:40 -0600
commit: 504cc808d642a969d0e13242b505e6f8430a594f (patch)
tree: c633282df82e427b42705ecbc60280e632413f37 /var
parent: 8076134c918ff8866bfd23b787ae6609c1bdc2f7 (diff)
download: spack-504cc808d642a969d0e13242b505e6f8430a594f.tar.gz
spack-504cc808d642a969d0e13242b505e6f8430a594f.tar.bz2
spack-504cc808d642a969d0e13242b505e6f8430a594f.tar.xz
spack-504cc808d642a969d0e13242b505e6f8430a594f.zip
1 files changed, 723 insertions, 267 deletions
diff --git a/var/spack/repos/builtin/packages/babelstream/package.py b/var/spack/repos/builtin/packages/babelstream/package.py
index ec85b2d356..b09fcc5f6e 100644
--- a/var/spack/repos/builtin/packages/babelstream/package.py
+++ b/var/spack/repos/builtin/packages/babelstream/package.py
@@ -3,65 +3,107 @@
 #
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)
 
-import re  # To get the variant name after (+)
-
+import spack.build_systems.cmake
+import spack.build_systems.makefile
 from spack.package import *
 
 
-def find_model_flag(str):
-    res = re.findall(r"\+(\w+)", str)
-    if not res:
-        return ""
-    return res
-
-
-class Babelstream(CMakePackage, CudaPackage, ROCmPackage):
+class Babelstream(CMakePackage, CudaPackage, ROCmPackage, MakefilePackage):
     """Measure memory transfer rates to/from global device memory on GPUs.
     This benchmark is similar in spirit, and based on, the STREAM benchmark for CPUs."""
 
     homepage = "https://github.com/UoB-HPC/BabelStream"
-    url = "https://github.com/UoB-HPC/BabelStream/archive/refs/tags/v4.0.tar.gz"
+    url = "https://github.com/UoB-HPC/BabelStream/archive/refs/tags/v5.0.tar.gz"
     git = "https://github.com/UoB-HPC/BabelStream.git"
+    version("5.0", sha256="1a418203fbfd95595bdc66047e2e39d8f1bba95a49725c9ecb907caf1af2521f")
     version("4.0", sha256="a9cd39277fb15d977d468435eb9b894f79f468233f0131509aa540ffda4f5953")
+    version("3.4", sha256="e34ee9d5ccdead019e3ea478333bcb7886117d600e5da8579a626f6ee34209cf")
+    version("3.3", sha256="4c89c805b277d52776feeb7a8eef7985a0d9295ce3e0bb2333bf715f724723cf")
+    version("3.2", sha256="20309b27ddd09ea37406bcc6f46fd32e9372bf3d145757e55938d19d69cdc49d")
+    version("3.1", sha256="be69e6085e8966e12aa2df897eea6254b172e5adfa03de0adbb89bc3065f4fbe")
+    version("3.0", sha256="776219c72e0fdc36f134e6975b68c7ab25f38206f8f8af84a6f9630648c24800")
+    version("1.0", sha256="3cfb9e45601f1f249878355c72baa6e6a61f6c811f8716d60b83c7fb544e1d5c")
     version("main", branch="main")
-    version("develop", branch="develop")
-
-    depends_on("cxx", type="build")  # generated
-
-    maintainers("tomdeakin", "kaanolgu", "tom91136", "robj0nes")
-
+    maintainers("tomdeakin", "kaanolgu", "tom91136")
+    # Previous maintainers: "robj0nes"
+    depends_on("cxx", type="build", when="languages=cxx")
+    depends_on("fortran", type="build", when="languages=fortran")
     # Languages
-    # Also supported variants are cuda and rocm (for HIP)
-    variant("sycl", default=False, description="Enable SYCL support")
-    variant("sycl2020", default=False, description="Enable SYCL support")
-    variant("omp", default=False, description="Enable OpenMP support")
-    variant("ocl", default=False, description="Enable OpenCL support")
-    variant("tbb", default=False, description="Enable TBB support")
-    variant("acc", default=False, description="Enable OpenACC support")
-    variant("thrust", default=False, description="Enable THRUST support")
-    variant("raja", default=False, description="Enable RAJA support")
-    variant("stddata", default=False, description="Enable STD-data support")
-    variant("stdindices", default=False, description="Enable STD-indices support")
-    variant("stdranges", default=False, description="Enable STD-ranges support")
+    # in the future it could be possible to add other languages too
+    variant(
+        "languages",
+        default="cxx",
+        values=("cxx", "fortran"),
+        description="Languages Babelstream Spack Package Support",
+    )
+    # Build System
+    build_system(
+        conditional("cmake", when="languages=cxx"),
+        conditional("makefile", when="languages=fortran"),
+        default="cmake",
+    )
+    with when("languages=cxx"):
+        # Also supported variants are cuda and rocm (for HIP)
+        # not included here because they are supplied via respective packages
+        variant("sycl", default=False, description="Enable SYCL support")
+        variant("sycl2020", default=False, description="Enable SYCL support")
+        variant("omp", default=False, description="Enable OpenMP support")
+        variant("ocl", default=False, description="Enable OpenCL support")
+        variant("tbb", default=False, description="Enable TBB support")
+        variant("acc", default=False, description="Enable OpenACC support")
+        variant("hip", default=False, description="Enable HIP support")
+        variant("thrust", default=False, description="Enable THRUST support")
+        variant("raja", default=False, description="Enable RAJA support")
+        variant("std", default=False, description="Enable STD support")
 
     # Some models need to have the programming model abstraction downloaded -
     # this variant enables a path to be provided.
     variant("dir", values=str, default="none", description="Enable Directory support")
+    variant(
+        "sycl2020_submodel",
+        values=("usm", "acc"),
+        when="+sycl2020",
+        default="usm",
+        description="SYCL2020 -> choose between usm and acc methods",
+    )
+    variant(
+        "std_submodel",
+        values=("data", "indices", "ranges"),
+        when="+std",
+        default="data",
+        description="STD -> choose between data, indices and ranges models",
+    )
 
-    # Kokkos conflict and variant
-    conflicts(
-        "dir=none", when="+kokkos", msg="KOKKKOS requires architecture to be specfied by dir="
+    variant(
+        "sycl2020_offload",
+        values=("nvidia", "intel"),
+        default="intel",
+        when="+sycl2020",
+        description="Offloading to NVIDIA GPU or not",
     )
-    variant("kokkos", default=False, description="Enable KOKKOS support")
 
-    # ACC conflict
-    variant("cpu_arch", values=str, default="none", description="Enable CPU Target for ACC")
-    variant("acc_target", values=str, default="none", description="Enable CPU Target for ACC")
+    variant(
+        "thrust_submodel",
+        values=("cuda", "rocm"),
+        default="cuda",
+        when="+thrust",
+        description="Which THRUST implementation to use, supported options include option= \
+            - CUDA (via https://github.com/NVIDIA/thrust)\
+            - ROCM (via https://github.com/ROCmSoftwarePlatform/rocThrust)",
+    )
+    variant(
+        "thrust_backend",
+        values=("cuda", "omp", "tbb"),
+        default="cuda",
+        when="+thrust",
+        description="Which THRUST implementation to use, supported options include option",
+    )
+
+    # Kokkos variant
+    variant("kokkos", default=False, description="Enable KOKKOS support")
 
     # STD conflicts
-    conflicts("+stddata", when="%gcc@:10.1.0", msg="STD-data requires newer version of GCC")
-    conflicts("+stdindices", when="%gcc@:10.1.0", msg="STD-indices requires newer version of GCC")
-    conflicts("+stdranges", when="%gcc@:10.1.0", msg="STD-ranges requires newer version of GCC")
+    conflicts("+std", when="%gcc@:10.1.0", msg="STD requires newer version of GCC")
 
     # CUDA conflict
     conflicts(
@@ -69,349 +111,763 @@ class Babelstream(CMakePackage, CudaPackage, ROCmPackage):
         when="+cuda",
         msg="CUDA requires architecture to be specfied by cuda_arch=",
     )
-    variant("mem", values=str, default="DEFAULT", description="Enable MEM Target for CUDA")
-    # Raja Conflict
     variant(
-        "offload", values=str, default="none", description="Enable RAJA Target [CPU or NVIDIA]"
-    )
-    conflicts(
-        "offload=none",
-        when="+raja",
-        msg="RAJA requires architecture to be specfied by acc_target=[CPU,NVIDIA]",
+        "cuda_memory_mode",
+        values=("default", "managed", "pagefault"),
+        default="default",
+        when="+cuda",
+        description="Enable MEM Target for CUDA",
     )
 
-    # download raja from https://github.com/LLNL/RAJA
+    # OMP offload
+    variant("omp_offload", default=False, when="+omp", description="Enable OpenMP Target")
+    variant(
+        "omp_flags",
+        values=str,
+        default="none",
+        when="+omp",
+        description="If OFFLOAD is enabled, this *overrides* the default offload flags",
+    )
     conflicts(
-        "dir=none",
+        "omp_flags=none",
+        when="+omp_offload",
+        msg="OpenMP requires offload flags to be specfied by omp_flags=",
+    )
+    # Raja offload
+    variant(
+        "raja_offload",
+        values=("cpu", "nvidia"),
+        default="cpu",
         when="+raja",
-        msg="RAJA implementation requires architecture to be specfied by dir=",
+        description="Enable RAJA Target [CPU or NVIDIA] / Offload with custom settings for OpenMP",
+    )
+    # std-* offload
+    variant(
+        "std_offload",
+        values=("nvhpc", "none"),
+        default="none",
+        when="+std",
+        description="Enable offloading support (via the non-standard `-stdpar`)\
+                    for the new NVHPC SDK",
+    )
+    variant(
+        "std_onedpl_backend",
+        values=("openmp", "tbb", "dpcpp", "none"),
+        default="none",
+        when="+std",
+        description="Implements policies using OpenMP,TBB or dpc++",
+    )
+    variant(
+        "std_use_tbb",
+        values=(True, False),
+        default=False,
+        when="+std",
+        description="No-op if ONE_TBB_DIR is set. Link against an in-tree oneTBB\
+                    via FetchContent_Declare, see top level CMakeLists.txt for details",
+    )
+    variant(
+        "std_use_onedpl",
+        values=(True, False),
+        default=False,
+        when="+std",
+        description="Link oneDPL which implements C++17 executor policies\
+                    (via execution_policy_tag) for different backends",
+    )
+    # hip memory mode
+    variant(
+        "hip_mem_mode",
+        values=("default", "managed", "pagefault"),
+        default="default",
+        when="+hip",
+        description="Enable MEM Target for HIP",
+    )
+    # tbb use vector
+    variant(
+        "tbb_use_vector",
+        values=(True, False),
+        default=False,
+        when="+tbb",
+        description="Whether to use std::vector<T> for storage or use aligned_alloc. \
+                     C++ vectors are *zero* initialised where as aligned_alloc is \
+                     uninitialised before first use.",
     )
 
     # Thrust Conflict
-    # conflicts("~cuda", when="+thrust", msg="Thrust requires +cuda variant")
     depends_on("thrust", when="+thrust")
-    depends_on("rocthrust", when="+thrust implementation=rocm")
-
+    depends_on("cuda", when="thrust_submodel=cuda")
+    depends_on("cuda", when="+raja raja_offload=nvidia")
+    depends_on("hip", when="+hip")
+    depends_on("rocthrust", when="thrust_submodel=rocm")
+    depends_on("intel-tbb", when="+std +std_use_tbb")
+    depends_on("intel-oneapi-dpl", when="+std +std_use_onedpl")
+    depends_on("intel-tbb", when="+std +std_use_onedpl")
     # TBB Dependency
-    depends_on("intel-oneapi-tbb", when="+tbb")
-    partitioner_vals = ["auto", "affinity", "static", "simple"]
+    depends_on("intel-tbb", when="+tbb")
+
     variant(
-        "partitioner",
-        values=partitioner_vals,
+        "tbb_partitioner",
+        values=("auto", "affinity", "static", "simple"),
         default="auto",
+        when="+tbb",
         description="Partitioner specifies how a loop template should partition its work among threads.\
             Possible values are:\
             AUTO     - Optimize range subdivision based on work-stealing events.\
             AFFINITY - Proportional splitting that optimizes for cache affinity.\
             STATIC   - Distribute work uniformly with no additional load balancing.\
             SIMPLE   - Recursively split its range until it cannot be further subdivided.\
-            See https://spec.oneapi.com/versions/latest/elements/oneTBB/source/algorithms.html#partitioners for more details.",
+            See https://spec.oneapi.com/versions/latest/elements/oneTBB/source/algorithms.html#partitioners",
     )
 
-    # Kokkos Dependency
-    depends_on("kokkos@3.7.1", when="+kokkos")
+    # Kokkos & RAJA Dependency
+    cuda_archs = CudaPackage.cuda_arch_values
+    for sm_ in cuda_archs:
+        depends_on(
+            "kokkos +cuda +wrapper cuda_arch={0}".format(sm_),
+            when="kokkos_backend=cuda cuda_arch={0}".format(sm_),
+        )
+        depends_on(
+            "raja +cuda cuda_arch={0}".format(sm_),
+            when="raja_offload=nvidia cuda_arch={0}".format(sm_),
+        )
+    depends_on("kokkos +openmp", when="kokkos_backend=omp")
+    depends_on("raja +openmp", when="raja_offload=cpu")
 
     # OpenCL Dependency
-
-    backends = {
-        "ocl": [
-            ("amd", "rocm-opencl", "enable ROCM backend"),
-            ("cuda", "cuda", "enable Cuda backend"),
-            ("intel", "intel-oneapi-compilers", "enable Intel backend"),
-            ("pocl", "pocl@1.5", "enable POCL backend"),
-        ],
-        "kokkos": [
-            ("cuda", "cuda", "enable Cuda backend"),
-            ("omp", "none", "enable Cuda backend"),
-        ],
-    }
-    backend_vals = ["none"]
-    for lang in backends:
-        for item in backends[lang]:
-            backend, dpdncy, descr = item
-            backend_vals.append(backend.lower())
-
-    variant("backend", values=backend_vals, default="none", description="Enable backend support")
-
-    for lang in backends:
-        for item in backends[lang]:
-            backend, dpdncy, descr = item
-            if dpdncy.lower() != "none":
-                depends_on("%s" % dpdncy.lower(), when="backend=%s" % backend.lower())
-    # this flag could be used in all required languages
-    variant("flags", values=str, default="none", description="Additional CXX flags to be provided")
-
-    # comp_impl_vals=["ONEAPI-DPCPP","DPCPP","HIPSYCL","COMPUTECPP"]
     variant(
-        "implementation",
-        values=str,
+        "ocl_backend",
+        values=("amd", "cuda", "intel", "pocl", "none"),
         default="none",
-        description="Compile using the specified SYCL compiler option",
+        when="+ocl",
+        description="Enable Backend Target for OpenCL",
     )
-
-    conflicts(
-        "implementation=none",
-        when="+sycl",
-        msg="SYCL requires compiler implementation to be specified by option=",
+    variant(
+        "kokkos_backend",
+        values=("cuda", "omp", "none"),
+        default="none",
+        when="+kokkos",
+        description="Enable Backend Target for kokkos",
     )
     conflicts(
-        "implementation=none",
-        when="+thrust",
-        msg="Which Thrust implementation to use, supported options include:\
-         - CUDA (via https://github.com/NVIDIA/thrust)\
-         - ROCM (via https://github.com/ROCm/rocThrust)",
+        "ocl_backend=none",
+        when="+ocl",
+        msg="OpenCL implementation requires backend to be specfied by ocl_backend=",
     )
+    # depends_on("rocm-opencl@6.0.2", when="+ocl ocl_backend=amd")
+    depends_on("cuda", when="+ocl ocl_backend=cuda")
+    depends_on("cuda", when="+sycl2020 sycl2020_offload=nvidia")
+    depends_on("intel-oneapi-compilers", when="+ocl ocl_backend=intel")
+    depends_on("pocl@1.5", when="+ocl ocl_backend=pocl")
+
+    variant(
+        "cuda_extra_flags",
+        values=str,
+        default="none",
+        description="Additional CUDA Compiler flags to be provided",
+    )
+
+    # CMake specific dependency
+    with when("build_system=cmake"):
+        depends_on("cmake@3.14.0:", type="build")
 
     # This applies to all
-    depends_on("cmake@3.14.0:", type="build")
     depends_on("opencl-c-headers", when="+ocl")
 
+    # Fortran related configurations
+    with when("languages=fortran"):
+        implementation_vals = [
+            "DoConcurrent",
+            "Array",
+            "OpenMP",
+            "OpenMPWorkshare",
+            "OpenMPTarget",
+            "OpenMPTargetLoop",
+            "OpenMPTaskloop",
+            "OpenACC",
+            "OpenACCArray",
+            "CUDA",
+            "CUDAKernel",
+            "Sequential",
+        ]
+        variant(
+            "foption",
+            values=implementation_vals,
+            default="Sequential",
+            description="Implementation",
+        )
+        # The fortran Makefile is inside the src/fortran so we need to address this
+        build_directory = "src/fortran"
+        build_name = ""
+        variant(
+            "fortran_flags",
+            values=str,
+            default="none",
+            description="Additional Fortran flags to be provided",
+        )
+
+
+class CMakeBuilder(spack.build_systems.cmake.CMakeBuilder):
     def cmake_args(self):
-        # convert spec to string to work on it
-        spec_string = str(self.spec)
-
-        # take only the first portion of the spec until space
-        spec_string_truncate = spec_string.split(" ", 1)[0]
-        model_list = find_model_flag(spec_string_truncate)  # Prints out ['cuda', 'thrust']
-
-        if len(model_list) > 1:
-            ignore_list = ["cuda"]  # if +acc is provided ignore the cuda model
-            model = list(set(model_list) - set(ignore_list))
-            # We choose 'thrust' from the list of ['cuda', 'thrust']
-            args = ["-DMODEL=" + model[0]]
+        model_list = [
+            "sycl",
+            "sycl2020",
+            "omp",
+            "cuda",
+            "ocl",
+            "tbb",
+            "acc",
+            "hip",
+            "thrust",
+            "raja",
+            "std",
+            "kokkos",
+        ]
+        # for +acc and +thrust the CudaPackage appends +cuda variant too so we need
+        # to filter cuda from list e.g. we choose 'thrust'
+        # from the list of ['cuda', 'thrust']
+        model_names = [name for name in model_list if f"+{name}" in self.spec]
+        print("model names : ", model_names)
+        if len(model_names) > 1:
+            model_names = [elem for elem in model_names if (elem != "cuda" and elem != "rocm")]
+            if "std" in model_names[0]:
+                args = ["-DMODEL=" + "std-" + self.spec.variants["std_submodel"].value]
+            elif "sycl2020" in model_names[0]:  # this is for nvidia offload
+                args = ["-DMODEL=" + "sycl2020-" + self.spec.variants["sycl2020_submodel"].value]
+            else:
+                args = ["-DMODEL=" + model_names[0]]
         else:
-            # if it is +stddata,indices etc. we need to pass it
-            # as std-data to the CMake compiler
-            # do some alterations here
-            if "std" in model_list[0]:
-                args = ["-DMODEL=" + "std-" + model_list[0].split("d", 1)[1]]
+            # do some alterations here to append sub models too
+            if "std" in model_names[0]:
+                args = ["-DMODEL=" + "std-" + self.spec.variants["std_submodel"].value]
+            elif "sycl2020" in model_names[0]:
+                args = ["-DMODEL=" + "sycl2020-" + self.spec.variants["sycl2020_submodel"].value]
+                print(args)
+            elif "rocm" in model_names[0]:
+                args = ["-DMODEL=hip"]
             else:
-                args = ["-DMODEL=" + model_list[0]]
+                args = ["-DMODEL=" + model_names[0]]
+        if model_names[0] != "tbb" and model_names[0] != "thrust":
+            args.append("-DCMAKE_CXX_COMPILER=" + spack_cxx)
 
         # ===================================
         #             ACC
         # ===================================
-        if ("+acc" in self.spec) and ("~cuda" in self.spec):
-            args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx)
-            if "cuda_arch" in self.spec.variants:
-                cuda_arch_list = self.spec.variants["cuda_arch"].value
-                # the architecture value is only number so append sm_ to the name
-                cuda_arch = "cc" + cuda_arch_list[0]
-                args.append("-DTARGET_DEVICE=gpu")
-                args.append("-DCUDA_ARCH=" + cuda_arch)
-            elif "cpu_arch" in self.spec.variants:
-                cpu_arch_list = self.spec.variants["cpu_arch"].value
-                # the architecture value is only number so append sm_ to the name
-                cpu_arch = cpu_arch_list[0]
-                args.append("-DTARGET_DEVICE=multicore")
-                args.append("-DTARGET_PROCESSOR=" + cpu_arch)
-
+        """
+        register_flag_optional(TARGET_DEVICE
+        "[PGI/NVHPC only] This sets the `-target` flag, possible values are:
+             gpu       - Globally set the target device to an NVIDIA GPU
+             multicore - Globally set the target device to the host CPU
+         Refer to `nvc++ --help` for the full list"
+         register_flag_optional(CUDA_ARCH
+        "[PGI/NVHPC only] Only applicable if `TARGET_DEVICE` is set to `gpu`.
+         Nvidia architecture in ccXY format, for example, sm_70 becomes cc70,
+         will be passed in via `-gpu=` (e.g `cc70`)
+         Possible values are:
+             cc35  - Compile for compute capability 3.5
+             cc50  - Compile for compute capability 5.0
+             cc60  - Compile for compute capability 6.0
+             cc62  - Compile for compute capability 6.2
+             cc70  - Compile for compute capability 7.0
+             cc72  - Compile for compute capability 7.2
+             cc75  - Compile for compute capability 7.5
+             cc80  - Compile for compute capability 8.0
+             ccall - Compile for all supported compute capabilities
+         Refer to `nvc++ --help` for the full list"
+        "")
+
+register_flag_optional(TARGET_PROCESSOR
+        "[PGI/NVHPC only] This sets the `-tp` (target processor) flag, possible values are:
+             px          - Generic x86 Processor
+             bulldozer   - AMD Bulldozer processor
+             piledriver  - AMD Piledriver processor
+             zen         - AMD Zen architecture (Epyc, Ryzen)
+             zen2        - AMD Zen 2 architecture (Ryzen 2)
+             sandybridge - Intel SandyBridge processor
+             haswell     - Intel Haswell processor
+             knl         - Intel Knights Landing processor
+             skylake     - Intel Skylake Xeon processor
+             host        - Link native version of HPC SDK cpu math library
+             native      - Alias for -tp host
+        Refer to `nvc++ --help` for the full list"
+        "")
+        """
+        if self.spec.satisfies("+acc~kokkos~raja"):
+            if (self.spec.compiler.name == "nvhpc") or (self.spec.compiler.name == "pgi"):
+                target_device = "gpu" if "cuda_arch" in self.spec.variants else "multicore"
+                if "cuda_arch" in self.spec.variants:
+                    cuda_arch_list = self.spec.variants["cuda_arch"].value
+                    # the architecture value is only number so append cc_ to the name
+                    cuda_arch = "cc" + cuda_arch_list[0]
+                    # args.append(
+                    #     "-DCXX_EXTRA_FLAGS=" + "-target=" + target_device + "-gpu=" + cuda_arch
+                    # )
+                    args.append("-DCUDA_ARCH=" + cuda_arch)
+                else:
+                    # get the cpu architecture value from user
+                    target_processor = str(
+                        self.spec.target
+                    )  # self.spec.variants["cpu_arch"].value[0]
+                    args.append("-DTARGET_PROCESSOR=" + target_processor)
+                    # args.append(
+                    #     "-DCXX_EXTRA_FLAGS="
+                    #     + "-target="
+                    #     + target_device
+                    #     + "-tp="
+                    #     + target_processor
+                    # )
+                args.append("-DTARGET_DEVICE=" + target_device)
         # ===================================
         #    STDdata,STDindices,STDranges
         # ===================================
-        std_list = ["+stddata", "+stdindices", "+stdranges"]
-        if spec_string.startswith(tuple(std_list)):
-            args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx)
+
+        if "+std" in self.spec:
+            if self.spec.satisfies("+std_use_tbb"):
+                args.append("-DCXX_EXTRA_FLAGS=-ltbb")
+            if self.spec.satisfies("+std_use_onedpl"):
+                # args.append("-DCXX_EXTRA_FLAGS=-ltbb")
+                # args.append("-DCXX_EXTRA_FLAGS=-loneDPL")
+                args.append(
+                    "-DUSE_ONEDPL=" + self.spec.variants["std_onedpl_backend"].value.upper()
+                )
+            if self.spec.variants["std_offload"].value != "none":
+                # the architecture value is only number so append cc_ to the name
+                cuda_arch = "cc" + self.spec.variants["cuda_arch"].value[0]
+                args.append("-DNVHPC_OFFLOAD=" + cuda_arch)
 
         # ===================================
         #             CUDA
         # ===================================
-
-        if ("+cuda" in self.spec) and ("~kokkos" in self.spec) and ("~acc" in self.spec):
+        if self.spec.satisfies("+cuda~kokkos~acc~omp~thrust~raja"):
             # Set up the cuda macros needed by the build
             cuda_arch_list = self.spec.variants["cuda_arch"].value
+            # "-DCUDA_ARCH" requires sm_
             # the architecture value is only number so append sm_ to the name
             cuda_arch = "sm_" + cuda_arch_list[0]
             args.append("-DCUDA_ARCH=" + cuda_arch)
             cuda_dir = self.spec["cuda"].prefix
             cuda_comp = cuda_dir + "/bin/nvcc"
             args.append("-DCMAKE_CUDA_COMPILER=" + cuda_comp)
-            args.append("-DMEM=" + self.spec.variants["mem"].value)
-            if self.spec.variants["flags"].value != "none":
-                args.append("-DCUDA_EXTRA_FLAGS=" + self.spec.variants["flags"].value)
+            args.append("-DMEM=" + self.spec.variants["cuda_memory_mode"].value.upper())
+            if self.spec.variants["cuda_extra_flags"].value != "none":
+                args.append("-DCUDA_EXTRA_FLAGS=" + self.spec.variants["cuda_extra_flags"].value)
 
         # ===================================
         #             OMP
         # ===================================
         # `~kokkos` option is there to prevent +kokkos +omp setting to use omp directly from here
         # Same applies for raja
-        if ("+omp" in self.spec) and ("~kokkos" in self.spec) and ("~raja" in self.spec):
-            args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx)
-            if "cuda_arch" in self.spec.variants:
-                cuda_arch_list = self.spec.variants["cuda_arch"].value
-                # the architecture value is only number so append sm_ to the name
-                cuda_arch = "sm_" + cuda_arch_list[0]
-                args.append("-DOFFLOAD= " + "NVIDIA:" + cuda_arch)
-            elif "amdgpu_target" in self.spec.variants:
-                rocm_arch = self.spec.variants["amdgpu_target"].value
-                # the architecture value is only number so append sm_ to the name
-                args.append("-DOFFLOAD=" + " AMD:" + rocm_arch)
+        if self.spec.satisfies("+omp~kokkos~raja"):
+            args.append("-DCMAKE_C_COMPILER=" + spack_cc)
+            if self.spec.satisfies("~omp_offload"):
+                args.append("-DOFFLOAD=" + "OFF")
+                # Check if the omp_flags variant is not set to "none"
+                args.append(
+                    "-DCMAKE_CXX_FLAGS="
+                    + self.pkg.compiler.openmp_flag
+                    + " "
+                    + (
+                        self.spec.variants["omp_flags"].value
+                        if self.spec.variants["omp_flags"].value != "none"
+                        else ""
+                    )
+                )
             else:
-                args.append("-DOFFLOAD=" + "INTEL")
+                offload_args = ""
+                args.append("-DOFFLOAD=ON")
+                if "cuda_arch" in self.spec.variants:
+                    if self.spec.satisfies("%nvhpc"):
+                        cuda_arch = "cc" + self.spec.variants["cuda_arch"].value[0]
+                        offload_args = " -mp=gpu;" + "-gpu=" + cuda_arch + " "
+                    if self.spec.satisfies("%clang"):
+                        cuda_arch = "sm_" + self.spec.variants["cuda_arch"].value[0]
+                        offload_args = "-fopenmp;--offload-arch=" + cuda_arch
+                elif ("amdgpu_target" in self.spec.variants) and (
+                    self.spec.variants["amdgpu_target"].value != "none"
+                ):
+                    offload_args = (
+                        ";--offload-arch=" + self.spec.variants["amdgpu_target"].value[0]
+                    )
+
+                args.append(
+                    "-DOFFLOAD_FLAGS="
+                    + self.pkg.compiler.openmp_flag
+                    + ";"
+                    + offload_args
+                    + ";"
+                    + self.spec.variants["omp_flags"].value
+                )
 
         # ===================================
-        #             SYCL
+        #            SYCL
         # ===================================
 
-        if self.spec.satisfies("+sycl"):
-            args.append("-DSYCL_COMPILER=" + self.spec.variants["implementation"].value.upper())
-            if self.spec.variants["implementation"].value.upper() != "ONEAPI-DPCPP":
-                args.append(
-                    "-DSYCL_COMPILER_DIR=" + self.spec.variants["implementation"].value.upper()
-                )
-                if self.spec.variants["implementation"].value.upper() == "COMPUTE-CPP":
-                    args.append("-DOpenCL_LIBRARY=")
+        if "+sycl" in self.spec:
+            if self.spec.satisfies("%oneapi"):
+                # -fsycl flag is required for setting up sycl/sycl.hpp seems like
+                #  it doesn't get it from the CMake file
+                args.append("-DSYCL_COMPILER=ONEAPI-ICPX")
+                args.append("-DCXX_EXTRA_FLAGS= -fsycl")
+            elif self.spec.satisfies("%clang"):
+                # this requires the clang inside oneapi installation
+                args.append("-DSYCL_COMPILER=ONEAPI-Clang")
+                args.append("-DCXX_EXTRA_FLAGS= -fsycl")
+            else:
+                args.append("-DSYCL_COMPILER=HIPSYCL")
+                args.append("-DSYCL_COMPILER_DIR=" + self.spec.variants["dir"].value)
+                args.append("-DCXX_EXTRA_FLAGS= -fsycl")
 
         # ===================================
-        #             SYCL 2020
+        #              SYCL 2020
         # ===================================
 
-        if self.spec.satisfies("+sycl2020"):
+        if "+sycl2020" in self.spec:
             if self.spec.satisfies("%oneapi"):
                 # -fsycl flag is required for setting up sycl/sycl.hpp seems like
                 #  it doesn't get it from the CMake file
-                args.append("-DCXX_EXTRA_FLAGS= -fsycl -O3")
-                # this is required to enable -DCMAKE_CXX_COMPILER=icpx flag from CMake
                 args.append("-DSYCL_COMPILER=ONEAPI-ICPX")
+                args.append("-DCXX_EXTRA_FLAGS= -fsycl")
+            elif self.spec.satisfies("%clang"):
+                # this requires the clang inside oneapi installation
+                args.append("-DSYCL_COMPILER=ONEAPI-Clang")
+                args.append("-DCXX_EXTRA_FLAGS= -fsycl")
             else:
+                args.append("-DSYCL_COMPILER=HIPSYCL")
+                args.append("-DSYCL_COMPILER_DIR=" + self.spec.variants["dir"].value)
+                args.append("-DCXX_EXTRA_FLAGS= -fsycl")
+                # if self.spec.variants["flags"].value != "none":
+            if self.spec.variants["sycl2020_offload"].value == "nvidia":
+                cuda_dir = self.spec["cuda"].prefix
+                cuda_arch = "sm_" + self.spec.variants["cuda_arch"].value[0]
                 args.append(
-                    "-DSYCL_COMPILER=" + self.spec.variants["implementation"].value.upper()
-                )
-                if self.spec.variants["implementation"].value.upper() != "ONEAPI-DPCPP":
-                    args.append(
-                        "-DSYCL_COMPILER_DIR=" + self.spec.variants["implementation"].value.upper()
+                    "-DCXX_EXTRA_FLAGS="
+                    + "-fsycl;-fsycl-targets=nvptx64-nvidia-cuda;"
+                    + self.spec.target.optimization_flags(
+                        self.spec.compiler.name, str(self.spec.compiler.version)
                     )
-                    if self.spec.variants["implementation"].value.upper() == "COMPUTE-CPP":
-                        args.append("-DOpenCL_LIBRARY=")
+                    + " --cuda-path="
+                    + cuda_dir
+                )
 
         # ===================================
         #             HIP(ROCM)
         # ===================================
 
-        if self.spec.satisfies("+rocm"):
-            hip_comp = self.spec["rocm"].prefix + "/bin/hipcc"
+        if "+hip" in self.spec:
+            hip_comp = self.spec["hip"].prefix + "/bin/hipcc"
+            offload_arch = str(self.spec.variants["amdgpu_target"].value[0])
+
             args.append("-DCMAKE_CXX_COMPILER=" + hip_comp)
-            args.append(
-                "-DCXX_EXTRA_FLAGS= --offload-arch="
-                + self.spec.variants["amdgpu_target"].value
-                + " "
-                + self.spec.variants["flags"].value
-                + " -O3"
-            )
+            args.append(f"-DCXX_EXTRA_FLAGS=--offload-arch={offload_arch} -O3")
+            if str(self.spec.variants["hip_mem_mode"].value) != "none":
+                args.append("-DMEM=" + self.spec.variants["hip_mem_mode"].value.upper())
 
         # ===================================
         #             TBB
         # ===================================
 
-        if self.spec.satisfies("+tbb"):
-            args.append("-DONE_TBB_DIR=" + self.spec["tbb"].prefix + "/tbb/latest/")
-            args.append("-DPARTITIONER=" + self.spec.variants["partitioner"].value.upper())
+        if "+tbb" in self.spec:
+            args.append("-DONE_TBB_DIR=" + self.spec["intel-tbb"].prefix + "/tbb/latest/")
+            args.append("-DCXX_EXTRA_FLAGS=-ltbb")
+            args.append("-DPARTITIONER=" + self.spec.variants["tbb_partitioner"].value.upper())
+            if self.spec.satisfies("+tbb_use_vector"):
+                args.append("-DUSE_VECTOR=ON")
 
         # ===================================
         #             OpenCL (ocl)
         # ===================================
-        if self.spec.satisfies("+ocl"):
-            if "backend" in self.spec.variants:
-                if "cuda" in self.spec.variants["backend"].value:
-                    cuda_dir = self.spec["cuda"].prefix
-                    args.append("-DOpenCL_LIBRARY=" + cuda_dir + "/lib64/libOpenCL.so")
-                elif "amd" in self.spec.variants["backend"].value:
-                    rocm_dir = self.spec["rocm-opencl"].prefix
-                    args.append("-DOpenCL_LIBRARY=" + rocm_dir + "/lib64/libOpenCL.so")
-                elif "intel" in self.spec.variants["backend"].value:
-                    intel_lib = (
-                        self.spec["intel-oneapi-compilers"].prefix
-                        + "/compiler/2023.0.0/linux/lib/libOpenCL.so"
-                    )
-                    args.append("-DOpenCL_LIBRARY=" + intel_lib)
-                elif "pocl" in self.spec.variants["backend"].value:
-                    args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx)
-                    pocl_lib = self.spec["pocl"].prefix + "/lib64/libOpenCL.so"
-                    args.append("-DOpenCL_LIBRARY=" + pocl_lib)
-                args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx)
+
+        if "+ocl" in self.spec:
+            if "cuda" in self.spec.variants["ocl_backend"].value:
+                cuda_dir = self.spec["cuda"].prefix
+                args.append("-DOpenCL_LIBRARY=" + cuda_dir + "/lib64/libOpenCL.so")
+            elif "amd" in self.spec.variants["ocl_backend"].value:
+                rocm_dir = self.spec["rocm-opencl"].prefix
+                args.append("-DOpenCL_LIBRARY=" + rocm_dir + "/lib64/libOpenCL.so")
+            elif "intel" in self.spec.variants["ocl_backend"].value:
+                intel_lib = (
+                    self.spec["intel-oneapi-compilers"].prefix
+                    + "/compiler/"
+                    + str(self.spec["intel-oneapi-compilers"].version)
+                    + "/linux/lib/libOpenCL.so"
+                )
+                args.append("-DOpenCL_LIBRARY=" + intel_lib)
+            elif "pocl" in self.spec.variants["ocl_backend"].value:
+                pocl_lib = self.spec["pocl"].prefix + "/lib64/libOpenCL.so"
+                args.append("-DOpenCL_LIBRARY=" + pocl_lib)
 
         # ===================================
         #             RAJA
         # ===================================
-        if self.spec.satisfies("+raja"):
-            args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx)
-            args.append("-DRAJA_IN_TREE=" + self.spec.variants["dir"].value)
-            if "offload" in self.spec.variants:
-                if "nvidia" in self.spec.variants["offload"].value:
-                    cuda_dir = self.spec["cuda"].prefix
-                    cuda_comp = cuda_dir + "/bin/nvcc"
-                    args.append("-DCMAKE_CUDA_COMPILER=" + cuda_comp)
-                    args.append("-DTARGET=NVIDIA")
-                    cuda_arch_list = self.spec.variants["cuda_arch"].value
-                    cuda_arch = "sm_" + cuda_arch_list[0]
-                    args.append("-DCUDA_ARCH=" + cuda_arch)
 
-                    args.append("DCUDA_TOOLKIT_ROOT_DIR=" + self.spec["cuda"].prefix)
-                    if self.spec.variants["flags"].value != "none":
-                        args.append("-DCUDA_EXTRA_FLAGS=" + self.spec.variants["flags"].value)
-                # if("cpu" in self.spec.variants['offload'].value):
+        if "+raja" in self.spec:
+            args.append("-DCMAKE_C_COMPILER=" + spack_cc)
+            args.append("-DRAJA_IN_PACKAGE=" + self.spec["raja"].prefix)
+            if "nvidia" in self.spec.variants["raja_offload"].value:
+                cuda_comp = self.spec["cuda"].prefix + "/bin/nvcc"
+                args.append("-DTARGET=NVIDIA")
+                cuda_arch = "sm_" + self.spec.variants["cuda_arch"].value[0]
+                args.append("-DCUDA_ARCH=" + cuda_arch)
 
-            if "omp" in self.spec.variants["backend"].value:
-                args.append("-DENABLE_OPENMP=ON")
-            if "cuda" in self.spec.variants["backend"].value:
                 args.append("-DENABLE_CUDA=ON")
+                args.append("-DCUDA_TOOLKIT_ROOT_DIR=" + self.spec["cuda"].prefix)
+                if self.spec.variants["cuda_extra_flags"].value != "none":
+                    args.append(
+                        "-DCMAKE_CUDA_FLAGS=" + self.spec.variants["cuda_extra_flags"].value
+                    )
 
         # ===================================
         #             THRUST
         # ===================================
-        if self.spec.satisfies("+thrust"):
-            if "cuda" in self.spec.variants["implementation"].value:
-                args.append("-DTHRUST_IMPL=" + self.spec.variants["implementation"].value.upper())
+
+        if "+thrust" in self.spec:
+            if "cuda" in self.spec.variants["thrust_submodel"].value:
+                args.append("-DTHRUST_IMPL=" + self.spec.variants["thrust_submodel"].value.upper())
+
                 args.append("-SDK_DIR=" + self.spec["thrust"].prefix + "/include")
-                cuda_arch_list = self.spec.variants["cuda_arch"].value
-                # the architecture value is only number so append sm_ to the name
-                cuda_arch = "sm_" + cuda_arch_list[0]
-                args.append("-DCUDA_ARCH=" + cuda_arch)
+                # this model uses CMAKE_CUDA_ARCHITECTURES which only requires number of cuda_arch
+                # no need to append sm_ or cc_
+                args.append("-DCUDA_ARCH=" + self.spec.variants["cuda_arch"].value[0])
                 cuda_dir = self.spec["cuda"].prefix
                 cuda_comp = cuda_dir + "/bin/nvcc"
                 args.append("-DCMAKE_CUDA_COMPILER=" + cuda_comp)
-                args.append("-DBACKEND=" + self.spec.variants["backend"].value.upper())
-                if self.spec.variants["flags"].value != "none":
-                    args.append("-DCUDA_EXTRA_FLAGS=" + self.spec.variants["flags"].value)
-
-            if "rocm" in self.spec.variants["implementation"].value:
-                args.append("-DTHRUST_IMPL=" + self.spec.variants["implementation"].value.upper())
+                # args.append("-DCMAKE_CUDA_COMPILER=" + spack_cxx)
+                # args.append("-DCMAKE_CUDA_FLAGS=-ccbin " + spack_cc)
+                args.append("-DBACKEND=" + self.spec.variants["thrust_backend"].value.upper())
+                if self.spec.variants["cuda_extra_flags"].value != "none":
+                    args.append(
+                        "-DCUDA_EXTRA_FLAGS=" + self.spec.variants["cuda_extra_flags"].value
+                    )
+            if "rocm" in self.spec.variants["thrust_submodel"].value:
+                args.append("-DCMAKE_CXX_COMPILER=" + self.spec["hip"].hipcc)
+                args.append("-DTHRUST_IMPL=" + self.spec.variants["thrust_submodel"].value.upper())
                 args.append("-SDK_DIR=" + self.spec["rocthrust"].prefix)
-                args.append("-DBACKEND=" + self.spec.variants["backend"].value.upper())
 
         # ===================================
         #             kokkos
         # ===================================
         # kokkos implementation is versatile and it could use cuda or omp architectures as backend
-        # The usage should be spack install babelstream +kokkos +cuda [or +omp]
-        if self.spec.satisfies("+kokkos"):
-            args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx)
-            args.append("-DKOKKOS_IN_TREE=" + self.spec.variants["dir"].value)
-            # args.append("-DKOKKOS_IN_PACKAGE=" + self.spec["kokkos"].prefix)
-            if "backend" in self.spec.variants:
-                if "cuda" in self.spec.variants["backend"].value:
-                    args.append("-DKokkos_ENABLE_CUDA=ON")
-                    cuda_arch_list = self.spec.variants["cuda_arch"].value
-                    cuda_arch = cuda_arch_list[0]
-                    # arhitecture kepler optimisations
-                    if cuda_arch in ("30", "32", "35", "37"):
-                        args.append("-D" + "Kokkos_ARCH_KEPLER" + cuda_arch + "=ON")
-                    # arhitecture maxwell optimisations
-                    if cuda_arch in ("50", "52", "53"):
-                        args.append("-D" + "Kokkos_ARCH_MAXWELL" + cuda_arch + "=ON")
-                    # arhitecture pascal optimisations
-                    if cuda_arch in ("60", "61"):
-                        args.append("-D" + "Kokkos_ARCH_PASCAL" + cuda_arch + "=ON")
-                    # architecture volta optimisations
-                    if cuda_arch in ("70", "72"):
-                        args.append("-D" + "Kokkos_ARCH_VOLTA" + cuda_arch + "=ON")
-                    if cuda_arch == "75":
-                        args.append("-DKokkos_ARCH_TURING75=ON")
-                if "omp" in self.spec.variants["backend"].value:
-                    args.append("-DKokkos_ENABLE_OPENMP=ON")
+
+        # The usage should be spack install babelstream +kokkos backend=[cuda or omp or none]
+        if "+kokkos" in self.spec:
+            args.append("-DCMAKE_C_COMPILER=" + spack_cc)
+            args.append("-DKOKKOS_IN_PACKAGE=" + self.spec["kokkos"].prefix)
+            if "cuda" in self.spec.variants["kokkos_backend"].value:
+                # args.append("-DCMAKE_CXX_COMPILER=" + self.spec["cuda"].nvcc)
+                args.append("-DCMAKE_CXX_COMPILER=" + spack_cxx)
+                args.append("-DKokkos_ENABLE_CUDA=ON")
+                int_cuda_arch = int(self.spec.variants["cuda_arch"].value[0])
+                # arhitecture kepler optimisations
+                if int_cuda_arch in (30, 32, 35, 37):
+                    args.append("-D" + "Kokkos_ARCH_KEPLER" + str(int_cuda_arch) + "=ON")
+                # arhitecture maxwell optimisations
+                if int_cuda_arch in (50, 52, 53):
+                    args.append("-D" + "Kokkos_ARCH_MAXWELL" + str(int_cuda_arch) + "=ON")
+                # arhitecture pascal optimisations
+                if int_cuda_arch in (60, 61):
+                    args.append("-D" + "Kokkos_ARCH_PASCAL" + str(int_cuda_arch) + "=ON")
+                # architecture volta optimisations
+                if int_cuda_arch in (70, 72):
+                    args.append("-D" + "Kokkos_ARCH_VOLTA" + str(int_cuda_arch) + "=ON")
+                if int_cuda_arch == 75:
+                    args.append("-DKokkos_ARCH_TURING75=ON")
+                if int_cuda_arch == 80:
+                    args.append("-DKokkos_ARCH_AMPERE80=ON")
+            if "omp" in self.spec.variants["kokkos_backend"].value:
+                args.append("-DKokkos_ENABLE_OPENMP=ON")
 
         # not in ["kokkos", "raja", "acc", "hip"] then compiler forced true
         if set(model_list).intersection(["kokkos", "raja", "acc", "hip"]) is True:
             args.append("-DCMAKE_CXX_COMPILER_FORCED=True")
 
         return args
+
+
+class MakefileBuilder(spack.build_systems.makefile.MakefileBuilder):
+    build_directory = "src/fortran"
+
+    # Generate Compiler Specific includes
+    def edit(self, pkg, spec, prefix):
+        config = {
+            "FC": pkg.compiler.fc,
+            "FCFLAGS": "",
+            "ARCH": spec.target.family,
+            "DOCONCURRENT_FLAG": "",
+            "ARRAY_FLAG": "",
+            "OPENMP_FLAG": "",
+            "OPENACC_FLAG": "",
+            "CUDA_FLAG": "",
+            "SEQUENTIAL_FLAG": "",
+        }
+        # Dictionary mapping compiler names to unsupported options
+        unsupported_options = {
+            "arm": ["CUDA", "CUDAKernel", "OpenACC", "OpenACCArray"],
+            "aocc": ["CUDA", "CUDAKernel"],
+            "cce": ["CUDA", "CUDAKernel"],
+            "gcc": ["CUDA", "CUDAKernel"],
+            "nvhpc": ["OpenMPTaskloop"],
+            "oneapi": ["CUDA", "CUDAKernel", "OpenACC", "OpenACCArray"],
+            "fj": ["CUDA", "CUDAKernel", "OpenACC"],
+        }
+
+        # Check if spec.compiler.name is in the unsupported_options dictionary
+        unsupported_value = self.spec.variants["foption"].value
+        compiler_name = spec.compiler.name
+        unsupported = any(
+            unsupported_value in options
+            for options in unsupported_options.get(compiler_name, [])
+            if options == unsupported_value
+        )
+        if unsupported:
+            raise InstallError(
+                f"{unsupported_value} is not supported by the {compiler_name} compiler"
+            )
+        # ===================================
+        #               ARM
+        # ===================================
+        if spec.compiler.name == "arm":
+            fortran_flags = (
+                "-std=f2018 " + pkg.compiler.opt_flags[4] + " -Wall -Wno-unused-variable"
+            )
+            fortran_flags += self.spec.target.optimization_flags(
+                self.spec.compiler.name, str(self.spec.compiler.version)
+            )
+
+            config["FCFLAGS"] = fortran_flags
+            config["DOCONCURRENT_FLAG"] = pkg.compiler.openmp_flag  # libomp.so required
+            config["ARRAY_FLAG"] = pkg.compiler.openmp_flag  # libomp.so required
+            config["OPENMP_FLAG"] = pkg.compiler.openmp_flag  # libomp.so required
+            config["OPENACC_FLAG"] = "-fopenacc"
+
+        # ===================================
+        #               AMD
+        # ===================================
+        if spec.compiler.name == "aocc":
+            fortran_flags = (
+                "-std=f2018 " + pkg.compiler.opt_flags[3] + " -Wall -Wno-unused-variable"
+            )
+            config["FCFLAGS"] = fortran_flags
+            config["DOCONCURRENT_FLAG"] = pkg.compiler.openmp_flag  # libomp.so required
+            config["ARRAY_FLAG"] = pkg.compiler.openmp_flag  # libomp.so required
+            config["OPENMP_FLAG"] = pkg.compiler.openmp_flag  # libomp.so required
+            config["OPENACC_FLAG"] = "-fopenacc"
+
+        # ===================================
+        #               CRAY
+        # ===================================
+        if spec.compiler.name == "cce":
+            fortran_flags = "-e F -O3"
+            config["FCFLAGS"] = fortran_flags
+            config["DOCONCURRENT_FLAG"] = "-h thread_do_concurrent -DCRAY_THREAD_DOCONCURRENT"
+            config["ARRAY_FLAG"] = "-h autothread"
+            config["OPENMP_FLAG"] = pkg.compiler.openmp_flag
+            config["OPENACC_FLAG"] = "-h acc"  # for cpu only -h omp
+
+        # ===================================
+        #               GCC
+        # ===================================
+        if spec.compiler.name == "gcc":
+            fortran_flags = "-std=f2018 -O3 "
+            fortran_flags += "-Wall -Wno-unused-dummy-argument -Wno-unused-variable "
+            fortran_flags += self.spec.target.optimization_flags(
+                self.spec.compiler.name, str(self.spec.compiler.version)
+            )
+
+            config["FCFLAGS"] = fortran_flags
+            config["DOCONCURRENT_FLAG"] = "-ftree-parallelize-loops=4"
+            config["OPENMP_FLAG"] = pkg.compiler.openmp_flag
+            config["OPENACC_FLAG"] = "-fopenacc"
+
+        # ===================================
+        #               NVHPC
+        # ===================================
+        if spec.compiler.name == "nvhpc":
+            fortran_flags = pkg.compiler.opt_flags[4]  # for -O3
+            # FCFLAGS	:= -O3 -Minform=inform -Minfo=all
+            fortran_flags += " -Minform=warn "
+            TARGET = "gpu"  # target = "multicore"
+            config["TARGET"] = TARGET
+            if "cuda_arch" in self.spec.variants:
+                cuda_arch_list = self.spec.variants["cuda_arch"].value
+                # the architecture value is only number so append sm_ to the name
+                cuda_arch = "cc" + cuda_arch_list[0]
+            GPUFLAG = " -gpu=" + cuda_arch
+            fortran_flags += "-tp=" + str(spec.target)
+            # this is to allow apples-to-apples comparison with DC in non-DC GPU impls
+            # set exactly one of these pairs!
+            # MANAGED = "-DUSE_MANAGED -gpu=managed"
+            # DEVICE=""
+            # ------------
+            DEVICE = "-DUSE_DEVICE -cuda -gpu=nomanaged"
+            MANAGED = ""
+            config["FCFLAGS"] = fortran_flags
+            config["DOCONCURRENT_FLAG"] = GPUFLAG + " -stdpar=" + TARGET + " " + DEVICE
+            config["ARRAY_FLAG"] = GPUFLAG + " -stdpar=" + TARGET + " " + MANAGED
+            config["OPENMP_FLAG"] = GPUFLAG + " -mp=" + TARGET + " " + MANAGED
+            config["OPENACC_FLAG"] = GPUFLAG + " -acc=" + TARGET + " " + MANAGED
+            config["CUDA_FLAG"] = GPUFLAG + " -cuda -acc=gpu" + " " + MANAGED
+
+        # ===================================
+        #               ONEAPI
+        # ===================================
+        if spec.compiler.name == "oneapi":
+            fortran_flags = "-std18 -Ofast -xHOST -qopt-zmm-usage=low"
+            if config["FC"] == "ifort":
+                fortran_flags += "-qopt-streaming-stores=always"
+
+            config["DOCONCURRENT_FLAG"] = "-qopenmp" + (
+                "-parallel" if config["FC"] == "ifort" else ""
+            )
+            config["ARRAY_FLAG"] = "-qopenmp" + ("-parallel" if config["FC"] == "ifort" else "")
+            config["OPENMP_FLAG"] = "-qopenmp" + (
+                "-fopenmp-targets=spir64 -DUSE_FLOAT=1" if config["FC"] == "ifx" else ""
+            )
+            config["FCFLAGS"] = fortran_flags
+
+        # ===================================
+        #               FJ
+        # ===================================
+        if spec.compiler.name == "fj":
+            fortran_flags = "-X08 -Kfast -KA64FX -KSVE -KARMV8_3_A -Kzfill=100 "
+            fortran_flags += "-Kprefetch_sequential=soft "
+            fortran_flags += "-Kprefetch_line=8 -Kprefetch_line_L2=16 -Koptmsg=2 "
+            # FJ Fortran system_clock is low resolution
+            fortran_flags += "-Keval -DUSE_OMP_GET_WTIME=1 "
+
+            config["FCFLAGS"] = fortran_flags
+            config["DOCONCURRENT_FLAG"] = "-Kparallel,reduction -DNOTSHARED"
+            config["ARRAY_FLAG"] = "-Kparallel,reduction"
+            config["OPENMP_FLAG"] = pkg.compiler.openmp_flag
+
+        with open(self.build_directory + "/make.inc." + spec.compiler.name, "w+") as inc:
+            for key in config:
+                inc.write("{0} = {1}\n".format(key, config[key]))
+
+    def setup_build_environment(self, env):
+        ######################################
+        # Build and Installation Directories #
+        ######################################
+
+        # The environment variable ESMF_DIR must be set to the full pathname
+        # of the top level ESMF directory before building the framework.
+        env.set("COMPILER", self.spec.compiler.name)
+        env.set("IMPLEMENTATION", self.spec.variants["foption"].value)
+        # DEBUG
+        # print(self.spec.variants["foption"].value)
+        # print(self.spec.compiler.version)
+        # print(platform.machine())
+        # This creates a testing tree (if one doesn't already exist) and
+        # copies the binaries from `src/fortran` to `SpackPackage/bin`.
+        # This allows you to use the testing tree independently of the
+        # source tree in the future.
+        # print(pkg.compiler.cc_pic_flag)
+
+    @property
+    def build_name(self):
+        compiler_prefix = self.spec.compiler.name
+        implementation_prefix = self.spec.variants["foption"].value
+        return "{}.{}.{}".format("BabelStream", compiler_prefix, implementation_prefix)
+
+    def install(self, pkg, spec, prefix):
+        mkdir(prefix.bin)
+        install(self.build_directory + "/" + self.build_name, prefix.bin)
+        # To check the make.inc file generated
+        install_tree(self.build_directory, prefix.lib)
author	Kaan <61908449+kaanolgu@users.noreply.github.com>	2024-11-01 00:42:40 +0000
committer	GitHub <noreply@github.com>	2024-10-31 18:42:40 -0600
commit	504cc808d642a969d0e13242b505e6f8430a594f (patch)
tree	c633282df82e427b42705ecbc60280e632413f37 /var
parent	8076134c918ff8866bfd23b787ae6609c1bdc2f7 (diff)
download	spack-504cc808d642a969d0e13242b505e6f8430a594f.tar.gz spack-504cc808d642a969d0e13242b505e6f8430a594f.tar.bz2 spack-504cc808d642a969d0e13242b505e6f8430a594f.tar.xz spack-504cc808d642a969d0e13242b505e6f8430a594f.zip