diff options
author | Kaan <61908449+kaanolgu@users.noreply.github.com> | 2024-11-01 00:42:40 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-10-31 18:42:40 -0600 |
commit | 504cc808d642a969d0e13242b505e6f8430a594f (patch) | |
tree | c633282df82e427b42705ecbc60280e632413f37 /var | |
parent | 8076134c918ff8866bfd23b787ae6609c1bdc2f7 (diff) | |
download | spack-504cc808d642a969d0e13242b505e6f8430a594f.tar.gz spack-504cc808d642a969d0e13242b505e6f8430a594f.tar.bz2 spack-504cc808d642a969d0e13242b505e6f8430a594f.tar.xz spack-504cc808d642a969d0e13242b505e6f8430a594f.zip |
Babelstream v5.0 Spack Package Updates (#41019)
- Merging sycl2020usm and sycl2020acc into sycl2020 and the submodel=acc/usm variant is introduced
- implementation is renamed to option
- impl ( fortran implementation options) renamed to foption
- sycl_compiler_implementation and thrust_backend
- stddata,stdindices,stdranges to a single std with std_submodel introduction
- std_use_tbb to be boolean; also changed model filtering algorithm to make sure that it only picks model names
- Modified comments to clear confusion with cuda_arch cc_ and sm_ prefix appends
- Deleted duplicate of cuda_arch definition from +omp
- CMAKE_CXX_COMPILER moved to be shared arg between all models except tbb and thrust
- Replaced sys.exit with InstallError and created a dictionary to simplify things and eliminate excess code lines doing same checks
- Replaced the -mcpu flags to -march since it is deprecated now
- Replaced platform.machine with spec.target
- Removing raja_backend, introducing openmp_flag,removing -march flags,clearing debugging print(), removing excess if ___ in self.spec.variants
- [FIX] Issue where Thrust couldn't find correct compiler (it requires nvcc)
- [FIX] Fortran unsupported check to match the full string
- [FIX] RAJA cuda_arch to be with sm_ not cc_
- dir= option is no longer needed for kokkos
- dir is no longer needed
- [omp] Adding clang support for nvidia offload
- SYCL2020 offload to nvidia GPU
- changing model dependency to be languages rather than build system
- removing hardcoded arch flags and replacing with archspec
- removing cpu_arch from acc model
---------
Signed-off-by: Todd Gamblin <tgamblin@llnl.gov>
Co-authored-by: Greg Becker <becker33@llnl.gov>
Co-authored-by: Kaan Olgu <kaan.olgu@bristol.ac.uk>
Co-authored-by: Todd Gamblin <tgamblin@llnl.gov>
Diffstat (limited to 'var')
-rw-r--r-- | var/spack/repos/builtin/packages/babelstream/package.py | 990 |
1 files changed, 723 insertions, 267 deletions
diff --git a/var/spack/repos/builtin/packages/babelstream/package.py b/var/spack/repos/builtin/packages/babelstream/package.py index ec85b2d356..b09fcc5f6e 100644 --- a/var/spack/repos/builtin/packages/babelstream/package.py +++ b/var/spack/repos/builtin/packages/babelstream/package.py @@ -3,65 +3,107 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) -import re # To get the variant name after (+) - +import spack.build_systems.cmake +import spack.build_systems.makefile from spack.package import * -def find_model_flag(str): - res = re.findall(r"\+(\w+)", str) - if not res: - return "" - return res - - -class Babelstream(CMakePackage, CudaPackage, ROCmPackage): +class Babelstream(CMakePackage, CudaPackage, ROCmPackage, MakefilePackage): """Measure memory transfer rates to/from global device memory on GPUs. This benchmark is similar in spirit, and based on, the STREAM benchmark for CPUs.""" homepage = "https://github.com/UoB-HPC/BabelStream" - url = "https://github.com/UoB-HPC/BabelStream/archive/refs/tags/v4.0.tar.gz" + url = "https://github.com/UoB-HPC/BabelStream/archive/refs/tags/v5.0.tar.gz" git = "https://github.com/UoB-HPC/BabelStream.git" + version("5.0", sha256="1a418203fbfd95595bdc66047e2e39d8f1bba95a49725c9ecb907caf1af2521f") version("4.0", sha256="a9cd39277fb15d977d468435eb9b894f79f468233f0131509aa540ffda4f5953") + version("3.4", sha256="e34ee9d5ccdead019e3ea478333bcb7886117d600e5da8579a626f6ee34209cf") + version("3.3", sha256="4c89c805b277d52776feeb7a8eef7985a0d9295ce3e0bb2333bf715f724723cf") + version("3.2", sha256="20309b27ddd09ea37406bcc6f46fd32e9372bf3d145757e55938d19d69cdc49d") + version("3.1", sha256="be69e6085e8966e12aa2df897eea6254b172e5adfa03de0adbb89bc3065f4fbe") + version("3.0", sha256="776219c72e0fdc36f134e6975b68c7ab25f38206f8f8af84a6f9630648c24800") + version("1.0", sha256="3cfb9e45601f1f249878355c72baa6e6a61f6c811f8716d60b83c7fb544e1d5c") version("main", branch="main") - version("develop", branch="develop") - - depends_on("cxx", type="build") # generated - - maintainers("tomdeakin", "kaanolgu", "tom91136", "robj0nes") - + maintainers("tomdeakin", "kaanolgu", "tom91136") + # Previous maintainers: "robj0nes" + depends_on("cxx", type="build", when="languages=cxx") + depends_on("fortran", type="build", when="languages=fortran") # Languages - # Also supported variants are cuda and rocm (for HIP) - variant("sycl", default=False, description="Enable SYCL support") - variant("sycl2020", default=False, description="Enable SYCL support") - variant("omp", default=False, description="Enable OpenMP support") - variant("ocl", default=False, description="Enable OpenCL support") - variant("tbb", default=False, description="Enable TBB support") - variant("acc", default=False, description="Enable OpenACC support") - variant("thrust", default=False, description="Enable THRUST support") - variant("raja", default=False, description="Enable RAJA support") - variant("stddata", default=False, description="Enable STD-data support") - variant("stdindices", default=False, description="Enable STD-indices support") - variant("stdranges", default=False, description="Enable STD-ranges support") + # in the future it could be possible to add other languages too + variant( + "languages", + default="cxx", + values=("cxx", "fortran"), + description="Languages Babelstream Spack Package Support", + ) + # Build System + build_system( + conditional("cmake", when="languages=cxx"), + conditional("makefile", when="languages=fortran"), + default="cmake", + ) + with when("languages=cxx"): + # Also supported variants are cuda and rocm (for HIP) + # not included here because they are supplied via respective packages + variant("sycl", default=False, description="Enable SYCL support") + variant("sycl2020", default=False, description="Enable SYCL support") + variant("omp", default=False, description="Enable OpenMP support") + variant("ocl", default=False, description="Enable OpenCL support") + variant("tbb", default=False, description="Enable TBB support") + variant("acc", default=False, description="Enable OpenACC support") + variant("hip", default=False, description="Enable HIP support") + variant("thrust", default=False, description="Enable THRUST support") + variant("raja", default=False, description="Enable RAJA support") + variant("std", default=False, description="Enable STD support") # Some models need to have the programming model abstraction downloaded - # this variant enables a path to be provided. variant("dir", values=str, default="none", description="Enable Directory support") + variant( + "sycl2020_submodel", + values=("usm", "acc"), + when="+sycl2020", + default="usm", + description="SYCL2020 -> choose between usm and acc methods", + ) + variant( + "std_submodel", + values=("data", "indices", "ranges"), + when="+std", + default="data", + description="STD -> choose between data, indices and ranges models", + ) - # Kokkos conflict and variant - conflicts( - "dir=none", when="+kokkos", msg="KOKKKOS requires architecture to be specfied by dir=" + variant( + "sycl2020_offload", + values=("nvidia", "intel"), + default="intel", + when="+sycl2020", + description="Offloading to NVIDIA GPU or not", ) - variant("kokkos", default=False, description="Enable KOKKOS support") - # ACC conflict - variant("cpu_arch", values=str, default="none", description="Enable CPU Target for ACC") - variant("acc_target", values=str, default="none", description="Enable CPU Target for ACC") + variant( + "thrust_submodel", + values=("cuda", "rocm"), + default="cuda", + when="+thrust", + description="Which THRUST implementation to use, supported options include option= \ + - CUDA (via https://github.com/NVIDIA/thrust)\ + - ROCM (via https://github.com/ROCmSoftwarePlatform/rocThrust)", + ) + variant( + "thrust_backend", + values=("cuda", "omp", "tbb"), + default="cuda", + when="+thrust", + description="Which THRUST implementation to use, supported options include option", + ) + + # Kokkos variant + variant("kokkos", default=False, description="Enable KOKKOS support") # STD conflicts - conflicts("+stddata", when="%gcc@:10.1.0", msg="STD-data requires newer version of GCC") - conflicts("+stdindices", when="%gcc@:10.1.0", msg="STD-indices requires newer version of GCC") - conflicts("+stdranges", when="%gcc@:10.1.0", msg="STD-ranges requires newer version of GCC") + conflicts("+std", when="%gcc@:10.1.0", msg="STD requires newer version of GCC") # CUDA conflict conflicts( @@ -69,349 +111,763 @@ class Babelstream(CMakePackage, CudaPackage, ROCmPackage): when="+cuda", msg="CUDA requires architecture to be specfied by cuda_arch=", ) - variant("mem", values=str, default="DEFAULT", description="Enable MEM Target for CUDA") - # Raja Conflict variant( - "offload", values=str, default="none", description="Enable RAJA Target [CPU or NVIDIA]" - ) - conflicts( - "offload=none", - when="+raja", - msg="RAJA requires architecture to be specfied by acc_target=[CPU,NVIDIA]", + "cuda_memory_mode", + values=("default", "managed", "pagefault"), + default="default", + when="+cuda", + description="Enable MEM Target for CUDA", ) - # download raja from https://github.com/LLNL/RAJA + # OMP offload + variant("omp_offload", default=False, when="+omp", description="Enable OpenMP Target") + variant( + "omp_flags", + values=str, + default="none", + when="+omp", + description="If OFFLOAD is enabled, this *overrides* the default offload flags", + ) conflicts( - "dir=none", + "omp_flags=none", + when="+omp_offload", + msg="OpenMP requires offload flags to be specfied by omp_flags=", + ) + # Raja offload + variant( + "raja_offload", + values=("cpu", "nvidia"), + default="cpu", when="+raja", - msg="RAJA implementation requires architecture to be specfied by dir=", + description="Enable RAJA Target [CPU or NVIDIA] / Offload with custom settings for OpenMP", + ) + # std-* offload + variant( + "std_offload", + values=("nvhpc", "none"), + default="none", + when="+std", + description="Enable offloading support (via the non-standard `-stdpar`)\ + for the new NVHPC SDK", + ) + variant( + "std_onedpl_backend", + values=("openmp", "tbb", "dpcpp", "none"), + default="none", + when="+std", + description="Implements policies using OpenMP,TBB or dpc++", + ) + variant( + "std_use_tbb", + values=(True, False), + default=False, + when="+std", + description="No-op if ONE_TBB_DIR is set. Link against an in-tree oneTBB\ + via FetchContent_Declare, see top level CMakeLists.txt for details", + ) + variant( + "std_use_onedpl", + values=(True, False), + default=False, + when="+std", + description="Link oneDPL which implements C++17 executor policies\ + (via execution_policy_tag) for different backends", + ) + # hip memory mode + variant( + "hip_mem_mode", + values=("default", "managed", "pagefault"), + default="default", + when="+hip", + description="Enable MEM Target for HIP", + ) + # tbb use vector + variant( + "tbb_use_vector", + values=(True, False), + default=False, + when="+tbb", + description="Whether to use std::vector<T> for storage or use aligned_alloc. \ + C++ vectors are *zero* initialised where as aligned_alloc is \ + uninitialised before first use.", ) # Thrust Conflict - # conflicts("~cuda", when="+thrust", msg="Thrust requires +cuda variant") depends_on("thrust", when="+thrust") - depends_on("rocthrust", when="+thrust implementation=rocm") - + depends_on("cuda", when="thrust_submodel=cuda") + depends_on("cuda", when="+raja raja_offload=nvidia") + depends_on("hip", when="+hip") + depends_on("rocthrust", when="thrust_submodel=rocm") + depends_on("intel-tbb", when="+std +std_use_tbb") + depends_on("intel-oneapi-dpl", when="+std +std_use_onedpl") + depends_on("intel-tbb", when="+std +std_use_onedpl") # TBB Dependency - depends_on("intel-oneapi-tbb", when="+tbb") - partitioner_vals = ["auto", "affinity", "static", "simple"] + depends_on("intel-tbb", when="+tbb") + variant( - "partitioner", - values=partitioner_vals, + "tbb_partitioner", + values=("auto", "affinity", "static", "simple"), default="auto", + when="+tbb", description="Partitioner specifies how a loop template should partition its work among threads.\ Possible values are:\ AUTO - Optimize range subdivision based on work-stealing events.\ AFFINITY - Proportional splitting that optimizes for cache affinity.\ STATIC - Distribute work uniformly with no additional load balancing.\ SIMPLE - Recursively split its range until it cannot be further subdivided.\ - See https://spec.oneapi.com/versions/latest/elements/oneTBB/source/algorithms.html#partitioners for more details.", + See https://spec.oneapi.com/versions/latest/elements/oneTBB/source/algorithms.html#partitioners", ) - # Kokkos Dependency - depends_on("kokkos@3.7.1", when="+kokkos") + # Kokkos & RAJA Dependency + cuda_archs = CudaPackage.cuda_arch_values + for sm_ in cuda_archs: + depends_on( + "kokkos +cuda +wrapper cuda_arch={0}".format(sm_), + when="kokkos_backend=cuda cuda_arch={0}".format(sm_), + ) + depends_on( + "raja +cuda cuda_arch={0}".format(sm_), + when="raja_offload=nvidia cuda_arch={0}".format(sm_), + ) + depends_on("kokkos +openmp", when="kokkos_backend=omp") + depends_on("raja +openmp", when="raja_offload=cpu") # OpenCL Dependency - - backends = { - "ocl": [ - ("amd", "rocm-opencl", "enable ROCM backend"), - ("cuda", "cuda", "enable Cuda backend"), - ("intel", "intel-oneapi-compilers", "enable Intel backend"), - ("pocl", "pocl@1.5", "enable POCL backend"), - ], - "kokkos": [ - ("cuda", "cuda", "enable Cuda backend"), - ("omp", "none", "enable Cuda backend"), - ], - } - backend_vals = ["none"] - for lang in backends: - for item in backends[lang]: - backend, dpdncy, descr = item - backend_vals.append(backend.lower()) - - variant("backend", values=backend_vals, default="none", description="Enable backend support") - - for lang in backends: - for item in backends[lang]: - backend, dpdncy, descr = item - if dpdncy.lower() != "none": - depends_on("%s" % dpdncy.lower(), when="backend=%s" % backend.lower()) - # this flag could be used in all required languages - variant("flags", values=str, default="none", description="Additional CXX flags to be provided") - - # comp_impl_vals=["ONEAPI-DPCPP","DPCPP","HIPSYCL","COMPUTECPP"] variant( - "implementation", - values=str, + "ocl_backend", + values=("amd", "cuda", "intel", "pocl", "none"), default="none", - description="Compile using the specified SYCL compiler option", + when="+ocl", + description="Enable Backend Target for OpenCL", ) - - conflicts( - "implementation=none", - when="+sycl", - msg="SYCL requires compiler implementation to be specified by option=", + variant( + "kokkos_backend", + values=("cuda", "omp", "none"), + default="none", + when="+kokkos", + description="Enable Backend Target for kokkos", ) conflicts( - "implementation=none", - when="+thrust", - msg="Which Thrust implementation to use, supported options include:\ - - CUDA (via https://github.com/NVIDIA/thrust)\ - - ROCM (via https://github.com/ROCm/rocThrust)", + "ocl_backend=none", + when="+ocl", + msg="OpenCL implementation requires backend to be specfied by ocl_backend=", ) + # depends_on("rocm-opencl@6.0.2", when="+ocl ocl_backend=amd") + depends_on("cuda", when="+ocl ocl_backend=cuda") + depends_on("cuda", when="+sycl2020 sycl2020_offload=nvidia") + depends_on("intel-oneapi-compilers", when="+ocl ocl_backend=intel") + depends_on("pocl@1.5", when="+ocl ocl_backend=pocl") + + variant( + "cuda_extra_flags", + values=str, + default="none", + description="Additional CUDA Compiler flags to be provided", + ) + + # CMake specific dependency + with when("build_system=cmake"): + depends_on("cmake@3.14.0:", type="build") # This applies to all - depends_on("cmake@3.14.0:", type="build") depends_on("opencl-c-headers", when="+ocl") + # Fortran related configurations + with when("languages=fortran"): + implementation_vals = [ + "DoConcurrent", + "Array", + "OpenMP", + "OpenMPWorkshare", + "OpenMPTarget", + "OpenMPTargetLoop", + "OpenMPTaskloop", + "OpenACC", + "OpenACCArray", + "CUDA", + "CUDAKernel", + "Sequential", + ] + variant( + "foption", + values=implementation_vals, + default="Sequential", + description="Implementation", + ) + # The fortran Makefile is inside the src/fortran so we need to address this + build_directory = "src/fortran" + build_name = "" + variant( + "fortran_flags", + values=str, + default="none", + description="Additional Fortran flags to be provided", + ) + + +class CMakeBuilder(spack.build_systems.cmake.CMakeBuilder): def cmake_args(self): - # convert spec to string to work on it - spec_string = str(self.spec) - - # take only the first portion of the spec until space - spec_string_truncate = spec_string.split(" ", 1)[0] - model_list = find_model_flag(spec_string_truncate) # Prints out ['cuda', 'thrust'] - - if len(model_list) > 1: - ignore_list = ["cuda"] # if +acc is provided ignore the cuda model - model = list(set(model_list) - set(ignore_list)) - # We choose 'thrust' from the list of ['cuda', 'thrust'] - args = ["-DMODEL=" + model[0]] + model_list = [ + "sycl", + "sycl2020", + "omp", + "cuda", + "ocl", + "tbb", + "acc", + "hip", + "thrust", + "raja", + "std", + "kokkos", + ] + # for +acc and +thrust the CudaPackage appends +cuda variant too so we need + # to filter cuda from list e.g. we choose 'thrust' + # from the list of ['cuda', 'thrust'] + model_names = [name for name in model_list if f"+{name}" in self.spec] + print("model names : ", model_names) + if len(model_names) > 1: + model_names = [elem for elem in model_names if (elem != "cuda" and elem != "rocm")] + if "std" in model_names[0]: + args = ["-DMODEL=" + "std-" + self.spec.variants["std_submodel"].value] + elif "sycl2020" in model_names[0]: # this is for nvidia offload + args = ["-DMODEL=" + "sycl2020-" + self.spec.variants["sycl2020_submodel"].value] + else: + args = ["-DMODEL=" + model_names[0]] else: - # if it is +stddata,indices etc. we need to pass it - # as std-data to the CMake compiler - # do some alterations here - if "std" in model_list[0]: - args = ["-DMODEL=" + "std-" + model_list[0].split("d", 1)[1]] + # do some alterations here to append sub models too + if "std" in model_names[0]: + args = ["-DMODEL=" + "std-" + self.spec.variants["std_submodel"].value] + elif "sycl2020" in model_names[0]: + args = ["-DMODEL=" + "sycl2020-" + self.spec.variants["sycl2020_submodel"].value] + print(args) + elif "rocm" in model_names[0]: + args = ["-DMODEL=hip"] else: - args = ["-DMODEL=" + model_list[0]] + args = ["-DMODEL=" + model_names[0]] + if model_names[0] != "tbb" and model_names[0] != "thrust": + args.append("-DCMAKE_CXX_COMPILER=" + spack_cxx) # =================================== # ACC # =================================== - if ("+acc" in self.spec) and ("~cuda" in self.spec): - args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx) - if "cuda_arch" in self.spec.variants: - cuda_arch_list = self.spec.variants["cuda_arch"].value - # the architecture value is only number so append sm_ to the name - cuda_arch = "cc" + cuda_arch_list[0] - args.append("-DTARGET_DEVICE=gpu") - args.append("-DCUDA_ARCH=" + cuda_arch) - elif "cpu_arch" in self.spec.variants: - cpu_arch_list = self.spec.variants["cpu_arch"].value - # the architecture value is only number so append sm_ to the name - cpu_arch = cpu_arch_list[0] - args.append("-DTARGET_DEVICE=multicore") - args.append("-DTARGET_PROCESSOR=" + cpu_arch) - + """ + register_flag_optional(TARGET_DEVICE + "[PGI/NVHPC only] This sets the `-target` flag, possible values are: + gpu - Globally set the target device to an NVIDIA GPU + multicore - Globally set the target device to the host CPU + Refer to `nvc++ --help` for the full list" + register_flag_optional(CUDA_ARCH + "[PGI/NVHPC only] Only applicable if `TARGET_DEVICE` is set to `gpu`. + Nvidia architecture in ccXY format, for example, sm_70 becomes cc70, + will be passed in via `-gpu=` (e.g `cc70`) + Possible values are: + cc35 - Compile for compute capability 3.5 + cc50 - Compile for compute capability 5.0 + cc60 - Compile for compute capability 6.0 + cc62 - Compile for compute capability 6.2 + cc70 - Compile for compute capability 7.0 + cc72 - Compile for compute capability 7.2 + cc75 - Compile for compute capability 7.5 + cc80 - Compile for compute capability 8.0 + ccall - Compile for all supported compute capabilities + Refer to `nvc++ --help` for the full list" + "") + +register_flag_optional(TARGET_PROCESSOR + "[PGI/NVHPC only] This sets the `-tp` (target processor) flag, possible values are: + px - Generic x86 Processor + bulldozer - AMD Bulldozer processor + piledriver - AMD Piledriver processor + zen - AMD Zen architecture (Epyc, Ryzen) + zen2 - AMD Zen 2 architecture (Ryzen 2) + sandybridge - Intel SandyBridge processor + haswell - Intel Haswell processor + knl - Intel Knights Landing processor + skylake - Intel Skylake Xeon processor + host - Link native version of HPC SDK cpu math library + native - Alias for -tp host + Refer to `nvc++ --help` for the full list" + "") + """ + if self.spec.satisfies("+acc~kokkos~raja"): + if (self.spec.compiler.name == "nvhpc") or (self.spec.compiler.name == "pgi"): + target_device = "gpu" if "cuda_arch" in self.spec.variants else "multicore" + if "cuda_arch" in self.spec.variants: + cuda_arch_list = self.spec.variants["cuda_arch"].value + # the architecture value is only number so append cc_ to the name + cuda_arch = "cc" + cuda_arch_list[0] + # args.append( + # "-DCXX_EXTRA_FLAGS=" + "-target=" + target_device + "-gpu=" + cuda_arch + # ) + args.append("-DCUDA_ARCH=" + cuda_arch) + else: + # get the cpu architecture value from user + target_processor = str( + self.spec.target + ) # self.spec.variants["cpu_arch"].value[0] + args.append("-DTARGET_PROCESSOR=" + target_processor) + # args.append( + # "-DCXX_EXTRA_FLAGS=" + # + "-target=" + # + target_device + # + "-tp=" + # + target_processor + # ) + args.append("-DTARGET_DEVICE=" + target_device) # =================================== # STDdata,STDindices,STDranges # =================================== - std_list = ["+stddata", "+stdindices", "+stdranges"] - if spec_string.startswith(tuple(std_list)): - args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx) + + if "+std" in self.spec: + if self.spec.satisfies("+std_use_tbb"): + args.append("-DCXX_EXTRA_FLAGS=-ltbb") + if self.spec.satisfies("+std_use_onedpl"): + # args.append("-DCXX_EXTRA_FLAGS=-ltbb") + # args.append("-DCXX_EXTRA_FLAGS=-loneDPL") + args.append( + "-DUSE_ONEDPL=" + self.spec.variants["std_onedpl_backend"].value.upper() + ) + if self.spec.variants["std_offload"].value != "none": + # the architecture value is only number so append cc_ to the name + cuda_arch = "cc" + self.spec.variants["cuda_arch"].value[0] + args.append("-DNVHPC_OFFLOAD=" + cuda_arch) # =================================== # CUDA # =================================== - - if ("+cuda" in self.spec) and ("~kokkos" in self.spec) and ("~acc" in self.spec): + if self.spec.satisfies("+cuda~kokkos~acc~omp~thrust~raja"): # Set up the cuda macros needed by the build cuda_arch_list = self.spec.variants["cuda_arch"].value + # "-DCUDA_ARCH" requires sm_ # the architecture value is only number so append sm_ to the name cuda_arch = "sm_" + cuda_arch_list[0] args.append("-DCUDA_ARCH=" + cuda_arch) cuda_dir = self.spec["cuda"].prefix cuda_comp = cuda_dir + "/bin/nvcc" args.append("-DCMAKE_CUDA_COMPILER=" + cuda_comp) - args.append("-DMEM=" + self.spec.variants["mem"].value) - if self.spec.variants["flags"].value != "none": - args.append("-DCUDA_EXTRA_FLAGS=" + self.spec.variants["flags"].value) + args.append("-DMEM=" + self.spec.variants["cuda_memory_mode"].value.upper()) + if self.spec.variants["cuda_extra_flags"].value != "none": + args.append("-DCUDA_EXTRA_FLAGS=" + self.spec.variants["cuda_extra_flags"].value) # =================================== # OMP # =================================== # `~kokkos` option is there to prevent +kokkos +omp setting to use omp directly from here # Same applies for raja - if ("+omp" in self.spec) and ("~kokkos" in self.spec) and ("~raja" in self.spec): - args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx) - if "cuda_arch" in self.spec.variants: - cuda_arch_list = self.spec.variants["cuda_arch"].value - # the architecture value is only number so append sm_ to the name - cuda_arch = "sm_" + cuda_arch_list[0] - args.append("-DOFFLOAD= " + "NVIDIA:" + cuda_arch) - elif "amdgpu_target" in self.spec.variants: - rocm_arch = self.spec.variants["amdgpu_target"].value - # the architecture value is only number so append sm_ to the name - args.append("-DOFFLOAD=" + " AMD:" + rocm_arch) + if self.spec.satisfies("+omp~kokkos~raja"): + args.append("-DCMAKE_C_COMPILER=" + spack_cc) + if self.spec.satisfies("~omp_offload"): + args.append("-DOFFLOAD=" + "OFF") + # Check if the omp_flags variant is not set to "none" + args.append( + "-DCMAKE_CXX_FLAGS=" + + self.pkg.compiler.openmp_flag + + " " + + ( + self.spec.variants["omp_flags"].value + if self.spec.variants["omp_flags"].value != "none" + else "" + ) + ) else: - args.append("-DOFFLOAD=" + "INTEL") + offload_args = "" + args.append("-DOFFLOAD=ON") + if "cuda_arch" in self.spec.variants: + if self.spec.satisfies("%nvhpc"): + cuda_arch = "cc" + self.spec.variants["cuda_arch"].value[0] + offload_args = " -mp=gpu;" + "-gpu=" + cuda_arch + " " + if self.spec.satisfies("%clang"): + cuda_arch = "sm_" + self.spec.variants["cuda_arch"].value[0] + offload_args = "-fopenmp;--offload-arch=" + cuda_arch + elif ("amdgpu_target" in self.spec.variants) and ( + self.spec.variants["amdgpu_target"].value != "none" + ): + offload_args = ( + ";--offload-arch=" + self.spec.variants["amdgpu_target"].value[0] + ) + + args.append( + "-DOFFLOAD_FLAGS=" + + self.pkg.compiler.openmp_flag + + ";" + + offload_args + + ";" + + self.spec.variants["omp_flags"].value + ) # =================================== - # SYCL + # SYCL # =================================== - if self.spec.satisfies("+sycl"): - args.append("-DSYCL_COMPILER=" + self.spec.variants["implementation"].value.upper()) - if self.spec.variants["implementation"].value.upper() != "ONEAPI-DPCPP": - args.append( - "-DSYCL_COMPILER_DIR=" + self.spec.variants["implementation"].value.upper() - ) - if self.spec.variants["implementation"].value.upper() == "COMPUTE-CPP": - args.append("-DOpenCL_LIBRARY=") + if "+sycl" in self.spec: + if self.spec.satisfies("%oneapi"): + # -fsycl flag is required for setting up sycl/sycl.hpp seems like + # it doesn't get it from the CMake file + args.append("-DSYCL_COMPILER=ONEAPI-ICPX") + args.append("-DCXX_EXTRA_FLAGS= -fsycl") + elif self.spec.satisfies("%clang"): + # this requires the clang inside oneapi installation + args.append("-DSYCL_COMPILER=ONEAPI-Clang") + args.append("-DCXX_EXTRA_FLAGS= -fsycl") + else: + args.append("-DSYCL_COMPILER=HIPSYCL") + args.append("-DSYCL_COMPILER_DIR=" + self.spec.variants["dir"].value) + args.append("-DCXX_EXTRA_FLAGS= -fsycl") # =================================== - # SYCL 2020 + # SYCL 2020 # =================================== - if self.spec.satisfies("+sycl2020"): + if "+sycl2020" in self.spec: if self.spec.satisfies("%oneapi"): # -fsycl flag is required for setting up sycl/sycl.hpp seems like # it doesn't get it from the CMake file - args.append("-DCXX_EXTRA_FLAGS= -fsycl -O3") - # this is required to enable -DCMAKE_CXX_COMPILER=icpx flag from CMake args.append("-DSYCL_COMPILER=ONEAPI-ICPX") + args.append("-DCXX_EXTRA_FLAGS= -fsycl") + elif self.spec.satisfies("%clang"): + # this requires the clang inside oneapi installation + args.append("-DSYCL_COMPILER=ONEAPI-Clang") + args.append("-DCXX_EXTRA_FLAGS= -fsycl") else: + args.append("-DSYCL_COMPILER=HIPSYCL") + args.append("-DSYCL_COMPILER_DIR=" + self.spec.variants["dir"].value) + args.append("-DCXX_EXTRA_FLAGS= -fsycl") + # if self.spec.variants["flags"].value != "none": + if self.spec.variants["sycl2020_offload"].value == "nvidia": + cuda_dir = self.spec["cuda"].prefix + cuda_arch = "sm_" + self.spec.variants["cuda_arch"].value[0] args.append( - "-DSYCL_COMPILER=" + self.spec.variants["implementation"].value.upper() - ) - if self.spec.variants["implementation"].value.upper() != "ONEAPI-DPCPP": - args.append( - "-DSYCL_COMPILER_DIR=" + self.spec.variants["implementation"].value.upper() + "-DCXX_EXTRA_FLAGS=" + + "-fsycl;-fsycl-targets=nvptx64-nvidia-cuda;" + + self.spec.target.optimization_flags( + self.spec.compiler.name, str(self.spec.compiler.version) ) - if self.spec.variants["implementation"].value.upper() == "COMPUTE-CPP": - args.append("-DOpenCL_LIBRARY=") + + " --cuda-path=" + + cuda_dir + ) # =================================== # HIP(ROCM) # =================================== - if self.spec.satisfies("+rocm"): - hip_comp = self.spec["rocm"].prefix + "/bin/hipcc" + if "+hip" in self.spec: + hip_comp = self.spec["hip"].prefix + "/bin/hipcc" + offload_arch = str(self.spec.variants["amdgpu_target"].value[0]) + args.append("-DCMAKE_CXX_COMPILER=" + hip_comp) - args.append( - "-DCXX_EXTRA_FLAGS= --offload-arch=" - + self.spec.variants["amdgpu_target"].value - + " " - + self.spec.variants["flags"].value - + " -O3" - ) + args.append(f"-DCXX_EXTRA_FLAGS=--offload-arch={offload_arch} -O3") + if str(self.spec.variants["hip_mem_mode"].value) != "none": + args.append("-DMEM=" + self.spec.variants["hip_mem_mode"].value.upper()) # =================================== # TBB # =================================== - if self.spec.satisfies("+tbb"): - args.append("-DONE_TBB_DIR=" + self.spec["tbb"].prefix + "/tbb/latest/") - args.append("-DPARTITIONER=" + self.spec.variants["partitioner"].value.upper()) + if "+tbb" in self.spec: + args.append("-DONE_TBB_DIR=" + self.spec["intel-tbb"].prefix + "/tbb/latest/") + args.append("-DCXX_EXTRA_FLAGS=-ltbb") + args.append("-DPARTITIONER=" + self.spec.variants["tbb_partitioner"].value.upper()) + if self.spec.satisfies("+tbb_use_vector"): + args.append("-DUSE_VECTOR=ON") # =================================== # OpenCL (ocl) # =================================== - if self.spec.satisfies("+ocl"): - if "backend" in self.spec.variants: - if "cuda" in self.spec.variants["backend"].value: - cuda_dir = self.spec["cuda"].prefix - args.append("-DOpenCL_LIBRARY=" + cuda_dir + "/lib64/libOpenCL.so") - elif "amd" in self.spec.variants["backend"].value: - rocm_dir = self.spec["rocm-opencl"].prefix - args.append("-DOpenCL_LIBRARY=" + rocm_dir + "/lib64/libOpenCL.so") - elif "intel" in self.spec.variants["backend"].value: - intel_lib = ( - self.spec["intel-oneapi-compilers"].prefix - + "/compiler/2023.0.0/linux/lib/libOpenCL.so" - ) - args.append("-DOpenCL_LIBRARY=" + intel_lib) - elif "pocl" in self.spec.variants["backend"].value: - args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx) - pocl_lib = self.spec["pocl"].prefix + "/lib64/libOpenCL.so" - args.append("-DOpenCL_LIBRARY=" + pocl_lib) - args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx) + + if "+ocl" in self.spec: + if "cuda" in self.spec.variants["ocl_backend"].value: + cuda_dir = self.spec["cuda"].prefix + args.append("-DOpenCL_LIBRARY=" + cuda_dir + "/lib64/libOpenCL.so") + elif "amd" in self.spec.variants["ocl_backend"].value: + rocm_dir = self.spec["rocm-opencl"].prefix + args.append("-DOpenCL_LIBRARY=" + rocm_dir + "/lib64/libOpenCL.so") + elif "intel" in self.spec.variants["ocl_backend"].value: + intel_lib = ( + self.spec["intel-oneapi-compilers"].prefix + + "/compiler/" + + str(self.spec["intel-oneapi-compilers"].version) + + "/linux/lib/libOpenCL.so" + ) + args.append("-DOpenCL_LIBRARY=" + intel_lib) + elif "pocl" in self.spec.variants["ocl_backend"].value: + pocl_lib = self.spec["pocl"].prefix + "/lib64/libOpenCL.so" + args.append("-DOpenCL_LIBRARY=" + pocl_lib) # =================================== # RAJA # =================================== - if self.spec.satisfies("+raja"): - args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx) - args.append("-DRAJA_IN_TREE=" + self.spec.variants["dir"].value) - if "offload" in self.spec.variants: - if "nvidia" in self.spec.variants["offload"].value: - cuda_dir = self.spec["cuda"].prefix - cuda_comp = cuda_dir + "/bin/nvcc" - args.append("-DCMAKE_CUDA_COMPILER=" + cuda_comp) - args.append("-DTARGET=NVIDIA") - cuda_arch_list = self.spec.variants["cuda_arch"].value - cuda_arch = "sm_" + cuda_arch_list[0] - args.append("-DCUDA_ARCH=" + cuda_arch) - args.append("DCUDA_TOOLKIT_ROOT_DIR=" + self.spec["cuda"].prefix) - if self.spec.variants["flags"].value != "none": - args.append("-DCUDA_EXTRA_FLAGS=" + self.spec.variants["flags"].value) - # if("cpu" in self.spec.variants['offload'].value): + if "+raja" in self.spec: + args.append("-DCMAKE_C_COMPILER=" + spack_cc) + args.append("-DRAJA_IN_PACKAGE=" + self.spec["raja"].prefix) + if "nvidia" in self.spec.variants["raja_offload"].value: + cuda_comp = self.spec["cuda"].prefix + "/bin/nvcc" + args.append("-DTARGET=NVIDIA") + cuda_arch = "sm_" + self.spec.variants["cuda_arch"].value[0] + args.append("-DCUDA_ARCH=" + cuda_arch) - if "omp" in self.spec.variants["backend"].value: - args.append("-DENABLE_OPENMP=ON") - if "cuda" in self.spec.variants["backend"].value: args.append("-DENABLE_CUDA=ON") + args.append("-DCUDA_TOOLKIT_ROOT_DIR=" + self.spec["cuda"].prefix) + if self.spec.variants["cuda_extra_flags"].value != "none": + args.append( + "-DCMAKE_CUDA_FLAGS=" + self.spec.variants["cuda_extra_flags"].value + ) # =================================== # THRUST # =================================== - if self.spec.satisfies("+thrust"): - if "cuda" in self.spec.variants["implementation"].value: - args.append("-DTHRUST_IMPL=" + self.spec.variants["implementation"].value.upper()) + + if "+thrust" in self.spec: + if "cuda" in self.spec.variants["thrust_submodel"].value: + args.append("-DTHRUST_IMPL=" + self.spec.variants["thrust_submodel"].value.upper()) + args.append("-SDK_DIR=" + self.spec["thrust"].prefix + "/include") - cuda_arch_list = self.spec.variants["cuda_arch"].value - # the architecture value is only number so append sm_ to the name - cuda_arch = "sm_" + cuda_arch_list[0] - args.append("-DCUDA_ARCH=" + cuda_arch) + # this model uses CMAKE_CUDA_ARCHITECTURES which only requires number of cuda_arch + # no need to append sm_ or cc_ + args.append("-DCUDA_ARCH=" + self.spec.variants["cuda_arch"].value[0]) cuda_dir = self.spec["cuda"].prefix cuda_comp = cuda_dir + "/bin/nvcc" args.append("-DCMAKE_CUDA_COMPILER=" + cuda_comp) - args.append("-DBACKEND=" + self.spec.variants["backend"].value.upper()) - if self.spec.variants["flags"].value != "none": - args.append("-DCUDA_EXTRA_FLAGS=" + self.spec.variants["flags"].value) - - if "rocm" in self.spec.variants["implementation"].value: - args.append("-DTHRUST_IMPL=" + self.spec.variants["implementation"].value.upper()) + # args.append("-DCMAKE_CUDA_COMPILER=" + spack_cxx) + # args.append("-DCMAKE_CUDA_FLAGS=-ccbin " + spack_cc) + args.append("-DBACKEND=" + self.spec.variants["thrust_backend"].value.upper()) + if self.spec.variants["cuda_extra_flags"].value != "none": + args.append( + "-DCUDA_EXTRA_FLAGS=" + self.spec.variants["cuda_extra_flags"].value + ) + if "rocm" in self.spec.variants["thrust_submodel"].value: + args.append("-DCMAKE_CXX_COMPILER=" + self.spec["hip"].hipcc) + args.append("-DTHRUST_IMPL=" + self.spec.variants["thrust_submodel"].value.upper()) args.append("-SDK_DIR=" + self.spec["rocthrust"].prefix) - args.append("-DBACKEND=" + self.spec.variants["backend"].value.upper()) # =================================== # kokkos # =================================== # kokkos implementation is versatile and it could use cuda or omp architectures as backend - # The usage should be spack install babelstream +kokkos +cuda [or +omp] - if self.spec.satisfies("+kokkos"): - args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx) - args.append("-DKOKKOS_IN_TREE=" + self.spec.variants["dir"].value) - # args.append("-DKOKKOS_IN_PACKAGE=" + self.spec["kokkos"].prefix) - if "backend" in self.spec.variants: - if "cuda" in self.spec.variants["backend"].value: - args.append("-DKokkos_ENABLE_CUDA=ON") - cuda_arch_list = self.spec.variants["cuda_arch"].value - cuda_arch = cuda_arch_list[0] - # arhitecture kepler optimisations - if cuda_arch in ("30", "32", "35", "37"): - args.append("-D" + "Kokkos_ARCH_KEPLER" + cuda_arch + "=ON") - # arhitecture maxwell optimisations - if cuda_arch in ("50", "52", "53"): - args.append("-D" + "Kokkos_ARCH_MAXWELL" + cuda_arch + "=ON") - # arhitecture pascal optimisations - if cuda_arch in ("60", "61"): - args.append("-D" + "Kokkos_ARCH_PASCAL" + cuda_arch + "=ON") - # architecture volta optimisations - if cuda_arch in ("70", "72"): - args.append("-D" + "Kokkos_ARCH_VOLTA" + cuda_arch + "=ON") - if cuda_arch == "75": - args.append("-DKokkos_ARCH_TURING75=ON") - if "omp" in self.spec.variants["backend"].value: - args.append("-DKokkos_ENABLE_OPENMP=ON") + + # The usage should be spack install babelstream +kokkos backend=[cuda or omp or none] + if "+kokkos" in self.spec: + args.append("-DCMAKE_C_COMPILER=" + spack_cc) + args.append("-DKOKKOS_IN_PACKAGE=" + self.spec["kokkos"].prefix) + if "cuda" in self.spec.variants["kokkos_backend"].value: + # args.append("-DCMAKE_CXX_COMPILER=" + self.spec["cuda"].nvcc) + args.append("-DCMAKE_CXX_COMPILER=" + spack_cxx) + args.append("-DKokkos_ENABLE_CUDA=ON") + int_cuda_arch = int(self.spec.variants["cuda_arch"].value[0]) + # arhitecture kepler optimisations + if int_cuda_arch in (30, 32, 35, 37): + args.append("-D" + "Kokkos_ARCH_KEPLER" + str(int_cuda_arch) + "=ON") + # arhitecture maxwell optimisations + if int_cuda_arch in (50, 52, 53): + args.append("-D" + "Kokkos_ARCH_MAXWELL" + str(int_cuda_arch) + "=ON") + # arhitecture pascal optimisations + if int_cuda_arch in (60, 61): + args.append("-D" + "Kokkos_ARCH_PASCAL" + str(int_cuda_arch) + "=ON") + # architecture volta optimisations + if int_cuda_arch in (70, 72): + args.append("-D" + "Kokkos_ARCH_VOLTA" + str(int_cuda_arch) + "=ON") + if int_cuda_arch == 75: + args.append("-DKokkos_ARCH_TURING75=ON") + if int_cuda_arch == 80: + args.append("-DKokkos_ARCH_AMPERE80=ON") + if "omp" in self.spec.variants["kokkos_backend"].value: + args.append("-DKokkos_ENABLE_OPENMP=ON") # not in ["kokkos", "raja", "acc", "hip"] then compiler forced true if set(model_list).intersection(["kokkos", "raja", "acc", "hip"]) is True: args.append("-DCMAKE_CXX_COMPILER_FORCED=True") return args + + +class MakefileBuilder(spack.build_systems.makefile.MakefileBuilder): + build_directory = "src/fortran" + + # Generate Compiler Specific includes + def edit(self, pkg, spec, prefix): + config = { + "FC": pkg.compiler.fc, + "FCFLAGS": "", + "ARCH": spec.target.family, + "DOCONCURRENT_FLAG": "", + "ARRAY_FLAG": "", + "OPENMP_FLAG": "", + "OPENACC_FLAG": "", + "CUDA_FLAG": "", + "SEQUENTIAL_FLAG": "", + } + # Dictionary mapping compiler names to unsupported options + unsupported_options = { + "arm": ["CUDA", "CUDAKernel", "OpenACC", "OpenACCArray"], + "aocc": ["CUDA", "CUDAKernel"], + "cce": ["CUDA", "CUDAKernel"], + "gcc": ["CUDA", "CUDAKernel"], + "nvhpc": ["OpenMPTaskloop"], + "oneapi": ["CUDA", "CUDAKernel", "OpenACC", "OpenACCArray"], + "fj": ["CUDA", "CUDAKernel", "OpenACC"], + } + + # Check if spec.compiler.name is in the unsupported_options dictionary + unsupported_value = self.spec.variants["foption"].value + compiler_name = spec.compiler.name + unsupported = any( + unsupported_value in options + for options in unsupported_options.get(compiler_name, []) + if options == unsupported_value + ) + if unsupported: + raise InstallError( + f"{unsupported_value} is not supported by the {compiler_name} compiler" + ) + # =================================== + # ARM + # =================================== + if spec.compiler.name == "arm": + fortran_flags = ( + "-std=f2018 " + pkg.compiler.opt_flags[4] + " -Wall -Wno-unused-variable" + ) + fortran_flags += self.spec.target.optimization_flags( + self.spec.compiler.name, str(self.spec.compiler.version) + ) + + config["FCFLAGS"] = fortran_flags + config["DOCONCURRENT_FLAG"] = pkg.compiler.openmp_flag # libomp.so required + config["ARRAY_FLAG"] = pkg.compiler.openmp_flag # libomp.so required + config["OPENMP_FLAG"] = pkg.compiler.openmp_flag # libomp.so required + config["OPENACC_FLAG"] = "-fopenacc" + + # =================================== + # AMD + # =================================== + if spec.compiler.name == "aocc": + fortran_flags = ( + "-std=f2018 " + pkg.compiler.opt_flags[3] + " -Wall -Wno-unused-variable" + ) + config["FCFLAGS"] = fortran_flags + config["DOCONCURRENT_FLAG"] = pkg.compiler.openmp_flag # libomp.so required + config["ARRAY_FLAG"] = pkg.compiler.openmp_flag # libomp.so required + config["OPENMP_FLAG"] = pkg.compiler.openmp_flag # libomp.so required + config["OPENACC_FLAG"] = "-fopenacc" + + # =================================== + # CRAY + # =================================== + if spec.compiler.name == "cce": + fortran_flags = "-e F -O3" + config["FCFLAGS"] = fortran_flags + config["DOCONCURRENT_FLAG"] = "-h thread_do_concurrent -DCRAY_THREAD_DOCONCURRENT" + config["ARRAY_FLAG"] = "-h autothread" + config["OPENMP_FLAG"] = pkg.compiler.openmp_flag + config["OPENACC_FLAG"] = "-h acc" # for cpu only -h omp + + # =================================== + # GCC + # =================================== + if spec.compiler.name == "gcc": + fortran_flags = "-std=f2018 -O3 " + fortran_flags += "-Wall -Wno-unused-dummy-argument -Wno-unused-variable " + fortran_flags += self.spec.target.optimization_flags( + self.spec.compiler.name, str(self.spec.compiler.version) + ) + + config["FCFLAGS"] = fortran_flags + config["DOCONCURRENT_FLAG"] = "-ftree-parallelize-loops=4" + config["OPENMP_FLAG"] = pkg.compiler.openmp_flag + config["OPENACC_FLAG"] = "-fopenacc" + + # =================================== + # NVHPC + # =================================== + if spec.compiler.name == "nvhpc": + fortran_flags = pkg.compiler.opt_flags[4] # for -O3 + # FCFLAGS := -O3 -Minform=inform -Minfo=all + fortran_flags += " -Minform=warn " + TARGET = "gpu" # target = "multicore" + config["TARGET"] = TARGET + if "cuda_arch" in self.spec.variants: + cuda_arch_list = self.spec.variants["cuda_arch"].value + # the architecture value is only number so append sm_ to the name + cuda_arch = "cc" + cuda_arch_list[0] + GPUFLAG = " -gpu=" + cuda_arch + fortran_flags += "-tp=" + str(spec.target) + # this is to allow apples-to-apples comparison with DC in non-DC GPU impls + # set exactly one of these pairs! + # MANAGED = "-DUSE_MANAGED -gpu=managed" + # DEVICE="" + # ------------ + DEVICE = "-DUSE_DEVICE -cuda -gpu=nomanaged" + MANAGED = "" + config["FCFLAGS"] = fortran_flags + config["DOCONCURRENT_FLAG"] = GPUFLAG + " -stdpar=" + TARGET + " " + DEVICE + config["ARRAY_FLAG"] = GPUFLAG + " -stdpar=" + TARGET + " " + MANAGED + config["OPENMP_FLAG"] = GPUFLAG + " -mp=" + TARGET + " " + MANAGED + config["OPENACC_FLAG"] = GPUFLAG + " -acc=" + TARGET + " " + MANAGED + config["CUDA_FLAG"] = GPUFLAG + " -cuda -acc=gpu" + " " + MANAGED + + # =================================== + # ONEAPI + # =================================== + if spec.compiler.name == "oneapi": + fortran_flags = "-std18 -Ofast -xHOST -qopt-zmm-usage=low" + if config["FC"] == "ifort": + fortran_flags += "-qopt-streaming-stores=always" + + config["DOCONCURRENT_FLAG"] = "-qopenmp" + ( + "-parallel" if config["FC"] == "ifort" else "" + ) + config["ARRAY_FLAG"] = "-qopenmp" + ("-parallel" if config["FC"] == "ifort" else "") + config["OPENMP_FLAG"] = "-qopenmp" + ( + "-fopenmp-targets=spir64 -DUSE_FLOAT=1" if config["FC"] == "ifx" else "" + ) + config["FCFLAGS"] = fortran_flags + + # =================================== + # FJ + # =================================== + if spec.compiler.name == "fj": + fortran_flags = "-X08 -Kfast -KA64FX -KSVE -KARMV8_3_A -Kzfill=100 " + fortran_flags += "-Kprefetch_sequential=soft " + fortran_flags += "-Kprefetch_line=8 -Kprefetch_line_L2=16 -Koptmsg=2 " + # FJ Fortran system_clock is low resolution + fortran_flags += "-Keval -DUSE_OMP_GET_WTIME=1 " + + config["FCFLAGS"] = fortran_flags + config["DOCONCURRENT_FLAG"] = "-Kparallel,reduction -DNOTSHARED" + config["ARRAY_FLAG"] = "-Kparallel,reduction" + config["OPENMP_FLAG"] = pkg.compiler.openmp_flag + + with open(self.build_directory + "/make.inc." + spec.compiler.name, "w+") as inc: + for key in config: + inc.write("{0} = {1}\n".format(key, config[key])) + + def setup_build_environment(self, env): + ###################################### + # Build and Installation Directories # + ###################################### + + # The environment variable ESMF_DIR must be set to the full pathname + # of the top level ESMF directory before building the framework. + env.set("COMPILER", self.spec.compiler.name) + env.set("IMPLEMENTATION", self.spec.variants["foption"].value) + # DEBUG + # print(self.spec.variants["foption"].value) + # print(self.spec.compiler.version) + # print(platform.machine()) + # This creates a testing tree (if one doesn't already exist) and + # copies the binaries from `src/fortran` to `SpackPackage/bin`. + # This allows you to use the testing tree independently of the + # source tree in the future. + # print(pkg.compiler.cc_pic_flag) + + @property + def build_name(self): + compiler_prefix = self.spec.compiler.name + implementation_prefix = self.spec.variants["foption"].value + return "{}.{}.{}".format("BabelStream", compiler_prefix, implementation_prefix) + + def install(self, pkg, spec, prefix): + mkdir(prefix.bin) + install(self.build_directory + "/" + self.build_name, prefix.bin) + # To check the make.inc file generated + install_tree(self.build_directory, prefix.lib) |