From deca4ce107320a18d5c1701f95efb34cf0d5c2aa Mon Sep 17 00:00:00 2001 From: kaanolgu <61908449+kaanolgu@users.noreply.github.com> Date: Mon, 27 Mar 2023 16:04:28 +0100 Subject: Babelstream Spack Package (#36164) --- .../repos/builtin/packages/babelstream/package.py | 415 +++++++++++++++++++++ 1 file changed, 415 insertions(+) create mode 100644 var/spack/repos/builtin/packages/babelstream/package.py (limited to 'var') diff --git a/var/spack/repos/builtin/packages/babelstream/package.py b/var/spack/repos/builtin/packages/babelstream/package.py new file mode 100644 index 0000000000..8c3af7704c --- /dev/null +++ b/var/spack/repos/builtin/packages/babelstream/package.py @@ -0,0 +1,415 @@ +# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +import re # To get the variant name after (+) + +from spack.package import * + + +def find_model_flag(str): + res = re.findall(r"\+(\w+)", str) + if not res: + return "" + return res + + +class Babelstream(CMakePackage, CudaPackage, ROCmPackage): + """Measure memory transfer rates to/from global device memory on GPUs. + This benchmark is similar in spirit, and based on, the STREAM benchmark for CPUs.""" + + homepage = "https://github.com/UoB-HPC/BabelStream" + url = "https://github.com/UoB-HPC/BabelStream/archive/refs/tags/v4.0.tar.gz" + git = "https://github.com/UoB-HPC/BabelStream.git" + version("4.0", sha256="a9cd39277fb15d977d468435eb9b894f79f468233f0131509aa540ffda4f5953") + version("main", branch="main") + version("develop", branch="develop") + maintainers = ["tomdeakin", "kaanolgu" "tom91136", "robj0nes"] + + # Languages + # Also supported variants are cuda and rocm (for HIP) + variant("sycl", default=False, description="Enable SYCL support") + variant("sycl2020", default=False, description="Enable SYCL support") + variant("omp", default=False, description="Enable OpenMP support") + variant("ocl", default=False, description="Enable OpenCL support") + variant("tbb", default=False, description="Enable TBB support") + variant("acc", default=False, description="Enable OpenACC support") + variant("thrust", default=False, description="Enable THRUST support") + variant("raja", default=False, description="Enable RAJA support") + variant("stddata", default=False, description="Enable STD-data support") + variant("stdindices", default=False, description="Enable STD-indices support") + variant("stdranges", default=False, description="Enable STD-ranges support") + + # Some models need to have the programming model abstraction downloaded - + # this variant enables a path to be provided. + variant("dir", values=str, default="none", description="Enable Directory support") + + # Kokkos conflict and variant + conflicts( + "dir=none", when="+kokkos", msg="KOKKKOS requires architecture to be specfied by dir=" + ) + variant("kokkos", default=False, description="Enable KOKKOS support") + + # ACC conflict + variant("cpu_arch", values=str, default="none", description="Enable CPU Target for ACC") + variant("target", values=str, default="none", description="Enable CPU Target for ACC") + + # STD conflicts + conflicts("+stddata", when="%gcc@:10.1.0", msg="STD-data requires newer version of GCC") + conflicts("+stdindices", when="%gcc@:10.1.0", msg="STD-indices requires newer version of GCC") + conflicts("+stdranges", when="%gcc@:10.1.0", msg="STD-ranges requires newer version of GCC") + + # CUDA conflict + conflicts( + "cuda_arch=none", + when="+cuda", + msg="CUDA requires architecture to be specfied by cuda_arch=", + ) + variant("mem", values=str, default="DEFAULT", description="Enable MEM Target for CUDA") + # Raja Conflict + variant( + "offload", values=str, default="none", description="Enable RAJA Target [CPU or NVIDIA]" + ) + conflicts( + "offload=none", + when="+raja", + msg="RAJA requires architecture to be specfied by target=[CPU,NVIDIA]", + ) + + # download raja from https://github.com/LLNL/RAJA + conflicts( + "dir=none", + when="+raja", + msg="RAJA implementation requires architecture to be specfied by dir=", + ) + + # Thrust Conflict + # conflicts("~cuda", when="+thrust", msg="Thrust requires +cuda variant") + depends_on("thrust", when="+thrust") + depends_on("rocthrust", when="+thrust implementation=rocm") + + # TBB Dependency + depends_on("intel-oneapi-tbb", when="+tbb") + partitioner_vals = ["auto", "affinity", "static", "simple"] + variant( + "partitioner", + values=partitioner_vals, + default="auto", + description="Partitioner specifies how a loop template should partition its work among threads.\ + Possible values are:\ + AUTO - Optimize range subdivision based on work-stealing events.\ + AFFINITY - Proportional splitting that optimizes for cache affinity.\ + STATIC - Distribute work uniformly with no additional load balancing.\ + SIMPLE - Recursively split its range until it cannot be further subdivided.\ + See https://spec.oneapi.com/versions/latest/elements/oneTBB/source/algorithms.html#partitioners for more details.", + ) + + # Kokkos Dependency + depends_on("kokkos@3.7.1", when="+kokkos") + + # OpenCL Dependency + + backends = { + "ocl": [ + ("amd", "rocm-opencl", "enable ROCM backend"), + ("cuda", "cuda", "enable Cuda backend"), + ("intel", "intel-oneapi-compilers", "enable Intel backend"), + ("pocl", "pocl@1.5", "enable POCL backend"), + ], + "kokkos": [ + ("cuda", "cuda", "enable Cuda backend"), + ("omp", "none", "enable Cuda backend"), + ], + } + backend_vals = ["none"] + for lang in backends: + for item in backends[lang]: + backend, dpdncy, descr = item + backend_vals.append(backend.lower()) + + variant("backend", values=backend_vals, default="none", description="Enable backend support") + + for lang in backends: + for item in backends[lang]: + backend, dpdncy, descr = item + if dpdncy.lower() != "none": + depends_on("%s" % dpdncy.lower(), when="backend=%s" % backend.lower()) + # this flag could be used in all required languages + variant("flags", values=str, default="none", description="Additional CXX flags to be provided") + + # comp_impl_vals=["ONEAPI-DPCPP","DPCPP","HIPSYCL","COMPUTECPP"] + variant( + "implementation", + values=str, + default="none", + description="Compile using the specified SYCL compiler option", + ) + + conflicts( + "implementation=none", + when="+sycl", + msg="SYCL requires compiler implementation to be specified by option=", + ) + conflicts( + "implementation=none", + when="+thrust", + msg="Which Thrust implementation to use, supported options include:\ + - CUDA (via https://github.com/NVIDIA/thrust)\ + - ROCM (via https://github.com/ROCmSoftwarePlatform/rocThrust)", + ) + + # This applies to all + depends_on("cmake@3.14.0:", type="build") + depends_on("opencl-c-headers", when="+ocl") + + def cmake_args(self): + # convert spec to string to work on it + spec_string = str(self.spec) + + # take only the first portion of the spec until space + spec_string_truncate = spec_string.split(" ", 1)[0] + model_list = find_model_flag(spec_string_truncate) # Prints out ['cuda', 'thrust'] + + if len(model_list) > 1: + ignore_list = ["cuda"] # if +acc is provided ignore the cuda model + model = list(set(model_list) - set(ignore_list)) + # We choose 'thrust' from the list of ['cuda', 'thrust'] + args = ["-DMODEL=" + model[0]] + else: + # if it is +stddata,indices etc. we need to pass it + # as std-data to the CMake compiler + # do some alterations here + if "std" in model_list[0]: + args = ["-DMODEL=" + "std-" + model_list[0].split("d", 1)[1]] + else: + args = ["-DMODEL=" + model_list[0]] + + # =================================== + # ACC + # =================================== + if ("+acc" in self.spec) and ("~cuda" in self.spec): + args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx) + if "cuda_arch" in self.spec.variants: + cuda_arch_list = self.spec.variants["cuda_arch"].value + # the architecture value is only number so append sm_ to the name + cuda_arch = "cc" + cuda_arch_list[0] + args.append("-DTARGET_DEVICE=gpu") + args.append("-DCUDA_ARCH=" + cuda_arch) + elif "cpu_arch" in self.spec.variants: + cpu_arch_list = self.spec.variants["cpu_arch"].value + # the architecture value is only number so append sm_ to the name + cpu_arch = cpu_arch_list[0] + args.append("-DTARGET_DEVICE=multicore") + args.append("-DTARGET_PROCESSOR=" + cpu_arch) + + # =================================== + # STDdata,STDindices,STDranges + # =================================== + std_list = ["+stddata", "+stdindices", "+stdranges"] + if spec_string.startswith(tuple(std_list)): + args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx) + + # =================================== + # CUDA + # =================================== + + if ("+cuda" in self.spec) and ("~kokkos" in self.spec) and ("~acc" in self.spec): + # Set up the cuda macros needed by the build + cuda_arch_list = self.spec.variants["cuda_arch"].value + # the architecture value is only number so append sm_ to the name + cuda_arch = "sm_" + cuda_arch_list[0] + args.append("-DCUDA_ARCH=" + cuda_arch) + cuda_dir = self.spec["cuda"].prefix + cuda_comp = cuda_dir + "/bin/nvcc" + args.append("-DCMAKE_CUDA_COMPILER=" + cuda_comp) + args.append("-DMEM=" + self.spec.variants["mem"].value) + if self.spec.variants["flags"].value != "none": + args.append("-DCUDA_EXTRA_FLAGS=" + self.spec.variants["flags"].value) + + # =================================== + # OMP + # =================================== + # `~kokkos` option is there to prevent +kokkos +omp setting to use omp directly from here + # Same applies for raja + if ("+omp" in self.spec) and ("~kokkos" in self.spec) and ("~raja" in self.spec): + args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx) + if "cuda_arch" in self.spec.variants: + cuda_arch_list = self.spec.variants["cuda_arch"].value + # the architecture value is only number so append sm_ to the name + cuda_arch = "sm_" + cuda_arch_list[0] + args.append("-DOFFLOAD= " + "NVIDIA:" + cuda_arch) + elif "amdgpu_target" in self.spec.variants: + rocm_arch = self.spec.variants["amdgpu_target"].value + # the architecture value is only number so append sm_ to the name + args.append("-DOFFLOAD=" + " AMD:" + rocm_arch) + else: + args.append("-DOFFLOAD=" + "INTEL") + + # =================================== + # SYCL + # =================================== + + if "+sycl" in self.spec: + args.append("-DSYCL_COMPILER=" + self.spec.variants["implementation"].value.upper()) + if self.spec.variants["implementation"].value.upper() != "ONEAPI-DPCPP": + args.append( + "-DSYCL_COMPILER_DIR=" + self.spec.variants["implementation"].value.upper() + ) + if self.spec.variants["implementation"].value.upper() == "COMPUTE-CPP": + args.append("-DOpenCL_LIBRARY=") + + # =================================== + # SYCL 2020 + # =================================== + + if "+sycl2020" in self.spec: + if self.spec.satisfies("%oneapi"): + # -fsycl flag is required for setting up sycl/sycl.hpp seems like + # it doesn't get it from the CMake file + args.append("-DCXX_EXTRA_FLAGS= -fsycl -O3") + # this is required to enable -DCMAKE_CXX_COMPILER=icpx flag from CMake + args.append("-DSYCL_COMPILER=ONEAPI-ICPX") + else: + args.append( + "-DSYCL_COMPILER=" + self.spec.variants["implementation"].value.upper() + ) + if self.spec.variants["implementation"].value.upper() != "ONEAPI-DPCPP": + args.append( + "-DSYCL_COMPILER_DIR=" + self.spec.variants["implementation"].value.upper() + ) + if self.spec.variants["implementation"].value.upper() == "COMPUTE-CPP": + args.append("-DOpenCL_LIBRARY=") + + # =================================== + # HIP(ROCM) + # =================================== + + if "+rocm" in self.spec: + hip_comp = self.spec["rocm"].prefix + "/bin/hipcc" + args.append("-DCMAKE_CXX_COMPILER=" + hip_comp) + args.append( + "-DCXX_EXTRA_FLAGS= --offload-arch=" + + self.spec.variants["amdgpu_target"].value + + " " + + self.spec.variants["flags"].value + + " -O3" + ) + + # =================================== + # TBB + # =================================== + + if "+tbb" in self.spec: + args.append("-DONE_TBB_DIR=" + self.spec["tbb"].prefix + "/tbb/latest/") + args.append("-DPARTITIONER=" + self.spec.variants["partitioner"].value.upper()) + + # =================================== + # OpenCL (ocl) + # =================================== + if "+ocl" in self.spec: + if "backend" in self.spec.variants: + if "cuda" in self.spec.variants["backend"].value: + cuda_dir = self.spec["cuda"].prefix + args.append("-DOpenCL_LIBRARY=" + cuda_dir + "/lib64/libOpenCL.so") + elif "amd" in self.spec.variants["backend"].value: + rocm_dir = self.spec["rocm-opencl"].prefix + args.append("-DOpenCL_LIBRARY=" + rocm_dir + "/lib64/libOpenCL.so") + elif "intel" in self.spec.variants["backend"].value: + intel_lib = ( + self.spec["intel-oneapi-compilers"].prefix + + "/compiler/2023.0.0/linux/lib/libOpenCL.so" + ) + args.append("-DOpenCL_LIBRARY=" + intel_lib) + elif "pocl" in self.spec.variants["backend"].value: + args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx) + pocl_lib = self.spec["pocl"].prefix + "/lib64/libOpenCL.so" + args.append("-DOpenCL_LIBRARY=" + pocl_lib) + args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx) + + # =================================== + # RAJA + # =================================== + if "+raja" in self.spec: + args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx) + args.append("-DRAJA_IN_TREE=" + self.spec.variants["dir"].value) + if "offload" in self.spec.variants: + if "nvidia" in self.spec.variants["offload"].value: + cuda_dir = self.spec["cuda"].prefix + cuda_comp = cuda_dir + "/bin/nvcc" + args.append("-DCMAKE_CUDA_COMPILER=" + cuda_comp) + args.append("-DTARGET=NVIDIA") + cuda_arch_list = self.spec.variants["cuda_arch"].value + int_cuda_arch = int(cuda_arch_list[0]) + cuda_arch = "sm_" + cuda_arch_list[0] + args.append("-DCUDA_ARCH=" + cuda_arch) + + args.append("DCUDA_TOOLKIT_ROOT_DIR=" + self.spec["cuda"].prefix) + if self.spec.variants["flags"].value != "none": + args.append("-DCUDA_EXTRA_FLAGS=" + self.spec.variants["flags"].value) + # if("cpu" in self.spec.variants['offload'].value): + + if "omp" in self.spec.variants["backend"].value: + args.append("-DENABLE_OPENMP=ON") + if "cuda" in self.spec.variants["backend"].value: + args.append("-DENABLE_CUDA=ON") + + # =================================== + # THRUST + # =================================== + if "+thrust" in self.spec: + if "cuda" in self.spec.variants["implementation"].value: + args.append("-DTHRUST_IMPL=" + self.spec.variants["implementation"].value.upper()) + args.append("-SDK_DIR=" + self.spec["thrust"].prefix + "/include") + cuda_arch_list = self.spec.variants["cuda_arch"].value + # the architecture value is only number so append sm_ to the name + cuda_arch = "sm_" + cuda_arch_list[0] + args.append("-DCUDA_ARCH=" + cuda_arch) + cuda_dir = self.spec["cuda"].prefix + cuda_comp = cuda_dir + "/bin/nvcc" + args.append("-DCMAKE_CUDA_COMPILER=" + cuda_comp) + args.append("-DBACKEND=" + self.spec.variants["backend"].value.upper()) + if self.spec.variants["flags"].value != "none": + args.append("-DCUDA_EXTRA_FLAGS=" + self.spec.variants["flags"].value) + + if "rocm" in self.spec.variants["implementation"].value: + args.append("-DTHRUST_IMPL=" + self.spec.variants["implementation"].value.upper()) + args.append("-SDK_DIR=" + self.spec["rocthrust"].prefix) + args.append("-DBACKEND=" + self.spec.variants["backend"].value.upper()) + + # =================================== + # kokkos + # =================================== + # kokkos implementation is versatile and it could use cuda or omp architectures as backend + # The usage should be spack install babelstream +kokkos +cuda [or +omp] + if "+kokkos" in self.spec: + args.append("-DCMAKE_CXX_COMPILER=" + self.compiler.cxx) + args.append("-DKOKKOS_IN_TREE=" + self.spec.variants["dir"].value) + # args.append("-DKOKKOS_IN_PACKAGE=" + self.spec["kokkos"].prefix) + if "backend" in self.spec.variants: + if "cuda" in self.spec.variants["backend"].value: + args.append("-DKokkos_ENABLE_CUDA=ON") + cuda_arch_list = self.spec.variants["cuda_arch"].value + int_cuda_arch = int(cuda_arch_list[0]) + # arhitecture kepler optimisations + if int_cuda_arch in (30, 32, 35, 37): + args.append("-D" + "Kokkos_ARCH_KEPLER" + str(int_cuda_arch) + "=ON") + # arhitecture maxwell optimisations + if int_cuda_arch in (50, 52, 53): + args.append("-D" + "Kokkos_ARCH_MAXWELL" + str(int_cuda_arch) + "=ON") + # arhitecture pascal optimisations + if int_cuda_arch in (60, 61): + args.append("-D" + "Kokkos_ARCH_PASCAL" + str(int_cuda_arch) + "=ON") + # architecture volta optimisations + if int_cuda_arch in (70, 72): + args.append("-D" + "Kokkos_ARCH_VOLTA" + str(int_cuda_arch) + "=ON") + if int_cuda_arch == 75: + args.append("-DKokkos_ARCH_TURING75=ON") + if "omp" in self.spec.variants["backend"].value: + args.append("-DKokkos_ENABLE_OPENMP=ON") + + # not in ["kokkos", "raja", "acc", "hip"] then compiler forced true + if set(model_list).intersection(["kokkos", "raja", "acc", "hip"]) is True: + args.append("-DCMAKE_CXX_COMPILER_FORCED=True") + + return args -- cgit v1.2.3-70-g09d2