diff options
Diffstat (limited to 'var/spack/repos/builtin/packages/aluminum/package.py')
-rw-r--r-- | var/spack/repos/builtin/packages/aluminum/package.py | 353 |
1 files changed, 176 insertions, 177 deletions
diff --git a/var/spack/repos/builtin/packages/aluminum/package.py b/var/spack/repos/builtin/packages/aluminum/package.py index bb085f8681..7b15b23856 100644 --- a/var/spack/repos/builtin/packages/aluminum/package.py +++ b/var/spack/repos/builtin/packages/aluminum/package.py @@ -9,7 +9,7 @@ import spack.platforms.cray from spack.package import * -class Aluminum(CMakePackage, CudaPackage, ROCmPackage): +class Aluminum(CachedCMakePackage, CudaPackage, ROCmPackage): """Aluminum provides a generic interface to high-performance communication libraries, with a focus on allreduce algorithms. Blocking and non-blocking algorithms and GPU-aware @@ -22,208 +22,207 @@ class Aluminum(CMakePackage, CudaPackage, ROCmPackage): git = "https://github.com/LLNL/Aluminum.git" tags = ["ecp", "radiuss"] - maintainers("bvanessen") + maintainers("benson31", "bvanessen") version("master", branch="master") + version("1.4.1", sha256="d130a67fef1cb7a9cb3bbec1d0de426f020fe68c9df6e172c83ba42281cd90e3") + version("1.4.0", sha256="ac54de058f38cead895ec8163f7b1fa7674e4dc5aacba683a660a61babbfe0c6") version("1.3.1", sha256="28ce0af6c6f29f97b7f19c5e45184bd2f8a0b1428f1e898b027d96d47cb74b0b") version("1.3.0", sha256="d0442efbebfdfb89eec793ae65eceb8f1ba65afa9f2e48df009f81985a4c27e3") version("1.2.3", sha256="9b214bdf30f9b7e8e017f83e6615db6be2631f5be3dd186205dbe3aa62f4018a") - version( - "1.2.2", - sha256="c01d9dd98be4cab9b944bae99b403abe76d65e9e1750e7f23bf0105636ad5485", - deprecated=True, - ) - version( - "1.2.1", - sha256="869402708c8a102a67667b83527b4057644a32b8cdf4990bcd1a5c4e5f0e30af", - deprecated=True, - ) - version( - "1.2.0", - sha256="2f3725147f4dbc045b945af68d3d747f5dffbe2b8e928deed64136785210bc9a", - deprecated=True, - ) - version( - "1.1.0", - sha256="78b03e36e5422e8651f400feb4d8a527f87302db025d77aa37e223be6b9bdfc9", - deprecated=True, - ) - version("1.0.0-lbann", tag="v1.0.0-lbann", commit="40a062b1f63e84e074489c0f926f36b806c6b8f3") - version("1.0.0", sha256="028d12e271817214db5c07c77b0528f88862139c3e442e1b12f58717290f414a") - version( - "0.7.0", - sha256="bbb73d2847c56efbe6f99e46b41d837763938483f2e2d1982ccf8350d1148caa", - deprecated=True, - ) - version( - "0.6.0", - sha256="6ca329951f4c7ea52670e46e5020e7e7879d9b56fed5ff8c5df6e624b313e925", - deprecated=True, - ) - version( - "0.5.0", - sha256="dc365a5849eaba925355a8efb27005c5f22bcd1dca94aaed8d0d29c265c064c1", - deprecated=True, - ) - version( - "0.4.0", - sha256="4d6fab5481cc7c994b32fb23a37e9ee44041a9f91acf78f981a97cb8ef57bb7d", - deprecated=True, - ) - version( - "0.3.3", - sha256="26e7f263f53c6c6ee0fe216e981a558dfdd7ec997d0dd2a24285a609a6c68f3b", - deprecated=True, - ) - version( - "0.3.2", - sha256="09b6d1bcc02ac54ba269b1123eee7be20f0104b93596956c014b794ba96b037f", - deprecated=True, - ) - version( - "0.2.1-1", - sha256="066b750e9d1134871709a3e2414b96b166e0e24773efc7d512df2f1d96ee8eef", - deprecated=True, - ) - version( - "0.2.1", - sha256="3d5d15853cccc718f60df68205e56a2831de65be4d96e7f7e8497097e7905f89", - deprecated=True, - ) - version( - "0.2", - sha256="fc8f06c6d8faab17a2aedd408d3fe924043bf857da1094d5553f35c4d2af893b", - deprecated=True, - ) - version( - "0.1", - sha256="3880b736866e439dd94e6a61eeeb5bb2abccebbac82b82d52033bc6c94950bdb", - deprecated=True, - ) - variant("nccl", default=False, description="Builds with support for NCCL communication lib") + # Library capabilities + variant( + "cuda_rma", + default=False, + when="+cuda", + description="Builds with support for CUDA intra-node " + " Put/Get and IPC RMA functionality", + ) variant( "ht", default=False, description="Builds with support for host-enabled MPI" " communication of accelerator data", ) + variant("nccl", default=False, description="Builds with support for NCCL communication lib") + variant("shared", default=True, description="Build Aluminum as a shared library") + + # Debugging features + variant("hang_check", default=False, description="Enable hang checking") + variant("trace", default=False, description="Enable runtime tracing") + + # Profiler support + variant("nvtx", default=False, when="+cuda", description="Enable profiling via nvprof/NVTX") variant( - "cuda_rma", + "roctracer", default=False, when="+rocm", description="Enable profiling via rocprof/roctx" + ) + + # Advanced options + variant("mpi_serialize", default=False, description="Serialize MPI operations") + variant("stream_mem_ops", default=False, description="Enable stream memory operations") + variant( + "thread_multiple", default=False, - description="Builds with support for CUDA intra-node " - " Put/Get and IPC RMA functionality", + description="Allow multiple threads to call Aluminum concurrently", ) - variant("rccl", default=False, description="Builds with support for RCCL communication lib") + + # Benchmark/testing support variant( - "ofi_libfabric_plugin", - default=spack.platforms.cray.slingshot_network(), - when="+rccl", - sticky=True, - description="Builds with support for OFI libfabric enhanced RCCL/NCCL communication lib", + "benchmarks", + default=False, + description="Build the Aluminum benchmarking drivers " + "(warning: may significantly increase build time!)", ) variant( - "ofi_libfabric_plugin", - default=spack.platforms.cray.slingshot_network(), - when="+nccl", - sticky=True, - description="Builds with support for OFI libfabric enhanced RCCL/NCCL communication lib", + "tests", + default=False, + description="Build the Aluminum test drivers " + "(warning: may moderately increase build time!)", ) - depends_on("cmake@3.21.0:", type="build", when="@1.0.1:") - depends_on("cmake@3.17.0:", type="build", when="@:1.0.0") - depends_on("mpi") - depends_on("nccl@2.7.0-0:", when="+nccl") - depends_on("hwloc@1.11:") - depends_on("hwloc +cuda +nvml", when="+cuda") - depends_on("hwloc@2.3.0:", when="+rocm") - depends_on("cub", when="@:0.1,0.6.0: +cuda ^cuda@:10") - depends_on("hipcub", when="@:0.1,0.6.0: +rocm") - - depends_on("rccl", when="+rccl") - depends_on("aws-ofi-rccl", when="+rccl +ofi_libfabric_plugin") - depends_on("aws-ofi-nccl", when="+nccl +ofi_libfabric_plugin") + # FIXME: Do we want to expose tuning parameters to the Spack + # recipe? Some are numeric values, some are on/off switches. conflicts("~cuda", when="+cuda_rma", msg="CUDA RMA support requires CUDA") conflicts("+cuda", when="+rocm", msg="CUDA and ROCm support are mutually exclusive") - conflicts("+nccl", when="+rccl", msg="NCCL and RCCL support are mutually exclusive") - generator("ninja") + depends_on("mpi") - def cmake_args(self): - spec = self.spec - args = [ - "-DCMAKE_CXX_STANDARD:STRING=17", - "-DALUMINUM_ENABLE_CUDA:BOOL=%s" % ("+cuda" in spec), - "-DALUMINUM_ENABLE_NCCL:BOOL=%s" % ("+nccl" in spec or "+rccl" in spec), - "-DALUMINUM_ENABLE_ROCM:BOOL=%s" % ("+rocm" in spec), - ] - - if not spec.satisfies("^cmake@3.23.0"): - # There is a bug with using Ninja generator in this version - # of CMake - args.append("-DCMAKE_EXPORT_COMPILE_COMMANDS=ON") - - if "+cuda" in spec: - if self.spec.satisfies("%clang"): - for flag in self.spec.compiler_flags["cxxflags"]: - if "gcc-toolchain" in flag: - args.append("-DCMAKE_CUDA_FLAGS=-Xcompiler={0}".format(flag)) - if spec.satisfies("^cuda@11.0:"): - args.append("-DCMAKE_CUDA_STANDARD=17") - else: - args.append("-DCMAKE_CUDA_STANDARD=14") - archs = spec.variants["cuda_arch"].value - if archs != "none": - arch_str = ";".join(archs) - args.append("-DCMAKE_CUDA_ARCHITECTURES=%s" % arch_str) - - if spec.satisfies("%cce") and spec.satisfies("^cuda+allow-unsupported-compilers"): - args.append("-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler") - - if spec.satisfies("@0.5:"): - args.extend( - [ - "-DALUMINUM_ENABLE_HOST_TRANSFER:BOOL=%s" % ("+ht" in spec), - "-DALUMINUM_ENABLE_MPI_CUDA:BOOL=%s" % ("+cuda_rma" in spec), - "-DALUMINUM_ENABLE_MPI_CUDA_RMA:BOOL=%s" % ("+cuda_rma" in spec), - ] + depends_on("cmake@3.21.0:", type="build", when="@1.0.1:") + depends_on("hwloc@1.11:") + + with when("+cuda"): + depends_on("cub", when="^cuda@:10") + depends_on("hwloc +cuda +nvml") + with when("+nccl"): + depends_on("nccl@2.7.0-0:") + for arch in CudaPackage.cuda_arch_values: + depends_on( + "nccl +cuda cuda_arch={0}".format(arch), + when="+cuda cuda_arch={0}".format(arch), + ) + if spack.platforms.cray.slingshot_network(): + depends_on("aws-ofi-nccl") # Note: NOT a CudaPackage + + with when("+rocm"): + for val in ROCmPackage.amdgpu_targets: + depends_on( + "hipcub +rocm amdgpu_target={0}".format(val), when="amdgpu_target={0}".format(val) ) - else: - args.append("-DALUMINUM_ENABLE_MPI_CUDA:BOOL=%s" % ("+ht" in spec)) - - if spec.satisfies("@:0.1,0.6.0: +cuda ^cuda@:10"): - args.append("-DCUB_DIR:FILEPATH=%s" % spec["cub"].prefix) - - # Add support for OS X to find OpenMP (LLVM installed via brew) - if self.spec.satisfies("%clang platform=darwin"): - clang = self.compiler.cc - clang_bin = os.path.dirname(clang) - clang_root = os.path.dirname(clang_bin) - args.extend(["-DOpenMP_DIR={0}".format(clang_root)]) - - if "+rocm" in spec: - args.extend( - [ - "-DHIP_ROOT_DIR={0}".format(spec["hip"].prefix), - "-DHIP_CXX_COMPILER={0}".format(self.spec["hip"].hipcc), - "-DCMAKE_CXX_FLAGS=-std=c++17", - ] + depends_on( + "hwloc@2.3.0: +rocm amdgpu_target={0}".format(val), + when="amdgpu_target={0}".format(val), ) - archs = self.spec.variants["amdgpu_target"].value - if archs != "none": - arch_str = ",".join(archs) - if spec.satisfies("%rocmcc@:5"): - args.append( - "-DHIP_HIPCC_FLAGS=--amdgpu-target={0}" - " -g -fsized-deallocation -fPIC -std=c++17".format(arch_str) - ) - args.extend( - [ - "-DCMAKE_HIP_ARCHITECTURES=%s" % arch_str, - "-DAMDGPU_TARGETS=%s" % arch_str, - "-DGPU_TARGETS=%s" % arch_str, - ] - ) + # RCCL is *NOT* implented as a ROCmPackage + depends_on( + "rccl amdgpu_target={0}".format(val), when="+nccl amdgpu_target={0}".format(val) + ) + depends_on( + "roctracer-dev +rocm amdgpu_target={0}".format(val), + when="+roctracer amdgpu_target={0}".format(val), + ) + if spack.platforms.cray.slingshot_network(): + depends_on("aws-ofi-rccl", when="+nccl") + def cmake_args(self): + args = [] return args + + def get_cuda_flags(self): + spec = self.spec + args = [] + if spec.satisfies("^cuda+allow-unsupported-compilers"): + args.append("-allow-unsupported-compiler") + + if spec.satisfies("%clang"): + for flag in spec.compiler_flags["cxxflags"]: + if "gcc-toolchain" in flag: + args.append("-Xcompiler={0}".format(flag)) + return args + + def std_initconfig_entries(self): + entries = super(Aluminum, self).std_initconfig_entries() + + # CMAKE_PREFIX_PATH, in CMake types, is a "STRING", not a "PATH". :/ + entries = [x for x in entries if "CMAKE_PREFIX_PATH" not in x] + cmake_prefix_path = os.environ["CMAKE_PREFIX_PATH"].replace(":", ";") + entries.append(cmake_cache_string("CMAKE_PREFIX_PATH", cmake_prefix_path)) + return entries + + def initconfig_compiler_entries(self): + spec = self.spec + entries = super(Aluminum, self).initconfig_compiler_entries() + + # FIXME: Enforce this better in the actual CMake. + entries.append(cmake_cache_string("CMAKE_CXX_STANDARD", "17")) + entries.append(cmake_cache_option("BUILD_SHARED_LIBS", "+shared" in spec)) + entries.append(cmake_cache_option("CMAKE_EXPORT_COMPILE_COMMANDS", True)) + entries.append(cmake_cache_option("MPI_ASSUME_NO_BUILTIN_MPI", True)) + + return entries + + def initconfig_hardware_entries(self): + spec = self.spec + entries = super(Aluminum, self).initconfig_hardware_entries() + + entries.append(cmake_cache_option("ALUMINUM_ENABLE_CUDA", "+cuda" in spec)) + if spec.satisfies("+cuda"): + entries.append(cmake_cache_string("CMAKE_CUDA_STANDARD", "17")) + if not spec.satisfies("cuda_arch=none"): + archs = spec.variants["cuda_arch"].value + arch_str = ";".join(archs) + entries.append(cmake_cache_string("CMAKE_CUDA_ARCHITECTURES", arch_str)) + + # FIXME: Should this use the "cuda_flags" function of the + # CudaPackage class or something? There might be other + # flags in play, and we need to be sure to get them all. + cuda_flags = self.get_cuda_flags() + if len(cuda_flags) > 0: + entries.append(cmake_cache_string("CMAKE_CUDA_FLAGS", " ".join(cuda_flags))) + + entries.append(cmake_cache_option("ALUMINUM_ENABLE_ROCM", "+rocm" in spec)) + if spec.satisfies("+rocm"): + entries.append(cmake_cache_string("CMAKE_HIP_STANDARD", "17")) + if not spec.satisfies("amdgpu_target=none"): + archs = self.spec.variants["amdgpu_target"].value + arch_str = ";".join(archs) + entries.append(cmake_cache_string("CMAKE_HIP_ARCHITECTURES", arch_str)) + entries.append(cmake_cache_string("AMDGPU_TARGETS", arch_str)) + entries.append(cmake_cache_string("GPU_TARGETS", arch_str)) + entries.append(cmake_cache_path("HIP_ROOT_DIR", spec["hip"].prefix)) + + return entries + + def initconfig_package_entries(self): + spec = self.spec + entries = super(Aluminum, self).initconfig_package_entries() + + # Library capabilities + entries.append(cmake_cache_option("ALUMINUM_ENABLE_MPI_CUDA", "+cuda_rma" in spec)) + entries.append(cmake_cache_option("ALUMINUM_ENABLE_MPI_CUDA_RMA", "+cuda_rma" in spec)) + entries.append(cmake_cache_option("ALUMINUM_ENABLE_HOST_TRANSFER", "+ht" in spec)) + entries.append(cmake_cache_option("ALUMINUM_ENABLE_NCCL", "+nccl" in spec)) + + # Debugging features + entries.append(cmake_cache_option("ALUMINUM_DEBUG_HANG_CHECK", "+hang_check" in spec)) + entries.append(cmake_cache_option("ALUMINUM_ENABLE_TRACE", "+trace" in spec)) + + # Profiler support + entries.append(cmake_cache_option("ALUMINUM_ENABLE_NVPROF", "+nvtx" in spec)) + entries.append(cmake_cache_option("ALUMINUM_ENABLE_ROCTRACER", "+roctracer" in spec)) + + # Advanced options + entries.append(cmake_cache_option("ALUMINUM_MPI_SERIALIZE", "+mpi_serialize" in spec)) + entries.append( + cmake_cache_option("ALUMINUM_ENABLE_STREAM_MEM_OPS", "+stream_mem_ops" in spec) + ) + entries.append( + cmake_cache_option("ALUMINUM_ENABLE_THREAD_MULTIPLE", "+thread_multiple" in spec) + ) + + # Benchmark/testing support + entries.append(cmake_cache_option("ALUMINUM_ENABLE_BENCHMARKS", "+benchmarks" in spec)) + entries.append(cmake_cache_option("ALUMINUM_ENABLE_TESTS", "+tests" in spec)) + + return entries |