var/spack/repos/builtin/packages/aluminum/package.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230

# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)

import os

import spack.platforms.cray
from spack.package import *


class Aluminum(CachedCMakePackage, CudaPackage, ROCmPackage):
    """Aluminum provides a generic interface to high-performance
    communication libraries, with a focus on allreduce
    algorithms. Blocking and non-blocking algorithms and GPU-aware
    algorithms are supported. Aluminum also contains custom
    implementations of select algorithms to optimize for certain
    situations."""

    homepage = "https://github.com/LLNL/Aluminum"
    url = "https://github.com/LLNL/Aluminum/archive/v1.0.0.tar.gz"
    git = "https://github.com/LLNL/Aluminum.git"
    tags = ["ecp", "radiuss"]

    maintainers("benson31", "bvanessen")

    license("Apache-2.0")

    version("master", branch="master")
    version("1.4.1", sha256="d130a67fef1cb7a9cb3bbec1d0de426f020fe68c9df6e172c83ba42281cd90e3")
    version("1.4.0", sha256="ac54de058f38cead895ec8163f7b1fa7674e4dc5aacba683a660a61babbfe0c6")
    version("1.3.1", sha256="28ce0af6c6f29f97b7f19c5e45184bd2f8a0b1428f1e898b027d96d47cb74b0b")
    version("1.3.0", sha256="d0442efbebfdfb89eec793ae65eceb8f1ba65afa9f2e48df009f81985a4c27e3")
    version("1.2.3", sha256="9b214bdf30f9b7e8e017f83e6615db6be2631f5be3dd186205dbe3aa62f4018a")

    # Library capabilities
    variant(
        "cuda_rma",
        default=False,
        when="+cuda",
        description="Builds with support for CUDA intra-node "
        " Put/Get and IPC RMA functionality",
    )
    variant(
        "ht",
        default=False,
        description="Builds with support for host-enabled MPI"
        " communication of accelerator data",
    )
    variant("nccl", default=False, description="Builds with support for NCCL communication lib")
    variant("shared", default=True, description="Build Aluminum as a shared library")

    # Debugging features
    variant("hang_check", default=False, description="Enable hang checking")
    variant("trace", default=False, description="Enable runtime tracing")

    # Profiler support
    variant("nvtx", default=False, when="+cuda", description="Enable profiling via nvprof/NVTX")
    variant(
        "roctracer", default=False, when="+rocm", description="Enable profiling via rocprof/roctx"
    )

    # Advanced options
    variant("mpi_serialize", default=False, description="Serialize MPI operations")
    variant("stream_mem_ops", default=False, description="Enable stream memory operations")
    variant(
        "thread_multiple",
        default=False,
        description="Allow multiple threads to call Aluminum concurrently",
    )

    # Benchmark/testing support
    variant(
        "benchmarks",
        default=False,
        description="Build the Aluminum benchmarking drivers "
        "(warning: may significantly increase build time!)",
    )
    variant(
        "tests",
        default=False,
        description="Build the Aluminum test drivers "
        "(warning: may moderately increase build time!)",
    )

    # FIXME: Do we want to expose tuning parameters to the Spack
    # recipe? Some are numeric values, some are on/off switches.

    conflicts("~cuda", when="+cuda_rma", msg="CUDA RMA support requires CUDA")
    conflicts("+cuda", when="+rocm", msg="CUDA and ROCm support are mutually exclusive")

    depends_on("mpi")

    depends_on("cmake@3.21.0:", type="build", when="@1.0.1:")
    depends_on("hwloc@1.11:")

    with when("+cuda"):
        depends_on("cub", when="^cuda@:10")
        depends_on("hwloc +cuda +nvml")
        with when("+nccl"):
            depends_on("nccl@2.7.0-0:")
            for arch in CudaPackage.cuda_arch_values:
                depends_on(
                    "nccl +cuda cuda_arch={0}".format(arch),
                    when="+cuda cuda_arch={0}".format(arch),
                )
            if spack.platforms.cray.slingshot_network():
                depends_on("aws-ofi-nccl")  # Note: NOT a CudaPackage

    with when("+rocm"):
        for val in ROCmPackage.amdgpu_targets:
            depends_on(
                "hipcub +rocm amdgpu_target={0}".format(val), when="amdgpu_target={0}".format(val)
            )
            depends_on(
                "hwloc@2.3.0: +rocm amdgpu_target={0}".format(val),
                when="amdgpu_target={0}".format(val),
            )
            # RCCL is *NOT* implented as a ROCmPackage
            depends_on(
                "rccl amdgpu_target={0}".format(val), when="+nccl amdgpu_target={0}".format(val)
            )
            depends_on(
                "roctracer-dev +rocm amdgpu_target={0}".format(val),
                when="+roctracer amdgpu_target={0}".format(val),
            )
        if spack.platforms.cray.slingshot_network():
            depends_on("aws-ofi-rccl", when="+nccl")

    def cmake_args(self):
        args = []
        return args

    def get_cuda_flags(self):
        spec = self.spec
        args = []
        if spec.satisfies("^cuda+allow-unsupported-compilers"):
            args.append("-allow-unsupported-compiler")

        if spec.satisfies("%clang"):
            for flag in spec.compiler_flags["cxxflags"]:
                if "gcc-toolchain" in flag:
                    args.append("-Xcompiler={0}".format(flag))
        return args

    def std_initconfig_entries(self):
        entries = super(Aluminum, self).std_initconfig_entries()

        # CMAKE_PREFIX_PATH, in CMake types, is a "STRING", not a "PATH". :/
        entries = [x for x in entries if "CMAKE_PREFIX_PATH" not in x]
        cmake_prefix_path = os.environ["CMAKE_PREFIX_PATH"].replace(":", ";")
        entries.append(cmake_cache_string("CMAKE_PREFIX_PATH", cmake_prefix_path))
        return entries

    def initconfig_compiler_entries(self):
        spec = self.spec
        entries = super(Aluminum, self).initconfig_compiler_entries()

        # FIXME: Enforce this better in the actual CMake.
        entries.append(cmake_cache_string("CMAKE_CXX_STANDARD", "17"))
        entries.append(cmake_cache_option("BUILD_SHARED_LIBS", "+shared" in spec))
        entries.append(cmake_cache_option("CMAKE_EXPORT_COMPILE_COMMANDS", True))
        entries.append(cmake_cache_option("MPI_ASSUME_NO_BUILTIN_MPI", True))

        return entries

    def initconfig_hardware_entries(self):
        spec = self.spec
        entries = super(Aluminum, self).initconfig_hardware_entries()

        entries.append(cmake_cache_option("ALUMINUM_ENABLE_CUDA", "+cuda" in spec))
        if spec.satisfies("+cuda"):
            entries.append(cmake_cache_string("CMAKE_CUDA_STANDARD", "17"))
            if not spec.satisfies("cuda_arch=none"):
                archs = spec.variants["cuda_arch"].value
                arch_str = ";".join(archs)
                entries.append(cmake_cache_string("CMAKE_CUDA_ARCHITECTURES", arch_str))

            # FIXME: Should this use the "cuda_flags" function of the
            # CudaPackage class or something? There might be other
            # flags in play, and we need to be sure to get them all.
            cuda_flags = self.get_cuda_flags()
            if len(cuda_flags) > 0:
                entries.append(cmake_cache_string("CMAKE_CUDA_FLAGS", " ".join(cuda_flags)))

        entries.append(cmake_cache_option("ALUMINUM_ENABLE_ROCM", "+rocm" in spec))
        if spec.satisfies("+rocm"):
            entries.append(cmake_cache_string("CMAKE_HIP_STANDARD", "17"))
            if not spec.satisfies("amdgpu_target=none"):
                archs = self.spec.variants["amdgpu_target"].value
                arch_str = ";".join(archs)
                entries.append(cmake_cache_string("CMAKE_HIP_ARCHITECTURES", arch_str))
                entries.append(cmake_cache_string("AMDGPU_TARGETS", arch_str))
                entries.append(cmake_cache_string("GPU_TARGETS", arch_str))
            entries.append(cmake_cache_path("HIP_ROOT_DIR", spec["hip"].prefix))

        return entries

    def initconfig_package_entries(self):
        spec = self.spec
        entries = super(Aluminum, self).initconfig_package_entries()

        # Library capabilities
        entries.append(cmake_cache_option("ALUMINUM_ENABLE_MPI_CUDA", "+cuda_rma" in spec))
        entries.append(cmake_cache_option("ALUMINUM_ENABLE_MPI_CUDA_RMA", "+cuda_rma" in spec))
        entries.append(cmake_cache_option("ALUMINUM_ENABLE_HOST_TRANSFER", "+ht" in spec))
        entries.append(cmake_cache_option("ALUMINUM_ENABLE_NCCL", "+nccl" in spec))

        # Debugging features
        entries.append(cmake_cache_option("ALUMINUM_DEBUG_HANG_CHECK", "+hang_check" in spec))
        entries.append(cmake_cache_option("ALUMINUM_ENABLE_TRACE", "+trace" in spec))

        # Profiler support
        entries.append(cmake_cache_option("ALUMINUM_ENABLE_NVPROF", "+nvtx" in spec))
        entries.append(cmake_cache_option("ALUMINUM_ENABLE_ROCTRACER", "+roctracer" in spec))

        # Advanced options
        entries.append(cmake_cache_option("ALUMINUM_MPI_SERIALIZE", "+mpi_serialize" in spec))
        entries.append(
            cmake_cache_option("ALUMINUM_ENABLE_STREAM_MEM_OPS", "+stream_mem_ops" in spec)
        )
        entries.append(
            cmake_cache_option("ALUMINUM_ENABLE_THREAD_MULTIPLE", "+thread_multiple" in spec)
        )

        # Benchmark/testing support
        entries.append(cmake_cache_option("ALUMINUM_ENABLE_BENCHMARKS", "+benchmarks" in spec))
        entries.append(cmake_cache_option("ALUMINUM_ENABLE_TESTS", "+tests" in spec))

        return entries