summaryrefslogtreecommitdiff
path: root/lib/spack/llnl/util/link_tree.py
blob: 816c9049b11bd05569b8ecde31d232573369a122 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)

"""LinkTree class for setting up trees of symbolic links."""

import filecmp
import os
import shutil
from collections import OrderedDict

import llnl.util.tty as tty
from llnl.util.filesystem import BaseDirectoryVisitor, mkdirp, touch, traverse_tree
from llnl.util.symlink import islink, symlink

__all__ = ["LinkTree"]

empty_file_name = ".spack-empty"


def remove_link(src, dest):
    if not islink(dest):
        raise ValueError("%s is not a link tree!" % dest)
    # remove if dest is a hardlink/symlink to src; this will only
    # be false if two packages are merged into a prefix and have a
    # conflicting file
    if filecmp.cmp(src, dest, shallow=True):
        os.remove(dest)


class MergeConflict:
    """
    The invariant here is that src_a and src_b are both mapped
    to dst:

        project(src_a) == project(src_b) == dst
    """

    def __init__(self, dst, src_a=None, src_b=None):
        self.dst = dst
        self.src_a = src_a
        self.src_b = src_b


class SourceMergeVisitor(BaseDirectoryVisitor):
    """
    Visitor that produces actions:
    - An ordered list of directories to create in dst
    - A list of files to link in dst
    - A list of merge conflicts in dst/
    """

    def __init__(self, ignore=None):
        self.ignore = ignore if ignore is not None else lambda f: False

        # When mapping <src root> to <dst root>/<projection>, we need
        # to prepend the <projection> bit to the relative path in the
        # destination dir.
        self.projection = ""

        # When a file blocks another file, the conflict can sometimes
        # be resolved / ignored (e.g. <prefix>/LICENSE or
        # or <site-packages>/<namespace>/__init__.py conflicts can be
        # ignored).
        self.file_conflicts = []

        # When we have to create a dir where a file is, or a file
        # where a dir is, we have fatal errors, listed here.
        self.fatal_conflicts = []

        # What directories we have to make; this is an ordered set,
        # so that we have a fast lookup and can run mkdir in order.
        self.directories = OrderedDict()

        # Files to link. Maps dst_rel to (src_root, src_rel)
        self.files = OrderedDict()

    def before_visit_dir(self, root, rel_path, depth):
        """
        Register a directory if dst / rel_path is not blocked by a file or ignored.
        """
        proj_rel_path = os.path.join(self.projection, rel_path)

        if self.ignore(rel_path):
            # Don't recurse when dir is ignored.
            return False
        elif proj_rel_path in self.files:
            # Can't create a dir where a file is.
            src_a_root, src_a_relpath = self.files[proj_rel_path]
            self.fatal_conflicts.append(
                MergeConflict(
                    dst=proj_rel_path,
                    src_a=os.path.join(src_a_root, src_a_relpath),
                    src_b=os.path.join(root, rel_path),
                )
            )
            return False
        elif proj_rel_path in self.directories:
            # No new directory, carry on.
            return True
        else:
            # Register new directory.
            self.directories[proj_rel_path] = (root, rel_path)
            return True

    def before_visit_symlinked_dir(self, root, rel_path, depth):
        """
        Replace symlinked dirs with actual directories when possible in low depths,
        otherwise handle it as a file (i.e. we link to the symlink).

        Transforming symlinks into dirs makes it more likely we can merge directories,
        e.g. when <prefix>/lib -> <prefix>/subdir/lib.

        We only do this when the symlink is pointing into a subdirectory from the
        symlink's directory, to avoid potential infinite recursion; and only at a
        constant level of nesting, to avoid potential exponential blowups in file
        duplication.
        """
        if self.ignore(rel_path):
            return False

        # Only follow symlinked dirs in <prefix>/**/**/*
        if depth > 1:
            handle_as_dir = False
        else:
            # Only follow symlinked dirs when pointing deeper
            src = os.path.join(root, rel_path)
            real_parent = os.path.realpath(os.path.dirname(src))
            real_child = os.path.realpath(src)
            handle_as_dir = real_child.startswith(real_parent)

        if handle_as_dir:
            return self.before_visit_dir(root, rel_path, depth)

        self.visit_file(root, rel_path, depth)
        return False

    def visit_file(self, root, rel_path, depth):
        proj_rel_path = os.path.join(self.projection, rel_path)

        if self.ignore(rel_path):
            pass
        elif proj_rel_path in self.directories:
            # Can't create a file where a dir is; fatal error
            src_a_root, src_a_relpath = self.directories[proj_rel_path]
            self.fatal_conflicts.append(
                MergeConflict(
                    dst=proj_rel_path,
                    src_a=os.path.join(src_a_root, src_a_relpath),
                    src_b=os.path.join(root, rel_path),
                )
            )
        elif proj_rel_path in self.files:
            # In some cases we can resolve file-file conflicts
            src_a_root, src_a_relpath = self.files[proj_rel_path]
            self.file_conflicts.append(
                MergeConflict(
                    dst=proj_rel_path,
                    src_a=os.path.join(src_a_root, src_a_relpath),
                    src_b=os.path.join(root, rel_path),
                )
            )
        else:
            # Otherwise register this file to be linked.
            self.files[proj_rel_path] = (root, rel_path)

    def visit_symlinked_file(self, root, rel_path, depth):
        # Treat symlinked files as ordinary files (without "dereferencing")
        self.visit_file(root, rel_path, depth)

    def set_projection(self, projection):
        self.projection = os.path.normpath(projection)

        # Todo, is this how to check in general for empty projection?
        if self.projection == ".":
            self.projection = ""
            return

        # If there is a projection, we'll also create the directories
        # it consists of, and check whether that's causing conflicts.
        path = ""
        for part in self.projection.split(os.sep):
            path = os.path.join(path, part)
            if path not in self.files:
                self.directories[path] = ("<projection>", path)
            else:
                # Can't create a dir where a file is.
                src_a_root, src_a_relpath = self.files[path]
                self.fatal_conflicts.append(
                    MergeConflict(
                        dst=path,
                        src_a=os.path.join(src_a_root, src_a_relpath),
                        src_b=os.path.join("<projection>", path),
                    )
                )


class DestinationMergeVisitor(BaseDirectoryVisitor):
    """DestinatinoMergeVisitor takes a SourceMergeVisitor
    and:

    a. registers additional conflicts when merging
       to the destination prefix
    b. removes redundant mkdir operations when
       directories already exist in the destination
       prefix.

    This also makes sure that symlinked directories
    in the target prefix will never be merged with
    directories in the sources directories.
    """

    def __init__(self, source_merge_visitor):
        self.src = source_merge_visitor

    def before_visit_dir(self, root, rel_path, depth):
        # If destination dir is a file in a src dir, add a conflict,
        # and don't traverse deeper
        if rel_path in self.src.files:
            src_a_root, src_a_relpath = self.src.files[rel_path]
            self.src.fatal_conflicts.append(
                MergeConflict(
                    rel_path, os.path.join(src_a_root, src_a_relpath), os.path.join(root, rel_path)
                )
            )
            return False

        # If destination dir was also a src dir, remove the mkdir
        # action, and traverse deeper.
        if rel_path in self.src.directories:
            del self.src.directories[rel_path]
            return True

        # If the destination dir does not appear in the src dir,
        # don't descend into it.
        return False

    def before_visit_symlinked_dir(self, root, rel_path, depth):
        """
        Symlinked directories in the destination prefix should
        be seen as files; we should not accidentally merge
        source dir with a symlinked dest dir.
        """
        # Always conflict
        if rel_path in self.src.directories:
            src_a_root, src_a_relpath = self.src.directories[rel_path]
            self.src.fatal_conflicts.append(
                MergeConflict(
                    rel_path, os.path.join(src_a_root, src_a_relpath), os.path.join(root, rel_path)
                )
            )

        if rel_path in self.src.files:
            src_a_root, src_a_relpath = self.src.files[rel_path]
            self.src.fatal_conflicts.append(
                MergeConflict(
                    rel_path, os.path.join(src_a_root, src_a_relpath), os.path.join(root, rel_path)
                )
            )

        # Never descend into symlinked target dirs.
        return False

    def visit_file(self, root, rel_path, depth):
        # Can't merge a file if target already exists
        if rel_path in self.src.directories:
            src_a_root, src_a_relpath = self.src.directories[rel_path]
            self.src.fatal_conflicts.append(
                MergeConflict(
                    rel_path, os.path.join(src_a_root, src_a_relpath), os.path.join(root, rel_path)
                )
            )

        elif rel_path in self.src.files:
            src_a_root, src_a_relpath = self.src.files[rel_path]
            self.src.fatal_conflicts.append(
                MergeConflict(
                    rel_path, os.path.join(src_a_root, src_a_relpath), os.path.join(root, rel_path)
                )
            )

    def visit_symlinked_file(self, root, rel_path, depth):
        # Treat symlinked files as ordinary files (without "dereferencing")
        self.visit_file(root, rel_path, depth)


class LinkTree:
    """Class to create trees of symbolic links from a source directory.

    LinkTree objects are constructed with a source root.  Their
    methods allow you to create and delete trees of symbolic links
    back to the source tree in specific destination directories.
    Trees comprise symlinks only to files; directries are never
    symlinked to, to prevent the source directory from ever being
    modified.
    """

    def __init__(self, source_root):
        if not os.path.exists(source_root):
            raise IOError("No such file or directory: '%s'", source_root)

        self._root = source_root

    def find_conflict(self, dest_root, ignore=None, ignore_file_conflicts=False):
        """Returns the first file in dest that conflicts with src"""
        ignore = ignore or (lambda x: False)
        conflicts = self.find_dir_conflicts(dest_root, ignore)

        if not ignore_file_conflicts:
            conflicts.extend(
                dst
                for src, dst in self.get_file_map(dest_root, ignore).items()
                if os.path.exists(dst)
            )

        if conflicts:
            return conflicts[0]

    def find_dir_conflicts(self, dest_root, ignore):
        conflicts = []
        kwargs = {"follow_nonexisting": False, "ignore": ignore}
        for src, dest in traverse_tree(self._root, dest_root, **kwargs):
            if os.path.isdir(src):
                if os.path.exists(dest) and not os.path.isdir(dest):
                    conflicts.append("File blocks directory: %s" % dest)
            elif os.path.exists(dest) and os.path.isdir(dest):
                conflicts.append("Directory blocks directory: %s" % dest)
        return conflicts

    def get_file_map(self, dest_root, ignore):
        merge_map = {}
        kwargs = {"follow_nonexisting": True, "ignore": ignore}
        for src, dest in traverse_tree(self._root, dest_root, **kwargs):
            if not os.path.isdir(src):
                merge_map[src] = dest
        return merge_map

    def merge_directories(self, dest_root, ignore):
        for src, dest in traverse_tree(self._root, dest_root, ignore=ignore):
            if os.path.isdir(src):
                if not os.path.exists(dest):
                    mkdirp(dest)
                    continue

                if not os.path.isdir(dest):
                    raise ValueError("File blocks directory: %s" % dest)

                # mark empty directories so they aren't removed on unmerge.
                if not os.listdir(dest):
                    marker = os.path.join(dest, empty_file_name)
                    touch(marker)

    def unmerge_directories(self, dest_root, ignore):
        for src, dest in traverse_tree(self._root, dest_root, ignore=ignore, order="post"):
            if os.path.isdir(src):
                if not os.path.exists(dest):
                    continue
                elif not os.path.isdir(dest):
                    raise ValueError("File blocks directory: %s" % dest)

                # remove directory if it is empty.
                if not os.listdir(dest):
                    shutil.rmtree(dest, ignore_errors=True)

                # remove empty dir marker if present.
                marker = os.path.join(dest, empty_file_name)
                if os.path.exists(marker):
                    os.remove(marker)

    def merge(self, dest_root, ignore_conflicts=False, ignore=None, link=symlink, relative=False):
        """Link all files in src into dest, creating directories
           if necessary.

        Keyword Args:

        ignore_conflicts (bool): if True, do not break when the target exists;
            return a list of files that could not be linked

        ignore (callable): callable that returns True if a file is to be
            ignored in the merge (by default ignore nothing)

        link (callable): function to create links with (defaults to llnl.util.symlink)

        relative (bool): create all symlinks relative to the target
            (default False)

        """
        if ignore is None:
            ignore = lambda x: False

        conflict = self.find_conflict(
            dest_root, ignore=ignore, ignore_file_conflicts=ignore_conflicts
        )
        if conflict:
            raise SingleMergeConflictError(conflict)

        self.merge_directories(dest_root, ignore)
        existing = []
        for src, dst in self.get_file_map(dest_root, ignore).items():
            if os.path.exists(dst):
                existing.append(dst)
            elif relative:
                abs_src = os.path.abspath(src)
                dst_dir = os.path.dirname(os.path.abspath(dst))
                rel = os.path.relpath(abs_src, dst_dir)
                link(rel, dst)
            else:
                link(src, dst)

        for c in existing:
            tty.warn("Could not merge: %s" % c)

    def unmerge(self, dest_root, ignore=None, remove_file=remove_link):
        """Unlink all files in dest that exist in src.

        Unlinks directories in dest if they are empty.
        """
        if ignore is None:
            ignore = lambda x: False

        for src, dst in self.get_file_map(dest_root, ignore).items():
            remove_file(src, dst)
        self.unmerge_directories(dest_root, ignore)


class MergeConflictError(Exception):
    pass


class ConflictingSpecsError(MergeConflictError):
    def __init__(self, spec_1, spec_2):
        super().__init__(spec_1, spec_2)


class SingleMergeConflictError(MergeConflictError):
    def __init__(self, path):
        super().__init__("Package merge blocked by file: %s" % path)


class MergeConflictSummary(MergeConflictError):
    def __init__(self, conflicts):
        """
        A human-readable summary of file system view merge conflicts (showing only the
        first 3 issues.)
        """
        msg = "{0} fatal error(s) when merging prefixes:".format(len(conflicts))
        # show the first 3 merge conflicts.
        for conflict in conflicts[:3]:
            msg += "\n    `{0}` and `{1}` both project to `{2}`".format(
                conflict.src_a, conflict.src_b, conflict.dst
            )
        super().__init__(msg)