summaryrefslogtreecommitdiff
path: root/lib/spack/spack/stage.py
blob: 2e02abd9b4f4524d7c227ddb4608e1f506968f9e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)

import os
import stat
import sys
import errno
import hashlib
import shutil
import tempfile
import getpass
from six import string_types
from six import iteritems
from six.moves.urllib.parse import urljoin

import llnl.util.tty as tty
from llnl.util.filesystem import mkdirp, can_access
from llnl.util.filesystem import remove_if_dead_link, remove_linked_tree

import spack.paths
import spack.caches
import spack.config
import spack.error
import spack.util.lock
import spack.fetch_strategy as fs
import spack.util.pattern as pattern
from spack.util.path import canonicalize_path
from spack.util.crypto import prefix_bits, bit_length

_stage_prefix = 'spack-stage-'


def _first_accessible_path(paths):
    """Find a tmp dir that exists that we can access."""
    for path in paths:
        try:
            # try to create the path if it doesn't exist.
            path = canonicalize_path(path)
            mkdirp(path)

            # ensure accessible
            if not can_access(path):
                continue

            # return it if successful.
            return path

        except OSError:
            tty.debug('OSError while checking temporary path: %s' % path)
            continue

    return None


# cached temporary root
_tmp_root = None
_use_tmp_stage = True


def get_tmp_root():
    global _tmp_root, _use_tmp_stage

    if not _use_tmp_stage:
        return None

    if _tmp_root is None:
        candidates = spack.config.get('config:build_stage')
        if isinstance(candidates, string_types):
            candidates = [candidates]

        path = _first_accessible_path(candidates)
        if not path:
            raise StageError("No accessible stage paths in %s", candidates)

        # Return None to indicate we're using a local staging area.
        if path == canonicalize_path(spack.paths.stage_path):
            _use_tmp_stage = False
            return None

        # ensure that any temp path is unique per user, so users don't
        # fight over shared temporary space.
        user = getpass.getuser()
        if user not in path:
            path = os.path.join(path, user, 'spack-stage')
        else:
            path = os.path.join(path, 'spack-stage')

        mkdirp(path)
        _tmp_root = path

    return _tmp_root


class Stage(object):
    """Manages a temporary stage directory for building.

    A Stage object is a context manager that handles a directory where
    some source code is downloaded and built before being installed.
    It handles fetching the source code, either as an archive to be
    expanded or by checking it out of a repository.  A stage's
    lifecycle looks like this::

        with Stage() as stage:      # Context manager creates and destroys the
                                    # stage directory
            stage.fetch()           # Fetch a source archive into the stage.
            stage.expand_archive()  # Expand the source archive.
            <install>               # Build and install the archive.
                                    # (handled by user of Stage)

    When used as a context manager, the stage is automatically
    destroyed if no exception is raised by the context. If an
    excpetion is raised, the stage is left in the filesystem and NOT
    destroyed, for potential reuse later.

    You can also use the stage's create/destroy functions manually,
    like this::

        stage = Stage()
        try:
            stage.create()          # Explicitly create the stage directory.
            stage.fetch()           # Fetch a source archive into the stage.
            stage.expand_archive()  # Expand the source archive.
            <install>               # Build and install the archive.
                                    # (handled by user of Stage)
        finally:
            stage.destroy()         # Explicitly destroy the stage directory.

    There are two kinds of stages: named and unnamed.  Named stages
    can persist between runs of spack, e.g. if you fetched a tarball
    but didn't finish building it, you won't have to fetch it again.

    Unnamed stages are created using standard mkdtemp mechanisms or
    similar, and are intended to persist for only one run of spack.
    """

    """Shared dict of all stage locks."""
    stage_locks = {}

    def __init__(
            self, url_or_fetch_strategy,
            name=None, mirror_path=None, keep=False, path=None, lock=True,
            search_fn=None):
        """Create a stage object.
           Parameters:
             url_or_fetch_strategy
                 URL of the archive to be downloaded into this stage, OR
                 a valid FetchStrategy.

             name
                 If a name is provided, then this stage is a named stage
                 and will persist between runs (or if you construct another
                 stage object later).  If name is not provided, then this
                 stage will be given a unique name automatically.

             mirror_path
                 If provided, Stage will search Spack's mirrors for
                 this archive at the mirror_path, before using the
                 default fetch strategy.

             keep
                 By default, when used as a context manager, the Stage
                 is deleted on exit when no exceptions are raised.
                 Pass True to keep the stage intact even if no
                 exceptions are raised.
        """
        # TODO: fetch/stage coupling needs to be reworked -- the logic
        # TODO: here is convoluted and not modular enough.
        if isinstance(url_or_fetch_strategy, string_types):
            self.fetcher = fs.from_url(url_or_fetch_strategy)
        elif isinstance(url_or_fetch_strategy, fs.FetchStrategy):
            self.fetcher = url_or_fetch_strategy
        else:
            raise ValueError(
                "Can't construct Stage without url or fetch strategy")
        self.fetcher.set_stage(self)
        # self.fetcher can change with mirrors.
        self.default_fetcher = self.fetcher
        self.search_fn = search_fn
        # used for mirrored archives of repositories.
        self.skip_checksum_for_mirror = True

        # TODO : this uses a protected member of tempfile, but seemed the only
        # TODO : way to get a temporary name besides, the temporary link name
        # TODO : won't be the same as the temporary stage area in tmp_root
        self.name = name
        if name is None:
            self.name = _stage_prefix + next(tempfile._get_candidate_names())
        self.mirror_path = mirror_path

        # Try to construct here a temporary name for the stage directory
        # If this is a named stage, then construct a named path.
        if path is not None:
            self.path = path
        else:
            self.path = os.path.join(spack.paths.stage_path, self.name)

        # Flag to decide whether to delete the stage folder on exit or not
        self.keep = keep

        # File lock for the stage directory.  We use one file for all
        # stage locks. See spack.database.Database.prefix_lock for
        # details on this approach.
        self._lock = None
        if lock:
            if self.name not in Stage.stage_locks:
                sha1 = hashlib.sha1(self.name.encode('utf-8')).digest()
                lock_id = prefix_bits(sha1, bit_length(sys.maxsize))
                stage_lock_path = os.path.join(spack.paths.stage_path, '.lock')

                Stage.stage_locks[self.name] = spack.util.lock.Lock(
                    stage_lock_path, lock_id, 1)

            self._lock = Stage.stage_locks[self.name]

        # When stages are reused, we need to know whether to re-create
        # it.  This marks whether it has been created/destroyed.
        self.created = False

    def __enter__(self):
        """
        Entering a stage context will create the stage directory

        Returns:
            self
        """
        if self._lock is not None:
            self._lock.acquire_write(timeout=60)
        self.create()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """
        Exiting from a stage context will delete the stage directory unless:
        - it was explicitly requested not to do so
        - an exception has been raised

        Args:
            exc_type: exception type
            exc_val: exception value
            exc_tb: exception traceback

        Returns:
            Boolean
        """
        # Delete when there are no exceptions, unless asked to keep.
        if exc_type is None and not self.keep:
            self.destroy()

        if self._lock is not None:
            self._lock.release_write()

    def _need_to_create_path(self):
        """Makes sure nothing weird has happened since the last time we
           looked at path.  Returns True if path already exists and is ok.
           Returns False if path needs to be created."""
        # Path doesn't exist yet.  Will need to create it.
        if not os.path.exists(self.path):
            return True

        # Path exists but points at something else.  Blow it away.
        if not os.path.isdir(self.path):
            os.unlink(self.path)
            return True

        # Path looks ok, but need to check the target of the link.
        if os.path.islink(self.path):
            tmp_root = get_tmp_root()
            if tmp_root is not None:
                real_path = os.path.realpath(self.path)
                real_tmp = os.path.realpath(tmp_root)

                # If we're using a tmp dir, it's a link, and it points at the
                # right spot, then keep it.
                if (real_path.startswith(real_tmp) and
                        os.path.exists(real_path)):
                    return False
                else:
                    # otherwise, just unlink it and start over.
                    os.unlink(self.path)
                    return True

            else:
                # If we're not tmp mode, then it's a link and we want a
                # directory.
                os.unlink(self.path)
                return True

        return False

    @property
    def expected_archive_files(self):
        """Possible archive file paths."""
        paths = []
        if isinstance(self.default_fetcher, fs.URLFetchStrategy):
            paths.append(os.path.join(
                self.path, os.path.basename(self.default_fetcher.url)))

        if self.mirror_path:
            paths.append(os.path.join(
                self.path, os.path.basename(self.mirror_path)))

        return paths

    @property
    def save_filename(self):
        possible_filenames = self.expected_archive_files
        if possible_filenames:
            # This prefers using the URL associated with the default fetcher if
            # available, so that the fetched resource name matches the remote
            # name
            return possible_filenames[0]

    @property
    def archive_file(self):
        """Path to the source archive within this stage directory."""
        for path in self.expected_archive_files:
            if os.path.exists(path):
                return path
        else:
            return None

    @property
    def source_path(self):
        """Returns the path to the expanded/checked out source code.

        To find the source code, this method searches for the first
        subdirectory of the stage that it can find, and returns it.
        This assumes nothing besides the archive file will be in the
        stage path, but it has the advantage that we don't need to
        know the name of the archive or its contents.

        If the fetch strategy is not supposed to expand the downloaded
        file, it will just return the stage path. If the archive needs
        to be expanded, it will return None when no archive is found.
        """
        if isinstance(self.fetcher, fs.URLFetchStrategy):
            if not self.fetcher.expand_archive:
                return self.path

        for p in [os.path.join(self.path, f) for f in os.listdir(self.path)]:
            if os.path.isdir(p):
                return p
        return None

    def fetch(self, mirror_only=False):
        """Downloads an archive or checks out code from a repository."""
        fetchers = []
        if not mirror_only:
            fetchers.append(self.default_fetcher)

        # TODO: move mirror logic out of here and clean it up!
        # TODO: Or @alalazo may have some ideas about how to use a
        # TODO: CompositeFetchStrategy here.
        self.skip_checksum_for_mirror = True
        if self.mirror_path:
            mirrors = spack.config.get('mirrors')

            # Join URLs of mirror roots with mirror paths. Because
            # urljoin() will strip everything past the final '/' in
            # the root, so we add a '/' if it is not present.
            mirror_roots = [root if root.endswith('/') else root + '/'
                            for root in mirrors.values()]
            urls = [urljoin(root, self.mirror_path) for root in mirror_roots]

            # If this archive is normally fetched from a tarball URL,
            # then use the same digest.  `spack mirror` ensures that
            # the checksum will be the same.
            digest = None
            expand = True
            extension = None
            if isinstance(self.default_fetcher, fs.URLFetchStrategy):
                digest = self.default_fetcher.digest
                expand = self.default_fetcher.expand_archive
                extension = self.default_fetcher.extension

            # Have to skip the checksum for things archived from
            # repositories.  How can this be made safer?
            self.skip_checksum_for_mirror = not bool(digest)

            # Add URL strategies for all the mirrors with the digest
            for url in urls:
                fetchers.insert(
                    0, fs.URLFetchStrategy(
                        url, digest, expand=expand, extension=extension))
            if self.default_fetcher.cachable:
                fetchers.insert(
                    0, spack.caches.fetch_cache.fetcher(
                        self.mirror_path, digest, expand=expand,
                        extension=extension))

        def generate_fetchers():
            for fetcher in fetchers:
                yield fetcher
            # The search function may be expensive, so wait until now to
            # call it so the user can stop if a prior fetcher succeeded
            if self.search_fn and not mirror_only:
                dynamic_fetchers = self.search_fn()
                for fetcher in dynamic_fetchers:
                    yield fetcher

        for fetcher in generate_fetchers():
            try:
                fetcher.set_stage(self)
                self.fetcher = fetcher
                self.fetcher.fetch()
                break
            except spack.fetch_strategy.NoCacheError as e:
                # Don't bother reporting when something is not cached.
                continue
            except spack.error.SpackError as e:
                tty.msg("Fetching from %s failed." % fetcher)
                tty.debug(e)
                continue
        else:
            err_msg = "All fetchers failed for %s" % self.name
            self.fetcher = self.default_fetcher
            raise fs.FetchError(err_msg, None)

    def check(self):
        """Check the downloaded archive against a checksum digest.
           No-op if this stage checks code out of a repository."""
        if self.fetcher is not self.default_fetcher and \
           self.skip_checksum_for_mirror:
            tty.warn("Fetching from mirror without a checksum!",
                     "This package is normally checked out from a version "
                     "control system, but it has been archived on a spack "
                     "mirror.  This means we cannot know a checksum for the "
                     "tarball in advance. Be sure that your connection to "
                     "this mirror is secure!")
        elif spack.config.get('config:checksum'):
            self.fetcher.check()

    def cache_local(self):
        spack.caches.fetch_cache.store(self.fetcher, self.mirror_path)

        if spack.caches.mirror_cache:
            spack.caches.mirror_cache.store(self.fetcher, self.mirror_path)

    def expand_archive(self):
        """Changes to the stage directory and attempt to expand the downloaded
        archive.  Fail if the stage is not set up or if the archive is not yet
        downloaded."""
        archive_dir = self.source_path
        if not archive_dir:
            self.fetcher.expand()
            tty.msg("Created stage in %s" % self.path)
        else:
            tty.msg("Already staged %s in %s" % (self.name, self.path))

    def restage(self):
        """Removes the expanded archive path if it exists, then re-expands
           the archive.
        """
        self.fetcher.reset()

    def create(self):
        """Creates the stage directory.

        If get_tmp_root() is None, the stage directory is created
        directly under spack.paths.stage_path, otherwise this will attempt to
        create a stage in a temporary directory and link it into
        spack.paths.stage_path.

        """
        # Create the top-level stage directory
        mkdirp(spack.paths.stage_path)
        remove_if_dead_link(self.path)

        # If a tmp_root exists then create a directory there and then link it
        # in the stage area, otherwise create the stage directory in self.path
        if self._need_to_create_path():
            tmp_root = get_tmp_root()
            if tmp_root is not None:
                # tempfile.mkdtemp already sets mode 0700
                tmp_dir = tempfile.mkdtemp('', _stage_prefix, tmp_root)
                tty.debug('link %s -> %s' % (self.path, tmp_dir))
                os.symlink(tmp_dir, self.path)
            else:
                # emulate file permissions for tempfile.mkdtemp
                mkdirp(self.path, mode=stat.S_IRWXU)
        # Make sure we can actually do something with the stage we made.
        ensure_access(self.path)
        self.created = True

    def destroy(self):
        """Removes this stage directory."""
        remove_linked_tree(self.path)

        # Make sure we don't end up in a removed directory
        try:
            os.getcwd()
        except OSError:
            os.chdir(os.path.dirname(self.path))

        # mark as destroyed
        self.created = False


class ResourceStage(Stage):

    def __init__(self, url_or_fetch_strategy, root, resource, **kwargs):
        super(ResourceStage, self).__init__(url_or_fetch_strategy, **kwargs)
        self.root_stage = root
        self.resource = resource

    def restage(self):
        super(ResourceStage, self).restage()
        self._add_to_root_stage()

    def expand_archive(self):
        super(ResourceStage, self).expand_archive()
        self._add_to_root_stage()

    def _add_to_root_stage(self):
        """
        Move the extracted resource to the root stage (according to placement).
        """
        root_stage = self.root_stage
        resource = self.resource
        placement = os.path.basename(self.source_path) \
            if resource.placement is None \
            else resource.placement
        if not isinstance(placement, dict):
            placement = {'': placement}

        target_path = os.path.join(
            root_stage.source_path, resource.destination)

        try:
            os.makedirs(target_path)
        except OSError as err:
            if err.errno == errno.EEXIST and os.path.isdir(target_path):
                pass
            else:
                raise

        for key, value in iteritems(placement):
            destination_path = os.path.join(target_path, value)
            source_path = os.path.join(self.source_path, key)

            if not os.path.exists(destination_path):
                tty.info('Moving resource stage\n\tsource : '
                         '{stage}\n\tdestination : {destination}'.format(
                             stage=source_path, destination=destination_path
                         ))
                shutil.move(os.path.realpath(source_path), destination_path)


@pattern.composite(method_list=[
    'fetch', 'create', 'created', 'check', 'expand_archive', 'restage',
    'destroy', 'cache_local'])
class StageComposite:
    """Composite for Stage type objects. The first item in this composite is
    considered to be the root package, and operations that return a value are
    forwarded to it."""
    #
    # __enter__ and __exit__ delegate to all stages in the composite.
    #

    def __enter__(self):
        for item in self:
            item.__enter__()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        for item in reversed(self):
            item.keep = getattr(self, 'keep', False)
            item.__exit__(exc_type, exc_val, exc_tb)

    #
    # Below functions act only on the *first* stage in the composite.
    #
    @property
    def source_path(self):
        return self[0].source_path

    @property
    def path(self):
        return self[0].path

    @property
    def archive_file(self):
        return self[0].archive_file

    @property
    def mirror_path(self):
        return self[0].mirror_path


class DIYStage(object):
    """Simple class that allows any directory to be a spack stage."""

    def __init__(self, path):
        self.archive_file = None
        self.path = path
        self.source_path = path
        self.created = True

    # DIY stages do nothing as context managers.
    def __enter__(self):
        pass

    def __exit__(self, exc_type, exc_val, exc_tb):
        pass

    def fetch(self, *args, **kwargs):
        tty.msg("No need to fetch for DIY.")

    def check(self):
        tty.msg("No checksum needed for DIY.")

    def expand_archive(self):
        tty.msg("Using source directory: %s" % self.source_path)

    def restage(self):
        tty.die("Cannot restage DIY stage.")

    def create(self):
        self.created = True

    def destroy(self):
        # No need to destroy DIY stage.
        pass

    def cache_local(self):
        tty.msg("Sources for DIY stages are not cached")


def _get_mirrors():
    """Get mirrors from spack configuration."""
    config = spack.config.get('mirrors')
    return [val for name, val in iteritems(config)]


def ensure_access(file=spack.paths.stage_path):
    """Ensure we can access a directory and die with an error if we can't."""
    if not can_access(file):
        tty.die("Insufficient permissions for %s" % file)


def purge():
    """Remove all build directories in the top-level stage path."""
    if os.path.isdir(spack.paths.stage_path):
        for stage_dir in os.listdir(spack.paths.stage_path):
            stage_path = os.path.join(spack.paths.stage_path, stage_dir)
            remove_linked_tree(stage_path)


class StageError(spack.error.SpackError):
    """"Superclass for all errors encountered during staging."""


class RestageError(StageError):
    """"Error encountered during restaging."""


# Keep this in namespace for convenience
FailedDownloadError = fs.FailedDownloadError