summaryrefslogtreecommitdiff
path: root/lib/spack/spack/util/file_cache.py
blob: 2b98f291996ee8a4907f47568c1c16bcb306a472 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)

import errno
import math
import os
import shutil

from llnl.util.filesystem import mkdirp, rename

from spack.error import SpackError
from spack.util.lock import Lock, ReadTransaction, WriteTransaction


class FileCache:
    """This class manages cached data in the filesystem.

    - Cache files are fetched and stored by unique keys.  Keys can be relative
      paths, so that there can be some hierarchy in the cache.

    - The FileCache handles locking cache files for reading and writing, so
      client code need not manage locks for cache entries.

    """

    def __init__(self, root, timeout=120):
        """Create a file cache object.

        This will create the cache directory if it does not exist yet.

        Args:
            root: specifies the root directory where the cache stores files

            timeout: when there is contention among multiple Spack processes
                for cache files, this specifies how long Spack should wait
                before assuming that there is a deadlock.
        """
        self.root = root.rstrip(os.path.sep)
        if not os.path.exists(self.root):
            mkdirp(self.root)

        self._locks = {}
        self.lock_timeout = timeout

    def destroy(self):
        """Remove all files under the cache root."""
        for f in os.listdir(self.root):
            path = os.path.join(self.root, f)
            if os.path.isdir(path):
                shutil.rmtree(path, True)
            else:
                os.remove(path)

    def cache_path(self, key):
        """Path to the file in the cache for a particular key."""
        return os.path.join(self.root, key)

    def _lock_path(self, key):
        """Path to the file in the cache for a particular key."""
        keyfile = os.path.basename(key)
        keydir = os.path.dirname(key)

        return os.path.join(self.root, keydir, "." + keyfile + ".lock")

    def _get_lock(self, key):
        """Create a lock for a key, if necessary, and return a lock object."""
        if key not in self._locks:
            self._locks[key] = Lock(self._lock_path(key), default_timeout=self.lock_timeout)
        return self._locks[key]

    def init_entry(self, key):
        """Ensure we can access a cache file. Create a lock for it if needed.

        Return whether the cache file exists yet or not.
        """
        cache_path = self.cache_path(key)

        exists = os.path.exists(cache_path)
        if exists:
            if not os.path.isfile(cache_path):
                raise CacheError("Cache file is not a file: %s" % cache_path)

            if not os.access(cache_path, os.R_OK):
                raise CacheError("Cannot access cache file: %s" % cache_path)
        else:
            # if the file is hierarchical, make parent directories
            parent = os.path.dirname(cache_path)
            if parent.rstrip(os.path.sep) != self.root:
                mkdirp(parent)

            if not os.access(parent, os.R_OK | os.W_OK):
                raise CacheError("Cannot access cache directory: %s" % parent)

            # ensure lock is created for this key
            self._get_lock(key)
        return exists

    def read_transaction(self, key):
        """Get a read transaction on a file cache item.

        Returns a ReadTransaction context manager and opens the cache file for
        reading.  You can use it like this:

           with file_cache_object.read_transaction(key) as cache_file:
               cache_file.read()

        """
        return ReadTransaction(self._get_lock(key), acquire=lambda: open(self.cache_path(key)))

    def write_transaction(self, key):
        """Get a write transaction on a file cache item.

        Returns a WriteTransaction context manager that opens a temporary file
        for writing.  Once the context manager finishes, if nothing went wrong,
        moves the file into place on top of the old file atomically.

        """
        filename = self.cache_path(key)
        if os.path.exists(filename) and not os.access(filename, os.W_OK):
            raise CacheError(
                "Insufficient permissions to write to file cache at {0}".format(filename)
            )

        # TODO: this nested context manager adds a lot of complexity and
        # TODO: is pretty hard to reason about in llnl.util.lock. At some
        # TODO: point we should just replace it with functions and simplify
        # TODO: the locking code.
        class WriteContextManager:
            def __enter__(cm):
                cm.orig_filename = self.cache_path(key)
                cm.orig_file = None
                if os.path.exists(cm.orig_filename):
                    cm.orig_file = open(cm.orig_filename, "r")

                cm.tmp_filename = self.cache_path(key) + ".tmp"
                cm.tmp_file = open(cm.tmp_filename, "w")

                return cm.orig_file, cm.tmp_file

            def __exit__(cm, type, value, traceback):
                if cm.orig_file:
                    cm.orig_file.close()
                cm.tmp_file.close()

                if value:
                    os.remove(cm.tmp_filename)

                else:
                    rename(cm.tmp_filename, cm.orig_filename)

        return WriteTransaction(self._get_lock(key), acquire=WriteContextManager)

    def mtime(self, key) -> float:
        """Return modification time of cache file, or -inf if it does not exist.

        Time is in units returned by os.stat in the mtime field, which is
        platform-dependent.

        """
        if not self.init_entry(key):
            return -math.inf
        else:
            return os.stat(self.cache_path(key)).st_mtime

    def remove(self, key):
        file = self.cache_path(key)
        lock = self._get_lock(key)
        try:
            lock.acquire_write()
            os.unlink(file)
        except OSError as e:
            # File not found is OK, so remove is idempotent.
            if e.errno != errno.ENOENT:
                raise
        finally:
            lock.release_write()


class CacheError(SpackError):
    pass