From c15e55c6686dc5640f36e833902b9ce3889bb13b Mon Sep 17 00:00:00 2001 From: Paul Ferrell <51765748+Paul-Ferrell@users.noreply.github.com> Date: Wed, 20 Nov 2019 17:00:44 -0700 Subject: mirror bug fixes: symlinks, duplicate patch names, and exception handling (#13789) * Some packages (e.g. mpfr at the time of this patch) can have patches with the same name but different contents (which apply to different versions of the package). This appends part of the patch hash to the cache file name to avoid conflicts. * Some exceptions which occur during fetching are not a subclass of SpackError and therefore do not have a 'message' attribute. This updates the logic for mirroring a single spec (add_single_spec) to produce an appropriate error message in that case (where before it failed with an AttributeError) * In various circumstances, a mirror can contain the universal storage path but not a cosmetic symlink; in this case it would not generate a symlink. Now "spack mirror create" will create a symlink for any package that doesn't have one. --- lib/spack/spack/caches.py | 17 ++++++++++------- lib/spack/spack/mirror.py | 2 +- lib/spack/spack/patch.py | 10 +++++++--- lib/spack/spack/stage.py | 12 ++++++------ 4 files changed, 24 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/caches.py b/lib/spack/spack/caches.py index bdece50421..a7910905bc 100644 --- a/lib/spack/spack/caches.py +++ b/lib/spack/spack/caches.py @@ -53,20 +53,23 @@ class MirrorCache(object): def __init__(self, root): self.root = os.path.abspath(root) - def store(self, fetcher, relative_dest, cosmetic_path=None): + def store(self, fetcher, relative_dest): + """Fetch and relocate the fetcher's target into our mirror cache.""" + # Note this will archive package sources even if they would not # normally be cached (e.g. the current tip of an hg/git branch) dst = os.path.join(self.root, relative_dest) mkdirp(os.path.dirname(dst)) fetcher.archive(dst) - # Add a symlink path that a human can read to understand what resource - # the archive path refers to - if not cosmetic_path: - return - cosmetic_path = os.path.join(self.root, cosmetic_path) + def symlink(self, mirror_ref): + """Symlink a human readible path in our mirror to the actual + storage location.""" + + cosmetic_path = os.path.join(self.root, mirror_ref.cosmetic_path) relative_dst = os.path.relpath( - dst, start=os.path.dirname(cosmetic_path)) + mirror_ref.storage_path, + start=os.path.dirname(cosmetic_path)) if not os.path.exists(cosmetic_path): mkdirp(os.path.dirname(cosmetic_path)) os.symlink(relative_dst, cosmetic_path) diff --git a/lib/spack/spack/mirror.py b/lib/spack/spack/mirror.py index f7e8e73ea9..db32ceb5a5 100644 --- a/lib/spack/spack/mirror.py +++ b/lib/spack/spack/mirror.py @@ -519,7 +519,7 @@ def add_single_spec(spec, mirror_root, mirror_stats): else: tty.warn( "Error while fetching %s" % spec.cformat('{name}{@version}'), - exception.message) + getattr(exception, 'message', exception)) mirror_stats.error() diff --git a/lib/spack/spack/patch.py b/lib/spack/spack/patch.py index 79550538db..ba8d4c5ef8 100644 --- a/lib/spack/spack/patch.py +++ b/lib/spack/spack/patch.py @@ -199,11 +199,15 @@ class UrlPatch(Patch): fetcher = fs.URLFetchStrategy(self.url, fetch_digest, expand=bool(self.archive_sha256)) - per_package_ref = os.path.join( - self.owner.split('.')[-1], os.path.basename(self.url)) + # The same package can have multiple patches with the same name but + # with different contents, therefore apply a subset of the hash. + name = '{0}-{1}'.format(os.path.basename(self.url), fetch_digest[:7]) + + per_package_ref = os.path.join(self.owner.split('.')[-1], name) # Reference starting with "spack." is required to avoid cyclic imports mirror_ref = spack.mirror.mirror_archive_paths( - fetcher, per_package_ref) + fetcher, + per_package_ref) self.stage = spack.stage.Stage(fetcher, mirror_paths=mirror_ref) self.stage.create() diff --git a/lib/spack/spack/stage.py b/lib/spack/spack/stage.py index d2dd3e6e7a..e46354d963 100644 --- a/lib/spack/spack/stage.py +++ b/lib/spack/spack/stage.py @@ -498,13 +498,13 @@ class Stage(object): if os.path.exists(absolute_storage_path): stats.already_existed(absolute_storage_path) - return + else: + self.fetch() + spack.caches.mirror_cache.store( + self.fetcher, self.mirror_paths.storage_path) + stats.added(absolute_storage_path) - self.fetch() - spack.caches.mirror_cache.store( - self.fetcher, self.mirror_paths.storage_path, - self.mirror_paths.cosmetic_path) - stats.added(absolute_storage_path) + spack.caches.mirror_cache.symlink(self.mirror_paths) def expand_archive(self): """Changes to the stage directory and attempt to expand the downloaded -- cgit v1.2.3-70-g09d2 From 64209dda977c907da435faed6c9e030a50c520ab Mon Sep 17 00:00:00 2001 From: Peter Josef Scheibel Date: Tue, 26 Nov 2019 18:42:21 -0800 Subject: Allow repeated invocations of 'mirror create' When creating a cosmetic symlink for a resource in a mirror, remove it if it already exists. The symlink is removed in case the logic to create the symlink has changed. --- lib/spack/spack/caches.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/spack/spack/caches.py b/lib/spack/spack/caches.py index a7910905bc..61a47eaa6e 100644 --- a/lib/spack/spack/caches.py +++ b/lib/spack/spack/caches.py @@ -70,7 +70,13 @@ class MirrorCache(object): relative_dst = os.path.relpath( mirror_ref.storage_path, start=os.path.dirname(cosmetic_path)) + if not os.path.exists(cosmetic_path): + if os.path.lexists(cosmetic_path): + # In this case the link itself exists but it is broken: remove + # it and recreate it (in order to fix any symlinks broken prior + # to https://github.com/spack/spack/pull/13908) + os.unlink(cosmetic_path) mkdirp(os.path.dirname(cosmetic_path)) os.symlink(relative_dst, cosmetic_path) -- cgit v1.2.3-70-g09d2 From 98b498c671fe4cec0c3b1ecfc09fa959faa713bd Mon Sep 17 00:00:00 2001 From: Peter Josef Scheibel Date: Tue, 26 Nov 2019 18:50:32 -0800 Subject: Mirrors: fix cosmetic symlink targets The targets for the cosmetic paths in mirrrors were being calculated incorrectly as of fb3a3ba: the symlinks used relative paths as targets, and the relative path was computed relative to the wrong directory. --- lib/spack/spack/caches.py | 3 ++- lib/spack/spack/test/mirror.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/spack/spack/caches.py b/lib/spack/spack/caches.py index 61a47eaa6e..ce2f8a6371 100644 --- a/lib/spack/spack/caches.py +++ b/lib/spack/spack/caches.py @@ -67,8 +67,9 @@ class MirrorCache(object): storage location.""" cosmetic_path = os.path.join(self.root, mirror_ref.cosmetic_path) + storage_path = os.path.join(self.root, mirror_ref.storage_path) relative_dst = os.path.relpath( - mirror_ref.storage_path, + storage_path, start=os.path.dirname(cosmetic_path)) if not os.path.exists(cosmetic_path): diff --git a/lib/spack/spack/test/mirror.py b/lib/spack/spack/test/mirror.py index 9068db7193..e1b31695e3 100644 --- a/lib/spack/spack/test/mirror.py +++ b/lib/spack/spack/test/mirror.py @@ -14,6 +14,8 @@ from spack.spec import Spec from spack.stage import Stage from spack.util.executable import which +from llnl.util.filesystem import resolve_link_target_relative_to_the_link + pytestmark = pytest.mark.usefixtures('config', 'mutable_mock_packages') # paths in repos that shouldn't be in the mirror tarballs. @@ -192,3 +194,33 @@ def test_mirror_with_url_patches(mock_packages, config, monkeypatch): 'abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234', 'abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd.gz' # NOQA: ignore=E501 ]) - files_cached_in_mirror) + + +class MockFetcher(object): + """Mock fetcher object which implements the necessary functionality for + testing MirrorCache + """ + @staticmethod + def archive(dst): + with open(dst, 'w'): + pass + + +@pytest.mark.regression('14067') +def test_mirror_cache_symlinks(tmpdir): + """Confirm that the cosmetic symlink created in the mirror cache (which may + be relative) targets the storage path correctly. + """ + cosmetic_path = 'zlib/zlib-1.2.11.tar.gz' + global_path = '_source-cache/archive/c3/c3e5.tar.gz' + cache = spack.caches.MirrorCache(str(tmpdir)) + reference = spack.mirror.MirrorReference(cosmetic_path, global_path) + + cache.store(MockFetcher(), reference.storage_path) + cache.symlink(reference) + + link_target = resolve_link_target_relative_to_the_link( + os.path.join(cache.root, reference.cosmetic_path)) + assert os.path.exists(link_target) + assert (os.path.normpath(link_target) == + os.path.join(cache.root, reference.storage_path)) -- cgit v1.2.3-70-g09d2 From a69b3c85b01075703dc480b490e894ee14d00125 Mon Sep 17 00:00:00 2001 From: Peter Josef Scheibel Date: Tue, 26 Nov 2019 19:23:38 -0800 Subject: Mirrors: perform checksum of fetched sources Since cache_mirror does the fetch itself, it also needs to do the checksum itself if it wants to verify that the source stored in the mirror is valid. Note that this isn't strictly required because fetching (including from mirrors) always separately verifies the checksum. --- lib/spack/spack/stage.py | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/spack/spack/stage.py b/lib/spack/spack/stage.py index e46354d963..ffad242f27 100644 --- a/lib/spack/spack/stage.py +++ b/lib/spack/spack/stage.py @@ -500,6 +500,7 @@ class Stage(object): stats.already_existed(absolute_storage_path) else: self.fetch() + self.check() spack.caches.mirror_cache.store( self.fetcher, self.mirror_paths.storage_path) stats.added(absolute_storage_path) -- cgit v1.2.3-70-g09d2 From d71428622bebc54db4a1903e2026067fc085cb1b Mon Sep 17 00:00:00 2001 From: Peter Josef Scheibel Date: Tue, 26 Nov 2019 19:35:58 -0800 Subject: Mirrors: avoid re-downloading patches When updating a mirror, Spack was re-retrieving all patches (since the fetch logic for patches is separate). This updates the patch logic to allow the mirror logic to avoid this. --- lib/spack/spack/mirror.py | 1 - lib/spack/spack/patch.py | 47 ++++++++++++++++++++++++++++------------------- 2 files changed, 28 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/mirror.py b/lib/spack/spack/mirror.py index db32ceb5a5..da9b20472a 100644 --- a/lib/spack/spack/mirror.py +++ b/lib/spack/spack/mirror.py @@ -502,7 +502,6 @@ def add_single_spec(spec, mirror_root, mirror_stats): with spec.package.stage as pkg_stage: pkg_stage.cache_mirror(mirror_stats) for patch in spec.package.all_patches(): - patch.fetch(pkg_stage) if patch.cache(): patch.cache().cache_mirror(mirror_stats) patch.clean() diff --git a/lib/spack/spack/patch.py b/lib/spack/spack/patch.py index ba8d4c5ef8..9e29dcc638 100644 --- a/lib/spack/spack/patch.py +++ b/lib/spack/spack/patch.py @@ -171,6 +171,7 @@ class UrlPatch(Patch): super(UrlPatch, self).__init__(pkg, url, level, working_dir) self.url = url + self._stage = None self.ordering_key = ordering_key @@ -191,25 +192,6 @@ class UrlPatch(Patch): Args: stage: stage for the package that needs to be patched """ - # use archive digest for compressed archives - fetch_digest = self.sha256 - if self.archive_sha256: - fetch_digest = self.archive_sha256 - - fetcher = fs.URLFetchStrategy(self.url, fetch_digest, - expand=bool(self.archive_sha256)) - - # The same package can have multiple patches with the same name but - # with different contents, therefore apply a subset of the hash. - name = '{0}-{1}'.format(os.path.basename(self.url), fetch_digest[:7]) - - per_package_ref = os.path.join(self.owner.split('.')[-1], name) - # Reference starting with "spack." is required to avoid cyclic imports - mirror_ref = spack.mirror.mirror_archive_paths( - fetcher, - per_package_ref) - - self.stage = spack.stage.Stage(fetcher, mirror_paths=mirror_ref) self.stage.create() self.stage.fetch() self.stage.check() @@ -243,6 +225,33 @@ class UrlPatch(Patch): "sha256 checksum failed for %s" % self.path, "Expected %s but got %s" % (self.sha256, checker.sum)) + @property + def stage(self): + if self._stage: + return self._stage + + # use archive digest for compressed archives + fetch_digest = self.sha256 + if self.archive_sha256: + fetch_digest = self.archive_sha256 + + fetcher = fs.URLFetchStrategy(self.url, fetch_digest, + expand=bool(self.archive_sha256)) + + # The same package can have multiple patches with the same name but + # with different contents, therefore apply a subset of the hash. + name = '{0}-{1}'.format(os.path.basename(self.url), fetch_digest[:7]) + + per_package_ref = os.path.join(self.owner.split('.')[-1], name) + # Reference starting with "spack." is required to avoid cyclic imports + mirror_ref = spack.mirror.mirror_archive_paths( + fetcher, + per_package_ref) + + self._stage = spack.stage.Stage(fetcher, mirror_paths=mirror_ref) + self._stage.create() + return self._stage + def cache(self): return self.stage -- cgit v1.2.3-70-g09d2 From 587c650b8891ff1a633d4ccfcfcae74e079b1c8d Mon Sep 17 00:00:00 2001 From: Peter Josef Scheibel Date: Tue, 26 Nov 2019 19:50:06 -0800 Subject: Mirrors: skip attempts to fetch BundlePackages BundlePackages use a noop fetch strategy. The mirror logic was assuming that the fetcher had a resource to cach after performing a fetch. This adds a special check to skip caching if the stage is associated with a BundleFetchStrategy. Note that this should allow caching resources associated with BundlePackages. --- lib/spack/spack/stage.py | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'lib') diff --git a/lib/spack/spack/stage.py b/lib/spack/spack/stage.py index ffad242f27..bf65ee0b01 100644 --- a/lib/spack/spack/stage.py +++ b/lib/spack/spack/stage.py @@ -492,6 +492,16 @@ class Stage(object): def cache_mirror(self, stats): """Perform a fetch if the resource is not already cached""" + if isinstance(self.default_fetcher, fs.BundleFetchStrategy): + # BundleFetchStrategy has no source to fetch. The associated + # fetcher does nothing but the associated stage may still exist. + # There is currently no method available on the fetcher to + # distinguish this ('cachable' refers to whether the fetcher + # refers to a resource with a fixed ID, which is not the same + # concept as whether there is anything to fetch at all) so we + # must examine the type of the fetcher. + return + dst_root = spack.caches.mirror_cache.root absolute_storage_path = os.path.join( dst_root, self.mirror_paths.storage_path) -- cgit v1.2.3-70-g09d2 From 639156130b54a05b22b5e6338431b378ff8b5030 Mon Sep 17 00:00:00 2001 From: Peter Josef Scheibel Date: Wed, 27 Nov 2019 14:04:45 -0800 Subject: Patch fetching: remove unnecessary argument --- lib/spack/spack/package.py | 2 +- lib/spack/spack/patch.py | 8 ++------ lib/spack/spack/test/patch.py | 2 +- 3 files changed, 4 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/package.py b/lib/spack/spack/package.py index 38631c7a0e..26335ed2ff 100644 --- a/lib/spack/spack/package.py +++ b/lib/spack/spack/package.py @@ -1086,7 +1086,7 @@ class PackageBase(with_metaclass(PackageMeta, PackageViewMixin, object)): self.stage.cache_local() for patch in self.spec.patches: - patch.fetch(self.stage) + patch.fetch() if patch.cache(): patch.cache().cache_local() diff --git a/lib/spack/spack/patch.py b/lib/spack/spack/patch.py index 9e29dcc638..73fdf4df25 100644 --- a/lib/spack/spack/patch.py +++ b/lib/spack/spack/patch.py @@ -64,11 +64,8 @@ class Patch(object): self.level = level self.working_dir = working_dir - def fetch(self, stage): + def fetch(self): """Fetch the patch in case of a UrlPatch - - Args: - stage: stage for the package that needs to be patched """ def clean(self): @@ -185,8 +182,7 @@ class UrlPatch(Patch): if not self.sha256: raise PatchDirectiveError("URL patches require a sha256 checksum") - # TODO: this function doesn't use the stage arg - def fetch(self, stage): + def fetch(self): """Retrieve the patch in a temporary stage and compute self.path Args: diff --git a/lib/spack/spack/test/patch.py b/lib/spack/spack/test/patch.py index c3df33dc8b..b705d01e42 100644 --- a/lib/spack/spack/test/patch.py +++ b/lib/spack/spack/test/patch.py @@ -79,7 +79,7 @@ first line third line """) # apply the patch and compare files - patch.fetch(stage) + patch.fetch() patch.apply(stage) patch.clean() -- cgit v1.2.3-70-g09d2 From 37eac1a2269fcaf1bd0e1fa51851150874676e1a Mon Sep 17 00:00:00 2001 From: Sajid Ali <30510036+s-sajid-ali@users.noreply.github.com> Date: Sat, 21 Dec 2019 02:02:28 -0600 Subject: use `sys.executable` instead of `python` in `_source_single_file` (#14252) --- lib/spack/spack/util/environment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/spack/spack/util/environment.py b/lib/spack/spack/util/environment.py index b85ec963e2..f7dc728e7c 100644 --- a/lib/spack/spack/util/environment.py +++ b/lib/spack/spack/util/environment.py @@ -919,7 +919,7 @@ def environment_after_sourcing_files(*files, **kwargs): source_file = ' '.join(source_file) dump_cmd = 'import os, json; print(json.dumps(dict(os.environ)))' - dump_environment = 'python -c "{0}"'.format(dump_cmd) + dump_environment = sys.executable + ' -c "{0}"'.format(dump_cmd) # Try to source the file source_file_arguments = ' '.join([ -- cgit v1.2.3-70-g09d2 From 48befd67b565efcb218849644c9dc1f728e2f095 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Mon, 16 Dec 2019 16:56:55 -0800 Subject: performance: memoize spack.architecture.get_platform() `get_platform()` is pretty expensive and can be called many times in a spack invocation. - [x] memoize `get_platform()` --- lib/spack/spack/architecture.py | 1 + lib/spack/spack/test/concretize.py | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'lib') diff --git a/lib/spack/spack/architecture.py b/lib/spack/spack/architecture.py index 0e318bbccf..7552795cd2 100644 --- a/lib/spack/spack/architecture.py +++ b/lib/spack/spack/architecture.py @@ -436,6 +436,7 @@ class Arch(object): return arch_for_spec(spec) +@memoized def get_platform(platform_name): """Returns a platform object that corresponds to the given name.""" platform_list = all_platforms() diff --git a/lib/spack/spack/test/concretize.py b/lib/spack/spack/test/concretize.py index e0774909f4..4b17e755ed 100644 --- a/lib/spack/spack/test/concretize.py +++ b/lib/spack/spack/test/concretize.py @@ -94,6 +94,10 @@ def current_host(request, monkeypatch): # preferred target via packages.yaml cpu, _, is_preference = request.param.partition('-') target = llnl.util.cpu.targets[cpu] + + # this function is memoized, so clear its state for testing + spack.architecture.get_platform.cache.clear() + if not is_preference: monkeypatch.setattr(llnl.util.cpu, 'host', lambda: target) monkeypatch.setattr(spack.platforms.test.Test, 'default', cpu) @@ -104,6 +108,9 @@ def current_host(request, monkeypatch): with spack.config.override('packages:all', {'target': [cpu]}): yield target + # clear any test values fetched + spack.architecture.get_platform.cache.clear() + @pytest.mark.usefixtures('config', 'mock_packages') class TestConcretize(object): -- cgit v1.2.3-70-g09d2 From cbf8553406c9b7d7ec529751f39ebbad331cbdc7 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Mon, 16 Dec 2019 17:37:47 -0800 Subject: concretization: improve performance by avoiding database locks Checks for deprecated specs were repeatedly taking out read locks on the database, which can be very slow. - [x] put a read transaction around the deprecation check --- lib/spack/spack/spec.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py index 050f027679..86ac62e0d7 100644 --- a/lib/spack/spack/spec.py +++ b/lib/spack/spack/spec.py @@ -2243,10 +2243,12 @@ class Spec(object): # If any spec in the DAG is deprecated, throw an error deprecated = [] - for x in self.traverse(): - _, rec = spack.store.db.query_by_spec_hash(x.dag_hash()) - if rec and rec.deprecated_for: - deprecated.append(rec) + with spack.store.db.read_transaction(): + for x in self.traverse(): + _, rec = spack.store.db.query_by_spec_hash(x.dag_hash()) + if rec and rec.deprecated_for: + deprecated.append(rec) + if deprecated: msg = "\n The following specs have been deprecated" msg += " in favor of specs with the hashes shown:\n" -- cgit v1.2.3-70-g09d2 From 5bdba9883735eb64a5d598f95bb774637e27e9db Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Tue, 17 Dec 2019 12:50:44 -0800 Subject: performance: `spack spec` should use a read transacction with -I `spack spec -I` queries the database for installation status and should use a read transaction around calls to `Spec.tree()`. --- lib/spack/spack/cmd/spec.py | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/cmd/spec.py b/lib/spack/spack/cmd/spec.py index f63ac638c4..f10eef9c73 100644 --- a/lib/spack/spack/cmd/spec.py +++ b/lib/spack/spack/cmd/spec.py @@ -6,6 +6,7 @@ from __future__ import print_function import argparse +import contextlib import sys import llnl.util.tty as tty @@ -14,6 +15,7 @@ import spack import spack.cmd import spack.cmd.common.arguments as arguments import spack.spec +import spack.store import spack.hash_types as ht description = "show what would be installed, given a spec" @@ -45,6 +47,14 @@ def setup_parser(subparser): 'specs', nargs=argparse.REMAINDER, help="specs of packages") +@contextlib.contextmanager +def nullcontext(): + """Empty context manager. + TODO: replace with contextlib.nullcontext() if we ever require python 3.7. + """ + yield + + def spec(parser, args): name_fmt = '{namespace}.{name}' if args.namespaces else '{name}' fmt = '{@version}{%compiler}{compiler_flags}{variants}{arch=architecture}' @@ -57,6 +67,12 @@ def spec(parser, args): 'status_fn': install_status_fn if args.install_status else None } + # use a read transaction if we are getting install status for every + # spec in the DAG. This avoids repeatedly querying the DB. + tree_context = nullcontext + if args.install_status: + tree_context = spack.store.db.read_transaction + if not args.specs: tty.die("spack spec requires at least one spec") @@ -73,13 +89,14 @@ def spec(parser, args): print(spec.to_json(hash=ht.build_hash)) continue - kwargs['hashes'] = False # Always False for input spec - print("Input spec") - print("--------------------------------") - print(spec.tree(**kwargs)) + with tree_context(): + kwargs['hashes'] = False # Always False for input spec + print("Input spec") + print("--------------------------------") + print(spec.tree(**kwargs)) - kwargs['hashes'] = args.long or args.very_long - print("Concretized") - print("--------------------------------") - spec.concretize() - print(spec.tree(**kwargs)) + kwargs['hashes'] = args.long or args.very_long + print("Concretized") + print("--------------------------------") + spec.concretize() + print(spec.tree(**kwargs)) -- cgit v1.2.3-70-g09d2 From 91ea90c25347a33ab4b7f7d7d390af048b4ab809 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Wed, 18 Dec 2019 14:25:21 -0800 Subject: performance: speed up `spack find` in environments `Environment.added_specs()` has a loop around calls to `Package.installed()`, which can result in repeated DB queries. Optimize this with a read transaction in `Environment`. --- lib/spack/spack/environment.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/environment.py b/lib/spack/spack/environment.py index 2c7a0cf098..4a23c52622 100644 --- a/lib/spack/spack/environment.py +++ b/lib/spack/spack/environment.py @@ -25,6 +25,7 @@ import spack.hash_types as ht import spack.repo import spack.schema.env import spack.spec +import spack.store import spack.util.spack_json as sjson import spack.util.spack_yaml as syaml import spack.config @@ -1232,13 +1233,16 @@ class Environment(object): Yields the user spec for non-concretized specs, and the concrete spec for already concretized but not yet installed specs. """ - concretized = dict(self.concretized_specs()) - for spec in self.user_specs: - concrete = concretized.get(spec) - if not concrete: - yield spec - elif not concrete.package.installed: - yield concrete + # use a transaction to avoid overhead of repeated calls + # to `package.installed` + with spack.store.db.read_transaction(): + concretized = dict(self.concretized_specs()) + for spec in self.user_specs: + concrete = concretized.get(spec) + if not concrete: + yield spec + elif not concrete.package.installed: + yield concrete def concretized_specs(self): """Tuples of (user spec, concrete spec) for all concrete specs.""" -- cgit v1.2.3-70-g09d2 From a85b9070cb37103c45e0e9b58732b4d92c09288b Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Fri, 20 Dec 2019 23:38:23 -0800 Subject: performance: avoid repeated DB locking on view generation `ViewDescriptor.regenerate()` checks repeatedly whether packages are installed and also does a lot of DB queries. Put a read transaction around the whole thing to avoid repeatedly locking and unlocking the DB. --- lib/spack/spack/environment.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/environment.py b/lib/spack/spack/environment.py index 4a23c52622..f7a1310459 100644 --- a/lib/spack/spack/environment.py +++ b/lib/spack/spack/environment.py @@ -516,20 +516,23 @@ class ViewDescriptor(object): if spec.concrete: # Do not link unconcretized roots specs_for_view.append(spec.copy(deps=('link', 'run'))) - installed_specs_for_view = set(s for s in specs_for_view - if s in self and s.package.installed) + # regeneration queries the database quite a bit; this read + # transaction ensures that we don't repeatedly lock/unlock. + with spack.store.db.read_transaction(): + installed_specs_for_view = set( + s for s in specs_for_view if s in self and s.package.installed) - view = self.view() + view = self.view() - view.clean() - specs_in_view = set(view.get_all_specs()) - tty.msg("Updating view at {0}".format(self.root)) + view.clean() + specs_in_view = set(view.get_all_specs()) + tty.msg("Updating view at {0}".format(self.root)) - rm_specs = specs_in_view - installed_specs_for_view - view.remove_specs(*rm_specs, with_dependents=False) + rm_specs = specs_in_view - installed_specs_for_view + view.remove_specs(*rm_specs, with_dependents=False) - add_specs = installed_specs_for_view - specs_in_view - view.add_specs(*add_specs, with_dependencies=False) + add_specs = installed_specs_for_view - specs_in_view + view.add_specs(*add_specs, with_dependencies=False) class Environment(object): -- cgit v1.2.3-70-g09d2 From 98577e3af5cf571d5408fcb1da32de3586238ead Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Sat, 21 Dec 2019 16:23:54 -0800 Subject: lock transactions: fix non-transactional writes Lock transactions were actually writing *after* the lock was released. The code was looking at the result of `release_write()` before writing, then writing based on whether the lock was released. This is pretty obviously wrong. - [x] Refactor `Lock` so that a release function can be passed to the `Lock` and called *only* when a lock is really released. - [x] Refactor `LockTransaction` classes to use the release function instead of checking the return value of `release_read()` / `release_write()` --- lib/spack/llnl/util/lock.py | 100 ++++++---- lib/spack/spack/database.py | 7 +- lib/spack/spack/test/llnl/util/lock.py | 340 ++++++++++++++++++++------------- lib/spack/spack/util/file_cache.py | 10 +- 4 files changed, 285 insertions(+), 172 deletions(-) (limited to 'lib') diff --git a/lib/spack/llnl/util/lock.py b/lib/spack/llnl/util/lock.py index 66cb067c88..c675c7c452 100644 --- a/lib/spack/llnl/util/lock.py +++ b/lib/spack/llnl/util/lock.py @@ -95,10 +95,6 @@ class Lock(object): The lock is implemented as a spin lock using a nonblocking call to ``lockf()``. - On acquiring an exclusive lock, the lock writes this process's - pid and host to the lock file, in case the holding process needs - to be killed later. - If the lock times out, it raises a ``LockError``. If the lock is successfully acquired, the total wait time and the number of attempts is returned. @@ -284,11 +280,19 @@ class Lock(object): self._writes += 1 return False - def release_read(self): + def release_read(self, release_fn=None): """Releases a read lock. - Returns True if the last recursive lock was released, False if - there are still outstanding locks. + Arguments: + release_fn (callable): function to call *before* the last recursive + lock (read or write) is released. + + If the last recursive lock will be released, then this will call + release_fn and return its result (if provided), or return True + (if release_fn was not provided). + + Otherwise, we are still nested inside some other lock, so do not + call the release_fn and, return False. Does limited correctness checking: if a read lock is released when none are held, this will raise an assertion error. @@ -300,18 +304,30 @@ class Lock(object): self._debug( 'READ LOCK: {0.path}[{0._start}:{0._length}] [Released]' .format(self)) + + result = True + if release_fn is not None: + result = release_fn() + self._unlock() # can raise LockError. self._reads -= 1 - return True + return result else: self._reads -= 1 return False - def release_write(self): + def release_write(self, release_fn=None): """Releases a write lock. - Returns True if the last recursive lock was released, False if - there are still outstanding locks. + Arguments: + release_fn (callable): function to call before the last recursive + write is released. + + If the last recursive *write* lock will be released, then this + will call release_fn and return its result (if provided), or + return True (if release_fn was not provided). Otherwise, we are + still nested inside some other write lock, so do not call the + release_fn, and return False. Does limited correctness checking: if a read lock is released when none are held, this will raise an assertion error. @@ -323,9 +339,16 @@ class Lock(object): self._debug( 'WRITE LOCK: {0.path}[{0._start}:{0._length}] [Released]' .format(self)) + + # we need to call release_fn before releasing the lock + result = True + if release_fn is not None: + result = release_fn() + self._unlock() # can raise LockError. self._writes -= 1 - return True + return result + else: self._writes -= 1 return False @@ -349,28 +372,36 @@ class Lock(object): class LockTransaction(object): """Simple nested transaction context manager that uses a file lock. - This class can trigger actions when the lock is acquired for the - first time and released for the last. + Arguments: + lock (Lock): underlying lock for this transaction to be accquired on + enter and released on exit + acquire (callable or contextmanager): function to be called after lock + is acquired, or contextmanager to enter after acquire and leave + before release. + release (callable): function to be called before release. If + ``acquire`` is a contextmanager, this will be called *after* + exiting the nexted context and before the lock is released. + timeout (float): number of seconds to set for the timeout when + accquiring the lock (default no timeout) If the ``acquire_fn`` returns a value, it is used as the return value for ``__enter__``, allowing it to be passed as the ``as`` argument of a ``with`` statement. If ``acquire_fn`` returns a context manager, *its* ``__enter__`` function - will be called in ``__enter__`` after ``acquire_fn``, and its ``__exit__`` - funciton will be called before ``release_fn`` in ``__exit__``, allowing you - to nest a context manager to be used along with the lock. + will be called after the lock is acquired, and its ``__exit__`` funciton + will be called before ``release_fn`` in ``__exit__``, allowing you to + nest a context manager inside this one. Timeout for lock is customizable. """ - def __init__(self, lock, acquire_fn=None, release_fn=None, - timeout=None): + def __init__(self, lock, acquire=None, release=None, timeout=None): self._lock = lock self._timeout = timeout - self._acquire_fn = acquire_fn - self._release_fn = release_fn + self._acquire_fn = acquire + self._release_fn = release self._as = None def __enter__(self): @@ -383,13 +414,18 @@ class LockTransaction(object): def __exit__(self, type, value, traceback): suppress = False - if self._exit(): - if self._as and hasattr(self._as, '__exit__'): - if self._as.__exit__(type, value, traceback): - suppress = True - if self._release_fn: - if self._release_fn(type, value, traceback): - suppress = True + + def release_fn(): + if self._release_fn is not None: + return self._release_fn(type, value, traceback) + + if self._as and hasattr(self._as, '__exit__'): + if self._as.__exit__(type, value, traceback): + suppress = True + + if self._exit(release_fn): + suppress = True + return suppress @@ -398,8 +434,8 @@ class ReadTransaction(LockTransaction): def _enter(self): return self._lock.acquire_read(self._timeout) - def _exit(self): - return self._lock.release_read() + def _exit(self, release_fn): + return self._lock.release_read(release_fn) class WriteTransaction(LockTransaction): @@ -407,8 +443,8 @@ class WriteTransaction(LockTransaction): def _enter(self): return self._lock.acquire_write(self._timeout) - def _exit(self): - return self._lock.release_write() + def _exit(self, release_fn): + return self._lock.release_write(release_fn) class LockError(Exception): diff --git a/lib/spack/spack/database.py b/lib/spack/spack/database.py index e6e82f9803..243f1a20d5 100644 --- a/lib/spack/spack/database.py +++ b/lib/spack/spack/database.py @@ -332,11 +332,12 @@ class Database(object): def write_transaction(self): """Get a write lock context manager for use in a `with` block.""" - return WriteTransaction(self.lock, self._read, self._write) + return WriteTransaction( + self.lock, acquire=self._read, release=self._write) def read_transaction(self): """Get a read lock context manager for use in a `with` block.""" - return ReadTransaction(self.lock, self._read) + return ReadTransaction(self.lock, acquire=self._read) def prefix_lock(self, spec): """Get a lock on a particular spec's installation directory. @@ -624,7 +625,7 @@ class Database(object): self._data = {} transaction = WriteTransaction( - self.lock, _read_suppress_error, self._write + self.lock, acquire=_read_suppress_error, release=self._write ) with transaction: diff --git a/lib/spack/spack/test/llnl/util/lock.py b/lib/spack/spack/test/llnl/util/lock.py index d8081d108c..3bf8a236b1 100644 --- a/lib/spack/spack/test/llnl/util/lock.py +++ b/lib/spack/spack/test/llnl/util/lock.py @@ -42,6 +42,7 @@ node-local filesystem, and multi-node tests will fail if the locks aren't actually on a shared filesystem. """ +import collections import os import socket import shutil @@ -776,189 +777,258 @@ def test_complex_acquire_and_release_chain(lock_path): multiproc_test(p1, p2, p3) -def test_transaction(lock_path): +class AssertLock(lk.Lock): + """Test lock class that marks acquire/release events.""" + def __init__(self, lock_path, vals): + super(AssertLock, self).__init__(lock_path) + self.vals = vals + + def acquire_read(self, timeout=None): + self.assert_acquire_read() + result = super(AssertLock, self).acquire_read(timeout) + self.vals['acquired_read'] = True + return result + + def acquire_write(self, timeout=None): + self.assert_acquire_write() + result = super(AssertLock, self).acquire_write(timeout) + self.vals['acquired_write'] = True + return result + + def release_read(self, release_fn=None): + self.assert_release_read() + result = super(AssertLock, self).release_read(release_fn) + self.vals['released_read'] = True + return result + + def release_write(self, release_fn=None): + self.assert_release_write() + result = super(AssertLock, self).release_write(release_fn) + self.vals['released_write'] = True + return result + + +@pytest.mark.parametrize( + "transaction,type", + [(lk.ReadTransaction, "read"), (lk.WriteTransaction, "write")] +) +def test_transaction(lock_path, transaction, type): + class MockLock(AssertLock): + def assert_acquire_read(self): + assert not vals['entered_fn'] + assert not vals['exited_fn'] + + def assert_release_read(self): + assert vals['entered_fn'] + assert not vals['exited_fn'] + + def assert_acquire_write(self): + assert not vals['entered_fn'] + assert not vals['exited_fn'] + + def assert_release_write(self): + assert vals['entered_fn'] + assert not vals['exited_fn'] + def enter_fn(): - vals['entered'] = True + # assert enter_fn is called while lock is held + assert vals['acquired_%s' % type] + vals['entered_fn'] = True def exit_fn(t, v, tb): - vals['exited'] = True + # assert exit_fn is called while lock is held + assert not vals['released_%s' % type] + vals['exited_fn'] = True vals['exception'] = (t or v or tb) - lock = lk.Lock(lock_path) - vals = {'entered': False, 'exited': False, 'exception': False} - with lk.ReadTransaction(lock, enter_fn, exit_fn): - pass + vals = collections.defaultdict(lambda: False) + lock = MockLock(lock_path, vals) + + with transaction(lock, acquire=enter_fn, release=exit_fn): + assert vals['acquired_%s' % type] + assert not vals['released_%s' % type] - assert vals['entered'] - assert vals['exited'] + assert vals['entered_fn'] + assert vals['exited_fn'] + assert vals['acquired_%s' % type] + assert vals['released_%s' % type] assert not vals['exception'] - vals = {'entered': False, 'exited': False, 'exception': False} - with lk.WriteTransaction(lock, enter_fn, exit_fn): - pass - assert vals['entered'] - assert vals['exited'] - assert not vals['exception'] +@pytest.mark.parametrize( + "transaction,type", + [(lk.ReadTransaction, "read"), (lk.WriteTransaction, "write")] +) +def test_transaction_with_exception(lock_path, transaction, type): + class MockLock(AssertLock): + def assert_acquire_read(self): + assert not vals['entered_fn'] + assert not vals['exited_fn'] + + def assert_release_read(self): + assert vals['entered_fn'] + assert not vals['exited_fn'] + def assert_acquire_write(self): + assert not vals['entered_fn'] + assert not vals['exited_fn'] + + def assert_release_write(self): + assert vals['entered_fn'] + assert not vals['exited_fn'] -def test_transaction_with_exception(lock_path): def enter_fn(): - vals['entered'] = True + assert vals['acquired_%s' % type] + vals['entered_fn'] = True def exit_fn(t, v, tb): - vals['exited'] = True + assert not vals['released_%s' % type] + vals['exited_fn'] = True vals['exception'] = (t or v or tb) + return exit_result - lock = lk.Lock(lock_path) - - def do_read_with_exception(): - with lk.ReadTransaction(lock, enter_fn, exit_fn): - raise Exception() - - def do_write_with_exception(): - with lk.WriteTransaction(lock, enter_fn, exit_fn): - raise Exception() + exit_result = False + vals = collections.defaultdict(lambda: False) + lock = MockLock(lock_path, vals) - vals = {'entered': False, 'exited': False, 'exception': False} with pytest.raises(Exception): - do_read_with_exception() - assert vals['entered'] - assert vals['exited'] - assert vals['exception'] + with transaction(lock, acquire=enter_fn, release=exit_fn): + raise Exception() - vals = {'entered': False, 'exited': False, 'exception': False} - with pytest.raises(Exception): - do_write_with_exception() - assert vals['entered'] - assert vals['exited'] + assert vals['entered_fn'] + assert vals['exited_fn'] assert vals['exception'] + # test suppression of exceptions from exit_fn + exit_result = True + vals.clear() -def test_transaction_with_context_manager(lock_path): - class TestContextManager(object): - - def __enter__(self): - vals['entered'] = True - - def __exit__(self, t, v, tb): - vals['exited'] = True - vals['exception'] = (t or v or tb) - - def exit_fn(t, v, tb): - vals['exited_fn'] = True - vals['exception_fn'] = (t or v or tb) - - lock = lk.Lock(lock_path) - - vals = {'entered': False, 'exited': False, 'exited_fn': False, - 'exception': False, 'exception_fn': False} - with lk.ReadTransaction(lock, TestContextManager, exit_fn): - pass + # should not raise now. + with transaction(lock, acquire=enter_fn, release=exit_fn): + raise Exception() - assert vals['entered'] - assert vals['exited'] - assert not vals['exception'] + assert vals['entered_fn'] assert vals['exited_fn'] - assert not vals['exception_fn'] - - vals = {'entered': False, 'exited': False, 'exited_fn': False, - 'exception': False, 'exception_fn': False} - with lk.ReadTransaction(lock, TestContextManager): - pass - - assert vals['entered'] - assert vals['exited'] - assert not vals['exception'] - assert not vals['exited_fn'] - assert not vals['exception_fn'] + assert vals['exception'] - vals = {'entered': False, 'exited': False, 'exited_fn': False, - 'exception': False, 'exception_fn': False} - with lk.WriteTransaction(lock, TestContextManager, exit_fn): - pass - assert vals['entered'] - assert vals['exited'] - assert not vals['exception'] - assert vals['exited_fn'] - assert not vals['exception_fn'] +@pytest.mark.parametrize( + "transaction,type", + [(lk.ReadTransaction, "read"), (lk.WriteTransaction, "write")] +) +def test_transaction_with_context_manager(lock_path, transaction, type): + class MockLock(AssertLock): + def assert_acquire_read(self): + assert not vals['entered_ctx'] + assert not vals['exited_ctx'] - vals = {'entered': False, 'exited': False, 'exited_fn': False, - 'exception': False, 'exception_fn': False} - with lk.WriteTransaction(lock, TestContextManager): - pass + def assert_release_read(self): + assert vals['entered_ctx'] + assert vals['exited_ctx'] - assert vals['entered'] - assert vals['exited'] - assert not vals['exception'] - assert not vals['exited_fn'] - assert not vals['exception_fn'] + def assert_acquire_write(self): + assert not vals['entered_ctx'] + assert not vals['exited_ctx'] + def assert_release_write(self): + assert vals['entered_ctx'] + assert vals['exited_ctx'] -def test_transaction_with_context_manager_and_exception(lock_path): class TestContextManager(object): def __enter__(self): - vals['entered'] = True + vals['entered_ctx'] = True def __exit__(self, t, v, tb): - vals['exited'] = True - vals['exception'] = (t or v or tb) + assert not vals['released_%s' % type] + vals['exited_ctx'] = True + vals['exception_ctx'] = (t or v or tb) + return exit_ctx_result def exit_fn(t, v, tb): + assert not vals['released_%s' % type] vals['exited_fn'] = True vals['exception_fn'] = (t or v or tb) + return exit_fn_result - lock = lk.Lock(lock_path) - - def do_read_with_exception(exit_fn): - with lk.ReadTransaction(lock, TestContextManager, exit_fn): - raise Exception() + exit_fn_result, exit_ctx_result = False, False + vals = collections.defaultdict(lambda: False) + lock = MockLock(lock_path, vals) - def do_write_with_exception(exit_fn): - with lk.WriteTransaction(lock, TestContextManager, exit_fn): - raise Exception() + with transaction(lock, acquire=TestContextManager, release=exit_fn): + pass - vals = {'entered': False, 'exited': False, 'exited_fn': False, - 'exception': False, 'exception_fn': False} - with pytest.raises(Exception): - do_read_with_exception(exit_fn) - assert vals['entered'] - assert vals['exited'] - assert vals['exception'] + assert vals['entered_ctx'] + assert vals['exited_ctx'] assert vals['exited_fn'] - assert vals['exception_fn'] - - vals = {'entered': False, 'exited': False, 'exited_fn': False, - 'exception': False, 'exception_fn': False} - with pytest.raises(Exception): - do_read_with_exception(None) - assert vals['entered'] - assert vals['exited'] - assert vals['exception'] - assert not vals['exited_fn'] + assert not vals['exception_ctx'] assert not vals['exception_fn'] - vals = {'entered': False, 'exited': False, 'exited_fn': False, - 'exception': False, 'exception_fn': False} - with pytest.raises(Exception): - do_write_with_exception(exit_fn) - assert vals['entered'] - assert vals['exited'] - assert vals['exception'] - assert vals['exited_fn'] - assert vals['exception_fn'] + vals.clear() + with transaction(lock, acquire=TestContextManager): + pass - vals = {'entered': False, 'exited': False, 'exited_fn': False, - 'exception': False, 'exception_fn': False} - with pytest.raises(Exception): - do_write_with_exception(None) - assert vals['entered'] - assert vals['exited'] - assert vals['exception'] + assert vals['entered_ctx'] + assert vals['exited_ctx'] assert not vals['exited_fn'] + assert not vals['exception_ctx'] assert not vals['exception_fn'] + # below are tests for exceptions with and without suppression + def assert_ctx_and_fn_exception(raises=True): + vals.clear() + + if raises: + with pytest.raises(Exception): + with transaction( + lock, acquire=TestContextManager, release=exit_fn): + raise Exception() + else: + with transaction( + lock, acquire=TestContextManager, release=exit_fn): + raise Exception() + + assert vals['entered_ctx'] + assert vals['exited_ctx'] + assert vals['exited_fn'] + assert vals['exception_ctx'] + assert vals['exception_fn'] + + def assert_only_ctx_exception(raises=True): + vals.clear() + + if raises: + with pytest.raises(Exception): + with transaction(lock, acquire=TestContextManager): + raise Exception() + else: + with transaction(lock, acquire=TestContextManager): + raise Exception() + + assert vals['entered_ctx'] + assert vals['exited_ctx'] + assert not vals['exited_fn'] + assert vals['exception_ctx'] + assert not vals['exception_fn'] + + # no suppression + assert_ctx_and_fn_exception(raises=True) + assert_only_ctx_exception(raises=True) + + # suppress exception only in function + exit_fn_result, exit_ctx_result = True, False + assert_ctx_and_fn_exception(raises=False) + assert_only_ctx_exception(raises=True) + + # suppress exception only in context + exit_fn_result, exit_ctx_result = False, True + assert_ctx_and_fn_exception(raises=False) + assert_only_ctx_exception(raises=False) + + # suppress exception in function and context + exit_fn_result, exit_ctx_result = True, True + assert_ctx_and_fn_exception(raises=False) + assert_only_ctx_exception(raises=False) + def test_lock_debug_output(lock_path): host = socket.getfqdn() diff --git a/lib/spack/spack/util/file_cache.py b/lib/spack/spack/util/file_cache.py index d56f2b33c5..0227edf155 100644 --- a/lib/spack/spack/util/file_cache.py +++ b/lib/spack/spack/util/file_cache.py @@ -107,7 +107,8 @@ class FileCache(object): """ return ReadTransaction( - self._get_lock(key), lambda: open(self.cache_path(key))) + self._get_lock(key), acquire=lambda: open(self.cache_path(key)) + ) def write_transaction(self, key): """Get a write transaction on a file cache item. @@ -117,6 +118,10 @@ class FileCache(object): moves the file into place on top of the old file atomically. """ + # TODO: this nested context manager adds a lot of complexity and + # TODO: is pretty hard to reason about in llnl.util.lock. At some + # TODO: point we should just replace it with functions and simplify + # TODO: the locking code. class WriteContextManager(object): def __enter__(cm): # noqa @@ -142,7 +147,8 @@ class FileCache(object): else: os.rename(cm.tmp_filename, cm.orig_filename) - return WriteTransaction(self._get_lock(key), WriteContextManager) + return WriteTransaction( + self._get_lock(key), acquire=WriteContextManager) def mtime(self, key): """Return modification time of cache file, or 0 if it does not exist. -- cgit v1.2.3-70-g09d2 From b3a5f2e3c3ce4e3e9836301504f5b5987117248e Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Sat, 21 Dec 2019 16:29:53 -0800 Subject: lock transactions: ensure that nested write transactions write If a write transaction was nested inside a read transaction, it would not write properly on release, e.g., in a sequence like this, inside our `LockTransaction` class: ``` 1 with spack.store.db.read_transaction(): 2 with spack.store.db.write_transaction(): 3 ... 4 with spack.store.db.read_transaction(): ... ``` The WriteTransaction on line 2 had no way of knowing that its `__exit__()` call was the last *write* in the nesting, and it would skip calling its write function. The `__exit__()` call of the `ReadTransaction` on line 1 wouldn't know how to write, and the file would never be written. The DB would be correct in memory, but the `ReadTransaction` on line 4 would re-read the whole DB assuming that other processes may have modified it. Since the DB was never written, we got stale data. - [x] Make `Lock.release_write()` return `True` whenever we release the *last write* in a nest. --- lib/spack/llnl/util/lock.py | 8 ++++- lib/spack/spack/test/llnl/util/lock.py | 57 ++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/spack/llnl/util/lock.py b/lib/spack/llnl/util/lock.py index c675c7c452..86a45e2d7c 100644 --- a/lib/spack/llnl/util/lock.py +++ b/lib/spack/llnl/util/lock.py @@ -351,7 +351,13 @@ class Lock(object): else: self._writes -= 1 - return False + + # when the last *write* is released, we call release_fn here + # instead of immediately before releasing the lock. + if self._writes == 0: + return release_fn() if release_fn is not None else True + else: + return False def _debug(self, *args): tty.debug(*args) diff --git a/lib/spack/spack/test/llnl/util/lock.py b/lib/spack/spack/test/llnl/util/lock.py index 3bf8a236b1..2b0892a25e 100644 --- a/lib/spack/spack/test/llnl/util/lock.py +++ b/lib/spack/spack/test/llnl/util/lock.py @@ -783,6 +783,12 @@ class AssertLock(lk.Lock): super(AssertLock, self).__init__(lock_path) self.vals = vals + # assert hooks for subclasses + assert_acquire_read = lambda self: None + assert_acquire_write = lambda self: None + assert_release_read = lambda self: None + assert_release_write = lambda self: None + def acquire_read(self, timeout=None): self.assert_acquire_read() result = super(AssertLock, self).acquire_read(timeout) @@ -1030,6 +1036,57 @@ def test_transaction_with_context_manager(lock_path, transaction, type): assert_only_ctx_exception(raises=False) +def test_nested_write_transaction(lock_path): + """Ensure that the outermost write transaction writes.""" + + def write(t, v, tb): + vals['wrote'] = True + + vals = collections.defaultdict(lambda: False) + lock = AssertLock(lock_path, vals) + + # write/write + with lk.WriteTransaction(lock, release=write): + assert not vals['wrote'] + with lk.WriteTransaction(lock, release=write): + assert not vals['wrote'] + assert not vals['wrote'] + assert vals['wrote'] + + # read/write + vals.clear() + with lk.ReadTransaction(lock): + assert not vals['wrote'] + with lk.WriteTransaction(lock, release=write): + assert not vals['wrote'] + assert vals['wrote'] + + # write/read/write + vals.clear() + with lk.WriteTransaction(lock, release=write): + assert not vals['wrote'] + with lk.ReadTransaction(lock): + assert not vals['wrote'] + with lk.WriteTransaction(lock, release=write): + assert not vals['wrote'] + assert not vals['wrote'] + assert not vals['wrote'] + assert vals['wrote'] + + # read/write/read/write + vals.clear() + with lk.ReadTransaction(lock): + with lk.WriteTransaction(lock, release=write): + assert not vals['wrote'] + with lk.ReadTransaction(lock): + assert not vals['wrote'] + with lk.WriteTransaction(lock, release=write): + assert not vals['wrote'] + assert not vals['wrote'] + assert not vals['wrote'] + assert vals['wrote'] + + def test_lock_debug_output(lock_path): host = socket.getfqdn() -- cgit v1.2.3-70-g09d2 From d87ededddce53a8e3ae18ba8f75e206f6e395793 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Sat, 21 Dec 2019 16:31:28 -0800 Subject: lock transactions: avoid redundant reading in write transactions Our `LockTransaction` class was reading overly aggressively. In cases like this: ``` 1 with spack.store.db.read_transaction(): 2 with spack.store.db.write_transaction(): 3 ... ``` The `ReadTransaction` on line 1 would read in the DB, but the WriteTransaction on line 2 would read in the DB *again*, even though we had a read lock the whole time. `WriteTransaction`s were only considering nested writes to decide when to read, but they didn't know when we already had a read lock. - [x] `Lock.acquire_write()` return `False` in cases where we already had a read lock. --- lib/spack/llnl/util/lock.py | 7 ++++- lib/spack/spack/test/llnl/util/lock.py | 56 ++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/spack/llnl/util/lock.py b/lib/spack/llnl/util/lock.py index 86a45e2d7c..3a58093491 100644 --- a/lib/spack/llnl/util/lock.py +++ b/lib/spack/llnl/util/lock.py @@ -275,7 +275,12 @@ class Lock(object): wait_time, nattempts = self._lock(fcntl.LOCK_EX, timeout=timeout) self._acquired_debug('WRITE LOCK', wait_time, nattempts) self._writes += 1 - return True + + # return True only if we weren't nested in a read lock. + # TODO: we may need to return two values: whether we got + # the write lock, and whether this is acquiring a read OR + # write lock for the first time. Now it returns the latter. + return self._reads == 0 else: self._writes += 1 return False diff --git a/lib/spack/spack/test/llnl/util/lock.py b/lib/spack/spack/test/llnl/util/lock.py index 2b0892a25e..ca879cdc0b 100644 --- a/lib/spack/spack/test/llnl/util/lock.py +++ b/lib/spack/spack/test/llnl/util/lock.py @@ -1087,6 +1087,62 @@ def test_nested_write_transaction(lock_path): assert vals['wrote'] +def test_nested_reads(lock_path): + """Ensure that write transactions won't re-read data.""" + + def read(): + vals['read'] += 1 + + vals = collections.defaultdict(lambda: 0) + lock = AssertLock(lock_path, vals) + + # read/read + vals.clear() + assert vals['read'] == 0 + with lk.ReadTransaction(lock, acquire=read): + assert vals['read'] == 1 + with lk.ReadTransaction(lock, acquire=read): + assert vals['read'] == 1 + + # write/write + vals.clear() + assert vals['read'] == 0 + with lk.WriteTransaction(lock, acquire=read): + assert vals['read'] == 1 + with lk.WriteTransaction(lock, acquire=read): + assert vals['read'] == 1 + + # read/write + vals.clear() + assert vals['read'] == 0 + with lk.ReadTransaction(lock, acquire=read): + assert vals['read'] == 1 + with lk.WriteTransaction(lock, acquire=read): + assert vals['read'] == 1 + + # write/read/write + vals.clear() + assert vals['read'] == 0 + with lk.WriteTransaction(lock, acquire=read): + assert vals['read'] == 1 + with lk.ReadTransaction(lock, acquire=read): + assert vals['read'] == 1 + with lk.WriteTransaction(lock, acquire=read): + assert vals['read'] == 1 + + # read/write/read/write + vals.clear() + assert vals['read'] == 0 + with lk.ReadTransaction(lock, acquire=read): + assert vals['read'] == 1 + with lk.WriteTransaction(lock, acquire=read): + assert vals['read'] == 1 + with lk.ReadTransaction(lock, acquire=read): + assert vals['read'] == 1 + with lk.WriteTransaction(lock, acquire=read): + assert vals['read'] == 1 + + def test_lock_debug_output(lock_path): host = socket.getfqdn() -- cgit v1.2.3-70-g09d2 From be6d7db2a81d9013c4fd05c15b2a01288c5725f0 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Sat, 21 Dec 2019 00:21:59 -0800 Subject: performance: add read transactions for `install_all()` and `install()` Environments need to read the DB a lot when installing all specs. - [x] Put a read transaction around `install_all()` and `install()` to avoid repeated locking --- lib/spack/spack/environment.py | 66 ++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 31 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/environment.py b/lib/spack/spack/environment.py index f7a1310459..2a2be00425 100644 --- a/lib/spack/spack/environment.py +++ b/lib/spack/spack/environment.py @@ -992,19 +992,22 @@ class Environment(object): spec = Spec(user_spec) - if self.add(spec): - concrete = concrete_spec if concrete_spec else spec.concretized() - self._add_concrete_spec(spec, concrete) - else: - # spec might be in the user_specs, but not installed. - # TODO: Redo name-based comparison for old style envs - spec = next(s for s in self.user_specs if s.satisfies(user_spec)) - concrete = self.specs_by_hash.get(spec.build_hash()) - if not concrete: - concrete = spec.concretized() + with spack.store.db.read_transaction(): + if self.add(spec): + concrete = concrete_spec or spec.concretized() self._add_concrete_spec(spec, concrete) + else: + # spec might be in the user_specs, but not installed. + # TODO: Redo name-based comparison for old style envs + spec = next( + s for s in self.user_specs if s.satisfies(user_spec) + ) + concrete = self.specs_by_hash.get(spec.build_hash()) + if not concrete: + concrete = spec.concretized() + self._add_concrete_spec(spec, concrete) - self._install(concrete, **install_args) + self._install(concrete, **install_args) def _install(self, spec, **install_args): spec.package.do_install(**install_args) @@ -1177,26 +1180,27 @@ class Environment(object): def install_all(self, args=None): """Install all concretized specs in an environment.""" - for concretized_hash in self.concretized_order: - spec = self.specs_by_hash[concretized_hash] - - # Parse cli arguments and construct a dictionary - # that will be passed to Package.do_install API - kwargs = dict() - if args: - spack.cmd.install.update_kwargs_from_args(args, kwargs) - - self._install(spec, **kwargs) - - if not spec.external: - # Link the resulting log file into logs dir - build_log_link = os.path.join( - self.log_path, '%s-%s.log' % (spec.name, spec.dag_hash(7))) - if os.path.lexists(build_log_link): - os.remove(build_log_link) - os.symlink(spec.package.build_log_path, build_log_link) - - self.regenerate_views() + with spack.store.db.read_transaction(): + for concretized_hash in self.concretized_order: + spec = self.specs_by_hash[concretized_hash] + + # Parse cli arguments and construct a dictionary + # that will be passed to Package.do_install API + kwargs = dict() + if args: + spack.cmd.install.update_kwargs_from_args(args, kwargs) + + self._install(spec, **kwargs) + + if not spec.external: + # Link the resulting log file into logs dir + log_name = '%s-%s' % (spec.name, spec.dag_hash(7)) + build_log_link = os.path.join(self.log_path, log_name) + if os.path.lexists(build_log_link): + os.remove(build_log_link) + os.symlink(spec.package.build_log_path, build_log_link) + + self.regenerate_views() def all_specs_by_hash(self): """Map of hashes to spec for all specs in this environment.""" -- cgit v1.2.3-70-g09d2 From 79ddf6cf0daa95ffef3aa1457b893bb245fa23de Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Sat, 21 Dec 2019 11:34:12 -0800 Subject: performance: only regenerate env views once in `spack install` `spack install` previously concretized, writes the entire environment out, regenerated views, then wrote and regenerated views again. Regenerating views is slow, so ensure that we only do that once. - [x] add an option to env.write() to skip view regeneration - [x] add a note on whether regenerate_views() shouldn't just be a separate operation -- not clear if we want to keep it as part of write to ensure consistency, or take it out to avoid performance issues. --- lib/spack/spack/cmd/install.py | 7 ++++++- lib/spack/spack/environment.py | 31 +++++++++++++++++++++++-------- 2 files changed, 29 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/cmd/install.py b/lib/spack/spack/cmd/install.py index 8fd63beede..ab012eaead 100644 --- a/lib/spack/spack/cmd/install.py +++ b/lib/spack/spack/cmd/install.py @@ -229,9 +229,14 @@ def install(parser, args, **kwargs): if not args.only_concrete: concretized_specs = env.concretize() ev.display_specs(concretized_specs) - env.write() + + # save view regeneration for later, so that we only do it + # once, as it can be slow. + env.write(regenerate_views=False) + tty.msg("Installing environment %s" % env.name) env.install_all(args) + env.regenerate_views() return else: tty.die("install requires a package argument or a spack.yaml file") diff --git a/lib/spack/spack/environment.py b/lib/spack/spack/environment.py index 2a2be00425..5b526bbe11 100644 --- a/lib/spack/spack/environment.py +++ b/lib/spack/spack/environment.py @@ -1179,7 +1179,12 @@ class Environment(object): self.specs_by_hash[h] = concrete def install_all(self, args=None): - """Install all concretized specs in an environment.""" + """Install all concretized specs in an environment. + + Note: this does not regenerate the views for the environment; + that needs to be done separately with a call to write(). + + """ with spack.store.db.read_transaction(): for concretized_hash in self.concretized_order: spec = self.specs_by_hash[concretized_hash] @@ -1200,8 +1205,6 @@ class Environment(object): os.remove(build_log_link) os.symlink(spec.package.build_log_path, build_log_link) - self.regenerate_views() - def all_specs_by_hash(self): """Map of hashes to spec for all specs in this environment.""" # Note this uses dag-hashes calculated without build deps as keys, @@ -1367,10 +1370,17 @@ class Environment(object): self.concretized_order = [ old_hash_to_new.get(h, h) for h in self.concretized_order] - def write(self): + def write(self, regenerate_views=True): """Writes an in-memory environment to its location on disk. - This will also write out package files for each newly concretized spec. + Write out package files for each newly concretized spec. Also + regenerate any views associated with the environment, if + regenerate_views is True. + + Arguments: + regenerate_views (bool): regenerate views as well as + writing if True. + """ # ensure path in var/spack/environments fs.mkdirp(self.path) @@ -1478,9 +1488,14 @@ class Environment(object): with fs.write_tmp_and_move(self.manifest_path) as f: _write_yaml(self.yaml, f) - # TODO: for operations that just add to the env (install etc.) this - # could just call update_view - self.regenerate_views() + # TODO: rethink where this needs to happen along with + # writing. For some of the commands (like install, which write + # concrete specs AND regen) this might as well be a separate + # call. But, having it here makes the views consistent witht the + # concretized environment for most operations. Which is the + # special case? + if regenerate_views: + self.regenerate_views() def __enter__(self): self._previous_active = _active_environment -- cgit v1.2.3-70-g09d2 From f01368739741ef9b3bcc827f7dbc2ab9d5b48df4 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Sat, 21 Dec 2019 16:50:15 -0800 Subject: performance: reduce system calls required for remove_dead_links `os.path.exists()` will report False if the target of a symlink doesn't exist, so we can avoid a costly call to realpath here. --- lib/spack/llnl/util/filesystem.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/spack/llnl/util/filesystem.py b/lib/spack/llnl/util/filesystem.py index 0095e3fd21..f6c8d161d7 100644 --- a/lib/spack/llnl/util/filesystem.py +++ b/lib/spack/llnl/util/filesystem.py @@ -917,10 +917,8 @@ def remove_if_dead_link(path): Parameters: path (str): The potential dead link """ - if os.path.islink(path): - real_path = os.path.realpath(path) - if not os.path.exists(real_path): - os.unlink(path) + if os.path.islink(path) and not os.path.exists(path): + os.unlink(path) def remove_linked_tree(path): -- cgit v1.2.3-70-g09d2 From e3939b0c72b2c6bc5f41e9f33de3c04cf4785c05 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Sat, 21 Dec 2019 23:45:30 -0800 Subject: performance: don't recompute hashes when regenerating environments `ViewDescriptor.regenerate()` was copying specs and stripping build dependencies, which clears `_hash` and other cached fields on concrete specs, which causes a bunch of YAML hashes to be recomputed. - [x] Preserve the `_hash` and `_normal` fields on stripped specs, as these will be unchanged. --- lib/spack/spack/environment.py | 11 ++++++++--- lib/spack/spack/spec.py | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/environment.py b/lib/spack/spack/environment.py index 5b526bbe11..56440fbe92 100644 --- a/lib/spack/spack/environment.py +++ b/lib/spack/spack/environment.py @@ -511,10 +511,15 @@ class ViewDescriptor(object): specs = all_specs if self.link == 'all' else roots for spec in specs: # The view does not store build deps, so if we want it to - # recognize environment specs (which do store build deps), then - # they need to be stripped + # recognize environment specs (which do store build deps), + # then they need to be stripped. if spec.concrete: # Do not link unconcretized roots - specs_for_view.append(spec.copy(deps=('link', 'run'))) + # We preserve _hash _normal to avoid recomputing DAG + # hashes (DAG hashes don't consider build deps) + spec_copy = spec.copy(deps=('link', 'run')) + spec_copy._hash = spec._hash + spec_copy._normal = spec._normal + specs_for_view.append(spec_copy) # regeneration queries the database quite a bit; this read # transaction ensures that we don't repeatedly lock/unlock. diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py index 86ac62e0d7..c553da796d 100644 --- a/lib/spack/spack/spec.py +++ b/lib/spack/spack/spec.py @@ -2999,7 +2999,7 @@ class Spec(object): before possibly copying the dependencies of ``other`` onto ``self`` caches (bool or None): preserve cached fields such as - ``_normal``, ``_concrete``, and ``_cmp_key_cache``. By + ``_normal``, ``_hash``, and ``_cmp_key_cache``. By default this is ``False`` if DAG structure would be changed by the copy, ``True`` if it's an exact copy. -- cgit v1.2.3-70-g09d2 From e22d3250dd78eac769906620da55594690fbbca7 Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Sat, 21 Dec 2019 23:45:47 -0800 Subject: performance: dont' read `spec.yaml` files twice in view regeneration `ViewDescriptor.regenerate()` calls `get_all_specs()`, which reads `spec.yaml` files, which is slow. It's fine to do this once, but `view.remove_specs()` *also* calls it immediately afterwards. - [x] Pass the result of `get_all_specs()` as an optional parameter to `view.remove_specs()` to avoid reading `spec.yaml` files twice. --- lib/spack/spack/environment.py | 7 +++++-- lib/spack/spack/filesystem_view.py | 5 +++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/environment.py b/lib/spack/spack/environment.py index 56440fbe92..bf9af075ce 100644 --- a/lib/spack/spack/environment.py +++ b/lib/spack/spack/environment.py @@ -534,9 +534,12 @@ class ViewDescriptor(object): tty.msg("Updating view at {0}".format(self.root)) rm_specs = specs_in_view - installed_specs_for_view - view.remove_specs(*rm_specs, with_dependents=False) - add_specs = installed_specs_for_view - specs_in_view + + # pass all_specs in, as it's expensive to read all the + # spec.yaml files twice. + view.remove_specs(*rm_specs, with_dependents=False, + all_specs=specs_in_view) view.add_specs(*add_specs, with_dependencies=False) diff --git a/lib/spack/spack/filesystem_view.py b/lib/spack/spack/filesystem_view.py index 448254f26b..5455ccb107 100644 --- a/lib/spack/spack/filesystem_view.py +++ b/lib/spack/spack/filesystem_view.py @@ -371,6 +371,9 @@ class YamlFilesystemView(FilesystemView): with_dependents = kwargs.get("with_dependents", True) with_dependencies = kwargs.get("with_dependencies", False) + # caller can pass this in, as get_all_specs() is expensive + all_specs = kwargs.get("all_specs", None) or set(self.get_all_specs()) + specs = set(specs) if with_dependencies: @@ -379,8 +382,6 @@ class YamlFilesystemView(FilesystemView): if kwargs.get("exclude", None): specs = set(filter_exclude(specs, kwargs["exclude"])) - all_specs = set(self.get_all_specs()) - to_deactivate = specs to_keep = all_specs - to_deactivate -- cgit v1.2.3-70-g09d2 From 231e237764e9bcf96c8ac233d05e302a16b2c02d Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Mon, 23 Dec 2019 23:23:22 -0800 Subject: version bump: 0.13.3 --- lib/spack/spack/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/spack/spack/__init__.py b/lib/spack/spack/__init__.py index f7caf373e3..0074409661 100644 --- a/lib/spack/spack/__init__.py +++ b/lib/spack/spack/__init__.py @@ -5,7 +5,7 @@ #: major, minor, patch version for Spack, in a tuple -spack_version_info = (0, 13, 2) +spack_version_info = (0, 13, 3) #: String containing Spack version joined with .'s spack_version = '.'.join(str(v) for v in spack_version_info) -- cgit v1.2.3-70-g09d2