summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTodd Gamblin <tgamblin@llnl.gov>2022-01-19 01:04:34 -0800
committerTodd Gamblin <tgamblin@llnl.gov>2022-05-13 10:45:12 -0700
commite02020c80a8749d82c271913df0d0f9d9e4fac0b (patch)
tree9636b64488641ee07bf01ac0c1f47d74cbe90ead
parentd900ac2003a39d1789ee62107e3fc2c6e2d8a78f (diff)
downloadspack-e02020c80a8749d82c271913df0d0f9d9e4fac0b.tar.gz
spack-e02020c80a8749d82c271913df0d0f9d9e4fac0b.tar.bz2
spack-e02020c80a8749d82c271913df0d0f9d9e4fac0b.tar.xz
spack-e02020c80a8749d82c271913df0d0f9d9e4fac0b.zip
Include all deps and package content in the `dag_hash()`
For a long time, Spack has used a coarser hash to identify packages than it likely should. Packages are identified by `dag_hash()`, which includes only link and run dependencies. Build dependencies are stripped before hashing, and we have notincluded hashes of build artifacts or the `package.py` files used to build. This means the DAG hash actually doesn't represent all the things Spack can build, and it reduces reproducibility. We did this because, in the early days, users were (rightly) annoyed when a new version of CMake, autotools, or some other build dependency would necessitate a rebuild of their entire stack. Coarsening the hash avoided this issue and enabled a modicum of stability when only reusing packages by hash match. Now that we have `--reuse`, we don't need to be so careful. Users can avoid unnecessary rebuilds much more easily, and we can add more provenance to the spec without worrying that frequent hash changes will cause too many rebuilds. This commit starts the refactor with the following major change: - [x] Make `Spec.dag_hash()` include build, run, and link dependencides and the package hash (it is now equivalent to `full_hash()`). It also adds a couple of bugfixes for problems discovered during the switch: - [x] Don't add a `package_hash()` in `to_node_dict()` unless the spec is concrete (fixes breaks on abstract specs) - [x] Don't add source ids to the package hash for packages without a known fetch strategy (may mock packages are like this) - [x] Change how `Spec.patches` is memoized. Using `llnl.util.lang.memoized` on `Spec` objects causes specs to be stored in a `dict`, which means they need a hash. But, `dag_hash()` now includes patch `sha256`'s via the package hash, which can lead to infinite recursion
-rw-r--r--lib/spack/spack/hash_types.py13
-rw-r--r--lib/spack/spack/package.py7
-rw-r--r--lib/spack/spack/spec.py62
-rw-r--r--lib/spack/spack/test/spec_yaml.py4
4 files changed, 45 insertions, 41 deletions
diff --git a/lib/spack/spack/hash_types.py b/lib/spack/spack/hash_types.py
index 000e2885fe..b6e8f12a74 100644
--- a/lib/spack/spack/hash_types.py
+++ b/lib/spack/spack/hash_types.py
@@ -34,9 +34,14 @@ class SpecHashDescriptor(object):
return '_' + self.name
-#: Default Hash descriptor, used by Spec.dag_hash() and stored in the DB.
+#: Spack's deployment hash. Includes all inputs that can affect how a package is built.
dag_hash = SpecHashDescriptor(
- deptype=('link', 'run'), package_hash=False, name='hash')
+ deptype=('build', 'link', 'run'), package_hash=True, name='hash')
+
+
+#: Same as dag_hash; old name.
+full_hash = SpecHashDescriptor(
+ deptype=('build', 'link', 'run'), package_hash=True, name='full_hash')
#: Hash descriptor that includes build dependencies.
@@ -51,10 +56,6 @@ process_hash = SpecHashDescriptor(
name='process_hash'
)
-#: Full hash used in build pipelines to determine when to rebuild packages.
-full_hash = SpecHashDescriptor(
- deptype=('build', 'link', 'run'), package_hash=True, name='full_hash')
-
#: Package hash used as part of full hash
package_hash = SpecHashDescriptor(
diff --git a/lib/spack/spack/package.py b/lib/spack/spack/package.py
index 95b029f3bb..55129d318e 100644
--- a/lib/spack/spack/package.py
+++ b/lib/spack/spack/package.py
@@ -1684,8 +1684,12 @@ class PackageBase(six.with_metaclass(PackageMeta, PackageViewMixin, object)):
hash_content = list()
try:
source_id = fs.for_package_version(self, self.version).source_id()
- except fs.ExtrapolationError:
+ except (fs.ExtrapolationError, fs.InvalidArgsError):
+ # ExtrapolationError happens if the package has no fetchers defined.
+ # InvalidArgsError happens when there are version directives with args,
+ # but none of them identifies an actual fetcher.
source_id = None
+
if not source_id:
# TODO? in cases where a digest or source_id isn't available,
# should this attempt to download the source and set one? This
@@ -1699,6 +1703,7 @@ class PackageBase(six.with_metaclass(PackageMeta, PackageViewMixin, object)):
hash_content.append(''.encode('utf-8'))
else:
hash_content.append(source_id.encode('utf-8'))
+
hash_content.extend(':'.join((p.sha256, str(p.level))).encode('utf-8')
for p in self.spec.patches)
hash_content.append(package_hash(self.spec, source=content).encode('utf-8'))
diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py
index 5189844b63..ef75eaff2c 100644
--- a/lib/spack/spack/spec.py
+++ b/lib/spack/spack/spec.py
@@ -1803,13 +1803,20 @@ class Spec(object):
def dag_hash(self, length=None):
"""This is Spack's default hash, used to identify installations.
- At the moment, it excludes build dependencies to avoid rebuilding
- packages whenever build dependency versions change. We will
- revise this to include more detailed provenance when the
- concretizer can more aggressievly reuse installed dependencies.
+ Same as the full hash (includes package hash and build/link/run deps).
+
+ NOTE: Versions of Spack prior to 0.18 only included link and run deps.
"""
return self._cached_hash(ht.dag_hash, length)
+ def full_hash(self, length=None):
+ """Hash that includes all build and run inputs for a spec.
+
+ Inputs are: the package hash (to identify the package.py),
+ build, link, and run dependencies.
+ """
+ return self._cached_hash(ht.full_hash, length)
+
def build_hash(self, length=None):
"""Hash used to store specs in environments.
@@ -1819,21 +1826,13 @@ class Spec(object):
return self._cached_hash(ht.build_hash, length)
def process_hash(self, length=None):
- """Hash used to store specs in environments.
+ """Hash used to transfer specs among processes.
This hash includes build and test dependencies and is only used to
serialize a spec and pass it around among processes.
"""
return self._cached_hash(ht.process_hash, length)
- def full_hash(self, length=None):
- """Hash to determine when to rebuild packages in the build pipeline.
-
- This hash includes the package hash, so that we know when package
- files has changed between builds.
- """
- return self._cached_hash(ht.full_hash, length)
-
def dag_hash_bit_prefix(self, bits):
"""Get the first <bits> bits of the DAG hash as an integer type."""
return spack.util.hash.base32_prefix_bits(self.dag_hash(), bits)
@@ -1931,7 +1930,7 @@ class Spec(object):
if hasattr(variant, '_patches_in_order_of_appearance'):
d['patches'] = variant._patches_in_order_of_appearance
- if hash.package_hash:
+ if self._concrete and hash.package_hash:
package_hash = self.package_hash()
# Full hashes are in bytes
@@ -2845,13 +2844,13 @@ class Spec(object):
@staticmethod
def ensure_no_deprecated(root):
- """Raise is a deprecated spec is in the dag.
+ """Raise if a deprecated spec is in the dag.
Args:
root (Spec): root spec to be analyzed
Raises:
- SpecDeprecatedError: is any deprecated spec is found
+ SpecDeprecatedError: if any deprecated spec is found
"""
deprecated = []
with spack.store.db.read_transaction():
@@ -3683,7 +3682,6 @@ class Spec(object):
return [spec for spec in self.traverse() if spec.virtual]
@property # type: ignore[misc] # decorated prop not supported in mypy
- @lang.memoized
def patches(self):
"""Return patch objects for any patch sha256 sums on this Spec.
@@ -3696,21 +3694,21 @@ class Spec(object):
if not self.concrete:
raise spack.error.SpecError("Spec is not concrete: " + str(self))
- if 'patches' not in self.variants:
- return []
-
- # FIXME: _patches_in_order_of_appearance is attached after
- # FIXME: concretization to store the order of patches somewhere.
- # FIXME: Needs to be refactored in a cleaner way.
-
- # translate patch sha256sums to patch objects by consulting the index
- patches = []
- for sha256 in self.variants['patches']._patches_in_order_of_appearance:
- index = spack.repo.path.patch_index
- patch = index.patch_for_package(sha256, self.package)
- patches.append(patch)
-
- return patches
+ if not hasattr(self, "_patches"):
+ self._patches = []
+ if 'patches' in self.variants:
+ # FIXME: _patches_in_order_of_appearance is attached after
+ # FIXME: concretization to store the order of patches somewhere.
+ # FIXME: Needs to be refactored in a cleaner way.
+
+ # translate patch sha256sums to patch objects by consulting the index
+ self._patches = []
+ for sha256 in self.variants['patches']._patches_in_order_of_appearance:
+ index = spack.repo.path.patch_index
+ patch = index.patch_for_package(sha256, self.package)
+ self._patches.append(patch)
+
+ return self._patches
def _dup(self, other, deps=True, cleardeps=True):
"""Copy the spec other into self. This is an overwriting
diff --git a/lib/spack/spack/test/spec_yaml.py b/lib/spack/spack/test/spec_yaml.py
index 8aa14b9230..e3a7308605 100644
--- a/lib/spack/spack/test/spec_yaml.py
+++ b/lib/spack/spack/test/spec_yaml.py
@@ -3,9 +3,9 @@
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
-"""Test YAML serialization for specs.
+"""Test YAML and JSON serialization for specs.
-YAML format preserves DAG information in the spec.
+The YAML and JSON formats preserve DAG information in the spec.
"""
import ast