diff options
author | Todd Gamblin <tgamblin@llnl.gov> | 2022-01-19 01:04:34 -0800 |
---|---|---|
committer | Todd Gamblin <tgamblin@llnl.gov> | 2022-05-13 10:45:12 -0700 |
commit | e02020c80a8749d82c271913df0d0f9d9e4fac0b (patch) | |
tree | 9636b64488641ee07bf01ac0c1f47d74cbe90ead | |
parent | d900ac2003a39d1789ee62107e3fc2c6e2d8a78f (diff) | |
download | spack-e02020c80a8749d82c271913df0d0f9d9e4fac0b.tar.gz spack-e02020c80a8749d82c271913df0d0f9d9e4fac0b.tar.bz2 spack-e02020c80a8749d82c271913df0d0f9d9e4fac0b.tar.xz spack-e02020c80a8749d82c271913df0d0f9d9e4fac0b.zip |
Include all deps and package content in the `dag_hash()`
For a long time, Spack has used a coarser hash to identify packages
than it likely should. Packages are identified by `dag_hash()`, which
includes only link and run dependencies. Build dependencies are
stripped before hashing, and we have notincluded hashes of build
artifacts or the `package.py` files used to build. This means the
DAG hash actually doesn't represent all the things Spack can build,
and it reduces reproducibility.
We did this because, in the early days, users were (rightly) annoyed
when a new version of CMake, autotools, or some other build dependency
would necessitate a rebuild of their entire stack. Coarsening the hash
avoided this issue and enabled a modicum of stability when only reusing
packages by hash match.
Now that we have `--reuse`, we don't need to be so careful. Users can
avoid unnecessary rebuilds much more easily, and we can add more
provenance to the spec without worrying that frequent hash changes
will cause too many rebuilds.
This commit starts the refactor with the following major change:
- [x] Make `Spec.dag_hash()` include build, run, and link
dependencides and the package hash (it is now equivalent to
`full_hash()`).
It also adds a couple of bugfixes for problems discovered during
the switch:
- [x] Don't add a `package_hash()` in `to_node_dict()` unless
the spec is concrete (fixes breaks on abstract specs)
- [x] Don't add source ids to the package hash for packages without
a known fetch strategy (may mock packages are like this)
- [x] Change how `Spec.patches` is memoized. Using
`llnl.util.lang.memoized` on `Spec` objects causes specs to
be stored in a `dict`, which means they need a hash. But,
`dag_hash()` now includes patch `sha256`'s via the package
hash, which can lead to infinite recursion
-rw-r--r-- | lib/spack/spack/hash_types.py | 13 | ||||
-rw-r--r-- | lib/spack/spack/package.py | 7 | ||||
-rw-r--r-- | lib/spack/spack/spec.py | 62 | ||||
-rw-r--r-- | lib/spack/spack/test/spec_yaml.py | 4 |
4 files changed, 45 insertions, 41 deletions
diff --git a/lib/spack/spack/hash_types.py b/lib/spack/spack/hash_types.py index 000e2885fe..b6e8f12a74 100644 --- a/lib/spack/spack/hash_types.py +++ b/lib/spack/spack/hash_types.py @@ -34,9 +34,14 @@ class SpecHashDescriptor(object): return '_' + self.name -#: Default Hash descriptor, used by Spec.dag_hash() and stored in the DB. +#: Spack's deployment hash. Includes all inputs that can affect how a package is built. dag_hash = SpecHashDescriptor( - deptype=('link', 'run'), package_hash=False, name='hash') + deptype=('build', 'link', 'run'), package_hash=True, name='hash') + + +#: Same as dag_hash; old name. +full_hash = SpecHashDescriptor( + deptype=('build', 'link', 'run'), package_hash=True, name='full_hash') #: Hash descriptor that includes build dependencies. @@ -51,10 +56,6 @@ process_hash = SpecHashDescriptor( name='process_hash' ) -#: Full hash used in build pipelines to determine when to rebuild packages. -full_hash = SpecHashDescriptor( - deptype=('build', 'link', 'run'), package_hash=True, name='full_hash') - #: Package hash used as part of full hash package_hash = SpecHashDescriptor( diff --git a/lib/spack/spack/package.py b/lib/spack/spack/package.py index 95b029f3bb..55129d318e 100644 --- a/lib/spack/spack/package.py +++ b/lib/spack/spack/package.py @@ -1684,8 +1684,12 @@ class PackageBase(six.with_metaclass(PackageMeta, PackageViewMixin, object)): hash_content = list() try: source_id = fs.for_package_version(self, self.version).source_id() - except fs.ExtrapolationError: + except (fs.ExtrapolationError, fs.InvalidArgsError): + # ExtrapolationError happens if the package has no fetchers defined. + # InvalidArgsError happens when there are version directives with args, + # but none of them identifies an actual fetcher. source_id = None + if not source_id: # TODO? in cases where a digest or source_id isn't available, # should this attempt to download the source and set one? This @@ -1699,6 +1703,7 @@ class PackageBase(six.with_metaclass(PackageMeta, PackageViewMixin, object)): hash_content.append(''.encode('utf-8')) else: hash_content.append(source_id.encode('utf-8')) + hash_content.extend(':'.join((p.sha256, str(p.level))).encode('utf-8') for p in self.spec.patches) hash_content.append(package_hash(self.spec, source=content).encode('utf-8')) diff --git a/lib/spack/spack/spec.py b/lib/spack/spack/spec.py index 5189844b63..ef75eaff2c 100644 --- a/lib/spack/spack/spec.py +++ b/lib/spack/spack/spec.py @@ -1803,13 +1803,20 @@ class Spec(object): def dag_hash(self, length=None): """This is Spack's default hash, used to identify installations. - At the moment, it excludes build dependencies to avoid rebuilding - packages whenever build dependency versions change. We will - revise this to include more detailed provenance when the - concretizer can more aggressievly reuse installed dependencies. + Same as the full hash (includes package hash and build/link/run deps). + + NOTE: Versions of Spack prior to 0.18 only included link and run deps. """ return self._cached_hash(ht.dag_hash, length) + def full_hash(self, length=None): + """Hash that includes all build and run inputs for a spec. + + Inputs are: the package hash (to identify the package.py), + build, link, and run dependencies. + """ + return self._cached_hash(ht.full_hash, length) + def build_hash(self, length=None): """Hash used to store specs in environments. @@ -1819,21 +1826,13 @@ class Spec(object): return self._cached_hash(ht.build_hash, length) def process_hash(self, length=None): - """Hash used to store specs in environments. + """Hash used to transfer specs among processes. This hash includes build and test dependencies and is only used to serialize a spec and pass it around among processes. """ return self._cached_hash(ht.process_hash, length) - def full_hash(self, length=None): - """Hash to determine when to rebuild packages in the build pipeline. - - This hash includes the package hash, so that we know when package - files has changed between builds. - """ - return self._cached_hash(ht.full_hash, length) - def dag_hash_bit_prefix(self, bits): """Get the first <bits> bits of the DAG hash as an integer type.""" return spack.util.hash.base32_prefix_bits(self.dag_hash(), bits) @@ -1931,7 +1930,7 @@ class Spec(object): if hasattr(variant, '_patches_in_order_of_appearance'): d['patches'] = variant._patches_in_order_of_appearance - if hash.package_hash: + if self._concrete and hash.package_hash: package_hash = self.package_hash() # Full hashes are in bytes @@ -2845,13 +2844,13 @@ class Spec(object): @staticmethod def ensure_no_deprecated(root): - """Raise is a deprecated spec is in the dag. + """Raise if a deprecated spec is in the dag. Args: root (Spec): root spec to be analyzed Raises: - SpecDeprecatedError: is any deprecated spec is found + SpecDeprecatedError: if any deprecated spec is found """ deprecated = [] with spack.store.db.read_transaction(): @@ -3683,7 +3682,6 @@ class Spec(object): return [spec for spec in self.traverse() if spec.virtual] @property # type: ignore[misc] # decorated prop not supported in mypy - @lang.memoized def patches(self): """Return patch objects for any patch sha256 sums on this Spec. @@ -3696,21 +3694,21 @@ class Spec(object): if not self.concrete: raise spack.error.SpecError("Spec is not concrete: " + str(self)) - if 'patches' not in self.variants: - return [] - - # FIXME: _patches_in_order_of_appearance is attached after - # FIXME: concretization to store the order of patches somewhere. - # FIXME: Needs to be refactored in a cleaner way. - - # translate patch sha256sums to patch objects by consulting the index - patches = [] - for sha256 in self.variants['patches']._patches_in_order_of_appearance: - index = spack.repo.path.patch_index - patch = index.patch_for_package(sha256, self.package) - patches.append(patch) - - return patches + if not hasattr(self, "_patches"): + self._patches = [] + if 'patches' in self.variants: + # FIXME: _patches_in_order_of_appearance is attached after + # FIXME: concretization to store the order of patches somewhere. + # FIXME: Needs to be refactored in a cleaner way. + + # translate patch sha256sums to patch objects by consulting the index + self._patches = [] + for sha256 in self.variants['patches']._patches_in_order_of_appearance: + index = spack.repo.path.patch_index + patch = index.patch_for_package(sha256, self.package) + self._patches.append(patch) + + return self._patches def _dup(self, other, deps=True, cleardeps=True): """Copy the spec other into self. This is an overwriting diff --git a/lib/spack/spack/test/spec_yaml.py b/lib/spack/spack/test/spec_yaml.py index 8aa14b9230..e3a7308605 100644 --- a/lib/spack/spack/test/spec_yaml.py +++ b/lib/spack/spack/test/spec_yaml.py @@ -3,9 +3,9 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) -"""Test YAML serialization for specs. +"""Test YAML and JSON serialization for specs. -YAML format preserves DAG information in the spec. +The YAML and JSON formats preserve DAG information in the spec. """ import ast |