From e0017a66668c855dcf21d5f1e5185f8bdd2963e4 Mon Sep 17 00:00:00 2001 From: Scott Wittenburg Date: Sat, 10 Jul 2021 00:49:09 -0600 Subject: Fix out of date remote spec.yamls while updating the index (#24811) --- lib/spack/spack/binary_distribution.py | 85 ++++++++++++++++++++++++++++++---- lib/spack/spack/test/bindist.py | 55 ++++++++++++++++++++++ 2 files changed, 130 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py index 6ca15df122..a9a2cf6af2 100644 --- a/lib/spack/spack/binary_distribution.py +++ b/lib/spack/spack/binary_distribution.py @@ -13,6 +13,7 @@ import shutil import sys import tarfile import tempfile +import traceback from contextlib import closing import ruamel.yaml as yaml @@ -27,6 +28,7 @@ import spack.cmd import spack.config as config import spack.database as spack_db import spack.fetch_strategy as fs +import spack.hash_types as ht import spack.mirror import spack.relocate as relocate import spack.util.file_cache as file_cache @@ -709,12 +711,6 @@ def generate_package_index(cache_prefix): cache_prefix. This page contains a link for each binary package (.yaml) under cache_prefix. """ - tmpdir = tempfile.mkdtemp() - db_root_dir = os.path.join(tmpdir, 'db_root') - db = spack_db.Database(None, db_dir=db_root_dir, - enable_transaction_locking=False, - record_fields=['spec', 'ref_count', 'in_buildcache']) - try: file_list = ( entry @@ -735,22 +731,90 @@ def generate_package_index(cache_prefix): tty.debug('Retrieving spec.yaml files from {0} to build index'.format( cache_prefix)) + + all_mirror_specs = {} + for file_path in file_list: try: yaml_url = url_util.join(cache_prefix, file_path) tty.debug('fetching {0}'.format(yaml_url)) _, _, yaml_file = web_util.read_from_url(yaml_url) yaml_contents = codecs.getreader('utf-8')(yaml_file).read() - # yaml_obj = syaml.load(yaml_contents) - # s = Spec.from_yaml(yaml_obj) + spec_dict = syaml.load(yaml_contents) s = Spec.from_yaml(yaml_contents) - db.add(s, None) - db.mark(s, 'in_buildcache', True) + all_mirror_specs[s.dag_hash()] = { + 'yaml_url': yaml_url, + 'spec': s, + 'num_deps': len(list(s.traverse(root=False))), + 'binary_cache_checksum': spec_dict['binary_cache_checksum'], + 'buildinfo': spec_dict['buildinfo'], + } except (URLError, web_util.SpackWebError) as url_err: tty.error('Error reading spec.yaml: {0}'.format(file_path)) tty.error(url_err) + sorted_specs = sorted(all_mirror_specs.keys(), + key=lambda k: all_mirror_specs[k]['num_deps']) + + tmpdir = tempfile.mkdtemp() + db_root_dir = os.path.join(tmpdir, 'db_root') + db = spack_db.Database(None, db_dir=db_root_dir, + enable_transaction_locking=False, + record_fields=['spec', 'ref_count', 'in_buildcache']) + try: + tty.debug('Specs sorted by number of dependencies:') + for dag_hash in sorted_specs: + spec_record = all_mirror_specs[dag_hash] + s = spec_record['spec'] + num_deps = spec_record['num_deps'] + tty.debug(' {0}/{1} -> {2}'.format( + s.name, dag_hash[:7], num_deps)) + if num_deps > 0: + # Check each of this spec's dependencies (which we have already + # processed), as they are the source of truth for their own + # full hash. If the full hash we have for any deps does not + # match what those deps have themselves, then we need to splice + # this spec with those deps, and push this spliced spec + # (spec.yaml file) back to the mirror, as well as update the + # all_mirror_specs dictionary with this spliced spec. + to_splice = [] + for dep in s.dependencies(): + dep_dag_hash = dep.dag_hash() + if dep_dag_hash in all_mirror_specs: + true_dep = all_mirror_specs[dep_dag_hash]['spec'] + if true_dep.full_hash() != dep.full_hash(): + to_splice.append(true_dep) + + if to_splice: + tty.debug(' needs the following deps spliced:') + for true_dep in to_splice: + tty.debug(' {0}/{1}'.format( + true_dep.name, true_dep.dag_hash()[:7])) + s = s.splice(true_dep, True) + + # Push this spliced spec back to the mirror + spliced_yaml = s.to_dict(hash=ht.full_hash) + for key in ['binary_cache_checksum', 'buildinfo']: + spliced_yaml[key] = spec_record[key] + + temp_yaml_path = os.path.join(tmpdir, 'spliced.spec.yaml') + with open(temp_yaml_path, 'w') as fd: + fd.write(syaml.dump(spliced_yaml)) + + spliced_yaml_url = spec_record['yaml_url'] + web_util.push_to_url( + temp_yaml_path, spliced_yaml_url, keep_original=False) + tty.debug(' spliced and wrote {0}'.format( + spliced_yaml_url)) + spec_record['spec'] = s + + db.add(s, None) + db.mark(s, 'in_buildcache', True) + + # Now that we have fixed any old spec yamls that might have had the wrong + # full hash for their dependencies, we can generate the index, compute + # the hash, and push those files to the mirror. index_json_path = os.path.join(db_root_dir, 'index.json') with open(index_json_path, 'w') as f: db._write_to_file(f) @@ -782,6 +846,7 @@ def generate_package_index(cache_prefix): msg = 'Encountered problem pushing package index to {0}: {1}'.format( cache_prefix, err) tty.warn(msg) + tty.debug('\n' + traceback.format_exc()) finally: shutil.rmtree(tmpdir) diff --git a/lib/spack/spack/test/bindist.py b/lib/spack/spack/test/bindist.py index 2e6d599422..76d51aa3f2 100644 --- a/lib/spack/spack/test/bindist.py +++ b/lib/spack/spack/test/bindist.py @@ -10,12 +10,15 @@ import sys import py import pytest +import llnl.util.filesystem as fs + import spack.binary_distribution as bindist import spack.config import spack.hooks.sbang as sbang import spack.main import spack.mirror import spack.repo +import spack.spec as spec import spack.store import spack.util.gpg import spack.util.web as web_util @@ -589,3 +592,55 @@ def test_update_sbang(tmpdir, test_mirror): open(str(installed_script_style_2_path)).read() uninstall_cmd('-y', '/%s' % new_spec.dag_hash()) + + +@pytest.mark.usefixtures( + 'install_mockery_mutable_config', 'mock_packages', 'mock_fetch', +) +def test_update_index_fix_deps(monkeypatch, tmpdir, mutable_config): + """Ensure spack buildcache update-index properly fixes up spec.yaml + files on the mirror when updating the buildcache index.""" + + # Create a temp mirror directory for buildcache usage + mirror_dir = tmpdir.join('mirror_dir') + mirror_url = 'file://{0}'.format(mirror_dir.strpath) + spack.config.set('mirrors', {'test': mirror_url}) + + a = Spec('a').concretized() + b = Spec('b').concretized() + new_b_full_hash = 'abcdef' + + # Install package a with dep b + install_cmd('--no-cache', a.name) + + # Create a buildcache for a and its dep b, and update index + buildcache_cmd('create', '-uad', mirror_dir.strpath, a.name) + buildcache_cmd('update-index', '-d', mirror_dir.strpath) + + # Simulate an update to b that only affects full hash by simply overwriting + # the full hash in the spec.yaml file on the mirror + b_spec_yaml_name = bindist.tarball_name(b, '.spec.yaml') + b_spec_yaml_path = os.path.join(mirror_dir.strpath, + bindist.build_cache_relative_path(), + b_spec_yaml_name) + fs.filter_file(r"full_hash:\s[^\s]+$", + "full_hash: {0}".format(new_b_full_hash), + b_spec_yaml_path) + + # When we update the index, spack should notice that a's notion of the + # full hash of b doesn't match b's notion of it's own full hash, and as + # a result, spack should fix the spec.yaml for a + buildcache_cmd('update-index', '-d', mirror_dir.strpath) + + # Read in the concrete spec yaml of a + a_spec_yaml_name = bindist.tarball_name(a, '.spec.yaml') + a_spec_yaml_path = os.path.join(mirror_dir.strpath, + bindist.build_cache_relative_path(), + a_spec_yaml_name) + + # Turn concrete spec yaml into a concrete spec (a) + with open(a_spec_yaml_path) as fd: + a_prime = spec.Spec.from_yaml(fd.read()) + + # Make sure the full hash of b in a's spec yaml matches the new value + assert(a_prime[b.name].full_hash() == new_b_full_hash) -- cgit v1.2.3-60-g2f50