From 24d12c632c483fb84bac2f6bc64bb62593035e3f Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Tue, 26 Dec 2023 14:52:10 -0800 Subject: `spack gc`: add options for environments and build dependencies (#41731) This adds a few options to `spack gc`. One to give you a little more control over dependencies: * `-b` / `--keep-build-dependencies`: By default, `spack gc` considers build dependencies to be "no longer needed" once their dependents are installed. With this option, we'll keep build dependencies of needed installations as well. And two more to make working with environments easier: * `-E` / `--except-any-environment`: Garbage collect anything NOT needed by an environment. `spack gc -E` and `spack gc -bE` are now easy ways to get rid of everytihng not used by some environment. * `-e` / `--except-environment` `ENV`: Instead of considering all environments, garbage collect everything not needed by a *specific* environment. Note that you can use this with `-E` to add directory environments to the list of considered envs, e.g.: spack gc -E -e /path/to/direnv1 -e /path/to/direnv2 #... - [x] rework `unused_specs()` method on DB to add options for roots and deptypes - [x] add `all_hashes()` method on DB - [x] rework `spack gc` command to add 3 more options - [x] tests --- lib/spack/spack/cmd/common/confirmation.py | 7 +- lib/spack/spack/cmd/gc.py | 109 +++++++++++++++++++------ lib/spack/spack/cmd/uninstall.py | 2 +- lib/spack/spack/database.py | 69 +++++++++------- lib/spack/spack/environment/environment.py | 2 +- lib/spack/spack/test/cmd/gc.py | 125 ++++++++++++++++++++++++++--- lib/spack/spack/test/conftest.py | 12 +-- lib/spack/spack/test/database.py | 47 ++++++++++- 8 files changed, 297 insertions(+), 76 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/cmd/common/confirmation.py b/lib/spack/spack/cmd/common/confirmation.py index 8a5cd2592b..0bdc9a2202 100644 --- a/lib/spack/spack/cmd/common/confirmation.py +++ b/lib/spack/spack/cmd/common/confirmation.py @@ -21,10 +21,11 @@ def confirm_action(specs: List[spack.spec.Spec], participle: str, noun: str): participle: action expressed as a participle, e.g. "uninstalled" noun: action expressed as a noun, e.g. "uninstallation" """ - tty.msg(f"The following {len(specs)} packages will be {participle}:\n") spack.cmd.display_specs(specs, **display_args) - print("") - answer = tty.get_yes_or_no("Do you want to proceed?", default=False) + print() + answer = tty.get_yes_or_no( + f"{len(specs)} packages will be {participle}. Do you want to proceed?", default=False + ) if not answer: tty.msg(f"Aborting {noun}") sys.exit(0) diff --git a/lib/spack/spack/cmd/gc.py b/lib/spack/spack/cmd/gc.py index 9918bf7479..ff352db78a 100644 --- a/lib/spack/spack/cmd/gc.py +++ b/lib/spack/spack/cmd/gc.py @@ -8,6 +8,7 @@ import llnl.util.tty as tty import spack.cmd.common.arguments import spack.cmd.common.confirmation import spack.cmd.uninstall +import spack.deptypes as dt import spack.environment as ev import spack.store @@ -17,31 +18,91 @@ level = "short" def setup_parser(subparser): + subparser.add_argument( + "-E", + "--except-any-environment", + action="store_true", + help="remove everything unless needed by an environment", + ) + subparser.add_argument( + "-e", + "--except-environment", + metavar="ENV", + action="append", + default=[], + help="remove everything unless needed by specified environment\n" + "you can list multiple environments, or specify directory\n" + "environments by path.", + ) + subparser.add_argument( + "-b", + "--keep-build-dependencies", + action="store_true", + help="do not remove installed build-only dependencies of roots\n" + "(default is to keep only link & run dependencies)", + ) spack.cmd.common.arguments.add_common_arguments(subparser, ["yes_to_all"]) +def roots_from_environments(args, active_env): + # if we're using -E or -e, make a list of environments whose roots we should consider. + all_environments = [] + + # -E will garbage collect anything not needed by any env, including the current one + if args.except_any_environment: + all_environments += list(ev.all_environments()) + if active_env: + all_environments.append(active_env) + + # -e says "also preserve things needed by this particular env" + for env_name_or_dir in args.except_environment: + print("HMM", env_name_or_dir) + if ev.exists(env_name_or_dir): + env = ev.read(env_name_or_dir) + elif ev.is_env_dir(env_name_or_dir): + env = ev.Environment(env_name_or_dir) + else: + tty.die(f"No such environment: '{env_name_or_dir}'") + all_environments.append(env) + + # add root hashes from all considered environments to list of roots + root_hashes = set() + for env in all_environments: + root_hashes |= set(env.concretized_order) + + return root_hashes + + def gc(parser, args): - specs = spack.store.STORE.db.unused_specs - - # Restrict garbage collection to the active environment - # speculating over roots that are yet to be installed - env = ev.active_environment() - if env: - msg = 'Restricting the garbage collection to the "{0}" environment' - tty.msg(msg.format(env.name)) - env.concretize() - roots = [s for s in env.roots()] - all_hashes = set([s.dag_hash() for r in roots for s in r.traverse()]) - lr_hashes = set([s.dag_hash() for r in roots for s in r.traverse(deptype=("link", "run"))]) - maybe_to_be_removed = all_hashes - lr_hashes - specs = [s for s in specs if s.dag_hash() in maybe_to_be_removed] - - if not specs: - msg = "There are no unused specs. Spack's store is clean." - tty.msg(msg) - return - - if not args.yes_to_all: - spack.cmd.common.confirmation.confirm_action(specs, "uninstalled", "uninstallation") - - spack.cmd.uninstall.do_uninstall(specs, force=False) + deptype = dt.LINK | dt.RUN + if args.keep_build_dependencies: + deptype |= dt.BUILD + + active_env = ev.active_environment() + + # wrap the whole command with a read transaction to avoid multiple + with spack.store.STORE.db.read_transaction(): + if args.except_environment or args.except_any_environment: + # if either of these is specified, we ignore the active environment and garbage + # collect anything NOT in specified environments. + root_hashes = roots_from_environments(args, active_env) + + elif active_env: + # only gc what's in current environment + tty.msg(f"Restricting garbage collection to environment '{active_env.name}'") + root_hashes = set(spack.store.STORE.db.all_hashes()) # keep everything + root_hashes -= set(active_env.all_hashes()) # except this env + root_hashes |= set(active_env.concretized_order) # but keep its roots + else: + # consider all explicit specs roots (the default for db.unused_specs()) + root_hashes = None + + specs = spack.store.STORE.db.unused_specs(root_hashes=root_hashes, deptype=deptype) + if not specs: + tty.msg("There are no unused specs. Spack's store is clean.") + return + + if not args.yes_to_all: + spack.cmd.common.confirmation.confirm_action(specs, "uninstalled", "uninstall") + + spack.cmd.uninstall.do_uninstall(specs, force=False) diff --git a/lib/spack/spack/cmd/uninstall.py b/lib/spack/spack/cmd/uninstall.py index 59ea18edc8..9cf15c4279 100644 --- a/lib/spack/spack/cmd/uninstall.py +++ b/lib/spack/spack/cmd/uninstall.py @@ -277,7 +277,7 @@ def uninstall_specs(args, specs): return if not args.yes_to_all: - confirmation.confirm_action(uninstall_list, "uninstalled", "uninstallation") + confirmation.confirm_action(uninstall_list, "uninstalled", "uninstall") # Uninstall everything on the list do_uninstall(uninstall_list, args.force) diff --git a/lib/spack/spack/database.py b/lib/spack/spack/database.py index ecda8c36b0..84c17a6a0d 100644 --- a/lib/spack/spack/database.py +++ b/lib/spack/spack/database.py @@ -25,9 +25,20 @@ import pathlib import socket import sys import time -from typing import Any, Callable, Dict, Generator, List, NamedTuple, Set, Type, Union - -import spack.deptypes as dt +from typing import ( + Any, + Callable, + Container, + Dict, + Generator, + List, + NamedTuple, + Optional, + Set, + Tuple, + Type, + Union, +) try: import uuid @@ -37,13 +48,13 @@ except ImportError: _use_uuid = False pass -from typing import Optional, Tuple - import llnl.util.filesystem as fs import llnl.util.tty as tty +import spack.deptypes as dt import spack.hash_types as ht import spack.spec +import spack.traverse as tr import spack.util.lock as lk import spack.util.spack_json as sjson import spack.version as vn @@ -297,7 +308,7 @@ _QUERY_DOCSTRING = """ end_date (datetime.datetime or None): filters the query discarding specs that have been installed after ``end_date``. - hashes (typing.Container): list or set of hashes that we can use to + hashes (Container): list or set of hashes that we can use to restrict the search in_buildcache (bool or None): Specs that are marked in @@ -1648,31 +1659,35 @@ class Database: with self.read_transaction(): return path in self._installed_prefixes - @property - def unused_specs(self): - """Return all the specs that are currently installed but not needed - at runtime to satisfy user's requests. - - Specs in the return list are those which are not either: - 1. Installed on an explicit user request - 2. Installed as a "run" or "link" dependency (even transitive) of - a spec at point 1. - """ - needed, visited = set(), set() + def all_hashes(self): + """Return dag hash of every spec in the database.""" with self.read_transaction(): - for key, rec in self._data.items(): - if not rec.explicit: - continue + return list(self._data.keys()) - # recycle `visited` across calls to avoid redundantly traversing - for spec in rec.spec.traverse(visited=visited, deptype=("link", "run")): - needed.add(spec.dag_hash()) + def unused_specs( + self, + root_hashes: Optional[Container[str]] = None, + deptype: Union[dt.DepFlag, dt.DepTypes] = dt.LINK | dt.RUN, + ) -> "List[spack.spec.Spec]": + """Return all specs that are currently installed but not needed by root specs. - unused = [ - rec.spec for key, rec in self._data.items() if key not in needed and rec.installed - ] + By default, roots are all explicit specs in the database. If a set of root + hashes are passed in, they are instead used as the roots. - return unused + Arguments: + root_hashes: optional list of roots to consider when evaluating needed installations. + deptype: if a spec is reachable from a root via these dependency types, it is + considered needed. By default only link and run dependency types are considered. + """ + + def root(key, record): + """Whether a DB record is a root for garbage collection.""" + return key in root_hashes if root_hashes is not None else record.explicit + + with self.read_transaction(): + roots = [rec.spec for key, rec in self._data.items() if root(key, rec)] + needed = set(id(spec) for spec in tr.traverse_nodes(roots, deptype=deptype)) + return [rec.spec for rec in self._data.values() if id(rec.spec) not in needed] def update_explicit(self, spec, explicit): """ diff --git a/lib/spack/spack/environment/environment.py b/lib/spack/spack/environment/environment.py index 2721ddb828..e2ea4b27e9 100644 --- a/lib/spack/spack/environment/environment.py +++ b/lib/spack/spack/environment/environment.py @@ -793,7 +793,7 @@ class Environment: #: User specs from the last concretization self.concretized_user_specs: List[Spec] = [] #: Roots associated with the last concretization, in order - self.concretized_order: List[Spec] = [] + self.concretized_order: List[str] = [] #: Concretized specs by hash self.specs_by_hash: Dict[str, Spec] = {} #: Repository for this environment (memoized) diff --git a/lib/spack/spack/test/cmd/gc.py b/lib/spack/spack/test/cmd/gc.py index d692628e10..9defb9d99c 100644 --- a/lib/spack/spack/test/cmd/gc.py +++ b/lib/spack/spack/test/cmd/gc.py @@ -11,37 +11,140 @@ import spack.main import spack.spec gc = spack.main.SpackCommand("gc") +add = spack.main.SpackCommand("add") +install = spack.main.SpackCommand("install") +find = spack.main.SpackCommand("find") pytestmark = pytest.mark.not_on_windows("does not run on windows") @pytest.mark.db -def test_no_packages_to_remove(config, mutable_database, capsys): - with capsys.disabled(): - output = gc("-y") +def test_gc_without_build_dependency(config, mutable_database): + output = gc("-yb") + assert "There are no unused specs." in output + + output = gc("-y") assert "There are no unused specs." in output @pytest.mark.db -def test_packages_are_removed(config, mutable_database, capsys): +def test_gc_with_build_dependency(config, mutable_database): s = spack.spec.Spec("simple-inheritance") s.concretize() s.package.do_install(fake=True, explicit=True) - with capsys.disabled(): - output = gc("-y") + + output = gc("-yb") + assert "There are no unused specs." in output + + output = gc("-y") assert "Successfully uninstalled cmake" in output @pytest.mark.db -def test_gc_with_environment(config, mutable_database, mutable_mock_env_path, capsys): +def test_gc_with_environment(config, mutable_database, mutable_mock_env_path): + s = spack.spec.Spec("simple-inheritance") + s.concretize() + s.package.do_install(fake=True, explicit=True) + + e = ev.create("test_gc") + with e: + add("cmake") + install() + assert "cmake" in find() + output = gc("-y") + assert "Restricting garbage collection" in output + assert "There are no unused specs" in output + + +@pytest.mark.db +def test_gc_with_build_dependency_in_environment(config, mutable_database, mutable_mock_env_path): s = spack.spec.Spec("simple-inheritance") s.concretize() s.package.do_install(fake=True, explicit=True) e = ev.create("test_gc") - e.add("cmake") with e: - with capsys.disabled(): - output = gc("-y") - assert "Restricting the garbage collection" in output + add("simple-inheritance") + install() + assert "simple-inheritance" in find() + output = gc("-yb") + assert "Restricting garbage collection" in output assert "There are no unused specs" in output + + with e: + assert "simple-inheritance" in find() + output = gc("-y") + assert "Restricting garbage collection" in output + assert "Successfully uninstalled cmake" in output + + +@pytest.mark.db +def test_gc_except_any_environments(config, mutable_database, mutable_mock_env_path): + s = spack.spec.Spec("simple-inheritance") + s.concretize() + s.package.do_install(fake=True, explicit=True) + + assert "zmpi" in find() + + e = ev.create("test_gc") + with e: + add("simple-inheritance") + install() + assert "simple-inheritance" in find() + + output = gc("-yE") + assert "Restricting garbage collection" not in output + assert "Successfully uninstalled zmpi" in output + assert "zmpi" not in find() + + with e: + output = gc("-yE") + assert "Restricting garbage collection" not in output + assert "There are no unused specs" not in find() + + +@pytest.mark.db +def test_gc_except_specific_environments(config, mutable_database, mutable_mock_env_path): + s = spack.spec.Spec("simple-inheritance") + s.concretize() + s.package.do_install(fake=True, explicit=True) + + assert "zmpi" in find() + + e = ev.create("test_gc") + with e: + add("simple-inheritance") + install() + assert "simple-inheritance" in find() + + output = gc("-ye", "test_gc") + assert "Restricting garbage collection" not in output + assert "Successfully uninstalled zmpi" in output + assert "zmpi" not in find() + + +@pytest.mark.db +def test_gc_except_nonexisting_dir_env(config, mutable_database, mutable_mock_env_path, tmpdir): + output = gc("-ye", tmpdir.strpath, fail_on_error=False) + assert "No such environment" in output + gc.returncode == 1 + + +@pytest.mark.db +def test_gc_except_specific_dir_env(config, mutable_database, mutable_mock_env_path, tmpdir): + s = spack.spec.Spec("simple-inheritance") + s.concretize() + s.package.do_install(fake=True, explicit=True) + + assert "zmpi" in find() + + e = ev.create_in_dir(tmpdir.strpath) + with e: + add("simple-inheritance") + install() + assert "simple-inheritance" in find() + + output = gc("-ye", tmpdir.strpath) + assert "Restricting garbage collection" not in output + assert "Successfully uninstalled zmpi" in output + assert "zmpi" not in find() diff --git a/lib/spack/spack/test/conftest.py b/lib/spack/spack/test/conftest.py index 7b396a0358..6b714e64af 100644 --- a/lib/spack/spack/test/conftest.py +++ b/lib/spack/spack/test/conftest.py @@ -801,13 +801,13 @@ def mock_low_high_config(tmpdir): def _populate(mock_db): r"""Populate a mock database with packages. - Here is what the mock DB looks like: + Here is what the mock DB looks like (explicit roots at top): - o mpileaks o mpileaks' o mpileaks'' - |\ |\ |\ - | o callpath | o callpath' | o callpath'' - |/| |/| |/| - o | mpich o | mpich2 o | zmpi + o mpileaks o mpileaks' o mpileaks'' o externaltest o trivial-smoke-test + |\ |\ |\ | + | o callpath | o callpath' | o callpath'' o externaltool + |/| |/| |/| | + o | mpich o | mpich2 o | zmpi o externalvirtual | | o | fake | | | | |______________/ diff --git a/lib/spack/spack/test/database.py b/lib/spack/spack/test/database.py index ee3e5da81e..ace7ff01e3 100644 --- a/lib/spack/spack/test/database.py +++ b/lib/spack/spack/test/database.py @@ -25,6 +25,7 @@ import llnl.util.lock as lk from llnl.util.tty.colify import colify import spack.database +import spack.deptypes as dt import spack.package_base import spack.repo import spack.spec @@ -778,9 +779,39 @@ def test_query_unused_specs(mutable_database): s.concretize() s.package.do_install(fake=True, explicit=True) - unused = spack.store.STORE.db.unused_specs - assert len(unused) == 1 - assert unused[0].name == "cmake" + si = s.dag_hash() + ml_mpich = spack.store.STORE.db.query_one("mpileaks ^mpich").dag_hash() + ml_mpich2 = spack.store.STORE.db.query_one("mpileaks ^mpich2").dag_hash() + ml_zmpi = spack.store.STORE.db.query_one("mpileaks ^zmpi").dag_hash() + externaltest = spack.store.STORE.db.query_one("externaltest").dag_hash() + trivial_smoke_test = spack.store.STORE.db.query_one("trivial-smoke-test").dag_hash() + + def check_unused(roots, deptype, expected): + unused = spack.store.STORE.db.unused_specs(root_hashes=roots, deptype=deptype) + assert set(u.name for u in unused) == set(expected) + + default_dt = dt.LINK | dt.RUN + check_unused(None, default_dt, ["cmake"]) + check_unused( + [si, ml_mpich, ml_mpich2, ml_zmpi, externaltest], + default_dt, + ["trivial-smoke-test", "cmake"], + ) + check_unused( + [si, ml_mpich, ml_mpich2, ml_zmpi, externaltest], + dt.LINK | dt.RUN | dt.BUILD, + ["trivial-smoke-test"], + ) + check_unused( + [si, ml_mpich, ml_mpich2, externaltest, trivial_smoke_test], + dt.LINK | dt.RUN | dt.BUILD, + ["mpileaks", "callpath", "zmpi", "fake"], + ) + check_unused( + [si, ml_mpich, ml_mpich2, ml_zmpi], + default_dt, + ["trivial-smoke-test", "cmake", "externaltest", "externaltool", "externalvirtual"], + ) @pytest.mark.regression("10019") @@ -1008,6 +1039,16 @@ def test_check_parents(spec_str, parent_name, expected_nparents, database): assert len(edges) == expected_nparents +def test_db_all_hashes(database): + # ensure we get the right number of hashes without a read transaction + hashes = database.all_hashes() + assert len(hashes) == 17 + + # and make sure the hashes match + with database.read_transaction(): + assert set(s.dag_hash() for s in database.query()) == set(hashes) + + def test_consistency_of_dependents_upon_remove(mutable_database): # Check the initial state s = mutable_database.query_one("dyninst") -- cgit v1.2.3-70-g09d2