From f6ad1e23f8f99b3dc7fa10d616921081127c3a2d Mon Sep 17 00:00:00 2001 From: Massimiliano Culpo Date: Thu, 24 Oct 2024 08:13:07 +0200 Subject: Improve `Database.query*` methods (#47116) * Add type hints to all query* methods * Inline docstrings * Change defaults from `any` to `None` so they can be type hinted in old Python * Pre-filter on given hashes instead of iterating over all db specs * Fix a bug where the `--origin` option of uninstall had no effect * Fix a bug where query args were not applied when searching by concrete spec Signed-off-by: Massimiliano Culpo --- lib/spack/spack/binary_distribution.py | 2 +- lib/spack/spack/cmd/deconcretize.py | 2 +- lib/spack/spack/cmd/find.py | 2 +- lib/spack/spack/cmd/mark.py | 8 +- lib/spack/spack/cmd/test.py | 2 +- lib/spack/spack/cmd/uninstall.py | 11 +- lib/spack/spack/database.py | 280 +++++++++++++++++++-------------- lib/spack/spack/test/cmd/find.py | 2 +- 8 files changed, 181 insertions(+), 128 deletions(-) (limited to 'lib') diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py index bf1d352124..2f74f38dac 100644 --- a/lib/spack/spack/binary_distribution.py +++ b/lib/spack/spack/binary_distribution.py @@ -252,7 +252,7 @@ class BinaryCacheIndex: spec_list = [ s - for s in db.query_local(installed=any, in_buildcache=any) + for s in db.query_local(installed=any) if s.external or db.query_local_by_spec_hash(s.dag_hash()).in_buildcache ] diff --git a/lib/spack/spack/cmd/deconcretize.py b/lib/spack/spack/cmd/deconcretize.py index ffb05eebe7..7e2feab5aa 100644 --- a/lib/spack/spack/cmd/deconcretize.py +++ b/lib/spack/spack/cmd/deconcretize.py @@ -99,5 +99,5 @@ def deconcretize(parser, args): " Use `spack deconcretize --all` to deconcretize ALL specs.", ) - specs = spack.cmd.parse_specs(args.specs) if args.specs else [any] + specs = spack.cmd.parse_specs(args.specs) if args.specs else [None] deconcretize_specs(args, specs) diff --git a/lib/spack/spack/cmd/find.py b/lib/spack/spack/cmd/find.py index afa830fb8e..079c5bf4d3 100644 --- a/lib/spack/spack/cmd/find.py +++ b/lib/spack/spack/cmd/find.py @@ -178,7 +178,7 @@ def query_arguments(args): if args.unknown: predicate_fn = lambda x: not spack.repo.PATH.exists(x.spec.name) - explicit = any + explicit = None if args.explicit: explicit = True if args.implicit: diff --git a/lib/spack/spack/cmd/mark.py b/lib/spack/spack/cmd/mark.py index 38701b9747..0069008c4f 100644 --- a/lib/spack/spack/cmd/mark.py +++ b/lib/spack/spack/cmd/mark.py @@ -80,8 +80,8 @@ def find_matching_specs(specs, allow_multiple_matches=False): has_errors = True # No installed package matches the query - if len(matching) == 0 and spec is not any: - tty.die("{0} does not match any installed packages.".format(spec)) + if len(matching) == 0 and spec is not None: + tty.die(f"{spec} does not match any installed packages.") specs_from_cli.extend(matching) @@ -116,6 +116,6 @@ def mark(parser, args): " Use `spack mark --all` to mark ALL packages.", ) - # [any] here handles the --all case by forcing all specs to be returned - specs = spack.cmd.parse_specs(args.specs) if args.specs else [any] + # [None] here handles the --all case by forcing all specs to be returned + specs = spack.cmd.parse_specs(args.specs) if args.specs else [None] mark_specs(args, specs) diff --git a/lib/spack/spack/cmd/test.py b/lib/spack/spack/cmd/test.py index d3d45dbe46..1457804382 100644 --- a/lib/spack/spack/cmd/test.py +++ b/lib/spack/spack/cmd/test.py @@ -165,7 +165,7 @@ def test_run(args): if args.fail_fast: spack.config.set("config:fail_fast", True, scope="command_line") - explicit = args.explicit or any + explicit = args.explicit or None explicit_str = "explicitly " if args.explicit else "" # Get specs to test diff --git a/lib/spack/spack/cmd/uninstall.py b/lib/spack/spack/cmd/uninstall.py index d51a8f8e3b..5d6779eea9 100644 --- a/lib/spack/spack/cmd/uninstall.py +++ b/lib/spack/spack/cmd/uninstall.py @@ -90,6 +90,7 @@ def find_matching_specs( env: optional active environment specs: list of specs to be matched against installed packages allow_multiple_matches: if True multiple matches are admitted + origin: origin of the spec Return: list: list of specs @@ -98,7 +99,7 @@ def find_matching_specs( hashes = env.all_hashes() if env else None # List of specs that match expressions given via command line - specs_from_cli = [] + specs_from_cli: List["spack.spec.Spec"] = [] has_errors = False for spec in specs: install_query = [InstallStatuses.INSTALLED, InstallStatuses.DEPRECATED] @@ -116,7 +117,7 @@ def find_matching_specs( has_errors = True # No installed package matches the query - if len(matching) == 0 and spec is not any: + if len(matching) == 0 and spec is not None: if env: pkg_type = "packages in environment '%s'" % env.name else: @@ -213,7 +214,7 @@ def get_uninstall_list(args, specs: List[spack.spec.Spec], env: Optional[ev.Envi # Gets the list of installed specs that match the ones given via cli # args.all takes care of the case where '-a' is given in the cli - matching_specs = find_matching_specs(env, specs, args.all) + matching_specs = find_matching_specs(env, specs, args.all, origin=args.origin) dependent_specs = installed_dependents(matching_specs) all_uninstall_specs = matching_specs + dependent_specs if args.dependents else matching_specs other_dependent_envs = dependent_environments(all_uninstall_specs, current_env=env) @@ -301,6 +302,6 @@ def uninstall(parser, args): " Use `spack uninstall --all` to uninstall ALL packages.", ) - # [any] here handles the --all case by forcing all specs to be returned - specs = spack.cmd.parse_specs(args.specs) if args.specs else [any] + # [None] here handles the --all case by forcing all specs to be returned + specs = spack.cmd.parse_specs(args.specs) if args.specs else [None] uninstall_specs(args, specs) diff --git a/lib/spack/spack/database.py b/lib/spack/spack/database.py index 9813c7c18d..12f6ac2465 100644 --- a/lib/spack/spack/database.py +++ b/lib/spack/spack/database.py @@ -32,6 +32,7 @@ from typing import ( Container, Dict, Generator, + Iterable, List, NamedTuple, Optional, @@ -290,52 +291,6 @@ class ForbiddenLock: return ForbiddenLock, tuple() -_QUERY_DOCSTRING = """ - - Args: - query_spec: queries iterate through specs in the database and - return those that satisfy the supplied ``query_spec``. If - query_spec is `any`, This will match all specs in the - database. If it is a spec, we'll evaluate - ``spec.satisfies(query_spec)`` - - predicate_fn: optional predicate taking an InstallRecord as argument, and returning - whether that record is selected for the query. It can be used to craft criteria - that need some data for selection not provided by the Database itself. - - installed (bool or InstallStatus or typing.Iterable or None): - if ``True``, includes only installed - specs in the search; if ``False`` only missing specs, and if - ``any``, all specs in database. If an InstallStatus or iterable - of InstallStatus, returns specs whose install status - (installed, deprecated, or missing) matches (one of) the - InstallStatus. (default: True) - - explicit (bool or None): A spec that was installed - following a specific user request is marked as explicit. If - instead it was pulled-in as a dependency of a user requested - spec it's considered implicit. - - start_date (datetime.datetime or None): filters the query - discarding specs that have been installed before ``start_date``. - - end_date (datetime.datetime or None): filters the query discarding - specs that have been installed after ``end_date``. - - hashes (Container): list or set of hashes that we can use to - restrict the search - - in_buildcache (bool or None): Specs that are marked in - this database as part of an associated binary cache are - ``in_buildcache``. All other specs are not. This field is used - for querying mirror indices. Default is ``any``. - - Returns: - list of specs that match the query - - """ - - class LockConfiguration(NamedTuple): """Data class to configure locks in Database objects @@ -1525,59 +1480,48 @@ class Database: def _query( self, - query_spec=any, + query_spec: Optional[Union[str, "spack.spec.Spec"]] = None, + *, predicate_fn: Optional[SelectType] = None, - installed=True, - explicit=any, - start_date=None, - end_date=None, - hashes=None, - in_buildcache=any, - origin=None, + installed: Union[bool, InstallStatus, List[InstallStatus]] = True, + explicit: Optional[bool] = None, + start_date: Optional[datetime.datetime] = None, + end_date: Optional[datetime.datetime] = None, + hashes: Optional[Iterable[str]] = None, + in_buildcache: Optional[bool] = None, + origin: Optional[str] = None, ) -> List["spack.spec.Spec"]: - """Run a query on the database.""" - - # TODO: Specs are a lot like queries. Should there be a - # TODO: wildcard spec object, and should specs have attributes - # TODO: like installed and known that can be queried? Or are - # TODO: these really special cases that only belong here? - - if query_spec is not any: - if not isinstance(query_spec, spack.spec.Spec): - query_spec = spack.spec.Spec(query_spec) - - # Just look up concrete specs with hashes; no fancy search. - if query_spec.concrete: - # TODO: handling of hashes restriction is not particularly elegant. - hash_key = query_spec.dag_hash() - if hash_key in self._data and (not hashes or hash_key in hashes): - return [self._data[hash_key].spec] - else: - return [] - - # Abstract specs require more work -- currently we test - # against everything. + + # Restrict the set of records over which we iterate first + matching_hashes = self._data + if hashes is not None: + matching_hashes = {h: self._data[h] for h in hashes if h in self._data} + + if isinstance(query_spec, str): + query_spec = spack.spec.Spec(query_spec) + + if query_spec is not None and query_spec.concrete: + hash_key = query_spec.dag_hash() + if hash_key not in matching_hashes: + return [] + matching_hashes = {hash_key: matching_hashes[hash_key]} + results = [] start_date = start_date or datetime.datetime.min end_date = end_date or datetime.datetime.max - # save specs whose name doesn't match for last, to avoid a virtual check deferred = [] - - for key, rec in self._data.items(): - if hashes is not None and rec.spec.dag_hash() not in hashes: - continue - + for rec in matching_hashes.values(): if origin and not (origin == rec.origin): continue if not rec.install_type_matches(installed): continue - if in_buildcache is not any and rec.in_buildcache != in_buildcache: + if in_buildcache is not None and rec.in_buildcache != in_buildcache: continue - if explicit is not any and rec.explicit != explicit: + if explicit is not None and rec.explicit != explicit: continue if predicate_fn is not None and not predicate_fn(rec): @@ -1588,7 +1532,7 @@ class Database: if not (start_date < inst_date < end_date): continue - if query_spec is any: + if query_spec is None or query_spec.concrete: results.append(rec.spec) continue @@ -1606,36 +1550,118 @@ class Database: # If we did fine something, the query spec can't be virtual b/c we matched an actual # package installation, so skip the virtual check entirely. If we *didn't* find anything, # check all the deferred specs *if* the query is virtual. - if not results and query_spec is not any and deferred and query_spec.virtual: + if not results and query_spec is not None and deferred and query_spec.virtual: results = [spec for spec in deferred if spec.satisfies(query_spec)] return results - if _query.__doc__ is None: - _query.__doc__ = "" - _query.__doc__ += _QUERY_DOCSTRING + def query_local( + self, + query_spec: Optional[Union[str, "spack.spec.Spec"]] = None, + *, + predicate_fn: Optional[SelectType] = None, + installed: Union[bool, InstallStatus, List[InstallStatus]] = True, + explicit: Optional[bool] = None, + start_date: Optional[datetime.datetime] = None, + end_date: Optional[datetime.datetime] = None, + hashes: Optional[List[str]] = None, + in_buildcache: Optional[bool] = None, + origin: Optional[str] = None, + ) -> List["spack.spec.Spec"]: + """Queries the local Spack database. - def query_local(self, *args, **kwargs): - """Query only the local Spack database. + This function doesn't guarantee any sorting of the returned data for performance reason, + since comparing specs for __lt__ may be an expensive operation. - This function doesn't guarantee any sorting of the returned - data for performance reason, since comparing specs for __lt__ - may be an expensive operation. + Args: + query_spec: if query_spec is ``None``, match all specs in the database. + If it is a spec, return all specs matching ``spec.satisfies(query_spec)``. + + predicate_fn: optional predicate taking an InstallRecord as argument, and returning + whether that record is selected for the query. It can be used to craft criteria + that need some data for selection not provided by the Database itself. + + installed: if ``True``, includes only installed specs in the search. If ``False`` only + missing specs, and if ``any``, all specs in database. If an InstallStatus or + iterable of InstallStatus, returns specs whose install status matches at least + one of the InstallStatus. + + explicit: a spec that was installed following a specific user request is marked as + explicit. If instead it was pulled-in as a dependency of a user requested spec + it's considered implicit. + + start_date: if set considers only specs installed from the starting date. + + end_date: if set considers only specs installed until the ending date. + + in_buildcache: specs that are marked in this database as part of an associated binary + cache are ``in_buildcache``. All other specs are not. This field is used for + querying mirror indices. By default, it does not check this status. + + hashes: list of hashes used to restrict the search + + origin: origin of the spec """ with self.read_transaction(): - return self._query(*args, **kwargs) + return self._query( + query_spec, + predicate_fn=predicate_fn, + installed=installed, + explicit=explicit, + start_date=start_date, + end_date=end_date, + hashes=hashes, + in_buildcache=in_buildcache, + origin=origin, + ) + + def query( + self, + query_spec: Optional[Union[str, "spack.spec.Spec"]] = None, + *, + predicate_fn: Optional[SelectType] = None, + installed: Union[bool, InstallStatus, List[InstallStatus]] = True, + explicit: Optional[bool] = None, + start_date: Optional[datetime.datetime] = None, + end_date: Optional[datetime.datetime] = None, + in_buildcache: Optional[bool] = None, + hashes: Optional[List[str]] = None, + origin: Optional[str] = None, + install_tree: str = "all", + ): + """Queries the Spack database including all upstream databases. - if query_local.__doc__ is None: - query_local.__doc__ = "" - query_local.__doc__ += _QUERY_DOCSTRING + Args: + query_spec: if query_spec is ``None``, match all specs in the database. + If it is a spec, return all specs matching ``spec.satisfies(query_spec)``. + + predicate_fn: optional predicate taking an InstallRecord as argument, and returning + whether that record is selected for the query. It can be used to craft criteria + that need some data for selection not provided by the Database itself. + + installed: if ``True``, includes only installed specs in the search. If ``False`` only + missing specs, and if ``any``, all specs in database. If an InstallStatus or + iterable of InstallStatus, returns specs whose install status matches at least + one of the InstallStatus. + + explicit: a spec that was installed following a specific user request is marked as + explicit. If instead it was pulled-in as a dependency of a user requested spec + it's considered implicit. + + start_date: if set considers only specs installed from the starting date. + + end_date: if set considers only specs installed until the ending date. + + in_buildcache: specs that are marked in this database as part of an associated binary + cache are ``in_buildcache``. All other specs are not. This field is used for + querying mirror indices. By default, it does not check this status. - def query(self, *args, **kwargs): - """Query the Spack database including all upstream databases. + hashes: list of hashes used to restrict the search - Additional Arguments: - install_tree (str): query 'all' (default), 'local', 'upstream', or upstream path + install_tree: query 'all' (default), 'local', 'upstream', or upstream path + + origin: origin of the spec """ - install_tree = kwargs.pop("install_tree", "all") valid_trees = ["all", "upstream", "local", self.root] + [u.root for u in self.upstream_dbs] if install_tree not in valid_trees: msg = "Invalid install_tree argument to Database.query()\n" @@ -1651,26 +1677,52 @@ class Database: # queries for upstream DBs need to *not* lock - we may not # have permissions to do this and the upstream DBs won't know about # us anyway (so e.g. they should never uninstall specs) - upstream_results.extend(upstream_db._query(*args, **kwargs) or []) + upstream_results.extend( + upstream_db._query( + query_spec, + predicate_fn=predicate_fn, + installed=installed, + explicit=explicit, + start_date=start_date, + end_date=end_date, + hashes=hashes, + in_buildcache=in_buildcache, + origin=origin, + ) + or [] + ) - local_results = [] + local_results: Set["spack.spec.Spec"] = set() if install_tree in ("all", "local") or self.root == install_tree: - local_results = set(self.query_local(*args, **kwargs)) + local_results = set( + self.query_local( + query_spec, + predicate_fn=predicate_fn, + installed=installed, + explicit=explicit, + start_date=start_date, + end_date=end_date, + hashes=hashes, + in_buildcache=in_buildcache, + origin=origin, + ) + ) results = list(local_results) + list(x for x in upstream_results if x not in local_results) - return sorted(results) - if query.__doc__ is None: - query.__doc__ = "" - query.__doc__ += _QUERY_DOCSTRING - - def query_one(self, query_spec, predicate_fn=None, installed=True): + def query_one( + self, + query_spec: Optional[Union[str, "spack.spec.Spec"]], + predicate_fn: Optional[SelectType] = None, + installed: Union[bool, InstallStatus, List[InstallStatus]] = True, + ) -> Optional["spack.spec.Spec"]: """Query for exactly one spec that matches the query spec. - Raises an assertion error if more than one spec matches the - query. Returns None if no installed package matches. + Returns None if no installed package matches. + Raises: + AssertionError: if more than one spec matches the query. """ concrete_specs = self.query(query_spec, predicate_fn=predicate_fn, installed=installed) assert len(concrete_specs) <= 1 diff --git a/lib/spack/spack/test/cmd/find.py b/lib/spack/spack/test/cmd/find.py index a29b56708f..d947362f18 100644 --- a/lib/spack/spack/test/cmd/find.py +++ b/lib/spack/spack/test/cmd/find.py @@ -74,7 +74,7 @@ def test_query_arguments(): assert "explicit" in q_args assert q_args["installed"] == ["installed"] assert q_args["predicate_fn"] is None - assert q_args["explicit"] is any + assert q_args["explicit"] is None assert "start_date" in q_args assert "end_date" not in q_args assert q_args["install_tree"] == "all" -- cgit v1.2.3-70-g09d2