From 44c22a54c97b5f7c3569ff0b4b394825cbfd7a13 Mon Sep 17 00:00:00 2001 From: Harmen Stoppels Date: Tue, 22 Nov 2022 03:33:35 +0100 Subject: Spec traversal: add option for topological ordering (#33817) Spec traversals can now specify a topological ordering. A topologically- ordered traversal with input specs X1, X2... will * include all of X1, X2... and their children * be ordered such that a given node is guaranteed to appear before any of its children in the traversal Other notes: * Input specs can be children of other input specs (this is useful if a user specifies a set of specs to uninstall: some of those specs might be children of others) * `direction="parents"` will produce a reversed topological order (children always come before parents). * `cover="edges"` will generate a list of edges L such that (a) input edges will always appear before output edges and (b) if you create a list with the destination of each edge in L the result is topologically ordered --- lib/spack/spack/test/traverse.py | 243 ++++++++++++++++++++++++++++++--------- lib/spack/spack/traverse.py | 227 ++++++++++++++++++++++++++++-------- 2 files changed, 371 insertions(+), 99 deletions(-) diff --git a/lib/spack/spack/test/traverse.py b/lib/spack/spack/test/traverse.py index b88bfdb95e..727265446d 100644 --- a/lib/spack/spack/test/traverse.py +++ b/lib/spack/spack/test/traverse.py @@ -25,64 +25,109 @@ def create_dag(nodes, edges): @pytest.fixture() def abstract_specs_dtuse(): - nodes = [ - "dtbuild1", - "dtbuild2", - "dtbuild3", - "dtlink1", - "dtlink2", - "dtlink3", - "dtlink4", - "dtlink5", - "dtrun1", - "dtrun2", - "dtrun3", - "dttop", - "dtuse", - ] - edges = [ - ("dtbuild1", "dtbuild2", ("build")), - ("dtbuild1", "dtlink2", ("build", "link")), - ("dtbuild1", "dtrun2", ("run")), - ("dtlink1", "dtlink3", ("build", "link")), - ("dtlink3", "dtbuild2", ("build")), - ("dtlink3", "dtlink4", ("build", "link")), - ("dtrun1", "dtlink5", ("build", "link")), - ("dtrun1", "dtrun3", ("run")), - ("dtrun3", "dtbuild3", ("build")), - ("dttop", "dtbuild1", ("build",)), - ("dttop", "dtlink1", ("build", "link")), - ("dttop", "dtrun1", ("run")), - ("dtuse", "dttop", ("build", "link")), - ] - return create_dag(nodes, edges) + return create_dag( + nodes=[ + "dtbuild1", + "dtbuild2", + "dtbuild3", + "dtlink1", + "dtlink2", + "dtlink3", + "dtlink4", + "dtlink5", + "dtrun1", + "dtrun2", + "dtrun3", + "dttop", + "dtuse", + ], + edges=[ + ("dtbuild1", "dtbuild2", ("build")), + ("dtbuild1", "dtlink2", ("build", "link")), + ("dtbuild1", "dtrun2", ("run")), + ("dtlink1", "dtlink3", ("build", "link")), + ("dtlink3", "dtbuild2", ("build")), + ("dtlink3", "dtlink4", ("build", "link")), + ("dtrun1", "dtlink5", ("build", "link")), + ("dtrun1", "dtrun3", ("run")), + ("dtrun3", "dtbuild3", ("build")), + ("dttop", "dtbuild1", ("build",)), + ("dttop", "dtlink1", ("build", "link")), + ("dttop", "dtrun1", ("run")), + ("dtuse", "dttop", ("build", "link")), + ], + ) @pytest.fixture() def abstract_specs_dt_diamond(): - nodes = ["dt-diamond", "dt-diamond-left", "dt-diamond-right", "dt-diamond-bottom"] - edges = [ - ("dt-diamond", "dt-diamond-left", ("build", "link")), - ("dt-diamond", "dt-diamond-right", ("build", "link")), - ("dt-diamond-right", "dt-diamond-bottom", ("build", "link", "run")), - ("dt-diamond-left", "dt-diamond-bottom", ("build")), - ] - return create_dag(nodes, edges) + return create_dag( + nodes=["dt-diamond", "dt-diamond-left", "dt-diamond-right", "dt-diamond-bottom"], + edges=[ + ("dt-diamond", "dt-diamond-left", ("build", "link")), + ("dt-diamond", "dt-diamond-right", ("build", "link")), + ("dt-diamond-right", "dt-diamond-bottom", ("build", "link", "run")), + ("dt-diamond-left", "dt-diamond-bottom", ("build")), + ], + ) @pytest.fixture() def abstract_specs_chain(): # Chain a -> b -> c -> d with skip connections # from a -> c and a -> d. - nodes = ["chain-a", "chain-b", "chain-c", "chain-d"] - edges = [ - ("chain-a", "chain-b", ("build", "link")), - ("chain-b", "chain-c", ("build", "link")), - ("chain-c", "chain-d", ("build", "link")), - ("chain-a", "chain-c", ("build", "link")), - ("chain-a", "chain-d", ("build", "link")), - ] - return create_dag(nodes, edges) + return create_dag( + nodes=["chain-a", "chain-b", "chain-c", "chain-d"], + edges=[ + ("chain-a", "chain-b", ("build", "link")), + ("chain-b", "chain-c", ("build", "link")), + ("chain-c", "chain-d", ("build", "link")), + ("chain-a", "chain-c", ("build", "link")), + ("chain-a", "chain-d", ("build", "link")), + ], + ) + + +@pytest.mark.parametrize("direction", ("children", "parents")) +@pytest.mark.parametrize("deptype", ("all", ("link", "build"), ("run", "link"))) +def test_all_orders_traverse_the_same_nodes(direction, deptype, abstract_specs_dtuse): + # Test whether all graph traversal methods visit the same set of vertices. + # When testing cover=nodes, the traversal methods may reach the same vertices + # through different edges, so we're using traverse_nodes here to only verify the + # vertices. + # + # NOTE: root=False currently means "yield nodes discovered at depth > 0", + # meaning that depth first search will yield dtlink5 as it is first found through + # dtuse, whereas breadth first search considers dtlink5 at depth 0 and does not + # yield it since it is a root. Therefore, we only use root=True. + # (The inconsistency cannot be resolved by making root=False mean "don't yield + # vertices without in-edges", since this is not how it's used; it's typically used + # as "skip the input specs".) + specs = [abstract_specs_dtuse["dtuse"], abstract_specs_dtuse["dtlink5"]] + kwargs = {"root": True, "direction": direction, "deptype": deptype, "cover": "nodes"} + + def nodes(order): + s = traverse.traverse_nodes(specs, order=order, **kwargs) + return sorted(list(s)) + + assert nodes("pre") == nodes("post") == nodes("breadth") == nodes("topo") + + +@pytest.mark.parametrize("direction", ("children", "parents")) +@pytest.mark.parametrize("root", (True, False)) +@pytest.mark.parametrize("deptype", ("all", ("link", "build"), ("run", "link"))) +def test_all_orders_traverse_the_same_edges(direction, root, deptype, abstract_specs_dtuse): + # Test whether all graph traversal methods visit the same set of edges. + # All edges should be returned, including the artificial edges to the input + # specs when root=True. + specs = [abstract_specs_dtuse["dtuse"], abstract_specs_dtuse["dtlink5"]] + kwargs = {"root": root, "direction": direction, "deptype": deptype, "cover": "edges"} + + def edges(order): + s = traverse.traverse_edges(specs, order=order, **kwargs) + return sorted(list(s)) + + assert edges("pre") == edges("post") == edges("breadth") == edges("topo") def test_breadth_first_traversal(abstract_specs_dtuse): @@ -169,18 +214,18 @@ def test_breadth_first_traversal_reverse(abstract_specs_dt_diamond): ] -def test_breadth_first_traversal_multiple_roots(abstract_specs_dt_diamond): +def test_breadth_first_traversal_multiple_input_specs(abstract_specs_dt_diamond): # With DFS, the branch dt-diamond -> dt-diamond-left -> dt-diamond-bottom # is followed, with BFS, dt-diamond-bottom should be traced through the second - # root dt-diamond-right at depth 1 instead. - roots = [ + # input spec dt-diamond-right at depth 1 instead. + input_specs = [ abstract_specs_dt_diamond["dt-diamond"], abstract_specs_dt_diamond["dt-diamond-right"], ] - gen = traverse.traverse_edges(roots, order="breadth", key=id, depth=True, root=False) + gen = traverse.traverse_edges(input_specs, order="breadth", key=id, depth=True, root=False) assert [(depth, edge.parent.name, edge.spec.name) for (depth, edge) in gen] == [ - (1, "dt-diamond", "dt-diamond-left"), # edge from first root "to" depth 1 - (1, "dt-diamond-right", "dt-diamond-bottom"), # edge from second root "to" depth 1 + (1, "dt-diamond", "dt-diamond-left"), # edge from first input spec "to" depth 1 + (1, "dt-diamond-right", "dt-diamond-bottom"), # edge from second input spec "to" depth 1 ] @@ -284,3 +329,93 @@ chain-a ^chain-d """ assert s.tree(depth_first=False, cover="edges", **args) == bfs_tree_edges + + +@pytest.fixture() +def abstract_specs_toposort(): + # Create a graph that both BFS and DFS would not traverse in topo order, given the + # default edge ordering (by target spec name). Roots are {A, E} in forward order + # and {F, G} in backward order. + # forward: DFS([A, E]) traverses [A, B, F, G, C, D, E] (not topo since C < B) + # forward: BFS([A, E]) traverses [A, E, B, C, D, F, G] (not topo since C < B) + # reverse: DFS([F, G]) traverses [F, B, A, D, C, E, G] (not topo since D < A) + # reverse: BFS([F, G]) traverses [F, G, B, A, D, C, E] (not topo since D < A) + # E + # | A + # | | \ + # | C | + # \ | | + # D | + # | / + # B + # / \ + # F G + return create_dag( + nodes=["A", "B", "C", "D", "E", "F", "G"], + edges=( + ("A", "B", "all"), + ("A", "C", "all"), + ("B", "F", "all"), + ("B", "G", "all"), + ("C", "D", "all"), + ("D", "B", "all"), + ("E", "D", "all"), + ), + ) + + +def test_traverse_nodes_topo(abstract_specs_toposort): + # Test whether we get topologically ordered specs when using traverse_nodes with + # order=topo and cover=nodes. + nodes = abstract_specs_toposort + + def test_topo(input_specs, direction="children"): + # Ensure the invariant that all parents of specs[i] are in specs[0:i] + specs = list( + traverse.traverse_nodes(input_specs, order="topo", cover="nodes", direction=direction) + ) + reverse = "parents" if direction == "children" else "children" + for i in range(len(specs)): + parents = specs[i].traverse(cover="nodes", direction=reverse, root=False) + assert set(list(parents)).issubset(set(specs[:i])) + + # Traverse forward from roots A and E and a non-root D. Notice that adding D has no + # effect, it's just to make the test case a bit more complicated, as D is a starting + # point for traversal, but it's also discovered as a descendant of E and A. + test_topo([nodes["D"], nodes["E"], nodes["A"]], direction="children") + + # Traverse reverse from leafs F and G and non-leaf D + test_topo([nodes["F"], nodes["D"], nodes["G"]], direction="parents") + + +def test_traverse_edges_topo(abstract_specs_toposort): + # Test the invariant that for each node in-edges precede out-edges when + # using traverse_edges with order=topo. + nodes = abstract_specs_toposort + input_specs = [nodes["E"], nodes["A"]] + + # Collect pairs of (parent spec name, child spec name) + edges = [ + (e.parent.name, e.spec.name) + for e in traverse.traverse_edges(input_specs, order="topo", cover="edges", root=False) + ] + + # See figure above, we have 7 edges (excluding artifical ones to the root) + assert set(edges) == set( + [ + ("A", "B"), + ("A", "C"), + ("B", "F"), + ("B", "G"), + ("C", "D"), + ("D", "B"), + ("E", "D"), + ] + ) + + # Verify that all in-edges precede all out-edges + for node in nodes.keys(): + in_edge_indices = [i for (i, (parent, child)) in enumerate(edges) if node == child] + out_edge_indices = [i for (i, (parent, child)) in enumerate(edges) if node == parent] + if in_edge_indices and out_edge_indices: + assert max(in_edge_indices) < min(out_edge_indices) diff --git a/lib/spack/spack/traverse.py b/lib/spack/spack/traverse.py index 6263f83896..c352bf74aa 100644 --- a/lib/spack/spack/traverse.py +++ b/lib/spack/spack/traverse.py @@ -29,10 +29,10 @@ class BaseVisitor(object): def __init__(self, deptype="all"): self.deptype = deptype - def accept(self, node): + def accept(self, item): """ Arguments: - node (EdgeAndDepth): Provides the depth and the edge through which the + item (EdgeAndDepth): Provides the depth and the edge through which the node was discovered Returns: @@ -42,8 +42,8 @@ class BaseVisitor(object): """ return True - def neighbors(self, node): - return sort_edges(node.edge.spec.edges_to_dependencies(deptype=self.deptype)) + def neighbors(self, item): + return sort_edges(item.edge.spec.edges_to_dependencies(deptype=self.deptype)) class ReverseVisitor(object): @@ -53,13 +53,13 @@ class ReverseVisitor(object): self.visitor = visitor self.deptype = deptype - def accept(self, node): - return self.visitor.accept(node) + def accept(self, item): + return self.visitor.accept(item) - def neighbors(self, node): + def neighbors(self, item): """Return dependents, note that we actually flip the edge direction to allow generic programming""" - spec = node.edge.spec + spec = item.edge.spec return sort_edges( [edge.flip() for edge in spec.edges_from_dependents(deptype=self.deptype)] ) @@ -73,19 +73,19 @@ class CoverNodesVisitor(object): self.key = key self.visited = set() if visited is None else visited - def accept(self, node): + def accept(self, item): # Covering nodes means: visit nodes once and only once. - key = self.key(node.edge.spec) + key = self.key(item.edge.spec) if key in self.visited: return False - accept = self.visitor.accept(node) + accept = self.visitor.accept(item) self.visited.add(key) return accept - def neighbors(self, node): - return self.visitor.neighbors(node) + def neighbors(self, item): + return self.visitor.neighbors(item) class CoverEdgesVisitor(object): @@ -96,18 +96,82 @@ class CoverEdgesVisitor(object): self.visited = set() if visited is None else visited self.key = key - def accept(self, node): - return self.visitor.accept(node) + def accept(self, item): + return self.visitor.accept(item) - def neighbors(self, node): + def neighbors(self, item): # Covering edges means: drop dependencies of visited nodes. - key = self.key(node.edge.spec) + key = self.key(item.edge.spec) if key in self.visited: return [] self.visited.add(key) - return self.visitor.neighbors(node) + return self.visitor.neighbors(item) + + +class TopoVisitor(object): + """Visitor that can be used in :py:func:`depth-first traversal + ` to generate + a topologically ordered list of specs. + + Algorithm based on "Section 22.4: Topological sort", Introduction to Algorithms + (2001, 2nd edition) by Cormen, Thomas H.; Leiserson, Charles E.; Rivest, Ronald L.; + Stein, Clifford. + + Summary of the algorithm: prepend each vertex to a list in depth-first post-order, + not following edges to nodes already seen. This ensures all descendants occur after + their parent, yielding a topological order. + + Note: in this particular implementation we collect the *edges* through which the + vertices are discovered, meaning that a topological order of *vertices* is obtained + by taking the specs pointed to: ``map(lambda edge: edge.spec, visitor.edges)``. + Lastly, ``all_edges=True`` can be used to retrieve a list of all reachable + edges, with the property that for each vertex all in-edges precede all out-edges. + """ + + def __init__(self, visitor, key=id, root=True, all_edges=False): + """ + Arguments: + visitor: visitor that implements accept(), pre(), post() and neighbors() + key: uniqueness key for nodes + root (bool): Whether to include the root node. + all_edges (bool): when ``False`` (default): Each node is reached once, + and ``map(lambda edge: edge.spec, visitor.edges)`` is topologically + ordered. When ``True``, every edge is listed, ordered such that for + each node all in-edges precede all out-edges. + """ + self.visited = set() + self.visitor = visitor + self.key = key + self.root = root + self.reverse_order = [] + self.all_edges = all_edges + + def accept(self, item): + if self.key(item.edge.spec) not in self.visited: + return True + if self.all_edges and (self.root or item.depth > 0): + self.reverse_order.append(item.edge) + return False + + def pre(self, item): + # You could add a temporary marker for cycle detection + # that's cleared in `post`, but we assume no cycles. + pass + + def post(self, item): + self.visited.add(self.key(item.edge.spec)) + if self.root or item.depth > 0: + self.reverse_order.append(item.edge) + + def neighbors(self, item): + return self.visitor.neighbors(item) + + @property + def edges(self): + """Return edges in topological order (in-edges precede out-edges).""" + return list(reversed(self.reverse_order)) def get_visitor_from_args(cover, direction, deptype, key=id, visited=None, visitor=None): @@ -144,7 +208,7 @@ def get_visitor_from_args(cover, direction, deptype, key=id, visited=None, visit return visitor -def root_specs(specs): +def with_artificial_edges(specs): """Initialize a list of edges from an imaginary root node to the root specs.""" return [ EdgeAndDepth(edge=spack.spec.DependencySpec(parent=None, spec=s, deptypes=()), depth=0) @@ -152,23 +216,31 @@ def root_specs(specs): ] -def traverse_depth_first_edges_generator(nodes, visitor, post_order=False, root=True, depth=False): - # This is a somewhat non-standard implementation, but the reason to start with - # edges is that we don't have to deal with an artificial root node when doing DFS - # on multiple (root) specs. - for node in nodes: - if not visitor.accept(node): +def traverse_depth_first_edges_generator(edges, visitor, post_order=False, root=True, depth=False): + """Generator that takes explores a DAG in depth-first fashion starting from + a list of edges. Note that typically DFS would take a vertex not a list of edges, + but the API is like this so we don't have to create an artificial root node when + traversing from multiple roots in a DAG. + + Arguments: + edges (list): List of EdgeAndDepth instances + visitor: class instance implementing accept() and neigbors() + post_order (bool): Whether to yield nodes when backtracking + root (bool): whether to yield at depth 0 + depth (bool): when ``True`` yield a tuple of depth and edge, otherwise only the + edge. + """ + for edge in edges: + if not visitor.accept(edge): continue - yield_me = root or node.depth > 0 + yield_me = root or edge.depth > 0 # Pre if yield_me and not post_order: - yield (node.depth, node.edge) if depth else node.edge + yield (edge.depth, edge.edge) if depth else edge.edge - neighbors = [ - EdgeAndDepth(edge=edge, depth=node.depth + 1) for edge in visitor.neighbors(node) - ] + neighbors = [EdgeAndDepth(edge=n, depth=edge.depth + 1) for n in visitor.neighbors(edge)] # This extra branch is just for efficiency. if len(neighbors) >= 0: @@ -179,22 +251,22 @@ def traverse_depth_first_edges_generator(nodes, visitor, post_order=False, root= # Post if yield_me and post_order: - yield (node.depth, node.edge) if depth else node.edge + yield (edge.depth, edge.edge) if depth else edge.edge def traverse_breadth_first_edges_generator(queue, visitor, root=True, depth=False): while len(queue) > 0: - node = queue.pop(0) + edge = queue.pop(0) # If the visitor doesn't accept the node, we don't yield it nor follow its edges. - if not visitor.accept(node): + if not visitor.accept(edge): continue - if root or node.depth > 0: - yield (node.depth, node.edge) if depth else node.edge + if root or edge.depth > 0: + yield (edge.depth, edge.edge) if depth else edge.edge - for edge in visitor.neighbors(node): - queue.append(EdgeAndDepth(edge, node.depth + 1)) + for e in visitor.neighbors(edge): + queue.append(EdgeAndDepth(e, edge.depth + 1)) def traverse_breadth_first_with_visitor(specs, visitor): @@ -205,16 +277,39 @@ def traverse_breadth_first_with_visitor(specs, visitor): visitor: object that implements accept and neighbors interface, see for example BaseVisitor. """ - queue = root_specs(specs) + queue = with_artificial_edges(specs) while len(queue) > 0: - node = queue.pop(0) + edge = queue.pop(0) # If the visitor doesn't accept the node, we don't traverse it further. - if not visitor.accept(node): + if not visitor.accept(edge): + continue + + for e in visitor.neighbors(edge): + queue.append(EdgeAndDepth(e, edge.depth + 1)) + + +def traverse_depth_first_with_visitor(edges, visitor): + """Traverse a DAG in depth-first fashion using a visitor, starting from + a list of edges. Note that typically DFS would take a vertex not a list of edges, + but the API is like this so we don't have to create an artificial root node when + traversing from multiple roots in a DAG. + + Arguments: + edges (list): List of EdgeAndDepth instances + visitor: class instance implementing accept(), pre(), post() and neighbors() + """ + for edge in edges: + if not visitor.accept(edge): continue - for edge in visitor.neighbors(node): - queue.append(EdgeAndDepth(edge, node.depth + 1)) + visitor.pre(edge) + + neighbors = [EdgeAndDepth(edge=e, depth=edge.depth + 1) for e in visitor.neighbors(edge)] + + traverse_depth_first_with_visitor(neighbors, visitor) + + visitor.post(edge) # Helper functions for generating a tree using breadth-first traversal @@ -275,6 +370,34 @@ def traverse_breadth_first_tree_nodes(parent_id, edges, key=id, depth=0): yield item +# Topologic order +def traverse_edges_topo( + specs, direction="children", deptype="all", key=id, root=True, all_edges=False +): + """ + Returns a list of edges in topological order, in the sense that all in-edges of a + vertex appear before all out-edges. By default with direction=children edges are + directed from dependent to dependency. With directions=parents, the edges are + directed from dependency to dependent. + + Arguments: + specs (list): List of root specs (considered to be depth 0) + direction (str): ``children`` (edges are directed from dependent to dependency) + or ``parents`` (edges are flipped / directed from dependency to dependent) + deptype (str or tuple): allowed dependency types + key: function that takes a spec and outputs a key for uniqueness test. + root (bool): Yield the root nodes themselves + all_edges (bool): When ``False`` only one in-edge per node is returned, when + ``True`` all reachable edges are returned. + """ + visitor = BaseVisitor(deptype) + if direction == "parents": + visitor = ReverseVisitor(visitor, deptype) + visitor = TopoVisitor(visitor, key=key, root=root, all_edges=all_edges) + traverse_depth_first_with_visitor(with_artificial_edges(specs), visitor) + return visitor.edges + + # High-level API: traverse_edges, traverse_nodes, traverse_tree. @@ -298,6 +421,7 @@ def traverse_edges( root (bool): Yield the root nodes themselves order (str): What order of traversal to use in the DAG. For depth-first search this can be ``pre`` or ``post``. For BFS this should be ``breadth``. + For topological order use ``topo`` cover (str): Determines how extensively to cover the dag. Possible values: ``nodes`` -- Visit each unique node in the dag only once. ``edges`` -- If a node has been visited once but is reached along a @@ -320,7 +444,19 @@ def traverse_edges( A generator that yields ``DependencySpec`` if depth is ``False`` or a tuple of ``(depth, DependencySpec)`` if depth is ``True``. """ - root_edges = root_specs(specs) + + if order == "topo": + if cover == "paths": + raise ValueError("cover=paths not supported for order=topo") + # TODO: There is no known need for topological ordering of traversals (edge or node) + # with an initialized "visited" set. Revisit if needed. + if visited is not None: + raise ValueError("visited set not implemented for order=topo") + return traverse_edges_topo( + specs, direction, deptype, key, root, all_edges=cover == "edges" + ) + + root_edges = with_artificial_edges(specs) visitor = get_visitor_from_args(cover, direction, deptype, key, visited) # Depth-first @@ -328,9 +464,10 @@ def traverse_edges( return traverse_depth_first_edges_generator( root_edges, visitor, order == "post", root, depth ) + elif order == "breadth": + return traverse_breadth_first_edges_generator(root_edges, visitor, root, depth) - # Breadth-first - return traverse_breadth_first_edges_generator(root_edges, visitor, root, depth) + raise ValueError("Unknown order {}".format(order)) def traverse_nodes( -- cgit v1.2.3-60-g2f50