diff options
author | Harmen Stoppels <me@harmenstoppels.nl> | 2024-11-22 15:04:19 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-22 15:04:19 +0100 |
commit | cb3d6549c988cb914583e4d220a2d1c0b0aa6ae2 (patch) | |
tree | 312d43a5241eee65e9b4bc72e581f6a852a6efdc /lib | |
parent | 559c2f1eb9ea7fdaeecf7d4fdf556c544d9de45e (diff) | |
download | spack-cb3d6549c988cb914583e4d220a2d1c0b0aa6ae2.tar.gz spack-cb3d6549c988cb914583e4d220a2d1c0b0aa6ae2.tar.bz2 spack-cb3d6549c988cb914583e4d220a2d1c0b0aa6ae2.tar.xz spack-cb3d6549c988cb914583e4d220a2d1c0b0aa6ae2.zip |
traverse.py: ensure topo order is bfs for trees (#47720)
Diffstat (limited to 'lib')
-rw-r--r-- | lib/spack/spack/build_environment.py | 13 | ||||
-rw-r--r-- | lib/spack/spack/graph.py | 7 | ||||
-rw-r--r-- | lib/spack/spack/test/graph.py | 13 | ||||
-rw-r--r-- | lib/spack/spack/test/traverse.py | 23 | ||||
-rw-r--r-- | lib/spack/spack/traverse.py | 149 |
5 files changed, 96 insertions, 109 deletions
diff --git a/lib/spack/spack/build_environment.py b/lib/spack/spack/build_environment.py index 3a4f8b3fa9..c46db63c83 100644 --- a/lib/spack/spack/build_environment.py +++ b/lib/spack/spack/build_environment.py @@ -882,6 +882,9 @@ class EnvironmentVisitor: elif context == Context.RUN: self.root_depflag = dt.RUN | dt.LINK + def accept(self, item): + return True + def neighbors(self, item): spec = item.edge.spec if spec.dag_hash() in self.root_hashes: @@ -919,19 +922,19 @@ def effective_deptypes( a flag specifying in what way they do so. The list is ordered topologically from root to leaf, meaning that environment modifications should be applied in reverse so that dependents override dependencies, not the other way around.""" - visitor = traverse.TopoVisitor( - EnvironmentVisitor(*specs, context=context), - key=lambda x: x.dag_hash(), + topo_sorted_edges = traverse.traverse_topo_edges_generator( + traverse.with_artificial_edges(specs), + visitor=EnvironmentVisitor(*specs, context=context), + key=traverse.by_dag_hash, root=True, all_edges=True, ) - traverse.traverse_depth_first_with_visitor(traverse.with_artificial_edges(specs), visitor) # Dictionary with "no mode" as default value, so it's easy to write modes[x] |= flag. use_modes = defaultdict(lambda: UseMode(0)) nodes_with_type = [] - for edge in visitor.edges: + for edge in topo_sorted_edges: parent, child, depflag = edge.parent, edge.spec, edge.depflag # Mark the starting point diff --git a/lib/spack/spack/graph.py b/lib/spack/spack/graph.py index f4ac437df9..2d0fc9c3a8 100644 --- a/lib/spack/spack/graph.py +++ b/lib/spack/spack/graph.py @@ -325,12 +325,7 @@ class AsciiGraph: self._out = llnl.util.tty.color.ColorStream(out, color=color) # We'll traverse the spec in topological order as we graph it. - nodes_in_topological_order = [ - edge.spec - for edge in spack.traverse.traverse_edges_topo( - [spec], direction="children", deptype=self.depflag - ) - ] + nodes_in_topological_order = list(spec.traverse(order="topo", deptype=self.depflag)) nodes_in_topological_order.reverse() # Work on a copy to be nondestructive diff --git a/lib/spack/spack/test/graph.py b/lib/spack/spack/test/graph.py index c26363bb46..a622e855e5 100644 --- a/lib/spack/spack/test/graph.py +++ b/lib/spack/spack/test/graph.py @@ -74,4 +74,17 @@ o | libdwarf |/ o libelf """ + or graph_str + == r"""o mpileaks +|\ +| o callpath +|/| +| o dyninst +| |\ +o | | mpich + / / +| o libdwarf +|/ +o libelf +""" ) diff --git a/lib/spack/spack/test/traverse.py b/lib/spack/spack/test/traverse.py index 79e05c0b2c..62ce24d366 100644 --- a/lib/spack/spack/test/traverse.py +++ b/lib/spack/spack/test/traverse.py @@ -431,3 +431,26 @@ def test_traverse_nodes_no_deps(abstract_specs_dtuse): ] outputs = [x for x in traverse.traverse_nodes(inputs, deptype=dt.NONE)] assert outputs == [abstract_specs_dtuse["dtuse"], abstract_specs_dtuse["dtlink5"]] + + +@pytest.mark.parametrize("cover", ["nodes", "edges"]) +def test_topo_is_bfs_for_trees(cover): + """For trees, both DFS and BFS produce a topological order, but BFS is the most sensible for + our applications, where we typically want to avoid that transitive dependencies shadow direct + depenencies in global search paths, etc. This test ensures that for trees, the default topo + order coincides with BFS.""" + binary_tree = create_dag( + nodes=["A", "B", "C", "D", "E", "F", "G"], + edges=( + ("A", "B", "all"), + ("A", "C", "all"), + ("B", "D", "all"), + ("B", "E", "all"), + ("C", "F", "all"), + ("C", "G", "all"), + ), + ) + + assert list(traverse.traverse_nodes([binary_tree["A"]], order="topo", cover=cover)) == list( + traverse.traverse_nodes([binary_tree["A"]], order="breadth", cover=cover) + ) diff --git a/lib/spack/spack/traverse.py b/lib/spack/spack/traverse.py index f6c5589b2a..158f4d892e 100644 --- a/lib/spack/spack/traverse.py +++ b/lib/spack/spack/traverse.py @@ -115,70 +115,6 @@ class CoverEdgesVisitor: return self.visitor.neighbors(item) -class TopoVisitor: - """Visitor that can be used in :py:func:`depth-first traversal - <spack.traverse.traverse_depth_first_with_visitor>` to generate - a topologically ordered list of specs. - - Algorithm based on "Section 22.4: Topological sort", Introduction to Algorithms - (2001, 2nd edition) by Cormen, Thomas H.; Leiserson, Charles E.; Rivest, Ronald L.; - Stein, Clifford. - - Summary of the algorithm: prepend each vertex to a list in depth-first post-order, - not following edges to nodes already seen. This ensures all descendants occur after - their parent, yielding a topological order. - - Note: in this particular implementation we collect the *edges* through which the - vertices are discovered, meaning that a topological order of *vertices* is obtained - by taking the specs pointed to: ``map(lambda edge: edge.spec, visitor.edges)``. - Lastly, ``all_edges=True`` can be used to retrieve a list of all reachable - edges, with the property that for each vertex all in-edges precede all out-edges. - """ - - def __init__(self, visitor, key=id, root=True, all_edges=False): - """ - Arguments: - visitor: visitor that implements accept(), pre(), post() and neighbors() - key: uniqueness key for nodes - root (bool): Whether to include the root node. - all_edges (bool): when ``False`` (default): Each node is reached once, - and ``map(lambda edge: edge.spec, visitor.edges)`` is topologically - ordered. When ``True``, every edge is listed, ordered such that for - each node all in-edges precede all out-edges. - """ - self.visited = set() - self.visitor = visitor - self.key = key - self.root = root - self.reverse_order = [] - self.all_edges = all_edges - - def accept(self, item): - if self.key(item.edge.spec) not in self.visited: - return True - if self.all_edges and (self.root or item.depth > 0): - self.reverse_order.append(item.edge) - return False - - def pre(self, item): - # You could add a temporary marker for cycle detection - # that's cleared in `post`, but we assume no cycles. - pass - - def post(self, item): - self.visited.add(self.key(item.edge.spec)) - if self.root or item.depth > 0: - self.reverse_order.append(item.edge) - - def neighbors(self, item): - return self.visitor.neighbors(item) - - @property - def edges(self): - """Return edges in topological order (in-edges precede out-edges).""" - return list(reversed(self.reverse_order)) - - def get_visitor_from_args( cover, direction, depflag: Union[dt.DepFlag, dt.DepTypes], key=id, visited=None, visitor=None ): @@ -381,39 +317,54 @@ def traverse_breadth_first_tree_nodes(parent_id, edges, key=id, depth=0): yield item -# Topologic order -def traverse_edges_topo( - specs, - direction="children", - deptype: Union[dt.DepFlag, dt.DepTypes] = "all", - key=id, - root=True, - all_edges=False, -): +def traverse_topo_edges_generator(edges, visitor, key=id, root=True, all_edges=False): """ - Returns a list of edges in topological order, in the sense that all in-edges of a - vertex appear before all out-edges. By default with direction=children edges are - directed from dependent to dependency. With directions=parents, the edges are - directed from dependency to dependent. + Returns a list of edges in topological order, in the sense that all in-edges of a vertex appear + before all out-edges. Arguments: - specs (list): List of root specs (considered to be depth 0) - direction (str): ``children`` (edges are directed from dependent to dependency) - or ``parents`` (edges are flipped / directed from dependency to dependent) - deptype: allowed dependency types + edges (list): List of EdgeAndDepth instances + visitor: visitor instance that defines the sub-DAG to traverse key: function that takes a spec and outputs a key for uniqueness test. root (bool): Yield the root nodes themselves all_edges (bool): When ``False`` only one in-edge per node is returned, when ``True`` all reachable edges are returned. """ - if not isinstance(deptype, dt.DepFlag): - deptype = dt.canonicalize(deptype) - visitor: Union[BaseVisitor, ReverseVisitor, TopoVisitor] = BaseVisitor(deptype) - if direction == "parents": - visitor = ReverseVisitor(visitor, deptype) - visitor = TopoVisitor(visitor, key=key, root=root, all_edges=all_edges) - traverse_depth_first_with_visitor(with_artificial_edges(specs), visitor) - return visitor.edges + # Topo order used to be implemented using a DFS visitor, which was relatively efficient in that + # it would visit nodes only once, and it was composable. In practice however it would yield a + # DFS order on DAGs that are trees, which is undesirable in many cases. For example, a list of + # search paths for trees is better in BFS order, so that direct dependencies are listed first. + # That way a transitive dependency cannot shadow a direct one. So, here we collect the sub-DAG + # of interest and then compute a topological order that is the most breadth-first possible. + + # maps node identifier to the number of remaining in-edges + in_edge_count = defaultdict(int) + # maps parent identifier to a list of edges, where None is a special identifier + # for the artificial root/source. + node_to_edges = defaultdict(list) + for edge in traverse_breadth_first_edges_generator( + edges, CoverEdgesVisitor(visitor, key=key), root=True, depth=False + ): + in_edge_count[key(edge.spec)] += 1 + parent_id = key(edge.parent) if edge.parent is not None else None + node_to_edges[parent_id].append(edge) + + queue = [None] + + while queue: + for edge in node_to_edges[queue.pop(0)]: + child_id = key(edge.spec) + in_edge_count[child_id] -= 1 + + should_yield = root or edge.parent is not None + + if all_edges and should_yield: + yield edge + + if in_edge_count[child_id] == 0: + if not all_edges and should_yield: + yield edge + queue.append(key(edge.spec)) # High-level API: traverse_edges, traverse_nodes, traverse_tree. @@ -462,20 +413,20 @@ def traverse_edges( A generator that yields ``DependencySpec`` if depth is ``False`` or a tuple of ``(depth, DependencySpec)`` if depth is ``True``. """ - + # validate input if order == "topo": if cover == "paths": raise ValueError("cover=paths not supported for order=topo") - # TODO: There is no known need for topological ordering of traversals (edge or node) - # with an initialized "visited" set. Revisit if needed. if visited is not None: raise ValueError("visited set not implemented for order=topo") - return traverse_edges_topo( - specs, direction, deptype, key, root, all_edges=cover == "edges" - ) + elif order not in ("post", "pre", "breadth"): + raise ValueError(f"Unknown order {order}") + # In topo traversal we need to construct a sub-DAG including all edges even if we are yielding + # a subset of them, hence "paths". + _cover = "paths" if order == "topo" else cover + visitor = get_visitor_from_args(_cover, direction, deptype, key, visited) root_edges = with_artificial_edges(specs) - visitor = get_visitor_from_args(cover, direction, deptype, key, visited) # Depth-first if order in ("pre", "post"): @@ -484,8 +435,10 @@ def traverse_edges( ) elif order == "breadth": return traverse_breadth_first_edges_generator(root_edges, visitor, root, depth) - - raise ValueError("Unknown order {}".format(order)) + elif order == "topo": + return traverse_topo_edges_generator( + root_edges, visitor, key, root, all_edges=cover == "edges" + ) def traverse_nodes( |