feat(graph): Implement graph functions

tcjennings · tcjennings · commit 29780eee320a · 2025-06-26T16:14:29.000-05:00
diff --git a/src/lsst/cmservice/common/graph.py b/src/lsst/cmservice/common/graph.py
@@ -1,8 +1,10 @@
-from collections.abc import Mapping, Sequence
+from collections.abc import Iterable, Mapping, MutableSet, Sequence
 
 import networkx as nx
+from sqlalchemy import select
 
 from ..db import Script, ScriptDependency, Step, StepDependency
+from ..db.campaigns_v2 import Edge, Node
 from ..parsing.string import parse_element_fullname
 from .types import AnyAsyncSession
 
@@ -35,8 +37,125 @@ async def graph_from_edge_list(
     return g
 
 
+async def graph_from_edge_list_v2(
+    edges: Sequence[Edge],
+    node_type: type[Node],
+    session: AnyAsyncSession,
+) -> nx.DiGraph:
+    """Given a sequence of Edges, create a directed graph for these
+    edges with nodes derived from database lookups of the related objects.
+    """
+    g = nx.DiGraph()
+    g.add_edges_from([(e.source, e.target) for e in edges])
+
+    # The graph understands the nodes in terms of the IDs used in the edges,
+    # but we want to hydrate the entire Node model for subsequent users of this
+    # graph to reference without dipping back to the Database.
+    for node in g.nodes:
+        s = select(Node).where(Node.id == node)
+        db_node: Node = (await session.execute(s)).scalars().one()
+
+        # This Node is going on an adventure where it does not need to drag its
+        # SQLAlchemy baggage along, so we expunge it from the session before
+        # adding it to the graph.
+        session.expunge(db_node)
+        g.nodes[node]["model"] = db_node
+
+    # TODO validate graph now raise exception, or leave it to the caller?
+    return g
+
+
 def graph_to_dict(g: nx.DiGraph) -> Mapping:
     """Renders a networkx directed graph to a mapping format suitable for JSON
     serialization.
     """
     return nx.node_link_data(g, edges="edges")
+
+
+def validate_graph(g: nx.DiGraph, source: str = "START", sink: str = "END") -> bool:
+    """Validates a graph by asserting by traversal that a complete and correct
+    path exists between `source` and `sink` nodes.
+
+    "Correct" means that there are no cycles or isolate nodes (nodes with
+    degree 0) and no nodes with degree 1.
+    """
+    try:
+        # Test that G is a directed graph with no cycles
+        is_valid = nx.is_directed_acyclic_graph(g)
+        assert is_valid
+
+        # And that any path from source to sink exists
+        is_valid = nx.has_path(g, source, sink)
+        assert is_valid
+
+        # Guard against bad graphs where START and/or END have been connected
+        # such that they are no longer the only source and sink
+        ...
+
+        # Test that there are no isolated Nodes in the graph. A node becomes
+        # isolated if it was involved with an edge that has been removed from
+        # G with no replacement edge added, in which case the node should also
+        # be removed.
+        is_valid = nx.number_of_isolates(g) == 0
+        assert is_valid
+
+        # TODO Given the set of nodes in the graph, consider all paths in G
+        #      from source to sink, making sure every node appears in a path?
+
+        # Every node in G that is not the START/END node must have a degree
+        # of at least 2 (one inbound and one outbound edge). If G has any
+        # node with a degree of 1, it cannot be considered valid.
+        g_degree_view: Iterable = nx.degree(g, (n for n in g.nodes if n not in [source, sink]))
+        is_valid = min([d[1] for d in g_degree_view]) > 1
+        assert is_valid
+    except (nx.exception.NodeNotFound, AssertionError):
+        return False
+    return True
+
+
+def processable_graph_nodes(g: nx.DiGraph) -> Iterable[Node]:
+    """Traverse the graph G and produce an iterator of any nodes that are
+    candidates for processing, i.e., their status is waiting/prepared/running
+    and their ancestors are complete/successful. Graph nodes in a failed state
+    will block the graph and prevent candidacy for subsequent nodes.
+
+    Yields
+    ------
+    `lsst.cmservice.db.campaigns_v2.Node`
+        A Node ORM object that has been ``expunge``d from its ``Session``.
+
+    Notes
+    -----
+    This function operates only on valid graphs (see `validate_graph()`) that
+    have been built by the `graph_from_edge_list_v2()` function, where each
+    graph-node is decorated with a "model" attribute referring to an expunged
+    instance of ``Node``. This ``Node`` can be ``add``ed back to a ``Session``
+    and manipulated in the usual way.
+    """
+    processable_nodes: MutableSet[Node] = set()
+
+    # A valid campaign graph will have only one source (START) with in_degree 0
+    # and only one sink (END) with out_degree 0
+    source = next(v for v, d in g.in_degree() if d == 0)
+    sink = next(v for v, d in g.out_degree() if d == 0)
+
+    # For each path through the graph, evaluate the state of nodes to determine
+    # which nodes are up for processing. When there are multiple paths, we have
+    # parallelization and common ancestors may be evaluated more than once,
+    # which is an exercise in optimization left as a TODO
+    for path in nx.all_simple_paths(g, source, sink):
+        for n in path:
+            node: Node = g.nodes[n]["model"]
+            if node.status.is_processable_element():
+                processable_nodes.add(node)
+                # We found a processable node in this path, stop traversal
+                break
+            elif node.status.is_bad():
+                # We reached a failed node in this path, it is blocked
+                break
+            else:
+                # This node must be in a "successful" terminal state
+                continue
+
+    # the inspection should stop when there are no more nodes to check
+    yield from processable_nodes
diff --git a/src/lsst/cmservice/db/campaigns_v2.py b/src/lsst/cmservice/db/campaigns_v2.py
@@ -100,6 +100,12 @@ class CampaignUpdate(BaseSQLModel):
 class NodeBase(BaseSQLModel):
     """nodes_v2 db table"""
 
+    def __hash__(self) -> int:
+        """A Node is hashable according to its unique ID, so it can be used in
+        sets and other places hashable types are required.
+        """
+        return self.id.int
+
     id: UUID = Field(primary_key=True)
     name: str
     namespace: UUID
@@ -108,7 +114,7 @@ class NodeBase(BaseSQLModel):
         default=ManifestKind.other,
         sa_column=Column("kind", Enum(ManifestKind, length=20, native_enum=False, create_constraint=False)),
     )
-    status: StatusField | None = Field(
+    status: StatusField = Field(
         default=StatusEnum.waiting,
         sa_column=Column("status", Enum(StatusEnum, length=20, native_enum=False, create_constraint=False)),
     )
diff --git a/src/lsst/cmservice/routers/v2/edges.py b/src/lsst/cmservice/routers/v2/edges.py
@@ -126,7 +126,7 @@ async def create_edge_resource(
         namespace: {campaign uuid}
     spec:
         source: {node name or id}
-        target: {ndoe name or id}
+        target: {node name or id}
     ```
     """
     edge_name = manifest.metadata_.name
@@ -153,6 +153,7 @@ async def create_edge_resource(
     # TODO the edge spec should support mappings for source/target nodes but
     # for now assume the provided name has `.vN` appended to it already or
     # default to v1
+    # TODO support node id in spec
     source_node = f"{source_node}.1" if "." not in source_node else str(source_node)
     target_node = f"{target_node}.1" if "." not in target_node else str(target_node)
 
diff --git a/tests/v2/test_graph.py b/tests/v2/test_graph.py
@@ -0,0 +1,217 @@
+"""Tests graph operations using v2 objects"""
+
+from collections.abc import AsyncGenerator
+from uuid import uuid4
+
+import networkx as nx
+import pytest
+import pytest_asyncio
+from httpx import AsyncClient
+
+from lsst.cmservice.common.enums import StatusEnum
+from lsst.cmservice.common.graph import graph_from_edge_list_v2, processable_graph_nodes, validate_graph
+from lsst.cmservice.common.types import AnyAsyncSession
+from lsst.cmservice.db.campaigns_v2 import Edge, Node
+
+pytestmark = pytest.mark.asyncio(loop_scope="module")
+"""All tests in this module will run in the same event loop."""
+
+
+@pytest_asyncio.fixture(scope="module", loop_scope="module")
+async def test_campaign(aclient: AsyncClient) -> AsyncGenerator[str]:
+    """Fixture managing a test campaign with two (additional) nodes."""
+    campaign_name = uuid4().hex[-8:]
+    node_ids = []
+
+    x = await aclient.post(
+        "/cm-service/v2/campaigns",
+        json={
+            "apiVersion": "io.lsst.cmservice/v1",
+            "kind": "campaign",
+            "metadata": {"name": campaign_name},
+            "spec": {},
+        },
+    )
+    campaign_edge_url = x.headers["Edges"]
+    campaign = x.json()
+
+    # create a trio of nodes for the campaign
+    for _ in range(3):
+        x = await aclient.post(
+            "/cm-service/v2/nodes",
+            json={
+                "apiVersion": "io.lsst.cmservice/v1",
+                "kind": "node",
+                "metadata": {"name": uuid4().hex[-8:], "namespace": campaign["id"]},
+                "spec": {},
+            },
+        )
+        node = x.json()
+        node_ids.append(node["name"])
+
+    # Create edges between each campaign node with parallelization
+    _ = await aclient.post(
+        "/cm-service/v2/edges",
+        json={
+            "apiVersion": "io.lsst.cmservice/v1",
+            "kind": "edge",
+            "metadata": {"name": uuid4().hex[-8:], "namespace": campaign["id"]},
+            "spec": {
+                "source": "START",
+                "target": node_ids[0],
+            },
+        },
+    )
+    _ = await aclient.post(
+        "/cm-service/v2/edges",
+        json={
+            "apiVersion": "io.lsst.cmservice/v1",
+            "kind": "edge",
+            "metadata": {"name": uuid4().hex[-8:], "namespace": campaign["id"]},
+            "spec": {
+                "source": node_ids[0],
+                "target": node_ids[1],
+            },
+        },
+    )
+    _ = await aclient.post(
+        "/cm-service/v2/edges",
+        json={
+            "apiVersion": "io.lsst.cmservice/v1",
+            "kind": "edge",
+            "metadata": {"name": uuid4().hex[-8:], "namespace": campaign["id"]},
+            "spec": {
+                "source": node_ids[0],
+                "target": node_ids[2],
+            },
+        },
+    )
+    _ = await aclient.post(
+        "/cm-service/v2/edges",
+        json={
+            "apiVersion": "io.lsst.cmservice/v1",
+            "kind": "edge",
+            "metadata": {"name": uuid4().hex[-8:], "namespace": campaign["id"]},
+            "spec": {
+                "source": node_ids[1],
+                "target": "END",
+            },
+        },
+    )
+    _ = await aclient.post(
+        "/cm-service/v2/edges",
+        json={
+            "apiVersion": "io.lsst.cmservice/v1",
+            "kind": "edge",
+            "metadata": {"name": uuid4().hex[-8:], "namespace": campaign["id"]},
+            "spec": {
+                "source": node_ids[2],
+                "target": "END",
+            },
+        },
+    )
+    yield campaign_edge_url
+
+
+async def test_build_and_walk_graph(
+    aclient: AsyncClient, session: AnyAsyncSession, test_campaign: str
+) -> None:
+    """Test the generation and traversal of a campaign graph as created in the
+    ``test_campaign`` fixture.
+
+    Test that the graph is traversed from START to END in order, and that as
+    graph nodes are "processable" they can be handled. In this test, the status
+    of each node is set to "accepted" and updated in the databse. The campaign
+    graph is recreated between each stage of the mock graph processing.
+
+    The test campaign is a set of 3 nodes arranged in a graph:
+
+    ```
+    START --> A --> B --> END
+                --> C -->
+    ```
+    """
+    edge_list = [Edge.model_validate(edge) for edge in (await aclient.get(test_campaign)).json()]
+    graph = await graph_from_edge_list_v2(edge_list, Node, session)
+
+    # the START node should be the only processable Node
+    for node in processable_graph_nodes(graph):
+        assert node.name == "START"
+        assert node.status is StatusEnum.waiting
+        # Add the Node back to the session and update its status
+        session.add(node)
+        await session.refresh(node)
+        node.status = StatusEnum.accepted
+        await session.commit()
+
+    # Repeat the graph building and traversal, this time expecting a single
+    # node that is not "START"
+    graph = await graph_from_edge_list_v2(edge_list, Node, session)
+    for node in processable_graph_nodes(graph):
+        assert node.name != "START"
+        assert node.status is StatusEnum.waiting
+        # Add the Node back to the session and update its status
+        session.add(node)
+        await session.refresh(node)
+        node.status = StatusEnum.accepted
+        await session.commit()
+
+    # Repeat the graph building and traversal, this time expecting a pair of
+    # nodes processable in parallel
+    graph = await graph_from_edge_list_v2(edge_list, Node, session)
+    count = 0
+    for node in processable_graph_nodes(graph):
+        count += 1
+        assert node.name != "START"
+        assert node.status is StatusEnum.waiting
+        # Add the Node back to the session and update its status
+        session.add(node)
+        await session.refresh(node)
+        node.status = StatusEnum.accepted
+        await session.commit()
+    assert count == 2
+
+    # Finally, expect the END node
+    graph = await graph_from_edge_list_v2(edge_list, Node, session)
+    for node in processable_graph_nodes(graph):
+        assert node.name == "END"
+        assert node.status is StatusEnum.waiting
+        # Add the Node back to the session and update its status
+        session.add(node)
+        await session.refresh(node)
+        node.status = StatusEnum.accepted
+        await session.commit()
+
+
+def test_validate_graph() -> None:
+    """Test basic graph validation operations using a simple DAG."""
+    edge_list = [("A", "B"), ("B", "C"), ("C", "D"), ("C", "E"), ("D", "F"), ("E", "F")]
+
+    g = nx.DiGraph()
+    g.add_edges_from(edge_list)
+
+    # this is a valid graph
+    assert validate_graph(g, "A", "F")
+
+    # add a new parallel node with no path to sink
+    g.add_edge("C", "CC")
+    assert not validate_graph(g, "A", "F")
+
+    # create a cycle with the new node
+    g.add_edge("CC", "A")
+    assert not validate_graph(g, "A", "F")
+
+    # correct the path
+    g.remove_edge("CC", "A")
+    g.add_edge("CC", "F")
+    assert validate_graph(g, "A", "F")
+
+    # remove the edges from a node
+    g.remove_edge("CC", "F")
+    g.remove_edge("C", "CC")
+    # the graph is invalid because "CC" is now an isolate
+    assert not validate_graph(g, "A", "F")
+
+    # remove the unneeded node
+    g.remove_node("CC")
+    assert validate_graph(g, "A", "F")