Skip to content

Commit 415c2b3

Browse files
authored
feat(cli): add orphan entity command (#816)
Signed-off-by: phernandez <paul@basicmachines.co>
1 parent 4aa0cbd commit 415c2b3

12 files changed

Lines changed: 598 additions & 2 deletions

File tree

src/basic_memory/api/v2/routers/knowledge_router.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
MoveEntityRequestV2,
3939
MoveDirectoryRequestV2,
4040
DeleteDirectoryRequestV2,
41+
OrphanEntitiesResponse,
4142
)
4243
from basic_memory.schemas.response import DirectoryMoveResult, DirectoryDeleteResult
4344

@@ -110,6 +111,38 @@ async def get_graph(
110111
return GraphResponse(nodes=nodes, edges=edges)
111112

112113

114+
## Orphan entities endpoint
115+
116+
117+
@router.get("/orphans", response_model=OrphanEntitiesResponse)
118+
async def get_orphan_entities(
119+
project_id: ProjectExternalIdPathDep,
120+
entity_repository: EntityRepositoryV2ExternalDep,
121+
) -> OrphanEntitiesResponse:
122+
"""Return entities that have no incoming or outgoing relations."""
123+
with logfire.span(
124+
"api.request.knowledge.get_orphans",
125+
entrypoint="api",
126+
domain="knowledge",
127+
action="get_orphans",
128+
):
129+
logger.info("API v2 request: get_orphan_entities")
130+
131+
entities = await entity_repository.find_without_relations()
132+
nodes = [
133+
GraphNode(
134+
external_id=entity.external_id,
135+
title=entity.title,
136+
note_type=entity.note_type,
137+
file_path=entity.file_path,
138+
)
139+
for entity in entities
140+
]
141+
142+
logger.info(f"API v2 response: {len(nodes)} orphan entities")
143+
return OrphanEntitiesResponse(entities=nodes, total=len(nodes))
144+
145+
113146
## Resolution endpoint
114147

115148

src/basic_memory/cli/commands/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""CLI commands for basic-memory."""
22

3-
from . import status, db, doctor, import_memory_json, mcp, import_claude_conversations
3+
from . import status, db, doctor, import_memory_json, mcp, import_claude_conversations, orphans
44
from . import (
55
import_claude_projects,
66
import_chatgpt,
@@ -18,6 +18,7 @@
1818
"import_memory_json",
1919
"mcp",
2020
"import_claude_conversations",
21+
"orphans",
2122
"import_claude_projects",
2223
"import_chatgpt",
2324
"tool",
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
"""Orphans command - show entities with no relations in the knowledge graph."""
2+
3+
import json
4+
from typing import Annotated, Optional
5+
6+
import typer
7+
from loguru import logger
8+
from mcp.server.fastmcp.exceptions import ToolError
9+
from rich.console import Console
10+
from rich.table import Table
11+
12+
from basic_memory.cli.app import app
13+
from basic_memory.cli.commands.routing import force_routing, validate_routing_flags
14+
from basic_memory.config import ConfigManager
15+
from basic_memory.mcp.async_client import get_client
16+
from basic_memory.mcp.clients.knowledge import KnowledgeClient
17+
from basic_memory.mcp.project_context import get_active_project
18+
from basic_memory.schemas.v2.graph import GraphNode
19+
20+
console = Console()
21+
22+
23+
async def run_orphans(project: Optional[str] = None) -> tuple[str, list[GraphNode]]:
24+
"""Fetch entities that have no relations in the knowledge graph."""
25+
project = project or ConfigManager().default_project
26+
27+
async with get_client(project_name=project) as client:
28+
project_item = await get_active_project(client, project, None)
29+
entities = await KnowledgeClient(client, project_item.external_id).get_orphans()
30+
return project_item.name, entities
31+
32+
33+
@app.command()
34+
def orphans(
35+
project: Annotated[
36+
Optional[str],
37+
typer.Option(help="The project name."),
38+
] = None,
39+
json_output: bool = typer.Option(False, "--json", help="Output in JSON format"),
40+
local: bool = typer.Option(
41+
False, "--local", help="Force local API routing (ignore cloud mode)"
42+
),
43+
cloud: bool = typer.Option(False, "--cloud", help="Force cloud API routing"),
44+
):
45+
"""Show entities that have no relations in the knowledge graph.
46+
47+
Orphan entities have no incoming or outgoing connections. These may indicate
48+
newly created notes not yet linked to other entities, or notes that have had
49+
their relations removed.
50+
"""
51+
from basic_memory.cli.commands.command_utils import run_with_cleanup
52+
53+
try:
54+
validate_routing_flags(local, cloud)
55+
with force_routing(local=local, cloud=cloud):
56+
project_name, entities = run_with_cleanup(run_orphans(project))
57+
58+
if json_output:
59+
print(json.dumps([entity.model_dump(mode="json") for entity in entities], indent=2))
60+
return
61+
62+
if not entities:
63+
console.print(f"[green]No orphan entities in project '{project_name}'[/green]")
64+
return
65+
66+
table = Table(title=f"{project_name}: Entities Without Relations ({len(entities)} total)")
67+
table.add_column("Title", style="cyan")
68+
table.add_column("File Path", style="yellow")
69+
table.add_column("Type", style="green")
70+
71+
for entity in entities:
72+
table.add_row(
73+
entity.title,
74+
entity.file_path,
75+
entity.note_type or "",
76+
)
77+
78+
console.print(table)
79+
except (ValueError, ToolError) as exc:
80+
if json_output:
81+
print(json.dumps({"error": str(exc)}, indent=2))
82+
else:
83+
console.print(f"[red]Error: {exc}[/red]")
84+
raise typer.Exit(code=1)
85+
except typer.Exit:
86+
raise
87+
except Exception as exc:
88+
logger.error(f"Error fetching orphan entities: {exc}")
89+
if json_output:
90+
print(json.dumps({"error": str(exc)}, indent=2))
91+
else:
92+
console.print(f"[red]Error: {exc}[/red]")
93+
raise typer.Exit(code=1) # pragma: no cover

src/basic_memory/cli/main.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ def _version_only_invocation(argv: list[str]) -> bool:
2424
import_claude_projects,
2525
import_memory_json,
2626
mcp,
27+
orphans,
2728
project,
2829
schema,
2930
status,

src/basic_memory/mcp/clients/knowledge.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
DirectoryMoveResult,
1616
DirectoryDeleteResult,
1717
)
18+
from basic_memory.schemas.v2.graph import GraphNode, OrphanEntitiesResponse
1819

1920

2021
class KnowledgeClient:
@@ -275,6 +276,24 @@ async def delete_directory(self, directory: str) -> DirectoryDeleteResult:
275276
)
276277
return DirectoryDeleteResult.model_validate(response.json())
277278

279+
# --- Orphan detection ---
280+
281+
async def get_orphans(self) -> list[GraphNode]:
282+
"""Get entities that have no incoming or outgoing relations."""
283+
with logfire.span(
284+
"mcp.client.knowledge.get_orphans",
285+
client_name="knowledge",
286+
operation="get_orphans",
287+
):
288+
response = await call_get(
289+
self.http_client,
290+
f"{self._base_path}/orphans",
291+
client_name="knowledge",
292+
operation="get_orphans",
293+
path_template="/v2/projects/{project_id}/knowledge/orphans",
294+
)
295+
return OrphanEntitiesResponse.model_validate(response.json()).entities
296+
278297
# --- Resolution ---
279298

280299
async def resolve_entity(self, identifier: str, *, strict: bool = False) -> str:

src/basic_memory/repository/entity_repository.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66

77
from loguru import logger
8-
from sqlalchemy import select, func
8+
from sqlalchemy import exists, func, select
99
from sqlalchemy.exc import IntegrityError
1010
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
1111
from sqlalchemy.orm import load_only, selectinload
@@ -454,6 +454,22 @@ async def get_all_file_paths(self) -> List[str]:
454454
result = await self.execute_query(query, use_query_options=False)
455455
return list(result.scalars().all())
456456

457+
async def find_without_relations(self) -> Sequence[Entity]:
458+
"""Find entities that have no incoming or outgoing relations."""
459+
# Trigger: entity appears as a source in any relation.
460+
# Why: even unresolved outgoing links mean the entity references another node.
461+
# Outcome: entities with outgoing relations are excluded from the orphan list.
462+
has_outgoing = exists().where(Relation.from_id == Entity.id)
463+
464+
# Trigger: entity appears as the resolved target in any relation.
465+
# Why: only resolved relation targets are graph nodes with an incoming edge.
466+
# Outcome: entities referenced by resolved links are excluded from orphans.
467+
has_incoming = exists().where(Relation.to_id == Entity.id)
468+
469+
query = self.select().where(~has_outgoing).where(~has_incoming).order_by(Entity.file_path)
470+
result = await self.execute_query(query, use_query_options=False)
471+
return list(result.scalars().all())
472+
457473
async def get_distinct_directories(self) -> List[str]:
458474
"""Extract unique directory paths from file_path column.
459475

src/basic_memory/schemas/v2/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
GraphEdge,
1515
GraphNode,
1616
GraphResponse,
17+
OrphanEntitiesResponse,
1718
)
1819
from basic_memory.schemas.v2.resource import (
1920
CreateResourceRequest,
@@ -33,6 +34,7 @@
3334
"GraphEdge",
3435
"GraphNode",
3536
"GraphResponse",
37+
"OrphanEntitiesResponse",
3638
"CreateResourceRequest",
3739
"UpdateResourceRequest",
3840
"ResourceResponse",

src/basic_memory/schemas/v2/graph.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,12 @@ class GraphResponse(BaseModel):
2929
edges: list[GraphEdge] = Field(
3030
default_factory=list, description="All resolved relations as edges"
3131
)
32+
33+
34+
class OrphanEntitiesResponse(BaseModel):
35+
"""Entities that have no incoming or outgoing relations in the knowledge graph."""
36+
37+
entities: list[GraphNode] = Field(
38+
default_factory=list, description="Entities with no relations"
39+
)
40+
total: int = Field(..., description="Total count of orphan entities")

tests/api/v2/test_orphan_router.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
"""Tests for the /knowledge/orphans API endpoint."""
2+
3+
import pytest
4+
from httpx import AsyncClient
5+
6+
7+
@pytest.mark.asyncio
8+
async def test_get_orphan_entities_empty_project(client: AsyncClient, v2_project_url):
9+
"""An empty project returns an empty orphans list."""
10+
response = await client.get(f"{v2_project_url}/knowledge/orphans")
11+
12+
assert response.status_code == 200
13+
assert response.json() == {"entities": [], "total": 0}
14+
15+
16+
@pytest.mark.asyncio
17+
async def test_get_orphan_entities_returns_unlinked_entities(client: AsyncClient, v2_project_url):
18+
"""Entities with no relations appear in the orphans endpoint."""
19+
first = await client.post(
20+
f"{v2_project_url}/knowledge/entities",
21+
json={"title": "Orphan One", "directory": "orphan", "content": "No links here"},
22+
)
23+
second = await client.post(
24+
f"{v2_project_url}/knowledge/entities",
25+
json={"title": "Orphan Two", "directory": "orphan", "content": "Also no links"},
26+
)
27+
assert first.status_code == 200
28+
assert second.status_code == 200
29+
30+
response = await client.get(f"{v2_project_url}/knowledge/orphans")
31+
32+
assert response.status_code == 200
33+
data = response.json()
34+
titles = {entity["title"] for entity in data["entities"]}
35+
assert titles == {"Orphan One", "Orphan Two"}
36+
assert data["total"] == 2
37+
38+
39+
@pytest.mark.asyncio
40+
async def test_get_orphan_entities_excludes_incoming_and_outgoing_relation_nodes(
41+
client: AsyncClient, v2_project_url
42+
):
43+
"""Entities with either side of a resolved relation are excluded from orphans."""
44+
target = await client.post(
45+
f"{v2_project_url}/knowledge/entities",
46+
json={
47+
"title": "Target Note",
48+
"directory": "linked",
49+
"content": "Referenced entity",
50+
},
51+
)
52+
source = await client.post(
53+
f"{v2_project_url}/knowledge/entities",
54+
json={
55+
"title": "Source Note",
56+
"directory": "linked",
57+
"content": "- links_to [[Target Note]]",
58+
},
59+
)
60+
standalone = await client.post(
61+
f"{v2_project_url}/knowledge/entities",
62+
json={"title": "Standalone Note", "directory": "linked", "content": "No links"},
63+
)
64+
assert source.status_code == 200
65+
assert target.status_code == 200
66+
assert standalone.status_code == 200
67+
68+
response = await client.get(f"{v2_project_url}/knowledge/orphans")
69+
70+
assert response.status_code == 200
71+
titles = {entity["title"] for entity in response.json()["entities"]}
72+
assert "Source Note" not in titles
73+
assert "Target Note" not in titles
74+
assert "Standalone Note" in titles
75+
76+
77+
@pytest.mark.asyncio
78+
async def test_get_orphan_entities_response_shape(client: AsyncClient, v2_project_url):
79+
"""Each orphan entity in the response has the expected graph-node fields."""
80+
created = await client.post(
81+
f"{v2_project_url}/knowledge/entities",
82+
json={"title": "Shape Test", "directory": "shape", "content": "Testing shape"},
83+
)
84+
assert created.status_code == 200
85+
86+
response = await client.get(f"{v2_project_url}/knowledge/orphans")
87+
88+
assert response.status_code == 200
89+
data = response.json()
90+
entity = next(entity for entity in data["entities"] if entity["title"] == "Shape Test")
91+
assert set(entity) == {"external_id", "title", "note_type", "file_path"}
92+
assert entity["file_path"].endswith(".md")

0 commit comments

Comments
 (0)