Skip to content

Commit f998ddd

Browse files
slobentanzerclaude
andcommitted
feat(map): project-level preview with merged KG schema
Aggregate per-file MappingPreviews into a global KG topology (entity --[relation]--> entity), a slot-resolution index annotating which mapping file resolves each slot, and cross-file conflict findings. Drops the per-file Projected-schema panel — its arrows suggested KG structure but only encoded intent→label. Per-file Samples panels are kept for sanity checks. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 5bc79cf commit f998ddd

4 files changed

Lines changed: 488 additions & 40 deletions

File tree

biotope/commands/map.py

Lines changed: 97 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
from biotope.croissant.api import scaffold_mapping
2727
from biotope.croissant.mapping import (
2828
Mapping,
29+
MultiMappingPreview,
30+
aggregate_previews,
2931
inspect_dataset,
3032
load_mapping,
3133
preview_mapping,
@@ -335,18 +337,21 @@ def preview(mapping_path: Path | None, as_json: bool, sample_rows: int) -> None:
335337
)
336338
previews.append((path, mapping, result))
337339

340+
aggregated = aggregate_previews([(path.name, result) for path, _, result in previews])
341+
338342
if as_json:
339-
if len(previews) == 1:
340-
click.echo(json.dumps(previews[0][2].to_json(), indent=2, default=str))
341-
else:
342-
payload = {path.name: result.to_json() for path, _, result in previews}
343-
click.echo(json.dumps(payload, indent=2, default=str))
343+
payload = {
344+
"global": aggregated.to_json(),
345+
"mappings": {path.name: result.to_json() for path, _, result in previews},
346+
}
347+
click.echo(json.dumps(payload, indent=2, default=str))
344348
return
345349

350+
_render_global_schema_rich(aggregated)
351+
_render_slot_resolution_rich(aggregated, [path.name for path, _, _ in previews])
352+
_render_global_findings_rich(aggregated)
346353
for path, mapping, result in previews:
347-
if len(previews) > 1:
348-
console.print(Panel(f"[bold]{path.name}[/bold]", border_style="cyan", expand=False))
349-
_render_preview_rich(mapping, result)
354+
_render_per_file_panels(path, result)
350355

351356

352357
# ---------------------------------------------------------------------------
@@ -579,26 +584,96 @@ def _discover_project_mappings() -> list[Path]:
579584
return _discover_mapping_paths(mappings_dir)
580585

581586

582-
def _render_preview_rich(mapping: Mapping, result) -> None:
583-
if result.resolved_slots:
587+
def _render_global_schema_rich(agg: MultiMappingPreview) -> None:
588+
"""Render the merged KG topology across all mapping files."""
589+
if not (agg.entities or agg.relations):
584590
console.print(
585591
Panel(
586-
"\n".join(f"✓ {s}" for s in result.resolved_slots),
587-
title="Resolved slots",
588-
border_style="green",
592+
"[dim]No resolved entities or relations yet. Run [bold]biotope map[/bold] to "
593+
"fill in the mapping files.[/dim]",
594+
title="KG schema",
595+
border_style="dim",
589596
expand=False,
590597
)
591598
)
592-
if result.unresolved_slots:
593-
console.print(
594-
Panel(
595-
"\n".join(f"○ {s}" for s in result.unresolved_slots),
596-
title="Unresolved slots",
597-
border_style="yellow",
598-
expand=False,
599+
return
600+
601+
sections: list[str] = []
602+
if agg.entities:
603+
sections.append("[bold]Entities:[/bold]")
604+
for e in agg.entities:
605+
label = e.schema_term if e.schema_term == e.key else f"{e.key} (label: {e.schema_term})"
606+
props = ", ".join(f"{k}:{v}" for k, v in e.properties.items()) or "(none)"
607+
sources = ", ".join(e.sources)
608+
sections.append(f" {label} namespace: {e.namespace}")
609+
sections.append(f" properties: {props}")
610+
sections.append(f" [dim]from: {sources}[/dim]")
611+
if agg.relations:
612+
if sections:
613+
sections.append("")
614+
sections.append("[bold]Relations:[/bold]")
615+
for r in agg.relations:
616+
label = r.schema_term if r.schema_term == r.key else f"{r.key} (label: {r.schema_term})"
617+
props = ", ".join(f"{k}:{v}" for k, v in r.properties.items()) or "(none)"
618+
sources = ", ".join(r.sources)
619+
# Escape `[` so Rich doesn't interpret `[label]` as a style tag.
620+
sections.append(f" {r.source_entity_key} --\\[{label}]--> {r.target_entity_key}")
621+
sections.append(f" properties: {props}")
622+
sections.append(f" [dim]from: {sources}[/dim]")
623+
console.print(Panel("\n".join(sections), title="KG schema", border_style="cyan", expand=False))
624+
625+
626+
def _render_slot_resolution_rich(agg: MultiMappingPreview, all_files: list[str]) -> None:
627+
"""Show which mapping file resolves each slot; flag unresolved slots."""
628+
all_slots = sorted(set(agg.slot_resolution) | set(agg.slot_unresolved))
629+
if not all_slots:
630+
return
631+
lines: list[str] = []
632+
for slot in all_slots:
633+
resolvers = agg.slot_resolution.get(slot, [])
634+
unresolved_in = agg.slot_unresolved.get(slot, [])
635+
if resolvers:
636+
marker = "✓"
637+
color = "green"
638+
tail = ", ".join(resolvers)
639+
if len(resolvers) > 1:
640+
marker = "⚠"
641+
color = "yellow"
642+
tail = f"{tail} (resolved by multiple — should be a single source of truth)"
643+
lines.append(f"[{color}]{marker}[/{color}] {slot}{tail}")
644+
else:
645+
lines.append(
646+
f"[red]○[/red] {slot} [dim](stub present in: {', '.join(unresolved_in)})[/dim]"
599647
)
648+
has_unresolved = any(slot not in agg.slot_resolution for slot in all_slots)
649+
border = "yellow" if has_unresolved else "green"
650+
console.print(Panel("\n".join(lines), title="Slot resolution", border_style=border, expand=False))
651+
652+
653+
def _render_global_findings_rich(agg: MultiMappingPreview) -> None:
654+
"""Project-level findings (cross-file conflicts, double-resolution)."""
655+
if not agg.findings:
656+
return
657+
lines = [f"[{f.severity}] {f.path}: {f.message}" for f in agg.findings]
658+
border = "red" if any(f.severity == "error" for f in agg.findings) else "yellow"
659+
console.print(
660+
Panel(
661+
"\n".join(lines),
662+
title="Project-level findings",
663+
border_style=border,
664+
expand=False,
600665
)
601-
if result.findings:
666+
)
667+
668+
669+
def _render_per_file_panels(path: Path, result) -> None:
670+
"""Render file-local information: validation findings and sample tuples."""
671+
has_findings = bool(result.findings)
672+
has_samples = bool(result.sample_node_tuples or result.sample_edge_tuples)
673+
if not (has_findings or has_samples):
674+
return
675+
console.print(Panel(f"[bold]{path.name}[/bold]", border_style="cyan", expand=False))
676+
if has_findings:
602677
lines = [f"[{f.severity}] {f.path}: {f.message}" for f in result.findings]
603678
console.print(
604679
Panel(
@@ -608,23 +683,7 @@ def _render_preview_rich(mapping: Mapping, result) -> None:
608683
expand=False,
609684
)
610685
)
611-
if result.entities or result.relations:
612-
sections: list[str] = ["[bold]Entities:[/bold]"]
613-
for e in result.entities:
614-
props = ", ".join(f"{k}:{v}" for k, v in e.properties.items()) or "(none)"
615-
sections.append(
616-
f" {e.key} -> {e.schema_term} [namespace={e.namespace}, input_label={e.input_label}]\n"
617-
f" properties: {props}"
618-
)
619-
sections.append("[bold]Relations:[/bold]")
620-
for r in result.relations:
621-
props = ", ".join(f"{k}:{v}" for k, v in r.properties.items()) or "(none)"
622-
sections.append(
623-
f" {r.key} -> {r.schema_term} [{r.source} -> {r.target}, input_label={r.input_label}]\n"
624-
f" properties: {props}"
625-
)
626-
console.print(Panel("\n".join(sections), title="Projected schema", border_style="cyan", expand=False))
627-
if result.sample_node_tuples or result.sample_edge_tuples:
686+
if has_samples:
628687
lines = []
629688
if result.sample_node_tuples:
630689
lines.append("[bold]Sample node tuples:[/bold]")

biotope/croissant/mapping/__init__.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,23 @@
2828
Selector,
2929
to_snake_case,
3030
)
31-
from biotope.croissant.mapping.preview import MappingPreview, preview_mapping
31+
from biotope.croissant.mapping.preview import (
32+
AggregatedEntity,
33+
AggregatedRelation,
34+
MappingPreview,
35+
MultiMappingPreview,
36+
aggregate_previews,
37+
preview_mapping,
38+
)
3239
from biotope.croissant.mapping.render import (
3340
build_inspector_appendix,
3441
render_mapping_with_appendix,
3542
render_mapping_yaml,
3643
)
3744

3845
__all__ = [
46+
"AggregatedEntity",
47+
"AggregatedRelation",
3948
"CompiledAdapter",
4049
"DatasetInspection",
4150
"Endpoint",
@@ -44,11 +53,13 @@
4453
"FieldInfo",
4554
"Mapping",
4655
"MappingPreview",
56+
"MultiMappingPreview",
4757
"RecordSetInfo",
4858
"RelationMapping",
4959
"RowScan",
5060
"Scan",
5161
"Selector",
62+
"aggregate_previews",
5263
"build_inspector_appendix",
5364
"compile_mapping",
5465
"derive_namespace",

0 commit comments

Comments
 (0)