diff --git a/.clinerules b/.clinerules index b333fda8..fd75036b 100644 --- a/.clinerules +++ b/.clinerules @@ -267,6 +267,64 @@ Canvas scene builders read CSS variables via `getComputedStyle` on the canvas DO ## AI Features `onObservation`/`useChartObserver`, `toConfig`/`fromConfig`/`toURL`/`fromURL`/`copyConfig`/`configToJSX`, `validateProps(component, props)`, `diagnoseConfig(component, props)`, `exportChart(div, { format })`, `npx semiotic-ai --doctor` +### Conversational Interrogation (`semiotic/ai`) +Headless hook for "chat with the chart" interactions. The library ships no UI — bring your own chat surface. +- **`useChartInterrogation({ data, onQuery, componentName?, props?, initialAnnotations? })`** → `{ ask(query), history, summary, annotations, loading, error, reset }` +- **`onQuery: (query, context) => Promise<{ answer, annotations? }>`** — call your LLM here. `context` is `{ data, summary, componentName?, props? }`. +- **`summary`**: LLM-friendly statistical summary (`rowCount`, per-field `{ min, max, mean, median }` for numerics, top-k for categoricals, ISO range for dates). Available before any ask(). +- **`annotations`**: Merged `initialAnnotations` + latest AI response. Wire to the chart's `annotations` prop for visual highlighting. +- **`summarizeData(data, options?)`**: Standalone for server-side prompting or batch jobs. +- **MCP Tool**: `interrogateChart(component, props, query)` returns the same statistical summary and AI-facing instructions. + +```jsx +import { LineChart, useChartInterrogation } from "semiotic/ai" + +function InterrogatableChart({ data }) { + const { ask, history, annotations, loading } = useChartInterrogation({ + data, + componentName: "LineChart", + props: { xAccessor: "month", yAccessor: "revenue" }, + onQuery: async (query, { summary }) => { + const res = await myLLMCall(query, summary) + return { answer: res.text, annotations: res.highlights } + }, + }) + return ( + <> + + + + ) +} +``` + +### Chart Capability Layer (`semiotic/ai`) +Heuristic chart-suggestion engine. Charts ship capability descriptors next to their TSX files; the engine ranks them against a profiled dataset by intent. No LLM call required. + +- **`profileData(data, { rawInput?, seriesField? })`** → `ChartDataProfile` (extends `DataSummary`): candidate fields per role (x/y/series/category/size/time), distinct counts, monotonicity, structure detection (hierarchy/network/geo). +- **`suggestCharts(data, { intent?, allow?, deny?, maxResults?, includeVariants?, minScore? })`** → ranked `Suggestion[]` with `{ component, family, importPath, variant?, score, intentScores, rubric, reasons, caveats, props }`. `props` is spreadable directly into the matching chart. +- **`scoreChart(component, data, { intent?, variantKey? })`** → evaluate a specific chart for a dataset (does it fit, how well, why/why not). +- **`useChartSuggestions(data, options)`** → memoized React hook returning `{ suggestions, profile }`. +- **`registerChartCapability(capability)`** / **`unregisterChartCapability(name)`** — runtime registration for custom charts. +- **Intent taxonomy**: 13 built-in intents (`trend`, `compare-series`, `compare-categories`, `rank`, `part-to-whole`, `distribution`, `correlation`, `flow`, `hierarchy`, `geo`, `outlier-detection`, `composition-over-time`, `change-detection`). Extend via `registerIntent(descriptor)`. +- **Capability authoring**: create `Foo.capability.ts` next to `Foo.tsx`, then append to the registry in `src/components/ai/chartCapabilities.ts`. Each capability declares `family`, `rubric` (familiarity/accuracy/precision 1-5), `fits(profile)` gate, `intentScores`, optional `variants` with `intentDeltas`, and `buildProps(profile, variant)`. +- **Variants encode that settings change what a chart is good for**: e.g. `StackedAreaChart`'s `streamgraph` variant boosts trend but penalizes part-to-whole. +- **Interrogation tie-in**: pass `includeSuggestions: true` to `useChartInterrogation` and the same ranked list lands in `context.suggestions` for the LLM. +- **MCP tool**: `suggestCharts(data, intent?)` returns the ranked list as structured content. + +```jsx +import { useChartSuggestions, LineChart, BarChart, /* ... */ } from "semiotic/ai" + +const COMPONENT_MAP = { LineChart, BarChart, /* ... */ } +function SuggestedChart({ data, intent }) { + const { suggestions } = useChartSuggestions(data, { intent }) + const top = suggestions[0] + if (!top) return null + const Component = COMPONENT_MAP[top.component] + return +} +``` + ## AI Behavior Contracts diff --git a/.cursorrules b/.cursorrules index b333fda8..fd75036b 100644 --- a/.cursorrules +++ b/.cursorrules @@ -267,6 +267,64 @@ Canvas scene builders read CSS variables via `getComputedStyle` on the canvas DO ## AI Features `onObservation`/`useChartObserver`, `toConfig`/`fromConfig`/`toURL`/`fromURL`/`copyConfig`/`configToJSX`, `validateProps(component, props)`, `diagnoseConfig(component, props)`, `exportChart(div, { format })`, `npx semiotic-ai --doctor` +### Conversational Interrogation (`semiotic/ai`) +Headless hook for "chat with the chart" interactions. The library ships no UI — bring your own chat surface. +- **`useChartInterrogation({ data, onQuery, componentName?, props?, initialAnnotations? })`** → `{ ask(query), history, summary, annotations, loading, error, reset }` +- **`onQuery: (query, context) => Promise<{ answer, annotations? }>`** — call your LLM here. `context` is `{ data, summary, componentName?, props? }`. +- **`summary`**: LLM-friendly statistical summary (`rowCount`, per-field `{ min, max, mean, median }` for numerics, top-k for categoricals, ISO range for dates). Available before any ask(). +- **`annotations`**: Merged `initialAnnotations` + latest AI response. Wire to the chart's `annotations` prop for visual highlighting. +- **`summarizeData(data, options?)`**: Standalone for server-side prompting or batch jobs. +- **MCP Tool**: `interrogateChart(component, props, query)` returns the same statistical summary and AI-facing instructions. + +```jsx +import { LineChart, useChartInterrogation } from "semiotic/ai" + +function InterrogatableChart({ data }) { + const { ask, history, annotations, loading } = useChartInterrogation({ + data, + componentName: "LineChart", + props: { xAccessor: "month", yAccessor: "revenue" }, + onQuery: async (query, { summary }) => { + const res = await myLLMCall(query, summary) + return { answer: res.text, annotations: res.highlights } + }, + }) + return ( + <> + + + + ) +} +``` + +### Chart Capability Layer (`semiotic/ai`) +Heuristic chart-suggestion engine. Charts ship capability descriptors next to their TSX files; the engine ranks them against a profiled dataset by intent. No LLM call required. + +- **`profileData(data, { rawInput?, seriesField? })`** → `ChartDataProfile` (extends `DataSummary`): candidate fields per role (x/y/series/category/size/time), distinct counts, monotonicity, structure detection (hierarchy/network/geo). +- **`suggestCharts(data, { intent?, allow?, deny?, maxResults?, includeVariants?, minScore? })`** → ranked `Suggestion[]` with `{ component, family, importPath, variant?, score, intentScores, rubric, reasons, caveats, props }`. `props` is spreadable directly into the matching chart. +- **`scoreChart(component, data, { intent?, variantKey? })`** → evaluate a specific chart for a dataset (does it fit, how well, why/why not). +- **`useChartSuggestions(data, options)`** → memoized React hook returning `{ suggestions, profile }`. +- **`registerChartCapability(capability)`** / **`unregisterChartCapability(name)`** — runtime registration for custom charts. +- **Intent taxonomy**: 13 built-in intents (`trend`, `compare-series`, `compare-categories`, `rank`, `part-to-whole`, `distribution`, `correlation`, `flow`, `hierarchy`, `geo`, `outlier-detection`, `composition-over-time`, `change-detection`). Extend via `registerIntent(descriptor)`. +- **Capability authoring**: create `Foo.capability.ts` next to `Foo.tsx`, then append to the registry in `src/components/ai/chartCapabilities.ts`. Each capability declares `family`, `rubric` (familiarity/accuracy/precision 1-5), `fits(profile)` gate, `intentScores`, optional `variants` with `intentDeltas`, and `buildProps(profile, variant)`. +- **Variants encode that settings change what a chart is good for**: e.g. `StackedAreaChart`'s `streamgraph` variant boosts trend but penalizes part-to-whole. +- **Interrogation tie-in**: pass `includeSuggestions: true` to `useChartInterrogation` and the same ranked list lands in `context.suggestions` for the LLM. +- **MCP tool**: `suggestCharts(data, intent?)` returns the ranked list as structured content. + +```jsx +import { useChartSuggestions, LineChart, BarChart, /* ... */ } from "semiotic/ai" + +const COMPONENT_MAP = { LineChart, BarChart, /* ... */ } +function SuggestedChart({ data, intent }) { + const { suggestions } = useChartSuggestions(data, { intent }) + const top = suggestions[0] + if (!top) return null + const Component = COMPONENT_MAP[top.component] + return +} +``` + ## AI Behavior Contracts diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index b333fda8..fd75036b 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -267,6 +267,64 @@ Canvas scene builders read CSS variables via `getComputedStyle` on the canvas DO ## AI Features `onObservation`/`useChartObserver`, `toConfig`/`fromConfig`/`toURL`/`fromURL`/`copyConfig`/`configToJSX`, `validateProps(component, props)`, `diagnoseConfig(component, props)`, `exportChart(div, { format })`, `npx semiotic-ai --doctor` +### Conversational Interrogation (`semiotic/ai`) +Headless hook for "chat with the chart" interactions. The library ships no UI — bring your own chat surface. +- **`useChartInterrogation({ data, onQuery, componentName?, props?, initialAnnotations? })`** → `{ ask(query), history, summary, annotations, loading, error, reset }` +- **`onQuery: (query, context) => Promise<{ answer, annotations? }>`** — call your LLM here. `context` is `{ data, summary, componentName?, props? }`. +- **`summary`**: LLM-friendly statistical summary (`rowCount`, per-field `{ min, max, mean, median }` for numerics, top-k for categoricals, ISO range for dates). Available before any ask(). +- **`annotations`**: Merged `initialAnnotations` + latest AI response. Wire to the chart's `annotations` prop for visual highlighting. +- **`summarizeData(data, options?)`**: Standalone for server-side prompting or batch jobs. +- **MCP Tool**: `interrogateChart(component, props, query)` returns the same statistical summary and AI-facing instructions. + +```jsx +import { LineChart, useChartInterrogation } from "semiotic/ai" + +function InterrogatableChart({ data }) { + const { ask, history, annotations, loading } = useChartInterrogation({ + data, + componentName: "LineChart", + props: { xAccessor: "month", yAccessor: "revenue" }, + onQuery: async (query, { summary }) => { + const res = await myLLMCall(query, summary) + return { answer: res.text, annotations: res.highlights } + }, + }) + return ( + <> + + + + ) +} +``` + +### Chart Capability Layer (`semiotic/ai`) +Heuristic chart-suggestion engine. Charts ship capability descriptors next to their TSX files; the engine ranks them against a profiled dataset by intent. No LLM call required. + +- **`profileData(data, { rawInput?, seriesField? })`** → `ChartDataProfile` (extends `DataSummary`): candidate fields per role (x/y/series/category/size/time), distinct counts, monotonicity, structure detection (hierarchy/network/geo). +- **`suggestCharts(data, { intent?, allow?, deny?, maxResults?, includeVariants?, minScore? })`** → ranked `Suggestion[]` with `{ component, family, importPath, variant?, score, intentScores, rubric, reasons, caveats, props }`. `props` is spreadable directly into the matching chart. +- **`scoreChart(component, data, { intent?, variantKey? })`** → evaluate a specific chart for a dataset (does it fit, how well, why/why not). +- **`useChartSuggestions(data, options)`** → memoized React hook returning `{ suggestions, profile }`. +- **`registerChartCapability(capability)`** / **`unregisterChartCapability(name)`** — runtime registration for custom charts. +- **Intent taxonomy**: 13 built-in intents (`trend`, `compare-series`, `compare-categories`, `rank`, `part-to-whole`, `distribution`, `correlation`, `flow`, `hierarchy`, `geo`, `outlier-detection`, `composition-over-time`, `change-detection`). Extend via `registerIntent(descriptor)`. +- **Capability authoring**: create `Foo.capability.ts` next to `Foo.tsx`, then append to the registry in `src/components/ai/chartCapabilities.ts`. Each capability declares `family`, `rubric` (familiarity/accuracy/precision 1-5), `fits(profile)` gate, `intentScores`, optional `variants` with `intentDeltas`, and `buildProps(profile, variant)`. +- **Variants encode that settings change what a chart is good for**: e.g. `StackedAreaChart`'s `streamgraph` variant boosts trend but penalizes part-to-whole. +- **Interrogation tie-in**: pass `includeSuggestions: true` to `useChartInterrogation` and the same ranked list lands in `context.suggestions` for the LLM. +- **MCP tool**: `suggestCharts(data, intent?)` returns the ranked list as structured content. + +```jsx +import { useChartSuggestions, LineChart, BarChart, /* ... */ } from "semiotic/ai" + +const COMPONENT_MAP = { LineChart, BarChart, /* ... */ } +function SuggestedChart({ data, intent }) { + const { suggestions } = useChartSuggestions(data, { intent }) + const top = suggestions[0] + if (!top) return null + const Component = COMPONENT_MAP[top.component] + return +} +``` + ## AI Behavior Contracts diff --git a/.windsurfrules b/.windsurfrules index b333fda8..fd75036b 100644 --- a/.windsurfrules +++ b/.windsurfrules @@ -267,6 +267,64 @@ Canvas scene builders read CSS variables via `getComputedStyle` on the canvas DO ## AI Features `onObservation`/`useChartObserver`, `toConfig`/`fromConfig`/`toURL`/`fromURL`/`copyConfig`/`configToJSX`, `validateProps(component, props)`, `diagnoseConfig(component, props)`, `exportChart(div, { format })`, `npx semiotic-ai --doctor` +### Conversational Interrogation (`semiotic/ai`) +Headless hook for "chat with the chart" interactions. The library ships no UI — bring your own chat surface. +- **`useChartInterrogation({ data, onQuery, componentName?, props?, initialAnnotations? })`** → `{ ask(query), history, summary, annotations, loading, error, reset }` +- **`onQuery: (query, context) => Promise<{ answer, annotations? }>`** — call your LLM here. `context` is `{ data, summary, componentName?, props? }`. +- **`summary`**: LLM-friendly statistical summary (`rowCount`, per-field `{ min, max, mean, median }` for numerics, top-k for categoricals, ISO range for dates). Available before any ask(). +- **`annotations`**: Merged `initialAnnotations` + latest AI response. Wire to the chart's `annotations` prop for visual highlighting. +- **`summarizeData(data, options?)`**: Standalone for server-side prompting or batch jobs. +- **MCP Tool**: `interrogateChart(component, props, query)` returns the same statistical summary and AI-facing instructions. + +```jsx +import { LineChart, useChartInterrogation } from "semiotic/ai" + +function InterrogatableChart({ data }) { + const { ask, history, annotations, loading } = useChartInterrogation({ + data, + componentName: "LineChart", + props: { xAccessor: "month", yAccessor: "revenue" }, + onQuery: async (query, { summary }) => { + const res = await myLLMCall(query, summary) + return { answer: res.text, annotations: res.highlights } + }, + }) + return ( + <> + + + + ) +} +``` + +### Chart Capability Layer (`semiotic/ai`) +Heuristic chart-suggestion engine. Charts ship capability descriptors next to their TSX files; the engine ranks them against a profiled dataset by intent. No LLM call required. + +- **`profileData(data, { rawInput?, seriesField? })`** → `ChartDataProfile` (extends `DataSummary`): candidate fields per role (x/y/series/category/size/time), distinct counts, monotonicity, structure detection (hierarchy/network/geo). +- **`suggestCharts(data, { intent?, allow?, deny?, maxResults?, includeVariants?, minScore? })`** → ranked `Suggestion[]` with `{ component, family, importPath, variant?, score, intentScores, rubric, reasons, caveats, props }`. `props` is spreadable directly into the matching chart. +- **`scoreChart(component, data, { intent?, variantKey? })`** → evaluate a specific chart for a dataset (does it fit, how well, why/why not). +- **`useChartSuggestions(data, options)`** → memoized React hook returning `{ suggestions, profile }`. +- **`registerChartCapability(capability)`** / **`unregisterChartCapability(name)`** — runtime registration for custom charts. +- **Intent taxonomy**: 13 built-in intents (`trend`, `compare-series`, `compare-categories`, `rank`, `part-to-whole`, `distribution`, `correlation`, `flow`, `hierarchy`, `geo`, `outlier-detection`, `composition-over-time`, `change-detection`). Extend via `registerIntent(descriptor)`. +- **Capability authoring**: create `Foo.capability.ts` next to `Foo.tsx`, then append to the registry in `src/components/ai/chartCapabilities.ts`. Each capability declares `family`, `rubric` (familiarity/accuracy/precision 1-5), `fits(profile)` gate, `intentScores`, optional `variants` with `intentDeltas`, and `buildProps(profile, variant)`. +- **Variants encode that settings change what a chart is good for**: e.g. `StackedAreaChart`'s `streamgraph` variant boosts trend but penalizes part-to-whole. +- **Interrogation tie-in**: pass `includeSuggestions: true` to `useChartInterrogation` and the same ranked list lands in `context.suggestions` for the LLM. +- **MCP tool**: `suggestCharts(data, intent?)` returns the ranked list as structured content. + +```jsx +import { useChartSuggestions, LineChart, BarChart, /* ... */ } from "semiotic/ai" + +const COMPONENT_MAP = { LineChart, BarChart, /* ... */ } +function SuggestedChart({ data, intent }) { + const { suggestions } = useChartSuggestions(data, { intent }) + const top = suggestions[0] + if (!top) return null + const Component = COMPONENT_MAP[top.component] + return +} +``` + ## AI Behavior Contracts diff --git a/CHANGELOG.md b/CHANGELOG.md index 68b2d0b8..4c207956 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [3.6.0] - 2026-05-31 + +### Added + +- **`semiotic/ai` subpath — the AI-facing API surface as a first-class entry point.** 211 KB gzip; the heuristic engine works without any LLM call, but every primitive returns LLM-friendly structured context so a model can ride on top. The entry covers four families of capability: + - **Recommendation.** `suggestCharts(data, options?)` returns ranked chart suggestions for a profiled dataset and optional intent; each suggestion carries a runnable `props` object, an intent-score breakdown, the chart's rubric (familiarity / accuracy / precision), human-readable `reasons[]`, and `caveats[]`. `suggestDashboard` returns a multi-panel composite covering distinct analytical intents (with `intentsMissing` for honesty about what the data can't show). `suggestStretchCharts` returns the literacy-growth surface — charts the audience is unfamiliar with but the data actually supports. `scoreChart` and `explainCapabilityFit` give single-chart introspection. `useChartSuggestions` is the React hook wrapping the same engine for live UI. + - **Profiling.** `profileData(data)` returns a `ChartDataProfile` with candidate fields per role (x / y / size / category / series / time), distinct counts, monotonicity, structure detection (hierarchy / network / geo). `diffProfile` reports schema changes between two profiles. `inferIntent` is a zero-dependency regex classifier that maps natural-language phrases (`"why is X different?"`, `"compare these"`, `"trend over time"`) to one of 13 built-in intents. + - **Audience calibration.** `AudienceProfile` is a serializable per-organization config — `familiarity` (chart → 1-5 number map) and `targets` (chart → `{direction: "increase" | "decrease", weight, reason}`) — that biases recommendations toward what a specific audience already knows AND toward charts the organization is trying to grow into. Three built-in personas (`executivePersona`, `analystPersona`, `dataScientistPersona`) ship as starting points; bias is meaningful (target weight 2 = ±2.0 on a 5-point composite score) and visible (the audience's verbatim rationale string lands on `reasons[]` so the policy is auditable in the UI). + - **Capability descriptors per chart.** Every chart now ships a `.capability.ts` next to its TSX, declaring `family`, `rubric`, `fits(profile) → reason | null`, `intentScores`, optional `variants`, `caveats`, and `buildProps`. The registry is runtime-extensible via `registerChartCapability` / `unregisterChartCapability` so consumers can add their own charts to the recommendation pool without forking the engine. + - 13 built-in intents in `intents.ts`: `trend`, `compare-series`, `compare-categories`, `rank`, `part-to-whole`, `distribution`, `correlation`, `flow`, `hierarchy`, `geo`, `outlier-detection`, `composition-over-time`, `change-detection`. Each carries a descriptor with synonyms, alias phrases, and a default scorer; `registerIntent` extends the taxonomy at runtime. +- **`useChartInterrogation` and `useChartFocus` hooks (`semiotic/ai`)** — the headless conversational primitives. `useChartInterrogation` gives consumers a `{ ask, history, summary, annotations, loading, error, reset }` surface; the consumer brings their own LLM via `onQuery`, and the hook supplies it with the profiled summary, the suggestion list, and the current focus datum as structured context. Returned annotations route directly to the chart's standard `annotations` prop so the AI's response can render as callouts, threshold lines, and bands, not just text. `useChartFocus` subscribes to the chart's observation store and returns the current point-of-focus (`{ datum, x, y, source }`), with configurable event-type filtering for sticky-focus UIs. +- **`semiotic-mcp` server** — Model Context Protocol server (`npx semiotic-mcp`) exposing `renderChart`, `interrogateChart`, `suggestCharts`, and `diagnoseConfig` as MCP tools so agents inside Claude Code, Cursor, Windsurf, and other MCP-aware environments can drive Semiotic directly. The interrogation tool returns the same statistical summary and AI-facing instructions the hook produces; the suggestion tool returns ranked structured content with runnable props. +- **`semiotic-ai` CLI extensions** — `--doctor` validates a `{component, props, data}` JSON spec against `validateProps` + `diagnoseConfig`; `--schema` emits the chart-schema JSON; `--compact` and `--examples` produce LLM-prompt-sized context. Pair with the MCP server for agent workflows that need both schema and validation in one place. +- **Three case-study blog posts** — `/blog/charts-that-know-what-theyre-for` (the recommendation engine and audience layer), `/blog/anchored-conversations` (point-anchored AI conversation via `useChartFocus` + `useChartInterrogation`), and `/blog/live-conversational-dashboard` (the streaming + interrogation + annotation composition). The three together describe the product surface 3.6.0 makes possible. + +### Changed + +- **AreaChart is now a single-series chart.** Multi-series area overlays are an occlusion nightmare; the capability rejects the multi-series intent scores it previously claimed and `buildProps` subselects to the leading series (largest cumulative y) when the input has 2+ groups, surfacing a `caveats[]` line so the reader knows they're looking at one slice. Gradient (`gradientFill: true`, `areaOpacity: 0.55`) is the baseline default. `trend` score is 5 for clean single-series and 3 when subselected. `LineChart.trend` yields to AreaChart on single-series (4 vs AreaChart's 5) but still wins on multi-series (5 vs AreaChart's 3) because LineChart shows the whole dataset. +- **DifferenceChart accepts 2+ series via top-2 subselection.** Previously rejected anything other than `seriesCount === 2`; now picks the two series with the highest cumulative y from the input and emits a `caveats[]` line when subselecting from 3+ series. Same ordered-x guard the other time-series capabilities apply (`xProvenance === "scatter" && !monotonicX` is rejected) so the chart no longer shows up for scatter-shaped data with two categorical groups. +- **Scatterplot and ConnectedScatterplot prefer the canonical 2-numeric form when a sequence axis is present.** With a strong-x (time or named) AND 2+ other numerics in the dataset, both charts plot the two numerics against each other (revenue × profit) instead of recapitulating a line chart on the sequence axis. ConnectedScatterplot threads the sequence as `orderAccessor` so the path encodes temporal progression. ConnectedScatterplot's `correlation` intent scores 5 when canonical (vs 4 otherwise), and Scatterplot's `correlation` steps back to 4 when canonical is available so ConnectedScatterplot wins the tiebreak — both charts fit, but the one with the temporal annotation is strictly more informative. +- **`X_FIELD_HINT` recognizes calendar-segment field names.** The profiler's x-axis name regex now matches `quarter`, `qtr`, `fiscal`, and `week` in addition to the existing `year` / `month` / `day` / `date` / `time` / `timestamp`. Without this, data shaped as `{quarter, revenue, region}` fell into scatter-fallback provenance and series detection never fired — `lineBy` / `areaBy` were silently dropped and multi-series time-series charts zigzagged across regions. + ## [3.5.4] - 2026-05-21 ### Added diff --git a/CLAUDE.md b/CLAUDE.md index b333fda8..06981165 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,7 +4,7 @@ - Install: `npm install semiotic` -- **Use sub-path imports** — `semiotic/xy` (85KB gz), `semiotic/ordinal` (69KB gz), `semiotic/network` (63KB gz), `semiotic/geo` (52KB gz), `semiotic/realtime` (90KB gz), `semiotic/server` (122KB gz), `semiotic/utils` (22KB gz), `semiotic/recipes` (5KB gz), `semiotic/themes` (4KB gz), `semiotic/data` (3KB gz), `semiotic/ai` (189KB gz). Full `semiotic` is 188KB gz. +- **Use sub-path imports** — `semiotic/xy` (86KB gz), `semiotic/ordinal` (70KB gz), `semiotic/network` (64KB gz), `semiotic/geo` (52KB gz), `semiotic/realtime` (91KB gz), `semiotic/server` (122KB gz), `semiotic/utils` (22KB gz), `semiotic/recipes` (5KB gz), `semiotic/themes` (4KB gz), `semiotic/data` (3KB gz), `semiotic/ai` (211KB gz). Full `semiotic` is 190KB gz. - CLI: `npx semiotic-ai [--schema|--compact|--examples|--doctor]` - MCP: `npx semiotic-mcp` @@ -267,6 +267,64 @@ Canvas scene builders read CSS variables via `getComputedStyle` on the canvas DO ## AI Features `onObservation`/`useChartObserver`, `toConfig`/`fromConfig`/`toURL`/`fromURL`/`copyConfig`/`configToJSX`, `validateProps(component, props)`, `diagnoseConfig(component, props)`, `exportChart(div, { format })`, `npx semiotic-ai --doctor` +### Conversational Interrogation (`semiotic/ai`) +Headless hook for "chat with the chart" interactions. The library ships no UI — bring your own chat surface. +- **`useChartInterrogation({ data, onQuery, componentName?, props?, initialAnnotations? })`** → `{ ask(query), history, summary, annotations, loading, error, reset }` +- **`onQuery: (query, context) => Promise<{ answer, annotations? }>`** — call your LLM here. `context` is `{ data, summary, componentName?, props? }`. +- **`summary`**: LLM-friendly statistical summary (`rowCount`, per-field `{ min, max, mean, median }` for numerics, top-k for categoricals, ISO range for dates). Available before any ask(). +- **`annotations`**: Merged `initialAnnotations` + latest AI response. Wire to the chart's `annotations` prop for visual highlighting. +- **`summarizeData(data, options?)`**: Standalone for server-side prompting or batch jobs. +- **MCP Tool**: `interrogateChart(component, props, query)` returns the same statistical summary and AI-facing instructions. + +```jsx +import { LineChart, useChartInterrogation } from "semiotic/ai" + +function InterrogatableChart({ data }) { + const { ask, history, annotations, loading } = useChartInterrogation({ + data, + componentName: "LineChart", + props: { xAccessor: "month", yAccessor: "revenue" }, + onQuery: async (query, { summary }) => { + const res = await myLLMCall(query, summary) + return { answer: res.text, annotations: res.highlights } + }, + }) + return ( + <> + + + + ) +} +``` + +### Chart Capability Layer (`semiotic/ai`) +Heuristic chart-suggestion engine. Charts ship capability descriptors next to their TSX files; the engine ranks them against a profiled dataset by intent. No LLM call required. + +- **`profileData(data, { rawInput?, seriesField? })`** → `ChartDataProfile` (extends `DataSummary`): candidate fields per role (x/y/series/category/size/time), distinct counts, monotonicity, structure detection (hierarchy/network/geo). +- **`suggestCharts(data, { intent?, allow?, deny?, maxResults?, includeVariants?, minScore? })`** → ranked `Suggestion[]` with `{ component, family, importPath, variant?, score, intentScores, rubric, reasons, caveats, props }`. `props` is spreadable directly into the matching chart. +- **`scoreChart(component, data, { intent?, variantKey? })`** → evaluate a specific chart for a dataset (does it fit, how well, why/why not). +- **`useChartSuggestions(data, options)`** → memoized React hook returning `{ suggestions, profile }`. +- **`registerChartCapability(capability)`** / **`unregisterChartCapability(name)`** — runtime registration for custom charts. +- **Intent taxonomy**: 13 built-in intents (`trend`, `compare-series`, `compare-categories`, `rank`, `part-to-whole`, `distribution`, `correlation`, `flow`, `hierarchy`, `geo`, `outlier-detection`, `composition-over-time`, `change-detection`). Extend via `registerIntent(descriptor)`. +- **Capability authoring**: create `Foo.capability.ts` next to `Foo.tsx`, then append to the registry in `src/components/ai/chartCapabilities.ts`. Each capability declares `family`, `rubric` (familiarity/accuracy/precision 1-5), `fits(profile)` gate, `intentScores`, optional `variants` with `intentDeltas`, and `buildProps(profile, variant)`. +- **Variants encode that settings change what a chart is good for**: e.g. `StackedAreaChart`'s `streamgraph` variant boosts trend but penalizes part-to-whole. +- **Interrogation tie-in**: pass `includeSuggestions: true` to `useChartInterrogation` and the same ranked list lands in `context.suggestions` for the LLM. +- **MCP tool**: `suggestCharts(data, intent?)` returns the ranked list as structured content. + +```jsx +import { useChartSuggestions, LineChart, BarChart, /* ... */ } from "semiotic/ai" + +const COMPONENT_MAP = { LineChart, BarChart, /* ... */ } +function SuggestedChart({ data, intent }) { + const { suggestions } = useChartSuggestions(data, { intent }) + const top = suggestions[0] + if (!top) return null + const Component = COMPONENT_MAP[top.component] + return +} +``` + ## AI Behavior Contracts diff --git a/IMPROVEMENTS.md b/IMPROVEMENTS.md new file mode 100644 index 00000000..c78e3164 --- /dev/null +++ b/IMPROVEMENTS.md @@ -0,0 +1,72 @@ +# Semiotic: Strategic Roadmap & Areas for Improvement + +## Executive Summary +Semiotic is a Tier-1 React data visualization library that has successfully carved out a unique niche: **AI-assisted development**. Its deep integration with the Model Context Protocol (MCP), robust JSON schemas, and semantic "behavior contracts" make it the best-in-class choice for the LLM-driven future of software engineering. + +However, as the project matures, there are opportunities to evolve from being a **developer tool for building charts** into a **data platform for understanding visualizations**. + +--- + +## 1. Stakeholder Analysis + +### 🌍 For the Readers (Visualization Consumers) +*The people looking at the dashboards, monitoring live feeds, and trying to extract meaning from data.* + +* **Interactivity Depth**: While hover and brush are supported, there is a gap in "Exploratory Interactivity." Readers would benefit from built-in zoom/pan for XY charts, drill-down patterns, and "Filter-in-place" UI that doesn't require developer wiring. +* **Narrative & Insights**: Charts often show *what* happened, but not *why*. Automated labeling of "Key Insights" (e.g., "All-time high," "Outlier detected") should be first-class props rather than manual annotations. +* **Export Utility**: A standard "Export to CSV/JSON/PNG" button as a first-class feature of the HOC charts would empower non-technical readers. + +### 🛠 For the Developers (Maintainers) +*The contributors keeping the engine running and the ecosystem growing.* + +* **Local Development Friction**: Currently, the documentation site imports from the built `dist/` bundle. This requires a full rebuild of the library to see source changes in the docs. Migrating to a **Monorepo / Workspace** (e.g., Turborepo) where the docs site can alias to the `src/` directory would radically improve DX. +* **Build Complexity**: The `scripts/build.mjs` is a custom Rollup orchestration. While high-performance, it's a barrier to entry for new contributors. Modernizing this with a standard tool like **Vite** or **Rspack** for the website could simplify the setup. +* **Technical Debt in Frames**: The `StreamXYFrame` and `PipelineStore` are massive (1.7k+ lines). Decomposing these into smaller, functional modules (e.g., a standalone `ScaleManager`, `LayoutEngine`, and `InteractionManager`) would make the core logic easier to test and extend. + +### 🤝 For Integration Partners +*Companies and products embedding Semiotic or using its output.* + +* **Design System Synchronization**: Theming is currently proprietary. Supporting **W3C Design Tokens** or providing a direct **Tailwind CSS** plugin to map theme variables to utility classes would make integration into enterprise design systems seamless. +* **Headless Layout Engine**: Integration partners often want the *math* of Semiotic (the layouts, the scales, the bins) without the *DOM*. Exposing a "Headless" version of `PipelineStore` that works in pure Node or Worker environments without JSDOM would be a game-changer for data processing pipelines. +* **Bundle Optimization**: The bundle sizes (85KB+ for XY) are large for modern web standards. More aggressive code-splitting (e.g., lazy-loading specific chart types like Candlestick or Swarm) could reduce the initial load for simple use cases. + +--- + +## 2. Core Technical Improvements + +### 🎨 Graphical Functionality +* **Complex Axis Types**: Better support for non-linear scales (Log, Power) and "broken" axes for handling massive outliers. +* **Touch Optimization**: Improved gesture support for mobile readers (pinch-to-zoom, long-press for tooltips). +* **Statistical Overlays**: First-class support for LOESS smoothing, confidence intervals, and regression lines as simple props rather than complex annotation objects. + +### ⚡ Performance +* **OffscreenCanvas**: Offloading heavy canvas rendering to Web Workers for charts with >100k data points. +* **Virtualization**: Virtualizing the `AccessibleDataTable` to prevent DOM bloat when the reader toggle-opens the data view for huge datasets. + +### 🤖 AI Integration (The "Assistant" Layer) +* **Generative Layouts**: Allow the AI to suggest not just the *chart type*, but the *visual priority* (e.g., "Highlight the trend, de-emphasize the points"). +* **Prop Auto-Correction**: The `diagnoseConfig` tool is excellent. The next step is "Auto-Fix" — an AI tool that takes a broken config and returns a corrected one that is guaranteed to render. + +--- + +## 3. The "Hidden" High-Impact Feature: **Conversational Chart Interrogation** + +### The Concept +Today, Semiotic helps an AI **build** a chart. The high-impact move is to help an AI **explain** the chart to the reader. + +We propose a new component: `` or a prop `interactiveExplain`. + +### How it works: +1. **Context Awareness**: Because Semiotic knows the `schema`, the `data`, and the `intent` (via behavior contracts), it can provide a "narrow" context to an LLM. +2. **Narrative Interaction**: A reader can ask: *"What was the highest peak in March?"* +3. **Visual Feedback**: The AI doesn't just answer with text. It returns an **Annotation Object** that Semiotic dynamically renders onto the chart. The chart literally "highlights" what the AI is talking about. +4. **Why this is huge**: It bridges the gap between **Static Visualization** and **Data Science**. It makes every chart an interactive data analyst. It's the ultimate Accessibility feature: those who can't see the chart can *talk* to it to understand the trends. + +### Implementation Path: +* Leverage the existing `semiotic-mcp` server. +* Create a "Small Context" generator that extracts the statistical summary of the data (min, max, mean, outliers). +* Build a UI bridge that turns AI responses into Semiotic `annotations`. + +--- + +*Authored by Gemini CLI for Elijah Meeks* diff --git a/README.md b/README.md index 843c84ec..e88775d1 100644 --- a/README.md +++ b/README.md @@ -287,18 +287,18 @@ Semiotic ships 12 entry points. **Don't import from `"semiotic"` unless you need | Entry Point | gzip | What's inside | |---|---|---| -| `semiotic/xy` | **85 KB** | LineChart, AreaChart, Scatterplot, Heatmap, + 8 more XY charts | -| `semiotic/ordinal` | **69 KB** | BarChart, PieChart, BoxPlot, Histogram, + 11 more categorical charts | -| `semiotic/network` | **63 KB** | ForceDirectedGraph, SankeyDiagram, ProcessSankey, Treemap, + 4 more | +| `semiotic/xy` | **86 KB** | LineChart, AreaChart, Scatterplot, Heatmap, + 8 more XY charts | +| `semiotic/ordinal` | **70 KB** | BarChart, PieChart, BoxPlot, Histogram, + 11 more categorical charts | +| `semiotic/network` | **64 KB** | ForceDirectedGraph, SankeyDiagram, ProcessSankey, Treemap, + 4 more | | `semiotic/geo` | **52 KB** | ChoroplethMap, FlowMap, DistanceCartogram, ProportionalSymbolMap | -| `semiotic/realtime` | **90 KB** | RealtimeLineChart, RealtimeHistogram, + 3 streaming charts | +| `semiotic/realtime` | **91 KB** | RealtimeLineChart, RealtimeHistogram, + 3 streaming charts | | `semiotic/server` | **122 KB** | renderChart, renderDashboard, renderToImage, renderToAnimatedGif | | `semiotic/utils` | **22 KB** | ThemeProvider, validators, serialization — no chart components | | `semiotic/recipes` | **5 KB** | Pure layout functions (waffle, marimekko, flextree, dagre, …) | | `semiotic/themes` | **4 KB** | Theme presets only (tufte, carbon, etc.) | | `semiotic/data` | **3 KB** | bin, rollup, groupBy, pivot, fromVegaLite | -| `semiotic/ai` | **189 KB** | All 41 HOCs + validation — optimized for LLM code generation | -| `semiotic` | **188 KB** | Everything below (full bundle) | +| `semiotic/ai` | **211 KB** | All 41 HOCs + validation — optimized for LLM code generation | +| `semiotic` | **190 KB** | Everything below (full bundle) | diff --git a/ai/capability-scorecard.json b/ai/capability-scorecard.json new file mode 100644 index 00000000..adec0c14 --- /dev/null +++ b/ai/capability-scorecard.json @@ -0,0 +1,1211 @@ +{ + "perCapability": [ + { + "component": "QuadrantChart", + "family": "relationship", + "fitsOn": 13, + "rejectedOn": 10, + "inTopThreeOn": 1, + "expertAgreementCount": 0, + "averageScore": 1.0769230769230769, + "caveatCoverage": 0, + "variantUtilization": 0 + }, + { + "component": "MinimapChart", + "family": "time-series", + "fitsOn": 5, + "rejectedOn": 18, + "inTopThreeOn": 0, + "expertAgreementCount": 0, + "averageScore": 1.6, + "caveatCoverage": 0, + "variantUtilization": 0 + }, + { + "component": "Heatmap", + "family": "relationship", + "fitsOn": 4, + "rejectedOn": 21, + "inTopThreeOn": 0, + "expertAgreementCount": 0, + "averageScore": 0, + "caveatCoverage": 0.5, + "variantUtilization": 1 + }, + { + "component": "SwarmPlot", + "family": "distribution", + "fitsOn": 4, + "rejectedOn": 19, + "inTopThreeOn": 0, + "expertAgreementCount": 0, + "averageScore": 1.75, + "caveatCoverage": 0, + "variantUtilization": 0 + }, + { + "component": "LikertChart", + "family": "categorical", + "fitsOn": 2, + "rejectedOn": 21, + "inTopThreeOn": 1, + "expertAgreementCount": 0, + "averageScore": 3.5, + "caveatCoverage": 0, + "variantUtilization": 0 + }, + { + "component": "SwimlaneChart", + "family": "categorical", + "fitsOn": 2, + "rejectedOn": 21, + "inTopThreeOn": 1, + "expertAgreementCount": 0, + "averageScore": 1.5, + "caveatCoverage": 0, + "variantUtilization": 0 + }, + { + "component": "RidgelinePlot", + "family": "distribution", + "fitsOn": 2, + "rejectedOn": 21, + "inTopThreeOn": 0, + "expertAgreementCount": 0, + "averageScore": 3.5, + "caveatCoverage": 1, + "variantUtilization": 0 + }, + { + "component": "CandlestickChart", + "family": "time-series", + "fitsOn": 1, + "rejectedOn": 22, + "inTopThreeOn": 0, + "expertAgreementCount": 0, + "averageScore": 4, + "caveatCoverage": 0, + "variantUtilization": 0 + }, + { + "component": "ForceDirectedGraph", + "family": "network", + "fitsOn": 1, + "rejectedOn": 22, + "inTopThreeOn": 1, + "expertAgreementCount": 0, + "averageScore": 3, + "caveatCoverage": 0, + "variantUtilization": 0 + }, + { + "component": "CirclePack", + "family": "hierarchy", + "fitsOn": 1, + "rejectedOn": 22, + "inTopThreeOn": 0, + "expertAgreementCount": 0, + "averageScore": 4, + "caveatCoverage": 1, + "variantUtilization": 0 + }, + { + "component": "OrbitDiagram", + "family": "hierarchy", + "fitsOn": 1, + "rejectedOn": 22, + "inTopThreeOn": 0, + "expertAgreementCount": 0, + "averageScore": 3, + "caveatCoverage": 1, + "variantUtilization": 0 + }, + { + "component": "GaugeChart", + "family": "categorical", + "fitsOn": 0, + "rejectedOn": 23, + "inTopThreeOn": 0, + "expertAgreementCount": 0, + "averageScore": 0, + "caveatCoverage": 0, + "variantUtilization": 0 + }, + { + "component": "ProcessSankey", + "family": "flow", + "fitsOn": 0, + "rejectedOn": 23, + "inTopThreeOn": 0, + "expertAgreementCount": 0, + "averageScore": 0, + "caveatCoverage": 0, + "variantUtilization": 0 + }, + { + "component": "FlowMap", + "family": "geo", + "fitsOn": 0, + "rejectedOn": 23, + "inTopThreeOn": 0, + "expertAgreementCount": 0, + "averageScore": 0, + "caveatCoverage": 0, + "variantUtilization": 0 + }, + { + "component": "DistanceCartogram", + "family": "geo", + "fitsOn": 0, + "rejectedOn": 23, + "inTopThreeOn": 0, + "expertAgreementCount": 0, + "averageScore": 0, + "caveatCoverage": 0, + "variantUtilization": 0 + }, + { + "component": "DonutChart", + "family": "categorical", + "fitsOn": 8, + "rejectedOn": 15, + "inTopThreeOn": 1, + "expertAgreementCount": 1, + "averageScore": 1.25, + "caveatCoverage": 0, + "variantUtilization": 0 + }, + { + "component": "ConnectedScatterplot", + "family": "relationship", + "fitsOn": 4, + "rejectedOn": 19, + "inTopThreeOn": 1, + "expertAgreementCount": 1, + "averageScore": 2.5, + "caveatCoverage": 1, + "variantUtilization": 0 + }, + { + "component": "BubbleChart", + "family": "relationship", + "fitsOn": 4, + "rejectedOn": 19, + "inTopThreeOn": 1, + "expertAgreementCount": 1, + "averageScore": 2, + "caveatCoverage": 1, + "variantUtilization": 0 + }, + { + "component": "ViolinPlot", + "family": "distribution", + "fitsOn": 4, + "rejectedOn": 21, + "inTopThreeOn": 1, + "expertAgreementCount": 1, + "averageScore": 4.5, + "caveatCoverage": 0, + "variantUtilization": 1 + }, + { + "component": "MultiAxisLineChart", + "family": "time-series", + "fitsOn": 3, + "rejectedOn": 20, + "inTopThreeOn": 1, + "expertAgreementCount": 1, + "averageScore": 2.3333333333333335, + "caveatCoverage": 1, + "variantUtilization": 0 + }, + { + "component": "GroupedBarChart", + "family": "categorical", + "fitsOn": 2, + "rejectedOn": 21, + "inTopThreeOn": 1, + "expertAgreementCount": 1, + "averageScore": 2, + "caveatCoverage": 0, + "variantUtilization": 0 + }, + { + "component": "DifferenceChart", + "family": "time-series", + "fitsOn": 1, + "rejectedOn": 22, + "inTopThreeOn": 1, + "expertAgreementCount": 1, + "averageScore": 5, + "caveatCoverage": 0, + "variantUtilization": 0 + }, + { + "component": "FunnelChart", + "family": "flow", + "fitsOn": 1, + "rejectedOn": 22, + "inTopThreeOn": 1, + "expertAgreementCount": 1, + "averageScore": 4, + "caveatCoverage": 1, + "variantUtilization": 0 + }, + { + "component": "SankeyDiagram", + "family": "flow", + "fitsOn": 1, + "rejectedOn": 22, + "inTopThreeOn": 1, + "expertAgreementCount": 1, + "averageScore": 5, + "caveatCoverage": 0, + "variantUtilization": 0 + }, + { + "component": "ChordDiagram", + "family": "flow", + "fitsOn": 1, + "rejectedOn": 22, + "inTopThreeOn": 1, + "expertAgreementCount": 1, + "averageScore": 4, + "caveatCoverage": 1, + "variantUtilization": 0 + }, + { + "component": "Treemap", + "family": "hierarchy", + "fitsOn": 1, + "rejectedOn": 22, + "inTopThreeOn": 1, + "expertAgreementCount": 1, + "averageScore": 4, + "caveatCoverage": 1, + "variantUtilization": 0 + }, + { + "component": "ChoroplethMap", + "family": "geo", + "fitsOn": 1, + "rejectedOn": 22, + "inTopThreeOn": 1, + "expertAgreementCount": 1, + "averageScore": 5, + "caveatCoverage": 1, + "variantUtilization": 0 + }, + { + "component": "ProportionalSymbolMap", + "family": "geo", + "fitsOn": 1, + "rejectedOn": 22, + "inTopThreeOn": 1, + "expertAgreementCount": 1, + "averageScore": 4, + "caveatCoverage": 0, + "variantUtilization": 0 + }, + { + "component": "AreaChart", + "family": "time-series", + "fitsOn": 24, + "rejectedOn": 15, + "inTopThreeOn": 4, + "expertAgreementCount": 2, + "averageScore": 2.625, + "caveatCoverage": 0, + "variantUtilization": 1 + }, + { + "component": "DotPlot", + "family": "categorical", + "fitsOn": 9, + "rejectedOn": 14, + "inTopThreeOn": 2, + "expertAgreementCount": 2, + "averageScore": 1.4444444444444444, + "caveatCoverage": 0, + "variantUtilization": 0 + }, + { + "component": "BoxPlot", + "family": "distribution", + "fitsOn": 4, + "rejectedOn": 19, + "inTopThreeOn": 2, + "expertAgreementCount": 2, + "averageScore": 2.25, + "caveatCoverage": 0, + "variantUtilization": 0 + }, + { + "component": "TreeDiagram", + "family": "hierarchy", + "fitsOn": 2, + "rejectedOn": 22, + "inTopThreeOn": 2, + "expertAgreementCount": 2, + "averageScore": 5, + "caveatCoverage": 0, + "variantUtilization": 1 + }, + { + "component": "PieChart", + "family": "categorical", + "fitsOn": 16, + "rejectedOn": 15, + "inTopThreeOn": 3, + "expertAgreementCount": 3, + "averageScore": 1.5, + "caveatCoverage": 1, + "variantUtilization": 1 + }, + { + "component": "StackedAreaChart", + "family": "time-series", + "fitsOn": 12, + "rejectedOn": 19, + "inTopThreeOn": 3, + "expertAgreementCount": 3, + "averageScore": 3.0833333333333335, + "caveatCoverage": 1, + "variantUtilization": 1 + }, + { + "component": "StackedBarChart", + "family": "categorical", + "fitsOn": 4, + "rejectedOn": 21, + "inTopThreeOn": 3, + "expertAgreementCount": 3, + "averageScore": 3.25, + "caveatCoverage": 1, + "variantUtilization": 1 + }, + { + "component": "Histogram", + "family": "distribution", + "fitsOn": 32, + "rejectedOn": 7, + "inTopThreeOn": 5, + "expertAgreementCount": 4, + "averageScore": 0.75, + "caveatCoverage": 0, + "variantUtilization": 1 + }, + { + "component": "BarChart", + "family": "categorical", + "fitsOn": 27, + "rejectedOn": 14, + "inTopThreeOn": 6, + "expertAgreementCount": 4, + "averageScore": 2.074074074074074, + "caveatCoverage": 0, + "variantUtilization": 1 + }, + { + "component": "Scatterplot", + "family": "relationship", + "fitsOn": 26, + "rejectedOn": 10, + "inTopThreeOn": 8, + "expertAgreementCount": 8, + "averageScore": 2.3846153846153846, + "caveatCoverage": 0, + "variantUtilization": 1 + }, + { + "component": "LineChart", + "family": "time-series", + "fitsOn": 24, + "rejectedOn": 15, + "inTopThreeOn": 12, + "expertAgreementCount": 12, + "averageScore": 3.0833333333333335, + "caveatCoverage": 0.3333333333333333, + "variantUtilization": 1 + } + ], + "perFixture": [ + { + "fixture": "monthly revenue with regions, intent=trend", + "shape": "12 months × 3 regions, numeric month, numeric revenue", + "intent": "trend", + "expected": [ + "LineChart", + "AreaChart", + "MinimapChart" + ], + "topPick": { + "component": "LineChart", + "variantKey": "linear", + "score": 5 + }, + "topThree": [ + { + "component": "LineChart", + "variantKey": "linear", + "score": 5 + }, + { + "component": "LineChart", + "variantKey": "smooth", + "score": 5 + }, + { + "component": "AreaChart", + "variantKey": "smooth-gradient", + "score": 5 + } + ], + "fittingCount": 15, + "rejectedCount": 32, + "expertAgreement": true, + "noFitHonored": null + }, + { + "fixture": "monthly revenue with regions, intent=compare-series", + "shape": "12 months × 3 regions", + "intent": "compare-series", + "expected": [ + "LineChart", + "GroupedBarChart" + ], + "topPick": { + "component": "LineChart", + "variantKey": "linear", + "score": 4 + }, + "topThree": [ + { + "component": "LineChart", + "variantKey": "linear", + "score": 4 + }, + { + "component": "LineChart", + "variantKey": "smooth", + "score": 4 + }, + { + "component": "LineChart", + "variantKey": "stepped-with-points", + "score": 4 + } + ], + "fittingCount": 15, + "rejectedCount": 32, + "expertAgreement": true, + "noFitHonored": null + }, + { + "fixture": "monthly revenue with regions, intent=composition-over-time", + "shape": "12 months × 3 regions, additive", + "intent": "composition-over-time", + "expected": [ + "StackedAreaChart", + "StackedBarChart" + ], + "topPick": { + "component": "StackedAreaChart", + "variantKey": "baseline-zero", + "score": 5 + }, + "topThree": [ + { + "component": "StackedAreaChart", + "variantKey": "baseline-zero", + "score": 5 + }, + { + "component": "StackedAreaChart", + "variantKey": "centered", + "score": 5 + }, + { + "component": "StackedAreaChart", + "variantKey": "streamgraph", + "score": 5 + } + ], + "fittingCount": 15, + "rejectedCount": 32, + "expertAgreement": true, + "noFitHonored": null + }, + { + "fixture": "monthly revenue single series, intent=trend", + "shape": "12 months, no series", + "intent": "trend", + "expected": [ + "LineChart", + "AreaChart" + ], + "topPick": { + "component": "LineChart", + "variantKey": "linear", + "score": 5 + }, + "topThree": [ + { + "component": "LineChart", + "variantKey": "linear", + "score": 5 + }, + { + "component": "LineChart", + "variantKey": "smooth", + "score": 5 + }, + { + "component": "AreaChart", + "variantKey": "smooth-gradient", + "score": 5 + } + ], + "fittingCount": 12, + "rejectedCount": 33, + "expertAgreement": true, + "noFitHonored": null + }, + { + "fixture": "product sales, intent=rank", + "shape": "5 products, single numeric measure", + "intent": "rank", + "expected": [ + "BarChart", + "DotPlot" + ], + "topPick": { + "component": "BarChart", + "variantKey": "sorted-desc", + "score": 5 + }, + "topThree": [ + { + "component": "BarChart", + "variantKey": "sorted-desc", + "score": 5 + }, + { + "component": "BarChart", + "variantKey": "horizontal", + "score": 5 + }, + { + "component": "DotPlot", + "score": 5 + } + ], + "fittingCount": 7, + "rejectedCount": 35, + "expertAgreement": true, + "noFitHonored": null + }, + { + "fixture": "product sales, intent=part-to-whole", + "shape": "5 products, single numeric measure", + "intent": "part-to-whole", + "expected": [ + "PieChart", + "DonutChart", + "BarChart" + ], + "topPick": { + "component": "PieChart", + "variantKey": "pie", + "score": 4 + }, + "topThree": [ + { + "component": "PieChart", + "variantKey": "pie", + "score": 4 + }, + { + "component": "PieChart", + "variantKey": "donut", + "score": 4 + }, + { + "component": "DonutChart", + "score": 4 + } + ], + "fittingCount": 7, + "rejectedCount": 35, + "expertAgreement": true, + "noFitHonored": null + }, + { + "fixture": "satisfaction scores, intent=distribution", + "shape": "150 numeric observations across 3 cohorts", + "intent": "distribution", + "expected": [ + "Histogram", + "BoxPlot", + "ViolinPlot" + ], + "topPick": { + "component": "Histogram", + "variantKey": "count-bins", + "score": 5 + }, + "topThree": [ + { + "component": "Histogram", + "variantKey": "count-bins", + "score": 5 + }, + { + "component": "Histogram", + "variantKey": "share-bins", + "score": 5 + }, + { + "component": "BoxPlot", + "score": 5 + } + ], + "fittingCount": 18, + "rejectedCount": 27, + "expertAgreement": true, + "noFitHonored": null + }, + { + "fixture": "satisfaction scores, intent=compare-categories", + "shape": "150 obs × 3 cohorts", + "intent": "compare-categories", + "expected": [ + "BoxPlot", + "ViolinPlot", + "SwarmPlot" + ], + "topPick": { + "component": "BoxPlot", + "score": 4 + }, + "topThree": [ + { + "component": "BoxPlot", + "score": 4 + }, + { + "component": "LikertChart", + "score": 4 + }, + { + "component": "ViolinPlot", + "variantKey": "density", + "score": 4 + } + ], + "fittingCount": 18, + "rejectedCount": 27, + "expertAgreement": true, + "noFitHonored": null + }, + { + "fixture": "hours vs grade, intent=correlation", + "shape": "80 students, hours + grade", + "intent": "correlation", + "expected": [ + "Scatterplot" + ], + "topPick": { + "component": "Scatterplot", + "variantKey": "points", + "score": 5 + }, + "topThree": [ + { + "component": "Scatterplot", + "variantKey": "points", + "score": 5 + }, + { + "component": "Scatterplot", + "variantKey": "with-trend", + "score": 5 + }, + { + "component": "QuadrantChart", + "score": 3 + } + ], + "fittingCount": 5, + "rejectedCount": 36, + "expertAgreement": true, + "noFitHonored": null + }, + { + "fixture": "hours vs grade, intent=outlier-detection", + "shape": "80 students", + "intent": "outlier-detection", + "expected": [ + "Scatterplot" + ], + "topPick": { + "component": "Scatterplot", + "variantKey": "points", + "score": 5 + }, + "topThree": [ + { + "component": "Scatterplot", + "variantKey": "points", + "score": 5 + }, + { + "component": "Scatterplot", + "variantKey": "with-trend", + "score": 5 + }, + { + "component": "Histogram", + "variantKey": "count-bins", + "score": 3 + } + ], + "fittingCount": 5, + "rejectedCount": 36, + "expertAgreement": true, + "noFitHonored": null + }, + { + "fixture": "conversion funnel, intent=flow", + "shape": "4 stages, descending values", + "intent": "flow", + "expected": [ + "FunnelChart" + ], + "topPick": { + "component": "FunnelChart", + "score": 4 + }, + "topThree": [ + { + "component": "FunnelChart", + "score": 4 + }, + { + "component": "BarChart", + "variantKey": "sorted-desc", + "score": 0 + }, + { + "component": "BarChart", + "variantKey": "source-order", + "score": 0 + } + ], + "fittingCount": 8, + "rejectedCount": 34, + "expertAgreement": true, + "noFitHonored": null + }, + { + "fixture": "org chart, intent=hierarchy", + "shape": "3-deep org tree", + "intent": "hierarchy", + "expected": [ + "TreeDiagram", + "Treemap", + "CirclePack" + ], + "topPick": { + "component": "TreeDiagram", + "variantKey": "vertical-tree", + "score": 5 + }, + "topThree": [ + { + "component": "TreeDiagram", + "variantKey": "vertical-tree", + "score": 5 + }, + { + "component": "TreeDiagram", + "variantKey": "horizontal-cluster", + "score": 5 + }, + { + "component": "Treemap", + "score": 4 + } + ], + "fittingCount": 5, + "rejectedCount": 35, + "expertAgreement": true, + "noFitHonored": null + }, + { + "fixture": "approval workflow transitions, intent=flow", + "shape": "5 nodes / 4 weighted edges", + "intent": "flow", + "expected": [ + "SankeyDiagram", + "ChordDiagram" + ], + "topPick": { + "component": "SankeyDiagram", + "score": 5 + }, + "topThree": [ + { + "component": "SankeyDiagram", + "score": 5 + }, + { + "component": "ChordDiagram", + "score": 4 + }, + { + "component": "ForceDirectedGraph", + "score": 3 + } + ], + "fittingCount": 3, + "rejectedCount": 36, + "expertAgreement": true, + "noFitHonored": null + }, + { + "fixture": "US states with values, intent=geo", + "shape": "3 polygon features with numeric values", + "intent": "geo", + "expected": [ + "ChoroplethMap", + "ProportionalSymbolMap" + ], + "topPick": { + "component": "ChoroplethMap", + "score": 5 + }, + "topThree": [ + { + "component": "ChoroplethMap", + "score": 5 + }, + { + "component": "ProportionalSymbolMap", + "score": 4 + } + ], + "fittingCount": 2, + "rejectedCount": 37, + "expertAgreement": true, + "noFitHonored": null + }, + { + "fixture": "country economies, intent=correlation", + "shape": "10 countries × 3 numeric measures (gdp, hours, population)", + "intent": "correlation", + "expected": [ + "Scatterplot", + "BubbleChart" + ], + "topPick": { + "component": "Scatterplot", + "variantKey": "points", + "score": 5 + }, + "topThree": [ + { + "component": "Scatterplot", + "variantKey": "points", + "score": 5 + }, + { + "component": "Scatterplot", + "variantKey": "with-trend", + "score": 5 + }, + { + "component": "BubbleChart", + "score": 4 + } + ], + "fittingCount": 10, + "rejectedCount": 33, + "expertAgreement": true, + "noFitHonored": null + }, + { + "fixture": "website metrics with 3 measures, intent=compare-series", + "shape": "24 months × 3 numeric measures with different ranges", + "intent": "compare-series", + "expected": [ + "MultiAxisLineChart", + "LineChart" + ], + "topPick": { + "component": "MultiAxisLineChart", + "score": 4 + }, + "topThree": [ + { + "component": "MultiAxisLineChart", + "score": 4 + }, + { + "component": "AreaChart", + "variantKey": "smooth-gradient", + "score": 2 + }, + { + "component": "AreaChart", + "variantKey": "linear", + "score": 2 + } + ], + "fittingCount": 14, + "rejectedCount": 31, + "expertAgreement": true, + "noFitHonored": null + }, + { + "fixture": "sales by region and product, intent=compare-series", + "shape": "12 rows = 4 products × 3 regions", + "intent": "compare-series", + "expected": [ + "GroupedBarChart", + "StackedBarChart" + ], + "topPick": { + "component": "GroupedBarChart", + "score": 4 + }, + "topThree": [ + { + "component": "GroupedBarChart", + "score": 4 + }, + { + "component": "SwimlaneChart", + "score": 3 + }, + { + "component": "StackedBarChart", + "variantKey": "absolute", + "score": 2 + } + ], + "fittingCount": 17, + "rejectedCount": 28, + "expertAgreement": true, + "noFitHonored": null + }, + { + "fixture": "sales by region and product, intent=part-to-whole", + "shape": "12 rows = 4 products × 3 regions", + "intent": "part-to-whole", + "expected": [ + "StackedBarChart", + "PieChart" + ], + "topPick": { + "component": "StackedBarChart", + "variantKey": "normalized", + "score": 5 + }, + "topThree": [ + { + "component": "StackedBarChart", + "variantKey": "normalized", + "score": 5 + }, + { + "component": "StackedBarChart", + "variantKey": "absolute", + "score": 4 + }, + { + "component": "PieChart", + "variantKey": "pie", + "score": 4 + } + ], + "fittingCount": 17, + "rejectedCount": 28, + "expertAgreement": true, + "noFitHonored": null + }, + { + "fixture": "revenue vs expenses, intent=compare-series", + "shape": "48 rows = 24 months × 2 series", + "intent": "compare-series", + "expected": [ + "DifferenceChart", + "LineChart", + "GroupedBarChart" + ], + "topPick": { + "component": "DifferenceChart", + "score": 5 + }, + "topThree": [ + { + "component": "DifferenceChart", + "score": 5 + }, + { + "component": "LineChart", + "variantKey": "linear", + "score": 4 + }, + { + "component": "LineChart", + "variantKey": "smooth", + "score": 4 + } + ], + "fittingCount": 16, + "rejectedCount": 31, + "expertAgreement": true, + "noFitHonored": null + }, + { + "fixture": "stock OHLC prices, intent=change-detection", + "shape": "30 days × open/high/low/close", + "intent": "change-detection", + "expected": [ + "CandlestickChart", + "LineChart" + ], + "topPick": { + "component": "LineChart", + "variantKey": "stepped-with-points", + "score": 5 + }, + "topThree": [ + { + "component": "LineChart", + "variantKey": "stepped-with-points", + "score": 5 + }, + { + "component": "LineChart", + "variantKey": "linear", + "score": 4 + }, + { + "component": "LineChart", + "variantKey": "smooth", + "score": 4 + } + ], + "fittingCount": 16, + "rejectedCount": 29, + "expertAgreement": true, + "noFitHonored": null + }, + { + "fixture": "unemployment vs inflation by year, intent=correlation", + "shape": "20 years × 2 measures, ordered by year", + "intent": "correlation", + "expected": [ + "ConnectedScatterplot", + "Scatterplot" + ], + "topPick": { + "component": "Scatterplot", + "variantKey": "points", + "score": 5 + }, + "topThree": [ + { + "component": "Scatterplot", + "variantKey": "points", + "score": 5 + }, + { + "component": "Scatterplot", + "variantKey": "with-trend", + "score": 5 + }, + { + "component": "ConnectedScatterplot", + "score": 4 + } + ], + "fittingCount": 14, + "rejectedCount": 31, + "expertAgreement": true, + "noFitHonored": null + }, + { + "fixture": "flat single column", + "shape": "50 rows, one numeric column", + "expected": [ + "Histogram" + ], + "topPick": { + "component": "Histogram", + "variantKey": "count-bins", + "score": 3 + }, + "topThree": [ + { + "component": "Histogram", + "variantKey": "count-bins", + "score": 3 + }, + { + "component": "Histogram", + "variantKey": "share-bins", + "score": 3 + } + ], + "fittingCount": 2, + "rejectedCount": 38, + "expertAgreement": true, + "noFitHonored": null + }, + { + "fixture": "sparse 3-row data, intent=rank", + "shape": "3 rows total", + "intent": "rank", + "expected": [ + "BarChart", + "DotPlot" + ], + "topPick": { + "component": "BarChart", + "variantKey": "sorted-desc", + "score": 5 + }, + "topThree": [ + { + "component": "BarChart", + "variantKey": "sorted-desc", + "score": 5 + }, + { + "component": "BarChart", + "variantKey": "horizontal", + "score": 5 + }, + { + "component": "DotPlot", + "score": 5 + } + ], + "fittingCount": 7, + "rejectedCount": 35, + "expertAgreement": true, + "noFitHonored": null + } + ], + "summary": { + "fixtureCount": 23, + "capabilityCount": 39, + "expertAgreementRate": 1, + "overallCaveatCoverage": 0.24596774193548387, + "overallVariantUtilization": 0.7056451612903226 + } +} \ No newline at end of file diff --git a/ai/dist/mcp-server.js b/ai/dist/mcp-server.js index f42e155d..a764a372 100755 --- a/ai/dist/mcp-server.js +++ b/ai/dist/mcp-server.js @@ -32743,6 +32743,166 @@ Dark-mode presets: ${THEME_PRESET_NAMES.filter((n) => n.includes("dark")).join(" content: [{ type: "text", text: usage.join("\n") }] }; } +async function suggestChartsHandler(args) { + const { data, intent, maxResults, allow, deny, audience } = args; + const intentArg = Array.isArray(intent) ? intent : intent ? [intent] : void 0; + const suggestions = (0, import_ai3.suggestCharts)(data, { + intent: intentArg, + allow, + deny, + maxResults: maxResults ?? 8, + audience + }); + const lines = [ + `${suggestions.length} suggestion${suggestions.length === 1 ? "" : "s"} for ${data.length} rows${intentArg ? ` (intent: ${intentArg.join(", ")})` : ""}:`, + "", + ...suggestions.map((s, i) => { + const variantTag = s.variant ? ` / ${s.variant.label}` : ""; + const reasons = s.reasons.length ? ` \u2014 ${s.reasons.join("; ")}` : ""; + const caveats = s.caveats.length ? ` + caveats: ${s.caveats.join("; ")}` : ""; + return `${i + 1}. ${s.component}${variantTag} (score ${s.score.toFixed(1)}/5, familiarity ${s.rubric.familiarity}, accuracy ${s.rubric.accuracy})${reasons}${caveats}`; + }) + ]; + return { + content: [{ type: "text", text: lines.join("\n") }], + structuredContent: { suggestions } + }; +} +async function suggestStreamChartsHandler(args) { + const { schema: schema2, intent, maxResults } = args; + const intentArg = Array.isArray(intent) ? intent : intent ? [intent] : void 0; + const suggestions = (0, import_ai3.suggestStreamCharts)(schema2, { + intent: intentArg, + maxResults: maxResults ?? 8 + }); + const lines = [ + `${suggestions.length} stream chart suggestion${suggestions.length === 1 ? "" : "s"}${intentArg ? ` (intent: ${intentArg.join(", ")})` : ""}`, + ...schema2.throughput ? [`throughput: ${schema2.throughput}`] : [], + ...schema2.retention ? [`retention: ${schema2.retention}`] : [], + "", + ...suggestions.map((s, i) => { + const reasons = s.reasons.length ? ` \u2014 ${s.reasons.join("; ")}` : ""; + const caveats = s.caveats.length ? ` + caveats: ${s.caveats.join("; ")}` : ""; + return `${i + 1}. ${s.component} (score ${s.score.toFixed(1)}/5)${reasons}${caveats}`; + }) + ]; + return { + content: [{ type: "text", text: lines.join("\n") }], + structuredContent: { suggestions, schema: schema2 } + }; +} +async function suggestDashboardHandler(args) { + const { data, intents, maxPanels, diversifyByFamily, audience } = args; + const dashboard = (0, import_ai3.suggestDashboard)(data, { + intents, + maxPanels: maxPanels ?? 6, + diversifyByFamily: diversifyByFamily !== false, + audience + }); + const lines = []; + lines.push(`Dashboard: ${dashboard.panels.length} panels covering ${dashboard.intentsCovered.join(", ") || "\u2014"}`); + if (dashboard.intentsMissing.length) { + lines.push(`Intents this data couldn't fill: ${dashboard.intentsMissing.join(", ")}`); + } + lines.push(""); + for (let i = 0; i < dashboard.panels.length; i++) { + const { intent, suggestion } = dashboard.panels[i]; + const variantTag = suggestion.variant ? ` / ${suggestion.variant.label}` : ""; + lines.push(`${i + 1}. [${intent}] ${suggestion.component}${variantTag} (score ${suggestion.score.toFixed(1)}/5)`); + if (suggestion.reasons.length) lines.push(` ${suggestion.reasons.join("; ")}`); + } + if (dashboard.stretchPanels.length > 0) { + lines.push(""); + lines.push(`Stretch picks (audience-unfamiliar but fitting):`); + for (const stretch of dashboard.stretchPanels) { + const variantTag = stretch.suggestion.variant ? ` / ${stretch.suggestion.variant.label}` : ""; + lines.push(` ${stretch.suggestion.component}${variantTag} (familiarity ${stretch.familiarity}) \u2014 ${stretch.rationale}`); + } + } + return { + content: [{ type: "text", text: lines.join("\n") }], + structuredContent: dashboard + }; +} +async function suggestStretchChartsHandler(args) { + const { data, audience, intent, maxResults } = args; + const intentArg = Array.isArray(intent) ? intent : intent ? [intent] : void 0; + const stretches = (0, import_ai3.suggestStretchCharts)(data, { + audience, + intent: intentArg, + maxResults: maxResults ?? 5 + }); + const lines = [ + `${stretches.length} stretch pick${stretches.length === 1 ? "" : "s"} for "${audience.name ?? "audience"}":`, + "", + ...stretches.map((s, i) => { + const variantTag = s.suggestion.variant ? ` / ${s.suggestion.variant.label}` : ""; + const replacing = s.replacing ? ` (could replace ${s.replacing})` : ""; + return `${i + 1}. ${s.suggestion.component}${variantTag} (familiarity ${s.familiarity}/5)${replacing} + ${s.rationale}`; + }) + ]; + return { + content: [{ type: "text", text: lines.join("\n") }], + structuredContent: { stretches, audience: audience.name ?? null } + }; +} +async function repairChartConfigHandler(args) { + const { component, data, intent, maxAlternatives } = args; + const intentArg = Array.isArray(intent) ? intent : intent ? [intent] : void 0; + const result = (0, import_ai3.repairChartConfig)(component, data, { + intent: intentArg, + maxAlternatives: maxAlternatives ?? 3 + }); + const lines = []; + if (result.status === "ok") { + lines.push(`\u2705 ${component} fits this dataset \u2014 no repair needed.`); + } else if (result.status === "alternative") { + lines.push(`\u26A0 ${component} doesn't fit: ${result.reason}`); + lines.push(""); + lines.push(`Alternatives that fit${intentArg ? ` (ranked by intent: ${intentArg.join(", ")})` : ""}:`); + for (let i = 0; i < result.alternatives.length; i++) { + const s = result.alternatives[i]; + const variantTag = s.variant ? ` / ${s.variant.label}` : ""; + const reasons = s.reasons.length ? ` \u2014 ${s.reasons.join("; ")}` : ""; + lines.push(`${i + 1}. ${s.component}${variantTag} (score ${s.score.toFixed(1)}/5)${reasons}`); + } + } else { + lines.push(`\u2753 No capability registered for "${component}". Closest matches:`); + for (let i = 0; i < result.alternatives.length; i++) { + const s = result.alternatives[i]; + lines.push(`${i + 1}. ${s.component} (${s.family}, score ${s.score.toFixed(1)}/5)`); + } + } + return { + content: [{ type: "text", text: lines.join("\n") }], + structuredContent: result + }; +} +async function interrogateChartHandler(args) { + const { component, props, query } = args; + const data = props.data || props.nodes || []; + const summary = (0, import_ai3.summarizeData)(data); + const content = [ + { type: "text", text: `Statistical summary for ${component}: +${JSON.stringify(summary, null, 2)}` } + ]; + if (query) { + content.push({ + type: "text", + text: `User Question: "${query}" + +Contextual instructions: +1. Analyze the statistical summary to answer the question. +2. Return a natural language response. +3. Optionally suggest a JSON array of Semiotic annotations to visually highlight the answer on the chart (e.g. { type: "callout", x: "Mar", y: 1500, label: "Peak month" }). +4. Use the accessor names from the provided props (e.g. xAccessor, yAccessor).` + }); + } + return { content, structuredContent: { summary, component, props } }; +} function createServer2() { const srv = new McpServer({ name: "semiotic", @@ -32927,6 +33087,93 @@ function createServer2() { }, applyThemeHandler ); + srv.tool( + "interrogateChart", + "Conversational interrogation of a Semiotic chart. Extract a statistical summary and answer natural language questions about the data, trends, and outliers. Returns a summary and guidance for an AI to generate a textual answer and visual annotations.", + { + component: external_exports3.string().describe("Chart component name, e.g. 'LineChart'"), + props: external_exports3.record(external_exports3.string(), external_exports3.unknown()).describe("The full chart props including data"), + query: external_exports3.string().optional().describe("A natural language question about the chart data") + }, + interrogateChartHandler + ); + srv.tool( + "suggestStreamCharts", + "Recommend realtime/streaming Semiotic charts for a schema (not row data). Pass a schema describing field types plus optional throughput ('low'|'medium'|'high') and retention ('windowed'|'cumulative') hints; the engine ranks realtime charts (RealtimeLineChart, RealtimeHistogram, RealtimeHeatmap, RealtimeWaterfallChart, RealtimeSwarmChart, TemporalHistogram) by their fit. Use when the user is wiring up a live dashboard or monitoring view rather than visualizing a bounded dataset.", + { + schema: external_exports3.object({ + fields: external_exports3.array( + external_exports3.object({ + name: external_exports3.string(), + kind: external_exports3.enum(["numeric", "categorical", "date", "boolean"]), + role: external_exports3.enum(["x", "y", "value", "category", "series", "size"]).optional() + }) + ), + throughput: external_exports3.enum(["low", "medium", "high"]).optional(), + retention: external_exports3.enum(["windowed", "cumulative"]).optional() + }).describe("Stream schema \u2014 fields plus throughput/retention hints. No row data."), + intent: external_exports3.union([external_exports3.string(), external_exports3.array(external_exports3.string())]).optional().describe("Ranking intent."), + maxResults: external_exports3.number().int().min(1).max(20).optional() + }, + suggestStreamChartsHandler + ); + srv.tool( + "suggestDashboard", + "Generate a dashboard of complementary chart panels for a dataset \u2014 each panel answers a distinct analytical intent (trend, rank, distribution, correlation, etc.) and the engine diversifies by chart family by default. Heuristic only; no LLM call. Use when the user asks 'show me this data' or 'build me a dashboard' rather than picking one chart.", + { + data: external_exports3.array(external_exports3.record(external_exports3.string(), external_exports3.unknown())).describe("Row data \u2014 array of objects."), + intents: external_exports3.array(external_exports3.string()).optional().describe("Intents to cover. Omit to let the engine pick based on the data shape."), + maxPanels: external_exports3.number().int().min(1).max(12).optional().describe("Maximum panels (default 6)."), + diversifyByFamily: external_exports3.boolean().optional().describe("Prefer not to repeat chart families across panels (default true).") + }, + suggestDashboardHandler + ); + srv.tool( + "suggestStretchCharts", + "Recommend literacy-growth chart picks for a dataset given an AudienceProfile. Returns charts the data supports but the audience is unfamiliar with (familiarity \u2264 3, or \u2264 4 at exposureLevel 2), each paired with the familiar chart it could substitute for and a rationale. Use when the consumer wants to gently expose users to less familiar but more analytically appropriate visualizations.", + { + data: external_exports3.array(external_exports3.record(external_exports3.string(), external_exports3.unknown())).describe("Row data."), + audience: external_exports3.object({ + name: external_exports3.string().optional(), + familiarity: external_exports3.record(external_exports3.string(), external_exports3.number()).optional(), + targets: external_exports3.record( + external_exports3.string(), + external_exports3.object({ + direction: external_exports3.enum(["increase", "decrease"]), + weight: external_exports3.number().int().min(1).max(3).optional(), + reason: external_exports3.string().optional() + }) + ).optional(), + exposureLevel: external_exports3.union([external_exports3.literal(0), external_exports3.literal(1), external_exports3.literal(2)]).optional() + }).describe("Audience profile \u2014 familiarity, targets, exposure level."), + intent: external_exports3.union([external_exports3.string(), external_exports3.array(external_exports3.string())]).optional(), + maxResults: external_exports3.number().int().min(1).max(20).optional() + }, + suggestStretchChartsHandler + ); + srv.tool( + "repairChartConfig", + "Validate that a chart component is a sensible choice for a dataset, and if not, propose alternatives that fit. Use when a user asks for a specific chart and you want to confirm it's appropriate, or when you've drafted a config and want to verify it. Returns either ok (no change needed), alternative (chart doesn't fit; here are ranked replacements with rationale), or unknown (no capability registered).", + { + component: external_exports3.string().describe("Chart component name to validate, e.g. 'PieChart'"), + data: external_exports3.array(external_exports3.record(external_exports3.string(), external_exports3.unknown())).describe("Row data \u2014 array of objects."), + intent: external_exports3.union([external_exports3.string(), external_exports3.array(external_exports3.string())]).optional().describe("User intent \u2014 informs ranking of alternatives when the chart doesn't fit."), + maxAlternatives: external_exports3.number().int().min(1).max(10).optional().describe("Cap on alternatives returned (default 3).") + }, + repairChartConfigHandler + ); + srv.tool( + "suggestCharts", + "Recommend Semiotic charts for a dataset using heuristic capability descriptors. Each chart declares which data shapes it serves and which intents (trend, compare-categories, distribution, correlation, part-to-whole, etc.) it answers \u2014 the engine returns a ranked list with scores, reasons, caveats, and ready-to-use props. Heuristic only; no LLM call. Use the result as structured context when answering 'what chart should I use?' or generating chart code.", + { + data: external_exports3.array(external_exports3.record(external_exports3.string(), external_exports3.unknown())).describe("Row data \u2014 array of objects."), + intent: external_exports3.union([external_exports3.string(), external_exports3.array(external_exports3.string())]).optional().describe("Ranking intent. One of: trend, compare-series, compare-categories, rank, part-to-whole, distribution, correlation, flow, hierarchy, geo, outlier-detection, composition-over-time, change-detection. Custom intents accepted."), + maxResults: external_exports3.number().int().min(1).max(40).optional().describe("Cap on suggestions returned (default 8)."), + allow: external_exports3.array(external_exports3.string()).optional().describe("Restrict to these component names."), + deny: external_exports3.array(external_exports3.string()).optional().describe("Exclude these component names.") + }, + suggestChartsHandler + ); return srv; } var cliArgs = process.argv.slice(2); diff --git a/ai/mcp-server.ts b/ai/mcp-server.ts index a4cfbfa7..8dc309a1 100644 --- a/ai/mcp-server.ts +++ b/ai/mcp-server.ts @@ -32,7 +32,16 @@ import * as path from "path" import * as http from "http" import { renderHOCToSVG } from "./renderHOCToSVG" import { COMPONENT_REGISTRY } from "./componentRegistry" -import { diagnoseConfig } from "semiotic/ai" +import { + diagnoseConfig, + summarizeData, + suggestCharts as suggestChartsFromCapabilities, + repairChartConfig as repairChartConfigFromCapabilities, + suggestDashboard as suggestDashboardFromCapabilities, + suggestStreamCharts as suggestStreamChartsFromCapabilities, + suggestStretchCharts as suggestStretchChartsFromCapabilities, +} from "semiotic/ai" +import type { IntentId, StreamSchema, AudienceProfile } from "semiotic/ai" const { componentIndexFromSchema, @@ -432,6 +441,218 @@ async function applyThemeHandler(args: { name?: string }): Promise { } } +async function suggestChartsHandler(args: { + data: unknown[] + intent?: string | string[] + maxResults?: number + allow?: string[] + deny?: string[] + audience?: AudienceProfile +}): Promise { + const { data, intent, maxResults, allow, deny, audience } = args + const intentArg = (Array.isArray(intent) ? intent : intent ? [intent] : undefined) as + | IntentId[] + | undefined + + const suggestions = suggestChartsFromCapabilities(data as Record[], { + intent: intentArg, + allow, + deny, + maxResults: maxResults ?? 8, + audience, + }) + + const lines: string[] = [ + `${suggestions.length} suggestion${suggestions.length === 1 ? "" : "s"} for ${(data as unknown[]).length} rows${intentArg ? ` (intent: ${intentArg.join(", ")})` : ""}:`, + "", + ...suggestions.map((s, i) => { + const variantTag = s.variant ? ` / ${s.variant.label}` : "" + const reasons = s.reasons.length ? ` — ${s.reasons.join("; ")}` : "" + const caveats = s.caveats.length ? `\n caveats: ${s.caveats.join("; ")}` : "" + return `${i + 1}. ${s.component}${variantTag} (score ${s.score.toFixed(1)}/5, familiarity ${s.rubric.familiarity}, accuracy ${s.rubric.accuracy})${reasons}${caveats}` + }), + ] + + return { + content: [{ type: "text", text: lines.join("\n") }], + structuredContent: { suggestions }, + } +} + +async function suggestStreamChartsHandler(args: { + schema: StreamSchema + intent?: string | string[] + maxResults?: number +}): Promise { + const { schema, intent, maxResults } = args + const intentArg = (Array.isArray(intent) ? intent : intent ? [intent] : undefined) as + | IntentId[] + | undefined + + const suggestions = suggestStreamChartsFromCapabilities(schema, { + intent: intentArg, + maxResults: maxResults ?? 8, + }) + + const lines: string[] = [ + `${suggestions.length} stream chart suggestion${suggestions.length === 1 ? "" : "s"}${intentArg ? ` (intent: ${intentArg.join(", ")})` : ""}`, + ...(schema.throughput ? [`throughput: ${schema.throughput}`] : []), + ...(schema.retention ? [`retention: ${schema.retention}`] : []), + "", + ...suggestions.map((s, i) => { + const reasons = s.reasons.length ? ` — ${s.reasons.join("; ")}` : "" + const caveats = s.caveats.length ? `\n caveats: ${s.caveats.join("; ")}` : "" + return `${i + 1}. ${s.component} (score ${s.score.toFixed(1)}/5)${reasons}${caveats}` + }), + ] + + return { + content: [{ type: "text", text: lines.join("\n") }], + structuredContent: { suggestions, schema }, + } +} + +async function suggestDashboardHandler(args: { + data: unknown[] + intents?: string[] + maxPanels?: number + diversifyByFamily?: boolean + audience?: AudienceProfile +}): Promise { + const { data, intents, maxPanels, diversifyByFamily, audience } = args + const dashboard = suggestDashboardFromCapabilities(data as Record[], { + intents: intents as IntentId[] | undefined, + maxPanels: maxPanels ?? 6, + diversifyByFamily: diversifyByFamily !== false, + audience, + }) + + const lines: string[] = [] + lines.push(`Dashboard: ${dashboard.panels.length} panels covering ${dashboard.intentsCovered.join(", ") || "—"}`) + if (dashboard.intentsMissing.length) { + lines.push(`Intents this data couldn't fill: ${dashboard.intentsMissing.join(", ")}`) + } + lines.push("") + for (let i = 0; i < dashboard.panels.length; i++) { + const { intent, suggestion } = dashboard.panels[i] + const variantTag = suggestion.variant ? ` / ${suggestion.variant.label}` : "" + lines.push(`${i + 1}. [${intent}] ${suggestion.component}${variantTag} (score ${suggestion.score.toFixed(1)}/5)`) + if (suggestion.reasons.length) lines.push(` ${suggestion.reasons.join("; ")}`) + } + if (dashboard.stretchPanels.length > 0) { + lines.push("") + lines.push(`Stretch picks (audience-unfamiliar but fitting):`) + for (const stretch of dashboard.stretchPanels) { + const variantTag = stretch.suggestion.variant ? ` / ${stretch.suggestion.variant.label}` : "" + lines.push(` ${stretch.suggestion.component}${variantTag} (familiarity ${stretch.familiarity}) — ${stretch.rationale}`) + } + } + + return { + content: [{ type: "text", text: lines.join("\n") }], + structuredContent: dashboard as unknown as Record, + } +} + +async function suggestStretchChartsHandler(args: { + data: unknown[] + audience: AudienceProfile + intent?: string | string[] + maxResults?: number +}): Promise { + const { data, audience, intent, maxResults } = args + const intentArg = (Array.isArray(intent) ? intent : intent ? [intent] : undefined) as + | IntentId[] + | undefined + + const stretches = suggestStretchChartsFromCapabilities(data as Record[], { + audience, + intent: intentArg, + maxResults: maxResults ?? 5, + }) + + const lines: string[] = [ + `${stretches.length} stretch pick${stretches.length === 1 ? "" : "s"} for "${audience.name ?? "audience"}":`, + "", + ...stretches.map((s, i) => { + const variantTag = s.suggestion.variant ? ` / ${s.suggestion.variant.label}` : "" + const replacing = s.replacing ? ` (could replace ${s.replacing})` : "" + return `${i + 1}. ${s.suggestion.component}${variantTag} (familiarity ${s.familiarity}/5)${replacing}\n ${s.rationale}` + }), + ] + + return { + content: [{ type: "text", text: lines.join("\n") }], + structuredContent: { stretches, audience: audience.name ?? null }, + } +} + +async function repairChartConfigHandler(args: { + component: string + data: unknown[] + intent?: string | string[] + maxAlternatives?: number +}): Promise { + const { component, data, intent, maxAlternatives } = args + const intentArg = (Array.isArray(intent) ? intent : intent ? [intent] : undefined) as + | IntentId[] + | undefined + + const result = repairChartConfigFromCapabilities(component, data as Record[], { + intent: intentArg, + maxAlternatives: maxAlternatives ?? 3, + }) + + const lines: string[] = [] + if (result.status === "ok") { + lines.push(`✅ ${component} fits this dataset — no repair needed.`) + } else if (result.status === "alternative") { + lines.push(`⚠ ${component} doesn't fit: ${result.reason}`) + lines.push("") + lines.push(`Alternatives that fit${intentArg ? ` (ranked by intent: ${intentArg.join(", ")})` : ""}:`) + for (let i = 0; i < result.alternatives.length; i++) { + const s = result.alternatives[i] + const variantTag = s.variant ? ` / ${s.variant.label}` : "" + const reasons = s.reasons.length ? ` — ${s.reasons.join("; ")}` : "" + lines.push(`${i + 1}. ${s.component}${variantTag} (score ${s.score.toFixed(1)}/5)${reasons}`) + } + } else { + lines.push(`❓ No capability registered for "${component}". Closest matches:`) + for (let i = 0; i < result.alternatives.length; i++) { + const s = result.alternatives[i] + lines.push(`${i + 1}. ${s.component} (${s.family}, score ${s.score.toFixed(1)}/5)`) + } + } + + return { + content: [{ type: "text", text: lines.join("\n") }], + structuredContent: result as unknown as Record, + } +} + +async function interrogateChartHandler(args: { + component: string + props: Record + query?: string +}): Promise { + const { component, props, query } = args + const data = (props.data as unknown[]) || (props.nodes as unknown[]) || [] + const summary = summarizeData(data as Record[]) + + const content: Array<{ type: "text"; text: string }> = [ + { type: "text", text: `Statistical summary for ${component}:\n${JSON.stringify(summary, null, 2)}` }, + ] + + if (query) { + content.push({ + type: "text", + text: `User Question: "${query}"\n\nContextual instructions:\n1. Analyze the statistical summary to answer the question.\n2. Return a natural language response.\n3. Optionally suggest a JSON array of Semiotic annotations to visually highlight the answer on the chart (e.g. { type: "callout", x: "Mar", y: 1500, label: "Peak month" }).\n4. Use the accessor names from the provided props (e.g. xAccessor, yAccessor).`, + }) + } + + return { content, structuredContent: { summary, component, props } } +} + // ── Server factory ─────────────────────────────────────────────────────── // Creates a fresh McpServer with all tools registered. // HTTP mode needs one instance per session (McpServer can only connect to one transport). @@ -635,6 +856,114 @@ function createServer(): McpServer { applyThemeHandler ) + srv.tool( + "interrogateChart", + "Conversational interrogation of a Semiotic chart. Extract a statistical summary and answer natural language questions about the data, trends, and outliers. Returns a summary and guidance for an AI to generate a textual answer and visual annotations.", + { + component: z.string().describe("Chart component name, e.g. 'LineChart'"), + props: z.record(z.string(), z.unknown()).describe("The full chart props including data"), + query: z.string().optional().describe("A natural language question about the chart data"), + }, + interrogateChartHandler + ) + + srv.tool( + "suggestStreamCharts", + "Recommend realtime/streaming Semiotic charts for a schema (not row data). Pass a schema describing field types plus optional throughput ('low'|'medium'|'high') and retention ('windowed'|'cumulative') hints; the engine ranks realtime charts (RealtimeLineChart, RealtimeHistogram, RealtimeHeatmap, RealtimeWaterfallChart, RealtimeSwarmChart, TemporalHistogram) by their fit. Use when the user is wiring up a live dashboard or monitoring view rather than visualizing a bounded dataset.", + { + schema: z + .object({ + fields: z.array( + z.object({ + name: z.string(), + kind: z.enum(["numeric", "categorical", "date", "boolean"]), + role: z.enum(["x", "y", "value", "category", "series", "size"]).optional(), + }), + ), + throughput: z.enum(["low", "medium", "high"]).optional(), + retention: z.enum(["windowed", "cumulative"]).optional(), + }) + .describe("Stream schema — fields plus throughput/retention hints. No row data."), + intent: z + .union([z.string(), z.array(z.string())]) + .optional() + .describe("Ranking intent."), + maxResults: z.number().int().min(1).max(20).optional(), + }, + suggestStreamChartsHandler + ) + + srv.tool( + "suggestDashboard", + "Generate a dashboard of complementary chart panels for a dataset — each panel answers a distinct analytical intent (trend, rank, distribution, correlation, etc.) and the engine diversifies by chart family by default. Heuristic only; no LLM call. Use when the user asks 'show me this data' or 'build me a dashboard' rather than picking one chart.", + { + data: z.array(z.record(z.string(), z.unknown())).describe("Row data — array of objects."), + intents: z.array(z.string()).optional().describe("Intents to cover. Omit to let the engine pick based on the data shape."), + maxPanels: z.number().int().min(1).max(12).optional().describe("Maximum panels (default 6)."), + diversifyByFamily: z.boolean().optional().describe("Prefer not to repeat chart families across panels (default true)."), + }, + suggestDashboardHandler + ) + + srv.tool( + "suggestStretchCharts", + "Recommend literacy-growth chart picks for a dataset given an AudienceProfile. Returns charts the data supports but the audience is unfamiliar with (familiarity ≤ 3, or ≤ 4 at exposureLevel 2), each paired with the familiar chart it could substitute for and a rationale. Use when the consumer wants to gently expose users to less familiar but more analytically appropriate visualizations.", + { + data: z.array(z.record(z.string(), z.unknown())).describe("Row data."), + audience: z + .object({ + name: z.string().optional(), + familiarity: z.record(z.string(), z.number()).optional(), + targets: z + .record( + z.string(), + z.object({ + direction: z.enum(["increase", "decrease"]), + weight: z.number().int().min(1).max(3).optional(), + reason: z.string().optional(), + }), + ) + .optional(), + exposureLevel: z.union([z.literal(0), z.literal(1), z.literal(2)]).optional(), + }) + .describe("Audience profile — familiarity, targets, exposure level."), + intent: z.union([z.string(), z.array(z.string())]).optional(), + maxResults: z.number().int().min(1).max(20).optional(), + }, + suggestStretchChartsHandler + ) + + srv.tool( + "repairChartConfig", + "Validate that a chart component is a sensible choice for a dataset, and if not, propose alternatives that fit. Use when a user asks for a specific chart and you want to confirm it's appropriate, or when you've drafted a config and want to verify it. Returns either ok (no change needed), alternative (chart doesn't fit; here are ranked replacements with rationale), or unknown (no capability registered).", + { + component: z.string().describe("Chart component name to validate, e.g. 'PieChart'"), + data: z.array(z.record(z.string(), z.unknown())).describe("Row data — array of objects."), + intent: z + .union([z.string(), z.array(z.string())]) + .optional() + .describe("User intent — informs ranking of alternatives when the chart doesn't fit."), + maxAlternatives: z.number().int().min(1).max(10).optional().describe("Cap on alternatives returned (default 3)."), + }, + repairChartConfigHandler + ) + + srv.tool( + "suggestCharts", + "Recommend Semiotic charts for a dataset using heuristic capability descriptors. Each chart declares which data shapes it serves and which intents (trend, compare-categories, distribution, correlation, part-to-whole, etc.) it answers — the engine returns a ranked list with scores, reasons, caveats, and ready-to-use props. Heuristic only; no LLM call. Use the result as structured context when answering 'what chart should I use?' or generating chart code.", + { + data: z.array(z.record(z.string(), z.unknown())).describe("Row data — array of objects."), + intent: z + .union([z.string(), z.array(z.string())]) + .optional() + .describe("Ranking intent. One of: trend, compare-series, compare-categories, rank, part-to-whole, distribution, correlation, flow, hierarchy, geo, outlier-detection, composition-over-time, change-detection. Custom intents accepted."), + maxResults: z.number().int().min(1).max(40).optional().describe("Cap on suggestions returned (default 8)."), + allow: z.array(z.string()).optional().describe("Restrict to these component names."), + deny: z.array(z.string()).optional().describe("Exclude these component names."), + }, + suggestChartsHandler + ) + return srv } diff --git a/ai/schema.json b/ai/schema.json index 2fee5f24..d118c38c 100644 --- a/ai/schema.json +++ b/ai/schema.json @@ -1,7 +1,7 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", "name": "semiotic", - "version": "3.5.4", + "version": "3.6.0", "description": "React data visualization library for charts, networks, and beyond", "tools": [ { diff --git a/ai/system-prompt.md b/ai/system-prompt.md index 2788cb4e..d909f553 100644 --- a/ai/system-prompt.md +++ b/ai/system-prompt.md @@ -2,7 +2,7 @@ -**Use sub-path imports** — `semiotic/xy` (85KB gz), `semiotic/ordinal` (69KB gz), `semiotic/network` (63KB gz), `semiotic/geo` (52KB gz), `semiotic/realtime` (90KB gz), `semiotic/server` (122KB gz), `semiotic/utils` (22KB gz), `semiotic/recipes` (5KB gz), `semiotic/themes` (4KB gz), `semiotic/data` (3KB gz), `semiotic/ai` (189KB gz). Full `semiotic` is 188KB gz. +**Use sub-path imports** — `semiotic/xy` (86KB gz), `semiotic/ordinal` (70KB gz), `semiotic/network` (64KB gz), `semiotic/geo` (52KB gz), `semiotic/realtime` (91KB gz), `semiotic/server` (122KB gz), `semiotic/utils` (22KB gz), `semiotic/recipes` (5KB gz), `semiotic/themes` (4KB gz), `semiotic/data` (3KB gz), `semiotic/ai` (211KB gz). Full `semiotic` is 190KB gz. ## Flat Array Data (`data: object[]`) diff --git a/docs/public/blog/feed.xml b/docs/public/blog/feed.xml index cc6a5857..155eb52d 100644 --- a/docs/public/blog/feed.xml +++ b/docs/public/blog/feed.xml @@ -5,8 +5,30 @@ https://semiotic3.nteract.io/blog/ - 2026-05-18T00:00:00Z + 2026-05-24T00:00:00Z Semiotic + + Charts that know what they're for + https://semiotic3.nteract.io/blog/charts-that-know-what-theyre-for + + + 2026-05-24T00:00:00Z + 2026-05-24T00:00:00Z + Elijah Meeks + + Semiotic 3.6.0 ships a chart recommendation engine that's heuristic-first, LLM-optional, and audience-aware. Charts now carry descriptors that declare what data shapes they serve and which questions they answer; an AudienceProfile layers per-org familiarity and adoption targets on top; a separate 'stretch' surface grows literacy without forcing it. + + + Semiotic 3.5.4 + https://semiotic3.nteract.io/blog/release-3-5-4 + + + 2026-05-21T00:00:00Z + 2026-05-21T00:00:00Z + AI-Generated + + 3.5.4 adds a first-class asymmetric band encoding (with percentile fans) to LineChart and AreaChart, sharpens the axis surface with edge-anchored ticks, CSS-variable font sizes, and per-axis class names, ships loadingContent across every HOC, and collapses bounds + band into a single shared ribbon primitive. + Semiotic 3.5.3 https://semiotic3.nteract.io/blog/release-3-5-3 @@ -14,7 +36,7 @@ 2026-05-18T00:00:00Z 2026-05-18T00:00:00Z - Elijah Meeks + AI-Generated 3.5.3 adds DifferenceChart, exact axis ticks, Swimlane rounded ends, and ProcessSankey lifecycle timing; it also launches the docs blog, refreshes AI capabilities to 45 chart schemas, and wires new release gates for capability and blog metadata drift. @@ -50,7 +72,7 @@ 2026-05-10T00:00:00Z 2026-05-10T00:00:00Z - Elijah Meeks + AI-Generated 3.5.2 is mostly a factor-and-extend release: useSeriesFeatures / useEncodingDomain / useStreamStatus / useXYLineStyle hooks land, ProcessSankey inherits SankeyDiagram's canvas particle pipeline, regression-line sugar extends to five more charts, FlowMap joins the push family, and ai/capabilities.json indexes all 44 charts. diff --git a/docs/public/blog/og/charts-that-know-what-theyre-for.png b/docs/public/blog/og/charts-that-know-what-theyre-for.png new file mode 100644 index 00000000..5cf94ce7 Binary files /dev/null and b/docs/public/blog/og/charts-that-know-what-theyre-for.png differ diff --git a/docs/public/blog/og/release-3-5-4.png b/docs/public/blog/og/release-3-5-4.png new file mode 100644 index 00000000..4a5237d1 Binary files /dev/null and b/docs/public/blog/og/release-3-5-4.png differ diff --git a/docs/public/blog/og/release-3-6-0.png b/docs/public/blog/og/release-3-6-0.png new file mode 100644 index 00000000..0b1e052d Binary files /dev/null and b/docs/public/blog/og/release-3-6-0.png differ diff --git a/docs/public/llms-full.txt b/docs/public/llms-full.txt index b333fda8..fd75036b 100644 --- a/docs/public/llms-full.txt +++ b/docs/public/llms-full.txt @@ -267,6 +267,64 @@ Canvas scene builders read CSS variables via `getComputedStyle` on the canvas DO ## AI Features `onObservation`/`useChartObserver`, `toConfig`/`fromConfig`/`toURL`/`fromURL`/`copyConfig`/`configToJSX`, `validateProps(component, props)`, `diagnoseConfig(component, props)`, `exportChart(div, { format })`, `npx semiotic-ai --doctor` +### Conversational Interrogation (`semiotic/ai`) +Headless hook for "chat with the chart" interactions. The library ships no UI — bring your own chat surface. +- **`useChartInterrogation({ data, onQuery, componentName?, props?, initialAnnotations? })`** → `{ ask(query), history, summary, annotations, loading, error, reset }` +- **`onQuery: (query, context) => Promise<{ answer, annotations? }>`** — call your LLM here. `context` is `{ data, summary, componentName?, props? }`. +- **`summary`**: LLM-friendly statistical summary (`rowCount`, per-field `{ min, max, mean, median }` for numerics, top-k for categoricals, ISO range for dates). Available before any ask(). +- **`annotations`**: Merged `initialAnnotations` + latest AI response. Wire to the chart's `annotations` prop for visual highlighting. +- **`summarizeData(data, options?)`**: Standalone for server-side prompting or batch jobs. +- **MCP Tool**: `interrogateChart(component, props, query)` returns the same statistical summary and AI-facing instructions. + +```jsx +import { LineChart, useChartInterrogation } from "semiotic/ai" + +function InterrogatableChart({ data }) { + const { ask, history, annotations, loading } = useChartInterrogation({ + data, + componentName: "LineChart", + props: { xAccessor: "month", yAccessor: "revenue" }, + onQuery: async (query, { summary }) => { + const res = await myLLMCall(query, summary) + return { answer: res.text, annotations: res.highlights } + }, + }) + return ( + <> + + + + ) +} +``` + +### Chart Capability Layer (`semiotic/ai`) +Heuristic chart-suggestion engine. Charts ship capability descriptors next to their TSX files; the engine ranks them against a profiled dataset by intent. No LLM call required. + +- **`profileData(data, { rawInput?, seriesField? })`** → `ChartDataProfile` (extends `DataSummary`): candidate fields per role (x/y/series/category/size/time), distinct counts, monotonicity, structure detection (hierarchy/network/geo). +- **`suggestCharts(data, { intent?, allow?, deny?, maxResults?, includeVariants?, minScore? })`** → ranked `Suggestion[]` with `{ component, family, importPath, variant?, score, intentScores, rubric, reasons, caveats, props }`. `props` is spreadable directly into the matching chart. +- **`scoreChart(component, data, { intent?, variantKey? })`** → evaluate a specific chart for a dataset (does it fit, how well, why/why not). +- **`useChartSuggestions(data, options)`** → memoized React hook returning `{ suggestions, profile }`. +- **`registerChartCapability(capability)`** / **`unregisterChartCapability(name)`** — runtime registration for custom charts. +- **Intent taxonomy**: 13 built-in intents (`trend`, `compare-series`, `compare-categories`, `rank`, `part-to-whole`, `distribution`, `correlation`, `flow`, `hierarchy`, `geo`, `outlier-detection`, `composition-over-time`, `change-detection`). Extend via `registerIntent(descriptor)`. +- **Capability authoring**: create `Foo.capability.ts` next to `Foo.tsx`, then append to the registry in `src/components/ai/chartCapabilities.ts`. Each capability declares `family`, `rubric` (familiarity/accuracy/precision 1-5), `fits(profile)` gate, `intentScores`, optional `variants` with `intentDeltas`, and `buildProps(profile, variant)`. +- **Variants encode that settings change what a chart is good for**: e.g. `StackedAreaChart`'s `streamgraph` variant boosts trend but penalizes part-to-whole. +- **Interrogation tie-in**: pass `includeSuggestions: true` to `useChartInterrogation` and the same ranked list lands in `context.suggestions` for the LLM. +- **MCP tool**: `suggestCharts(data, intent?)` returns the ranked list as structured content. + +```jsx +import { useChartSuggestions, LineChart, BarChart, /* ... */ } from "semiotic/ai" + +const COMPONENT_MAP = { LineChart, BarChart, /* ... */ } +function SuggestedChart({ data, intent }) { + const { suggestions } = useChartSuggestions(data, { intent }) + const top = suggestions[0] + if (!top) return null + const Component = COMPONENT_MAP[top.component] + return +} +``` + ## AI Behavior Contracts diff --git a/docs/src/App.js b/docs/src/App.js index f35e4de2..5a7ef6e3 100644 --- a/docs/src/App.js +++ b/docs/src/App.js @@ -94,6 +94,8 @@ import PerformancePage from "./pages/features/PerformancePage" import PushApiPage from "./pages/features/PushApiPage" import CustomChartsPage from "./pages/features/CustomChartsPage" import CapabilitiesPage from "./pages/features/CapabilitiesPage" +import InterrogationPage from "./pages/features/InterrogationPage" +import SuggestionsPage from "./pages/features/SuggestionsPage" // New cookbook pages import HomerunMapPage from "./pages/cookbook/HomerunMapPage" @@ -383,16 +385,32 @@ export default function DocsApp() { } /> } /> } /> - } /> - } /> - } /> } /> } /> } /> } /> + + + {/* Intelligence — AI/recommendation surface, separated from generic Features + in 3.5.x. Old /features/ paths redirect to /intelligence/ + via dedicated routes below. */} + }> + } /> } /> + } /> + } /> + } /> + } /> + {/* Redirects from old /features/ paths for the Intelligence pages */} + } /> + } /> + } /> + } /> + } /> + } /> + {/* Using Server-Side Rendering */} } /> } /> diff --git a/docs/src/blog/components/BlogEntryView.js b/docs/src/blog/components/BlogEntryView.js index 678fd8eb..afacf494 100644 --- a/docs/src/blog/components/BlogEntryView.js +++ b/docs/src/blog/components/BlogEntryView.js @@ -26,6 +26,15 @@ export default function BlogEntryView({ entry }) { (it owns the docs top bar). That collapsed the entry's title onto a 235-px column inside the flex header. */}
+ {entry.draft && ( +
+ DRAFT + + Unlisted — not in the blog index, RSS, or search engines. Flip{" "} + draft: true off in the entry's registry to publish. + +
+ )}

{entry.title}

{entry.subtitle}

@@ -126,4 +135,29 @@ const styles = { color: "var(--text-primary, #e5e7eb)", maxWidth: 860, }, + draftBanner: { + display: "flex", + alignItems: "center", + gap: 12, + background: "rgba(251, 191, 36, 0.12)", + border: "1px solid rgba(251, 191, 36, 0.35)", + borderRadius: 8, + padding: "10px 14px", + marginBottom: 24, + fontSize: 13, + color: "var(--text-secondary, #94a3b8)", + }, + draftBadge: { + background: "rgb(217, 119, 6)", + color: "white", + fontSize: 11, + fontWeight: 700, + letterSpacing: "0.08em", + padding: "3px 8px", + borderRadius: 4, + flexShrink: 0, + }, + draftNote: { + lineHeight: 1.5, + }, } diff --git a/docs/src/blog/entries-meta.js b/docs/src/blog/entries-meta.js index 511178a5..057c6ff1 100644 --- a/docs/src/blog/entries-meta.js +++ b/docs/src/blog/entries-meta.js @@ -15,7 +15,68 @@ * scripts/check-blog-entry-sync.mjs). */ -export const blogEntriesMeta = [ +// Same shape as `entries.js`'s `allBlogEntries`. Drafts (entries with +// `draft: true`) are included here so the sync check and per-entry +// inspection still work; the build scripts filter at consumption time. +export const allBlogEntriesMeta = [ + { + slug: "release-3-6-0", + title: "Semiotic 3.6.0", + subtitle: + "The AI release. A heuristic chart recommender, audience-aware ranking, focus + interrogation hooks for two-way anchored conversation, an MCP server, and a per-chart capability layer that makes the library itself a structured catalog.", + author: "AI-Generated", + date: "2026-05-31", + tags: ["release"], + excerpt: + "3.6.0 turns Semiotic's observation hooks, native annotations, and streaming runtime into an explicit AI-facing surface. Charts declare what they're for; datasets get profiled and ranked; audiences get calibrated; conversations anchor back to the chart instead of stopping at a chat bubble. Three case-study posts published alongside the release walk through what the new shape makes possible.", + }, + { + slug: "live-conversational-dashboard", + title: "Live conversational dashboards", + subtitle: + "Streaming data + an AI watching alongside you + anchored annotations + a conversational follow-up surface. The class of product Semiotic's streaming-first runtime makes possible.", + author: "Elijah Meeks", + date: "2026-05-31", + tags: ["case-study", "realtime"], + excerpt: + "Static dashboards show the past; chat-with-chart makes the past interrogable. Live conversational dashboards add what's missing: an AI watching the stream as it arrives, narrating events anchored to the chart, with a chat surface for human follow-ups. Draft post on composing Semiotic's streaming runtime, interrogation hook, and annotation model into a single product.", + draft: true, + }, + { + slug: "anchored-conversations", + title: "Anchored conversations: when the AI knows which point you're asking about", + subtitle: + "Two-way point-anchored AI conversation: the user clicks, the AI answers about that specific point, and the answer lives on the chart as a clickable note.", + author: "Elijah Meeks", + date: "2026-05-31", + tags: ["case-study"], + excerpt: + "Chat-with-chart works, but the user has to verbalize which point they care about and the AI has to verbalize where the answer applies. Both steps lose the spatial information that's already on screen. Bidirectional point-anchored AI conversation, with useChartFocus + useChartInterrogation as the building blocks.", + draft: true, + }, + { + slug: "multimodal-response", + title: "Multimodal response: chart as output channel", + subtitle: + "Text is half the answer. The other half — callouts, thresholds, bands, selections — lives on the chart, and LLMs already know how to ask for it.", + author: "Elijah Meeks", + date: "2026-05-24", + tags: ["case-study"], + excerpt: + "Modern LLM assistants treat text as the only output channel. When the question is about a chart, charts give us a parallel surface — callouts, threshold lines, bands, selections — that's both more honest and easier to read. Drafted exploration of what multimodal response means in practice.", + draft: true, + }, + { + slug: "charts-that-know-what-theyre-for", + title: "Charts that know what they're for", + subtitle: + "A heuristic-first chart recommendation engine with per-audience calibration, a literacy-growth surface, and ready-to-render props.", + author: "Elijah Meeks", + date: "2026-05-24", + tags: ["case-study"], + excerpt: + "Semiotic 3.6.0 ships a chart recommendation engine that's heuristic-first, LLM-optional, and audience-aware. Charts now carry descriptors that declare what data shapes they serve and which questions they answer; an AudienceProfile layers per-org familiarity and adoption targets on top; a separate 'stretch' surface grows literacy without forcing it.", + }, { slug: "release-3-5-4", title: "Semiotic 3.5.4", @@ -122,3 +183,7 @@ export const blogEntriesMeta = [ ogChart: { component: "OrbitDiagram" }, }, ] + +// Published-only mirror — what RSS, prerender, and OG-card emit. Anything +// marked `draft: true` in `allBlogEntriesMeta` is dropped. +export const blogEntriesMeta = allBlogEntriesMeta.filter((entry) => !entry.draft) diff --git a/docs/src/blog/entries.js b/docs/src/blog/entries.js index 1780d23c..548ea919 100644 --- a/docs/src/blog/entries.js +++ b/docs/src/blog/entries.js @@ -23,6 +23,11 @@ * // SSR-renderable spec via `ogChart` if you want a different * // chart than the first one shown in the entry. * ogChart?: { component: string, props: Record } + * + * // Set `draft: true` to keep an entry out of the index list, the + * // RSS feed, and SEO prerender meta. The route still resolves at + * // /blog// so authors can preview before publishing. + * draft?: boolean * } * * Tags vocabulary (additive — don't be precious): @@ -42,8 +47,23 @@ import DifferenceChartExplainer from "./entries/difference-chart.js" import QuadrantChartExplainer from "./entries/quadrant-chart.js" import FunnelChartExplainer from "./entries/funnel-chart.js" import OrbitDiagramExplainer from "./entries/orbit-diagram.js" +import ChartsThatKnow from "./entries/charts-that-know-what-theyre-for.js" +import MultimodalResponse from "./entries/multimodal-response.js" +import AnchoredConversations from "./entries/anchored-conversations.js" +import LiveDashboard from "./entries/live-conversational-dashboard.js" +import Release360 from "./entries/release-3-6-0.js" -export const blogEntries = [ +/** + * Every entry, drafts included. Consumers that need the full list (direct + * URL access, sync check) read this. Consumers that should NEVER surface + * drafts (index listing, RSS, SEO prerender) read `blogEntries` below. + */ +export const allBlogEntries = [ + Release360, + LiveDashboard, + AnchoredConversations, + MultimodalResponse, + ChartsThatKnow, Release354, Release353, ProcessSankeyVsClassicSankey, @@ -55,8 +75,19 @@ export const blogEntries = [ OrbitDiagramExplainer, ] +/** + * Published entries — the canonical reader-facing list. Filters out anything + * marked `draft: true`. Used by the blog index, RSS feed, and prerender. + */ +export const blogEntries = allBlogEntries.filter((entry) => !entry.draft) + +/** + * Slug lookup intentionally returns drafts too. Drafts must be routable so + * authors can preview them before publishing — the listings and feeds are + * the surfaces that filter, not the URL space. + */ export function getEntry(slug) { - return blogEntries.find((e) => e.slug === slug) + return allBlogEntries.find((e) => e.slug === slug) } export function entriesByDateDesc() { diff --git a/docs/src/blog/entries/anchored-conversations.js b/docs/src/blog/entries/anchored-conversations.js new file mode 100644 index 00000000..4f2ede85 --- /dev/null +++ b/docs/src/blog/entries/anchored-conversations.js @@ -0,0 +1,715 @@ +/* eslint-disable react/no-unescaped-entities */ +import React, { useMemo, useState } from "react" +import { Link } from "react-router-dom" +import { LineChart } from "semiotic" + +// ─── Shared blog styling ────────────────────────────────────────────────── +const chartFrame = { + background: "var(--surface-1)", + borderRadius: 8, + padding: 16, + border: "1px solid var(--surface-3)", + overflow: "hidden", + margin: "20px 0", + position: "relative", +} + +const chatPanel = { + background: "var(--surface-2)", + borderRadius: 8, + padding: 12, + marginTop: 12, + fontSize: 13, + lineHeight: 1.5, + minHeight: 100, +} + +const userBubble = { + display: "inline-block", + background: "var(--accent)", + color: "white", + padding: "6px 12px", + borderRadius: "12px 12px 2px 12px", + marginBottom: 6, + maxWidth: "85%", +} + +const aiBubble = { + display: "inline-block", + background: "var(--surface-3)", + color: "var(--text)", + padding: "6px 12px", + borderRadius: "12px 12px 12px 2px", + marginBottom: 6, + maxWidth: "85%", + whiteSpace: "pre-wrap", +} + +const inputRow = { display: "flex", gap: 6, marginTop: 10 } +const inputStyle = { + flex: 1, + padding: "6px 10px", + borderRadius: 6, + border: "1px solid var(--surface-3)", + background: "var(--background)", + color: "var(--text)", + fontSize: 13, +} + +const buttonStyle = { + padding: "6px 14px", + borderRadius: 6, + border: "none", + background: "var(--accent)", + color: "white", + fontSize: 13, + fontWeight: 600, + cursor: "pointer", +} + +const focusBadge = { + display: "inline-block", + background: "rgba(94,234,212,0.15)", + color: "var(--accent)", + padding: "2px 8px", + borderRadius: 999, + fontSize: 11, + fontWeight: 700, + letterSpacing: "0.04em", + marginLeft: 8, +} + +// ─── Demo data ──────────────────────────────────────────────────────────── +const SALES_DATA = [ + { month: 1, revenue: 1100, label: "Jan" }, + { month: 2, revenue: 1180, label: "Feb" }, + { month: 3, revenue: 1320, label: "Mar" }, + { month: 4, revenue: 1450, label: "Apr" }, + { month: 5, revenue: 2200, label: "May" }, + { month: 6, revenue: 1610, label: "Jun" }, + { month: 7, revenue: 1720, label: "Jul" }, + { month: 8, revenue: 1830, label: "Aug" }, + { month: 9, revenue: 1950, label: "Sep" }, + { month: 10, revenue: 1380, label: "Oct" }, + { month: 11, revenue: 2080, label: "Nov" }, + { month: 12, revenue: 2240, label: "Dec" }, +] + +// Canned LLM stand-in. A real implementation calls a model with: +// { question, focus.datum, summary, profile } +// and the model returns the same shape. +function cannedAnchoredResponder(question, focus) { + const q = question.toLowerCase() + // No focus: encourage the user to point at something + if (!focus) { + return { + text: "Hover or click a point on the chart first and I'll answer about that specific point.", + annotation: null, + } + } + const { month, revenue, label } = focus.datum + // Specific known-shape questions get rich answers anchored to the point. + if (q.includes("why") || q.includes("explain")) { + if (month === 5) { + return { + text: `May's $2,200 was driven by a spring promotion. It's well above the smooth trend the rest of the year follows. Removing it, the trajectory is almost monotonic.`, + annotation: { + type: "callout", + month, + revenue, + label: "Promo-driven spike", + note: "Spring 2024 product launch + 15% sitewide discount. Not repeatable; treat as one-off in forecasts.", + dx: 30, + dy: -30, + }, + } + } + if (month === 10) { + return { + text: `October's $1,380 is the year's dip is a four-day outage at the start of the month is the likely cause. The Nov/Dec recovery suggests no lasting impact.`, + annotation: { + type: "callout", + month, + revenue, + label: "Outage week", + note: "Oct 2 - 5 platform outage. Recovered by mid-month; Nov/Dec returned to trend.", + dx: -30, + dy: 30, + }, + } + } + return { + text: `${label} (${revenue}) sits ${revenue > 1670 ? "above" : "below"} the year's $1,670 average. Without a known incident here, this looks like ordinary variance.`, + annotation: { + type: "callout", + month, + revenue, + label: `${label}: ${revenue > 1670 ? "above avg" : "below avg"}`, + note: `${revenue > 1670 ? "+" : ""}${revenue - 1670} vs. $1,670 average.`, + }, + } + } + if (q.includes("compare")) { + return { + text: `${label} ($${revenue}) compared to the year average of $1,670: a difference of ${revenue > 1670 ? "+" : ""}$${revenue - 1670}. Among ${SALES_DATA.length} months, ${SALES_DATA.filter((d) => d.revenue > revenue).length} were higher and ${SALES_DATA.filter((d) => d.revenue < revenue).length} were lower.`, + annotation: { + type: "callout", + month, + revenue, + label: `${label}`, + note: `Rank: ${ + SALES_DATA.slice() + .sort((a, b) => b.revenue - a.revenue) + .findIndex((d) => d.month === month) + 1 + } of ${SALES_DATA.length}.`, + }, + } + } + // Default: small, factual answer about the focused point + return { + text: `${label}: revenue $${revenue}. ${revenue > 1670 ? "Above" : "Below"} the $1,670 yearly average.`, + annotation: { + type: "callout", + month, + revenue, + label, + note: `${revenue > 1670 ? "Above" : "Below"} average month.`, + }, + } +} + +// ─── Comment marker overlay ────────────────────────────────────────────── +// Renders interactive markers on top of the chart for AI-anchored comments. +// Reads annotation entries that carry a `note` field (the AI's narrative +// rationale) and renders a hoverable dot positioned at the same x/y as the +// callout. This is the reusable pattern the post documents copy it into +// your own consumer code. +function CommentOverlay({ annotations, scales }) { + const [openId, setOpenId] = useState(null) + if (!annotations || !scales) return null + const comments = annotations.filter((a) => a.note) + return ( + <> + {comments.map((c, i) => { + const key = `${c.month}-${i}` + const x = scales.x(c.month) + const y = scales.y(c.revenue) + return ( +
setOpenId(openId === key ? null : key)} + title="AI comment" + /> + ) + })} + {openId && + comments.map((c, i) => { + const key = `${c.month}-${i}` + if (key !== openId) return null + const x = scales.x(c.month) + const y = scales.y(c.revenue) + return ( +
+
+ AI note · {c.label || `month ${c.month}`} +
+
{c.note}
+ +
+ ) + })} + + ) +} + +function AnchoredDemo() { + const [focusIndex, setFocusIndex] = useState(null) + const [transcript, setTranscript] = useState([]) + const [annotations, setAnnotations] = useState([]) + const [input, setInput] = useState("Why is this point so different?") + + const focus = + focusIndex == null + ? null + : { + datum: SALES_DATA[focusIndex], + source: "click", + } + + // The chart's internal linear scales mapped to the rendered pixel + // dimensions. In production code this comes from the chart's ref + // (`chart.current.getScales()`); here we hardcode them to keep the demo + // self-contained. + const PLOT = { left: 60, right: 30, top: 30, bottom: 40, width: 600, height: 280 } + const scales = useMemo(() => { + const innerW = PLOT.width - PLOT.left - PLOT.right + const innerH = PLOT.height - PLOT.top - PLOT.bottom + return { + x: (m) => PLOT.left + ((m - 1) / 11) * innerW, + y: (r) => PLOT.top + innerH - ((r - 800) / (2400 - 800)) * innerH, + } + }, []) + + const handleClick = (datum) => { + const idx = SALES_DATA.findIndex((d) => d.month === datum.month) + setFocusIndex(idx === focusIndex ? null : idx) + } + + const send = () => { + if (!input.trim()) return + const userText = input + const { text, annotation } = cannedAnchoredResponder(userText, focus) + setTranscript((t) => [...t, { role: "user", text: userText }, { role: "assistant", text }]) + if (annotation) { + setAnnotations((a) => { + // Replace any existing annotation for the same datum so re-asking + // about the same point updates the marker rather than stacking. + const keep = a.filter( + (x) => !(x.month === annotation.month && x.revenue === annotation.revenue), + ) + return [...keep, annotation] + }) + } + setInput("") + } + + const reset = () => { + setFocusIndex(null) + setTranscript([]) + setAnnotations([]) + } + + // Highlight ring on the currently focused point + const focusRing = + focusIndex == null + ? null + : (() => { + const datum = SALES_DATA[focusIndex] + const x = scales.x(datum.month) + const y = scales.y(datum.revenue) + return ( +
+ ) + })() + + return ( +
+
+ + {focusRing} + +
+ +
+
+ + Anchored conversation + + {focus ? ( + + focused: {focus.datum.label} (${focus.datum.revenue}) + + ) : ( + + This is no focus, please click a point + + )} +
+ {transcript.length === 0 && ( +
+ Click a chart point to focus on it, then ask a question. The answer comes back both as + text here AND as a clickable AI note anchored to that point. +
+ )} + {transcript.map((m, i) => ( +
+
{m.text}
+
+ ))} +
+ setInput(e.target.value)} + onKeyDown={(e) => e.key === "Enter" && send()} + placeholder='Try: "Why is this so high?", "Compare this to the average"' + style={inputStyle} + /> + + {(transcript.length > 0 || annotations.length > 0) && ( + + )} +
+
+ +
+ ) +} + +function Body() { + return ( + <> +

+ The most common AI-on-a-chart pattern today is "ask the chart". Type a question, get a + paragraph back. It works, but it's lossy in both directions: the user has to verbalize which + point they care about, and the AI has to verbalize where the answer applies. Both steps lose + the spatial information that's already on screen. There's a better shape: let the user{" "} + point at a data point, and let the AI annotate the answer back onto it. In + other words: a two-way anchored conversation. +

+ +

The loop in three frames

+
    +
  1. + User hovers or clicks a data point. The chart fires an observation event; + we capture which datum the user is looking at and pass it into the chat as the "focus." +
  2. +
  3. + User asks a question. The LLM receives both the question AND the focused + datum. The prompt is "answer this question about this specific row" and not + "about the chart in general." +
  4. +
  5. + AI responds in two channels. Text in the transcript, anchored note back + on the chart at the same point. Future hover over that point shows the AI's rationale; + future questions in the same conversation can reference earlier comments. +
  6. +
+ +

Try it

+

+ Click any month's data point to focus on it. The dashed ring marks the focus; the chat shows + what's currently selected. Ask a question and the AI's answer arrives as a text bubble AND a + small turquoise dot on the chart. Click the dot to see the AI's anchored note. Stack + questions to build up a multi-point conversation. +

+ +

+ The interesting moves: click May and ask "why is this so high?" and the AI + cites the spring promotion. Click October and ask "why is this so low?"{" "} + and it cites the outage. Both rationales then live on the chart as clickable notes that + survive the rest of the conversation. The chart accumulates institutional knowledge about + itself. These are canned responses, of course. In the wild you'd need to wire this up to an + LLM but in this case there's more than enough context that you would not need a frontier + model. +

+ +

Why anchoring matters

+

Three things change once the conversation has a spatial anchor:

+
    +
  • + Pronouns work. "Why is this one higher?" becomes a well-formed + question instead of a guessing game. The LLM doesn't have to triangulate from prose what + point you meant. +
  • +
  • + Comparisons get cheap. Click two points in succession and ask "what + changed between these?" The AI compares them directly because both are explicit in the + context. +
  • +
  • + Answers persist where they're useful. The AI's rationale lives next to + the point it explains. When someone else looks at this chart next week, they hover over + October's dip and the explanation is right there. No re-asking, no re-discovering which + means no rework. None of us like rework. Charts become accumulating notebooks of + why-the-data-looks-this-way and that's a layer that can itself be mined for insights + later. +
  • +
+ +

Building it

+

+ Semiotic ships the two primitives this needs: useChartInterrogation for the + conversation, useChartFocus for the point-of-interest signal. Wiring them + together is one component: +

+
+        {`import { LineChart, ObservationProvider } from "semiotic"
+import { useChartFocus, useChartInterrogation } from "semiotic/ai"
+
+function AnchoredChart({ data }) {
+  // useChartFocus subscribes to the chart's observation store and returns
+  // the latest hover/click as { datum, x, y, source }. Returns null when
+  // the user has moved away or hasn't engaged yet.
+  const focus = useChartFocus({ chartId: "sales" })
+
+  const { ask, history, annotations } = useChartInterrogation({
+    data,
+    focus,                              // ← context.focus inside onQuery
+    onQuery: async (question, ctx) => {
+      // ctx.focus.datum is the row the user is asking about
+      const response = await yourLLMCall({
+        question,
+        focus:   ctx.focus,
+        summary: ctx.summary,
+      })
+      return {
+        answer: response.text,
+        // Return annotations with a \`note\` field your overlay renders
+        // them as clickable AI-anchored comments on the chart.
+        annotations: response.highlights,
+      }
+    },
+  })
+
+  return (
+    
+       {}}        // any handler enables the store
+      />
+      
+    
+  )
+}`}
+      
+

+ The useChartFocus hook is opinionated about what counts as focus: hover, click, + and selection by default; hover-end and click-end clear it. For a + sticky-focus UI where hover doesn't count, pass{" "} + {`{ types: ["click", "click-end"] }`} and only clicks update the AI's reference + point. +

+ +

The other direction: AI comments anchored back

+

+ The interrogation hook already returns annotations to the chart's standard{" "} + annotations prop. The new piece is what those annotations can carry which is + not just a label, but a note. An annotation like: +

+
+        {`{
+  type: "callout",
+  month: 5,
+  revenue: 2200,
+  label: "Promo-driven spike",
+  note: "Spring 2024 product launch + 15% sitewide discount. Not repeatable; treat as one-off in forecasts."
+}`}
+      
+

+ The chart renders the callout natively. A small overlay (~30 lines, copyable from this page) + finds annotations with a note field and renders a clickable marker that reveals + the note on demand. The rationale lives on the chart; the rationale doesn't crowd the chart + unless someone asks for it. +

+

+ This is exactly the pattern{" "} + Advanced Annotations demonstrates with the + human-authored comment threads with the same UI shape, but populated by an LLM instead of + typed by a teammate. The chart doesn't care where the comments came from. +

+ +

Where to use this

+
    +
  • + Operations dashboards. An on-call engineer hovers over an anomaly spike, + asks "what happened here?" and the AI consults a runbook + incident history + deploy log + and leaves an anchored note. Next time someone sees the spike, the note is already there. +
  • +
  • + Financial models. An analyst clicks a forecast point that looks + surprising, asks "why does the model show this?" and the AI walks through which inputs + drove this value most, leaves a note explaining the dominant terms. +
  • +
  • + Scientific exploration. A researcher clicks an outlier observation, asks + "is this an artifact?" and the AI references the run log, the calibration history, similar + past observations, and leaves a note classifying it. +
  • +
  • + Customer support / sales review. A rep hovers over a usage dip for a + specific account, asks "what's going on with this customer?" and the AI consults the CRM + history and leaves an anchored explanation that the next rep also sees. +
  • +
+

+ The pattern across all four:{" "} + the chart is the primary surface, the AI is a teammate annotating it. Not a chat + window that happens to talk about charts; a chart that accumulates explanations. +

+ +

Failure modes worth thinking about

+
    +
  • + Stale notes. Yesterday's AI explanation may be wrong today. Treat + annotated notes as ephemeral by default and easy to dismiss. A note that hasn't been + refreshed in 30 days probably shouldn't be surfaced with full confidence. +
  • +
  • + Anchoring drift. If the dataset gets re-aggregated (weekly to monthly), + the annotation's coordinates may no longer match anything meaningful. Tie notes to a + stable identity (datum.id, a deterministic hash of the row), not pixel coordinates. That + way the chart re-positions them on data shape changes. +
  • +
  • + Authority confusion. Human comments and AI comments need visual + differentiation. The convention this post uses--turquoise for AI, default for human--is + one option; an author field on each annotation is the more rigorous one. The + audience needs to know which voice they're reading. +
  • +
+ + +
    +
  • + Interrogation - the{" "} + useChartInterrogation hook and its focus option. +
  • +
  • + Observation Hooks -{" "} + useChartObserver and useChartFocus, the source of the focus + signal. +
  • +
  • + Advanced Annotations - the original + comment-thread-on-a-data-point pattern this post extends to AI. +
  • +
  • + Multimodal response: chart as output channel - + the broader frame this fits into. Anchored conversation is one specific multimodal + pattern. +
  • +
+ + ) +} + +export default { + slug: "anchored-conversations", + title: "Anchored conversations: when the AI knows which point you're asking about", + subtitle: + "Two-way point-anchored AI conversation: the user clicks, the AI answers about that specific point, and the answer lives on the chart as a clickable note.", + author: "Elijah Meeks", + date: "2026-05-31", + tags: ["case-study"], + excerpt: + "Chat-with-chart works, but the user has to verbalize which point they care about and the AI has to verbalize where the answer applies. Both steps lose the spatial information that's already on screen. Bidirectional point-anchored AI conversation, with useChartFocus + useChartInterrogation as the building blocks.", + draft: true, + component: Body, +} diff --git a/docs/src/blog/entries/charts-that-know-what-theyre-for.js b/docs/src/blog/entries/charts-that-know-what-theyre-for.js new file mode 100644 index 00000000..5a761076 --- /dev/null +++ b/docs/src/blog/entries/charts-that-know-what-theyre-for.js @@ -0,0 +1,922 @@ +/* eslint-disable react/no-unescaped-entities */ +import React, { useMemo, useState } from "react" +import { Link } from "react-router-dom" +import { + AreaChart, + BarChart, + BoxPlot, + ConnectedScatterplot, + DonutChart, + DotPlot, + Histogram, + LineChart, + MultiAxisLineChart, + PieChart, + Scatterplot, + StackedAreaChart, + StackedBarChart, + SwarmPlot, + ViolinPlot, + DifferenceChart, +} from "semiotic" +import { + executivePersona, + dataScientistPersona, + inferIntent, + suggestCharts, + suggestStretchCharts, +} from "semiotic/ai" + +// ─── Styling shared with the rest of the blog ─── +const chartFrame = { + background: "var(--surface-1)", + borderRadius: 8, + padding: 16, + border: "1px solid var(--surface-3)", + overflow: "hidden", + margin: "20px 0", +} + +const playgroundFrame = { + ...chartFrame, + padding: 20, +} + +const controlsRow = { + display: "flex", + flexWrap: "wrap", + gap: 12, + alignItems: "flex-end", + marginBottom: 16, +} + +const controlGroup = { + display: "flex", + flexDirection: "column", + gap: 4, + fontSize: 12, + minWidth: 160, +} + +const labelStyle = { + textTransform: "uppercase", + letterSpacing: "0.06em", + fontSize: 10, + color: "var(--text-secondary)", + fontWeight: 700, +} + +const selectStyle = { + padding: "6px 10px", + borderRadius: 6, + border: "1px solid var(--surface-3)", + background: "var(--background)", + color: "var(--text)", + fontSize: 13, +} + +const inputStyle = { + ...selectStyle, + width: "100%", +} + +const intentBadge = { + display: "inline-block", + padding: "2px 10px", + borderRadius: 999, + background: "var(--accent)", + color: "white", + fontSize: 11, + fontWeight: 700, + letterSpacing: "0.04em", + textTransform: "uppercase", +} + +const cardGrid = { + display: "grid", + gridTemplateColumns: "repeat(auto-fit, minmax(280px, 1fr))", + gap: 12, +} + +const suggestionCard = { + background: "var(--background)", + border: "1px solid var(--surface-3)", + borderRadius: 8, + padding: 12, + display: "flex", + flexDirection: "column", + gap: 8, +} + +const stretchCard = { + ...suggestionCard, + background: "linear-gradient(180deg, rgba(123,97,255,0.08), transparent)", + border: "1px solid rgba(123,97,255,0.35)", +} + +const cardHeader = { + display: "flex", + justifyContent: "space-between", + alignItems: "center", + fontSize: 12, + fontWeight: 700, +} + +const sectionLabel = { + ...labelStyle, + marginTop: 24, + marginBottom: 8, + display: "flex", + alignItems: "center", + gap: 8, +} + +const stretchLabel = { + ...sectionLabel, + color: "rgb(123,97,255)", +} + +// ─── Sample datasets the playground rotates through ─── +const SAMPLE_DATASETS = { + "Quarterly revenue by region": Array.from({ length: 24 }, (_, i) => { + const region = ["EU", "NA", "APAC"][i % 3] + const quarter = Math.floor(i / 3) + 1 + const revenue = 800 + i * 60 + Math.sin(i / 2) * 90 + return { + quarter, + revenue, + profit: revenue - Math.random() * revenue * 0.9, + region, + } + }), + "Product sales": [ + { product: "Widget", units: 480 }, + { product: "Gadget", units: 620 }, + { product: "Sprocket", units: 290 }, + { product: "Whatsit", units: 740 }, + { product: "Doohickey", units: 410 }, + ], + "Survey ratings by cohort": Array.from({ length: 150 }, (_, i) => ({ + respondent: Math.max(1, Math.min(10, 6 + Math.sin((i % 5) / 7) * 2 + Math.random() * 3 - 1)), + satisfaction: ((i % 3) + 1) * Math.random(), + cohort: ["Beta", "GA", "Enterprise"][i % 3], + })), +} + +// Aggregated single-series time series for the fixed example: one row per +// quarter, two correlated numerics (revenue, profit). Picked specifically +// so the engine produces a canonical ConnectedScatterplot revenue on x, +// profit on y, quarter as the order axis when given correlation intent. +const QUARTERLY_KPIS = Array.from({ length: 8 }, (_, i) => { + const quarter = i + 1 + const revenue = 2400 + i * 220 + Math.sin(i / 2) * 180 + return { + quarter, + revenue, + profit: revenue * (0.16 + i * 0.015) + Math.cos(i / 2) * 60, + } +}) + +// Map Suggestion.component → renderable React component. Limited to the +// HOCs this post's sample datasets can produce keeps the bundle tight. +const COMPONENT_MAP = { + LineChart, + AreaChart, + StackedAreaChart, + Scatterplot, + ConnectedScatterplot, + BarChart, + StackedBarChart, + DotPlot, + PieChart, + DonutChart, + Histogram, + BoxPlot, + ViolinPlot, + SwarmPlot, + MultiAxisLineChart, + DifferenceChart, +} + +const AUDIENCES = { + Default: undefined, + Executive: executivePersona, + "Data scientist": dataScientistPersona, +} + +// Primary-mode chart defaults assume a 600×400 canvas, so their built-in +// margin (~70/60/50/40) eats most of a small preview tile. Override top/left/ +// right with compact values; leave bottom unset so the chart's legend-aware +// auto-reserve (80px for bottom legend) still kicks in. +const PREVIEW_MARGIN = { top: 16, left: 40, right: 16 } + +function renderSuggestion(suggestion, width = 280, height = 220) { + const Component = COMPONENT_MAP[suggestion.component] + if (!Component) { + return ( +
+ {suggestion.component} - preview not embedded +
+ ) + } + return ( + + ) +} + +// ─── Fixed before-playground example ─── +// One dataset, two intents, two ranked answers each with the verbatim +// reasons string the engine emits. This is the "audit trail" claim made +// concrete before the freeform playground. +const FIXED_INTENTS = [ + { + intent: "trend", + question: '"how is revenue moving over time?"', + }, + { + intent: "correlation", + question: '"how do revenue and profit move together?"', + }, +] + +function SameDataDifferentIntent() { + const data = QUARTERLY_KPIS + const picks = useMemo( + () => + FIXED_INTENTS.map(({ intent, question }) => ({ + intent, + question, + suggestion: suggestCharts(data, { intent, maxResults: 1, includeVariants: false })[0], + })), + [data], + ) + + return ( +
+
+ Same dataset (eight quarters of revenue and profit), two different questions, two different + chart picks. The reasons string below each chart is what the engine emitted + same string the LLM, the logs, and a snapshot test would see. +
+
+ {picks.map(({ intent, question, suggestion }) => + suggestion ? ( +
+
+ intent: {intent} + + → {suggestion.component} + +
+
+ {question} +
+
{renderSuggestion(suggestion)}
+ {suggestion.reasons.length > 0 && ( +
+ reasons: {suggestion.reasons.slice(0, 2).join("; ")} +
+ )} +
+ ) : null, + )} +
+
+ ) +} + +function Playground() { + const [datasetName, setDatasetName] = useState("Quarterly revenue by region") + const [audienceName, setAudienceName] = useState("Default") + const [query, setQuery] = useState("show me the trend") + + const data = SAMPLE_DATASETS[datasetName] + const audience = AUDIENCES[audienceName] + const inferred = useMemo(() => inferIntent(query), [query]) + const intent = inferred?.intent + + const top = useMemo( + () => + suggestCharts(data, { + intent, + audience, + maxResults: 3, + includeVariants: false, + }), + [data, intent, audience], + ) + + const stretches = useMemo( + () => + audience + ? suggestStretchCharts(data, { + audience, + intent, + maxResults: 3, + }) + : [], + [data, intent, audience], + ) + + return ( +
+
+
+ Dataset + +
+
+ Audience + +
+
+ Type a question + setQuery(e.target.value)} + placeholder='e.g. "how is the distribution" or "which is biggest"' + style={inputStyle} + /> +
+
+ +
+ {intent ? ( + <> + intent: {intent} + + detected by inferIntent from your question. + + + ) : ( + + Type a phrase like "trend over time", "which is biggest", "show the distribution", or + "is there a correlation" and inferIntent will classify it. + + )} +
+ +
+ Top picks + {audience ? ( + + (familiar to {audienceName.toLowerCase()}) + + ) : null} +
+
+ {top.length === 0 && ( +
+ No charts fit this dataset for that intent. Try a different question or audience. +
+ )} + {top.map((s) => ( +
+
+ {s.component} + + {s.score.toFixed(1)}/5 · fam {s.rubric.familiarity} + +
+
{renderSuggestion(s)}
+ {s.reasons.length > 0 && ( +
+ {s.reasons.slice(0, 2).join("; ")} +
+ )} + {s.caveats.length > 0 && ( +
+ ⚠ {s.caveats[0]} +
+ )} +
+ ))} +
+ + {stretches.length > 0 && ( + <> +
+ 🎓 Stretch your literacy + + (charts {audienceName.toLowerCase()} doesn't use often, but the data supports) + +
+
+ {stretches.map((s) => ( +
+
+ {s.suggestion.component} + + fam {s.familiarity}/5 + {s.replacing ? ` · vs ${s.replacing}` : ""} + +
+
{renderSuggestion(s.suggestion)}
+
+ {s.rationale} +
+
+ ))} +
+ + )} +
+ ) +} + +function Body() { + return ( + <> +

+ Chart libraries have historically been told how to render: props in, pixels out. + Picking which chart to render is someone else's problem: a designer, a BI tool, the + user, now very often an LLM. Semiotic 3.6.0 shipped a different approach: every chart now + knows what data shapes it serves,{" "} + which questions it answers well, and{" "} + how settings change those answers. Apply a profile of your data and you get + a ranked list of charts, each with a config and an auditable reason for that chart. Pair it + with a profile of your audience and the ranking calibrates to the needs of who is + actually reading. +

+ +

Why a recommendation engine, and why now

+

+ "What chart should I use?" has been answered three ways for the last decade, and none of + them have landed well: +

+
    +
  • + Statistical heuristics (Voyager, Lux, Vega-Lite's auto-encodings). Picks + "interesting" axes through statistical tests. Doesn't model human comprehension and + doesn't recognize that the same chart with different settings answers different + questions. They are also so tightly wed to libraries with just a handful of charts that + they completely ignore the value of increasing data literacy through exposure. +
  • +
  • + Let the LLM decide. Plausible-looking recommendations, occasionally + correct, no offline mode, no diagnostic surface, no way to disagree without rewriting the + prompt. +
  • +
  • + Schema lookup. Tells you what's valid ("LineChart needs + xAccessor + yAccessor") but says nothing about whether a line chart is the right answer + for what you're trying to show. +
  • +
+

+ The new layer takes a fourth position:{" "} + + charts know what they're good for, and we make that knowledge inspectable, composable, and + overridable + + . The output is an ordinary array of suggestions you can render, log, snapshot-test, diff + against a previous version, or hand to an LLM as structured context. The engine never calls + an LLM itself; an LLM can sit on top of the engine but can't replace it. +

+ +

Same data, different question, different chart

+

+ Concretely, here's what "auditable reason" buys you. One quarterly-revenue-by-region + dataset, fed through suggestCharts twice with different intents. The component + the engine picks changes, the props it emits change, and the reasons string + explains why. This is the same output string an LLM or a snapshot test or a log line would + consume. This is a key point: The things we build for human users like aggregations and + hints and suggestions are useful for AI and vice versa but also are useful for traditional + observability and analytics. +

+ + +

A playground for the impatient

+

+ That fixed example only scratches the surface. Pick a dataset, pick an audience, type a + natural-language question. Each change re-ranks the suggestions live. The "stretch your + literacy" row shows charts the audience is unfamiliar with but the data actually supports + and only appears when you've selected an audience that has growth targets. +

+ +

+ Notice what changes as you switch audience: under Executive, BoxPlot and ViolinPlot + drop out of the top picks even when the data favors them, because the descriptor's{" "} + rubric.familiarity for those charts has been replaced by the executive + profile's familiarity number ("not familiar"). The same charts then surface in the stretch + row alongside the rationale "growing distribution literacy" and labeled as opt-in, not + pushed as defaults. Under Data scientist, the same charts move up + the main ranking, and PieChart drops because the persona ships a decrease target. +

+ +

Three primitives compose the whole thing

+

+ The runtime entry points are all in semiotic/ai. They share the same data + contract (rows in, structured suggestions out) so consumers can pick which surface fits + their UI. +

+

suggestCharts - ranked single recommendations

+

Given a dataset and an optional intent, returns the top-ranked charts that fit.

+
+        {`import { suggestCharts } from "semiotic/ai"
+
+const suggestions = suggestCharts(data, { intent: "trend" })
+// → [
+//   { component: "LineChart", variant: { key: "smooth" },
+//     score: 4.8, intentScores: { trend: 5, "compare-series": 4, ... },
+//     rubric: { familiarity: 5, accuracy: 4, precision: 4 },
+//     reasons: ["Strong fit for trend (5/5)", "x = month, y = revenue"],
+//     caveats: [],
+//     props: { data, xAccessor: "month", yAccessor: "revenue" }
+//   },
+//   { component: "AreaChart", ... },
+// ]`}
+      
+

+ Every suggestion has a runnable props object. Drop it into the matching chart + and it renders. No second pass to derive accessors from the profile. +

+ +

suggestDashboard - composite, multi-intent views

+

+ Given a dataset, return a set of complementary panels each covering a distinct analytical + intent, diversified by chart family by default. The "show me a dashboard" function call. +

+
+        {`import { suggestDashboard } from "semiotic/ai"
+
+const { panels, intentsCovered, intentsMissing, stretchPanels } =
+  suggestDashboard(data, { maxPanels: 6 })
+
+// panels: [
+//   { intent: "trend", suggestion: { component: "LineChart", ... } },
+//   { intent: "rank", suggestion: { component: "BarChart", ... } },
+//   { intent: "distribution", suggestion: { component: "BoxPlot", ... } },
+//   ...
+// ]
+// intentsMissing: ["geo"]   // honest about what the data can't show`}
+      
+

+ Intents the dataset can't honestly cover land in intentsMissing rather than + getting a forced low-scoring suggestion. Better to say "this data doesn't support geo" than + to ship a misleading map. +

+ +

useChartInterrogation - the chat surface

+

+ A headless React hook that lets users ask natural-language questions about a chart and get + back annotations the chart can render. Bring your own LLM via the onQuery{" "} + callback; the hook supplies the LLM with the same structured suggestion context as the + library APIs. +

+
+        {`import { useChartInterrogation } from "semiotic/ai"
+
+const { ask, history, annotations, loading } = useChartInterrogation({
+  data,
+  componentName: "LineChart",
+  props: { xAccessor: "month", yAccessor: "revenue" },
+  includeSuggestions: true,      // engine context lands in onQuery
+  onQuery: async (query, ctx) => {
+    // ctx.summary, ctx.profile, ctx.suggestions are all there
+    const response = await callYourLLM({
+      question: query,
+      summary: ctx.summary,
+      alternatives: ctx.suggestions,
+    })
+    return { answer: response.text, annotations: response.highlights }
+  },
+})
+
+return (
+  <>
+    
+    
+  
+)`}
+      
+ +

The audience layer - where this gets interesting

+

+ Every chart's descriptor carries a rubric.familiarity number (1 - 5). That + number has always been a guess at "what a generic data-literate reader recognizes." In + practice it's nonsense. A quant fund and a marketing org have completely different + familiarity baselines. So 3.6.0 adds AudienceProfile: a serializable + artifact your organization produces (through surveys, telemetry, training records, manager + judgment) and the library consumes: +

+
+        {`const acmeFinanceTeam = {
+  name: "Acme Finance",
+  familiarity: {
+    BarChart: 5, LineChart: 5, PieChart: 5, Histogram: 4,
+    BoxPlot: 2, ViolinPlot: 1, Heatmap: 3,
+    // ...anything not listed falls back to the descriptor default
+  },
+  targets: {
+    PieChart: {
+      direction: "decrease",
+      weight: 1,
+      reason: "moving from share-by-angle to share-by-length for accuracy",
+    },
+    BoxPlot: {
+      direction: "increase",
+      weight: 2,
+      reason: "we want the team reading distributions, not just means",
+    },
+  },
+  exposureLevel: 1,  // include stretch picks in a separate surface
+}
+
+suggestCharts(data, { audience: acmeFinanceTeam, intent: "rank" })
+suggestDashboard(data, { audience: acmeFinanceTeam })
+suggestStretchCharts(data, { audience: acmeFinanceTeam })`}
+      
+

+ The library does not measure familiarity. That's not its job and it would tempt + feature creep that's hostile to embedded use. Your organization owns the measurement using + whatever survey, telemetry, or judgment tool produced the numbers and the library consumes + the result as data. +

+

+ The bias is meaningful, not cosmetic. A target with weight 2 adds ±2.0 to the + chart's composite score, on a scale that normally tops out around 5. Strong enough to + reorder rankings; small enough that a clearly-wrong chart still loses on data fit. When a + target fires, the suggestion's reasons[] gains the verbatim rationale string so + the audience's policy is visible in the UI:{" "} + "Acme Finance: we want the team reading distributions, not just means." +

+ +

Stretch picks - the "yes, and" of data visualization

+

+ You should always give your stakeholders what they want but you can build literacy by giving + them more complex charts alongside it. This is the literacy-growth mechanic the audience + layer enables. suggestStretchCharts(data, { audience }) returns + charts where: +

+
    +
  1. + The data actually supports it (the chart's fits() gate passes). +
  2. +
  3. The audience's effective familiarity is at or below 3.
  4. +
  5. + Either the audience has flagged it as an increase target, OR its score is within + reach of the top familiar pick. +
  6. +
+

+ Each stretch carries a replacing field (which familiar chart it could + substitute for) and a rationale string. If you render them in their own labeled + surface, not inline with the default recommendations, then the user gets to see "here's what + you'd normally pick" alongside "here's a vocabulary expansion opportunity." The playground + above splits them into two rows for exactly this reason. +

+

+ We deliberately did not collapse stretches into the main ranking. A stretch pick is{" "} + intentionally not the best familiar choice so surfacing it as "the recommendation" + would mislead. But it is a viable option that a team or organization might find useful to + deploy for other reasons in place of the higher-ranked chart. +

+ +

When to reach for this, and when not

+

+ Reach for it when: +

+
    +
  • + You're building any UI that needs to answer "what chart should I use?" (including + chart-picker dropdowns, dashboard generators, AI assistant plumbing, or any internal-tools + surface where the user knows their data shape but not the canonical rendering). +
  • +
  • + You want recommendations that work without an LLM and get richer with one. The + structured context (reasons, caveats, profile, intent scores) is straight prompt input. +
  • +
  • + You're shipping the library to a specific audience whose chart literacy is meaningfully + different from "generic data-literate user" such as the executive view of an enterprise + dashboard, a scientific notebook environment, or a teaching tool for students. +
  • +
  • + You want to nudge audience adoption toward more analytically appropriate charts over time. + The stretch surface gives you a place to surface charts you'd like to see used more, + without forcing them into defaults. This is key. Your organization might only be + comfortable with a few charts but you are failing them if you do not help them to grow + their data visualization literacy further by exposing them to the new patterns (and + therefore new opportunities) that other charts afford. +
  • +
+

+ Don't reach for it when: +

+
    +
  • + You already know exactly what chart you want. The suggestion engine is for open{" "} + questions; if you've decided on a BarChart, just render a BarChart. +
  • +
  • + Your data shape doesn't change. The engine's value is recomputing recommendations across + different data; on a static fixture, you can hardcode the answer. +
  • +
  • + You'd be tempted to use it as a wrapper that replaces user choice. The point of the + stretch surface is that the user sees both. A default-only recommender that hides the + familiar pick is the wrong shape. +
  • +
+ +

Wiring it up

+

Single recommendation

+
+        {`import { suggestCharts, LineChart, BarChart, /* ... */ } from "semiotic/ai"
+
+const COMPONENT_MAP = { LineChart, BarChart, /* ... */ }
+
+function SuggestedChart({ data, intent }) {
+  const [top] = suggestCharts(data, { intent, maxResults: 1 })
+  if (!top) return 

No fitting chart.

+ const Component = COMPONENT_MAP[top.component] + return +}`} +
+ +

Dashboard mode

+
+        {`function GeneratedDashboard({ data, audience }) {
+  const { panels, intentsMissing, stretchPanels } = suggestDashboard(data, { audience })
+  return (
+    <>
+      
+ {panels.map(({ intent, suggestion }) => { + const Component = COMPONENT_MAP[suggestion.component] + return ( + + + + ) + })} +
+ {stretchPanels.length > 0 && ( + + )} + {intentsMissing.length > 0 && ( +

Not covered: {intentsMissing.join(", ")}

+ )} + + ) +}`} +
+ +

Natural-language intent inference

+
+        {`import { inferIntent, suggestCharts } from "semiotic/ai"
+
+function AskTheData({ data, question }) {
+  const inferred = inferIntent(question)
+  const top = suggestCharts(data, { intent: inferred?.intent, maxResults: 1 })[0]
+  if (!top) return null
+  const Component = COMPONENT_MAP[top.component]
+  return (
+    <>
+      

Detected intent: {inferred?.intent ?? "(none)"}

+ + + ) +}`} +
+

+ inferIntent is a zero-dependency regex-pattern heuristic. It never calls out. + Wraps cleanly with an LLM-backed alternative if your audience uses jargon the defaults don't + cover. +

+ +

Where this pattern shows up next

+

Three near-term applications stand out:

+
    +
  • + Authoring assistants. A natural-language chart editor sitting on top of + the engine. User types "compare regions over time"; the editor uses{" "} + inferIntent + suggestCharts to produce a starting config, and + the user iterates. +
  • +
  • + Auto-dashboards. suggestDashboard + a templated panel + renderer = "drop in a CSV, get a sensible dashboard." Pair with audience profiles and the + dashboard adapts to who's logged in. +
  • +
  • + Data-product onboarding. An organization with a literacy growth program + can ship two views of the same data: the familiar one as default, the stretch one as + opt-in, both rendered by the same engine with the same data, audited against the same + adoption targets. +
  • +
+ + +
    +
  • + Chart Suggestions - full reference for{" "} + suggestCharts, intents, capability descriptors. +
  • +
  • + Interrogation -{" "} + useChartInterrogation with annotation-returning onQuery. +
  • +
  • + Capability Matrix - the AI-readable inventory + of which charts support which features (SSR, push, linked hover, etc.). +
  • +
  • + Strategy memos in docs/strategy/: chart-capability-layer.md{" "} + (design rationale), authoring-capabilities.md (writing your own descriptor), + and audience-profiles.md (the calibration layer). +
  • +
+ + ) +} + +export default { + slug: "charts-that-know-what-theyre-for", + title: "Charts that know what they're for", + subtitle: + "A heuristic-first chart recommendation engine with per-audience calibration, a literacy-growth surface, and ready-to-render props.", + author: "Elijah Meeks", + date: "2026-05-24", + tags: ["case-study"], + excerpt: + "Semiotic 3.6.0 ships a chart recommendation engine that's heuristic-first, LLM-optional, and audience-aware. Charts now carry descriptors that declare what data shapes they serve and which questions they answer; an AudienceProfile layers per-org familiarity and adoption targets on top; a separate 'stretch' surface grows literacy without forcing it.", + component: Body, +} diff --git a/docs/src/blog/entries/live-conversational-dashboard.js b/docs/src/blog/entries/live-conversational-dashboard.js new file mode 100644 index 00000000..ec304931 --- /dev/null +++ b/docs/src/blog/entries/live-conversational-dashboard.js @@ -0,0 +1,632 @@ +/* eslint-disable react/no-unescaped-entities */ +import React, { useEffect, useMemo, useRef, useState } from "react" +import { Link } from "react-router-dom" +import { LineChart } from "semiotic" +import { useChartInterrogation } from "semiotic/ai" + +// ─── Styling ────────────────────────────────────────────────────────────── +const chartFrame = { + background: "var(--surface-1)", + borderRadius: 8, + padding: 16, + border: "1px solid var(--surface-3)", + overflow: "hidden", + margin: "20px 0", +} + +const dashboardGrid = { + display: "grid", + gridTemplateColumns: "minmax(0, 1fr) 320px", + gap: 16, +} + +const chatPanel = { + background: "var(--surface-2)", + borderRadius: 8, + padding: 12, + display: "flex", + flexDirection: "column", + height: 360, + fontSize: 12, +} + +const transcriptBox = { + flex: 1, + overflowY: "auto", + paddingRight: 4, + marginBottom: 8, +} + +const watcherBubble = { + background: "rgba(251, 191, 36, 0.18)", + border: "1px solid rgba(251, 191, 36, 0.45)", + borderRadius: 8, + padding: "6px 10px", + marginBottom: 8, + fontSize: 11, + lineHeight: 1.45, +} + +const userBubble = { + background: "var(--accent)", + color: "white", + borderRadius: "10px 10px 2px 10px", + padding: "6px 10px", + marginBottom: 6, + fontSize: 11, + alignSelf: "flex-end", + maxWidth: "85%", + display: "inline-block", +} + +const aiBubble = { + background: "var(--surface-3)", + color: "var(--text)", + borderRadius: "10px 10px 10px 2px", + padding: "6px 10px", + marginBottom: 6, + fontSize: 11, + maxWidth: "85%", + display: "inline-block", + whiteSpace: "pre-wrap", +} + +const inputRow = { display: "flex", gap: 4 } +const inputStyle = { + flex: 1, + padding: "5px 8px", + borderRadius: 4, + border: "1px solid var(--surface-3)", + background: "var(--background)", + color: "var(--text)", + fontSize: 11, +} +const buttonStyle = { + padding: "5px 10px", + borderRadius: 4, + border: "none", + background: "var(--accent)", + color: "white", + fontSize: 11, + fontWeight: 600, + cursor: "pointer", +} + +const controlBar = { + display: "flex", + alignItems: "center", + gap: 12, + marginBottom: 12, + fontSize: 12, +} + +const statusDot = { + display: "inline-block", + width: 8, + height: 8, + borderRadius: 999, + background: "var(--accent)", + animation: "pulse-dot 1.4s ease-in-out infinite", + marginRight: 6, + verticalAlign: "middle", +} + +// ─── Streaming demo ─────────────────────────────────────────────────────── +// Synthetic latency stream. Most values land in 80-180 ms; ~5% are spikes, +// ~2% are dips. The z-score watcher catches both. +function generateNext(tick) { + const base = 130 + Math.sin(tick / 8) * 20 + (Math.random() - 0.5) * 30 + const roll = Math.random() + if (roll > 0.95) return base + 350 + Math.random() * 300 // spike + if (roll < 0.03) return Math.max(30, base - 80 - Math.random() * 40) // dip + return base +} + +function rollingStats(values) { + if (values.length < 2) return { mean: 0, std: 0 } + const mean = values.reduce((a, b) => a + b, 0) / values.length + const variance = values.reduce((a, b) => a + (b - mean) ** 2, 0) / values.length + return { mean, std: Math.sqrt(variance) } +} + +// Canned follow-up responder. In production this calls an LLM with the +// recent transcript + the focused event + the current rolling stats. +async function cannedFollowup(query, context) { + await new Promise((r) => setTimeout(r, 250)) + const q = query.toLowerCase() + const focus = context.focus + if (q.includes("baseline") || q.includes("normal")) { + return { + answer: `Current rolling baseline: ~130ms ±30ms. Watcher flags anything beyond 2.5σ. That's roughly under 50ms or over 220ms.`, + } + } + if (q.includes("why") && focus) { + return { + answer: `Most ${focus.datum.value > 300 ? "spikes" : "dips"} of this magnitude correlate with one of: a slow downstream call, a GC pause on the app server, or transient network congestion. Without trace IDs I can't be more specific. Recommend cross-referencing the app log at that timestamp.`, + } + } + if (q.includes("trend") || q.includes("worsen") || q.includes("getting")) { + return { + answer: `Looking at the last ~30 seconds, latency is ${Math.random() > 0.5 ? "stable" : "drifting up slightly"} but a streaming window this short makes trend claims unreliable. Recommend a longer history before declaring a trend.`, + } + } + if (q.includes("how many") || q.includes("count")) { + return { + answer: `Since you started watching, I've flagged ${context.summary.rowCount > 0 ? "several" : "no"} anomalies. The transcript above is your audit trail.`, + } + } + return { + answer: `I can riff on anomalies the watcher already flagged, compare to baseline, or describe recent trend. Ask "why" about a specific event, "what's the baseline?", or "is it getting worse?"`, + } +} + +function LiveDashboardDemo() { + const [points, setPoints] = useState([]) + const [paused, setPaused] = useState(false) + const [input, setInput] = useState("") + const tickRef = useRef(0) + const recentRef = useRef([]) + const lastFlagRef = useRef(-Infinity) + + const { ask, announce, history, annotations, reset } = useChartInterrogation({ + data: points, + onQuery: cannedFollowup, + }) + + // Auto-scroll the transcript as new messages arrive + const transcriptRef = useRef(null) + useEffect(() => { + if (transcriptRef.current) { + transcriptRef.current.scrollTop = transcriptRef.current.scrollHeight + } + }, [history]) + + // The streaming loop. Generates one point per tick, runs the rolling + // z-score detector, and fires announce() when something deviates. + useEffect(() => { + if (paused) return undefined + const id = setInterval(() => { + const tick = (tickRef.current += 1) + const value = generateNext(tick) + const next = { ts: tick, value } + + setPoints((prev) => { + const updated = [...prev, next] + // Keep at most 120 points visible about 50 seconds at 400ms cadence + return updated.length > 120 ? updated.slice(-120) : updated + }) + + // Z-score detector on the trailing 30 points + const buf = recentRef.current + buf.push(value) + if (buf.length > 30) buf.shift() + if (buf.length >= 15) { + const { mean, std } = rollingStats(buf.slice(0, -1)) // exclude the new point itself + if (std > 0) { + const z = (value - mean) / std + // Debounce: don't flag again within 5 ticks of the last flag + if (Math.abs(z) > 2.4 && tick - lastFlagRef.current > 5) { + lastFlagRef.current = tick + const direction = z > 0 ? "spike" : "dip" + const text = + `${direction === "spike" ? "⚠" : "⚡"} ${direction} at t=${tick}: ` + + `${Math.round(value)}ms (${z > 0 ? "+" : ""}${z.toFixed(1)}σ vs ${Math.round(mean)}ms baseline)` + const note = + z > 2.4 + ? "Sharp upward deviation. Likely candidates: a slow downstream call, GC pause, or congested network. Worth investigating if it recurs in this window." + : "Downward deviation. Often spurious because of caching effects, fewer concurrent requests, or under-counted samples. Less actionable than spikes." + announce({ + text, + annotations: [ + { + type: "callout", + ts: tick, + value, + label: `${direction === "spike" ? "↑" : "↓"} ${Math.round(value)}ms`, + note, + source: "ai-watcher", + dx: direction === "spike" ? 20 : -20, + dy: direction === "spike" ? -30 : 30, + }, + ], + }) + } + } + } + }, 400) + return () => clearInterval(id) + }, [paused, announce]) + + // Visible window only points already in state. We compute the visible + // chart domain from the buffer so the chart doesn't try to render an + // x-axis from 0 to infinity. + const xExtent = useMemo(() => { + if (points.length === 0) return [0, 1] + return [points[0].ts, points[points.length - 1].ts] + }, [points]) + + const submit = () => { + const trimmed = input.trim() + if (!trimmed) return + setInput("") + void ask(trimmed) + } + + const handleReset = () => { + setPaused(true) + setPoints([]) + recentRef.current = [] + lastFlagRef.current = -Infinity + tickRef.current = 0 + reset() + } + + return ( +
+
+ + + {paused ? "Paused" : "Watching"} — stream + z-score detector live + + + + + {points.length} points · {history.filter((m) => m.role === "assistant").length}{" "} + announcements + +
+

Request latency (ms) — synthetic stream

+ +
+
+ +
+ +
+
+ {history.length === 0 && ( +
+ Watcher will announce anomalies here in real-time. You can also ask follow-ups + ("why?", "what's baseline?", "trend?"). +
+ )} + {history.map((m, i) => { + // Distinguish AI-watcher proactive announcements from + // user-question responses. Convention: watcher messages start + // with the ⚠ or ⚡ glyph emitted above. + const isWatcher = + m.role === "assistant" && (m.text.startsWith("⚠") || m.text.startsWith("⚡")) + if (isWatcher) + return ( +
+ {m.text} +
+ ) + if (m.role === "user") { + return ( +
+
{m.text}
+
+ ) + } + return ( +
+
{m.text}
+
+ ) + })} +
+
+ setInput(e.target.value)} + onKeyDown={(e) => e.key === "Enter" && submit()} + placeholder="Ask a follow-up…" + style={inputStyle} + /> + +
+
+
+ +
+ ) +} + +function Body() { + return ( + <> +

+ Static dashboards show you the past. Conversational dashboards (the chat-with-a-chart + pattern) make the past interrogable. Live conversational dashboards add the missing + piece: an AI watching the stream alongside you, proactively narrating events as they happen + and anchoring its narration back onto the chart. The chart accumulates context the moment + something interesting occurs. There's no waiting for someone to ask the question, no losing + the moment to scroll. +

+

Three pieces composed into one product

+

+ This pattern is buildable only because Semiotic ships the three primitives it needs as + separate, composable things: +

+
    +
  • + A streaming runtime. Push API, observation hooks, decay encoding — the + chart is designed for data that arrives over time. +
  • +
  • + An interrogation hook with proactive announcements. The new{" "} + announce() method appends AI-initiated messages to the transcript and adds + annotations to the chart without going through a user question. A watcher can call it as + freely as a user can call ask(). +
  • +
  • + Anchored annotations. Every announcement can carry a callout, a + threshold, or a band — visual provenance for the AI's claims, attached to the coordinates + the claim is about. +
  • +
+

+ Compose them and you get a dashboard where the AI's "I saw that" is structurally identical + to the human's "ask about that" — both write to the same transcript, both leave traces on + the same chart, both feed the same conversation. +

+ +

Try it

+

+ Synthetic request-latency stream — a value arrives every 400ms. A rolling z-score detector + watches the last 30 points; anything beyond ±2.4σ gets announced. Each announcement carries + a callout on the chart (with a note explaining the deviation category) and an entry in the + transcript. Ask a follow-up like "why?", "what's baseline?", or{" "} + "is it getting worse?" and you'll get the AI's response in the same transcript. + Pause to inspect; reset to start over. +

+ +

+ The demo uses canned responders for the LLM side. In production you'd wire{" "} + onQuery to a real model and the announcement note field would be + the model's actual narrative — generated when the watcher fires, cached on the annotation, + displayed on hover. +

+ +

The watcher pattern

+

+ The detector here is intentionally simple: a rolling-window z-score with a debounce. That's + the right starting point for most monitoring workflows because it has zero configuration, + runs in O(window) per tick, and catches both spikes and dips. Stronger detectors layer on + top: +

+
    +
  • + Median absolute deviation (MAD) instead of stddev for non-Gaussian + streams — heavy-tailed metrics (request latency, error counts) often have outliers that + pull stddev around. MAD is robust. +
  • +
  • + Multi-window comparison. Compare the trailing 30 seconds to the trailing + 5 minutes. Flag when they diverge. Catches drift the rolling-window detector misses. +
  • +
  • + Domain-aware thresholds. Latency over 1000ms is interesting at any time, + even if the rolling mean was 950ms. Add absolute thresholds on top of the statistical + ones. +
  • +
  • + Capability-layer-driven detection. The same chart that's currently + showing latency could be rendered as a Histogram instead — and the histogram-based watcher + would flag distribution-shape changes (bimodal becoming unimodal, tail fattening). Pair{" "} + + suggestStreamCharts + {" "} + with watcher logic specific to each chart family. +
  • +
+

+ Whatever the detector, the pattern is the same: when it fires, call announce(){" "} + with text and annotations. The interrogation hook handles the rest. +

+ +

The conversational side

+

+ Half of the dashboard is autonomous (watcher → announce). The other half is reactive: the + user reads an announcement, has a follow-up question, and asks. That question lands in the + same transcript with full context — recent announcements, the statistical summary of the + visible window, the user's currently-focused point if any. +

+

+ The asymmetry is the feature. The watcher narrates broadly ("⚠ spike at t=42, 3.1σ above + baseline"); the user drills in ("which downstream call?"). The LLM gets both signals on + every turn — it knows what the watcher already said and what the user wants to know now. +

+ +

When to deploy this

+
    +
  • + Production monitoring with on-call rotation. The AI is essentially + writing real-time handoff notes. When the next oncall takes over, the transcript plus the + chart's anchored notes are a complete record of "what happened during my shift." +
  • +
  • + Financial trading desks. A watcher monitors instrument moves; the AI + annotates breakouts and breakdowns the moment they happen. Traders ask follow-ups without + leaving the chart. +
  • +
  • + IoT / industrial telemetry. Sensor streams from a factory floor. The + watcher flags pressure drops, vibration anomalies, temperature drift. Each gets a + timestamped anchored note that becomes the maintenance log. +
  • +
  • + Live experiments / lab readings. Researcher running an experiment; the + watcher flags when readings deviate from expected. The AI's anchored notes become a + real-time lab notebook. +
  • +
  • + Live data exploration sessions. Analyst exploring a new dataset with + streaming updates (a query that produces results progressively). The AI narrates what it + sees as the data arrives. +
  • +
+ +

Production considerations

+

The demo cuts corners for clarity. Real deployment needs to handle:

+
    +
  • + + Use RealtimeLineChart. + {" "} + The demo uses plain LineChart with state-managed buffer because it's easier to read. In + production, swap in Semiotic's RealtimeLineChart — it has an imperative push API that + bypasses React re-renders, supports decay encoding, and handles particles. 30+ Hz streams + are comfortable. +
  • +
  • + Debounce the LLM call. The demo's announce() happens + synchronously when the detector fires — the "note" is canned. In production, calling the + LLM inside the detector loop will blow your budget. The right pattern: announce + immediately with a placeholder note ("detected, analyzing…"), then call the LLM + asynchronously, then update the annotation's note when the response lands. +
  • +
  • + Rate-limit announcements. A cascading-failure incident can fire the + detector dozens of times in seconds. The demo debounces by 5 ticks; production needs + adaptive backoff (collapse repeat announcements into "10 spikes in 30s"). +
  • +
  • + Sliding-window annotation lifecycle. When the chart's data window slides, + annotations referencing data that's been evicted should either age out or migrate to a + separate "recent events" panel. The demo lets them slide off — fine for monitoring, wrong + for a forensic timeline. +
  • +
  • + Persist the conversation. The transcript is in-memory. If the oncall + handoff is the use case, write it to durable storage. Semiotic doesn't ship that path; + bring your own. +
  • +
+ +

Failure modes worth thinking about

+
    +
  • + The watcher cries wolf. A misconfigured detector floods the transcript + with non-events. Users learn to ignore announcements. The fix is upstream — tighter + detectors, multi-signal confirmation — not "make the AI better at phrasing the false + alarms." +
  • +
  • + The watcher misses real events. A z-score detector misses gradual drift. + The transcript looks calm while the underlying system is slowly burning down. Pair it with + longer-window detectors and absolute thresholds. +
  • +
  • + The AI hallucinates causes. The watcher detected the spike; the LLM is + guessing what caused it. Make the AI's note explicitly tentative ("likely candidates: …") + and surface links to actual evidence (logs, traces) when available. Never let the AI claim + certainty it doesn't have. +
  • +
  • + Operator desensitization. Anything blinking and announcing constantly + gets tuned out. The watcher should be quiet most of the time. Better to flag fewer real + events than many maybe-events. +
  • +
+ +

Why this is hard to build outside Semiotic

+

The pattern requires three things that other chart libraries don't put together:

+
    +
  1. + A streaming chart runtime that handles incremental data without + re-mounting (Semiotic's push API + decay). +
  2. +
  3. + An interrogation surface that accepts proactive AI input, not just user + queries (the announce() method). +
  4. +
  5. + An annotation model where AI-generated annotations are first-class + (callouts, thresholds, bands all work the same whether the human or the AI added them). +
  6. +
+

+ Other libraries can be made to do this with enough custom plumbing — but only because they + treat each of the three concerns as out-of-scope. With Semiotic, all three are in-scope, + individually testable, and composable. The streaming-first runtime is the load-bearing + piece; everything else assembles around it. +

+ + +
    +
  • + Interrogation — the{" "} + useChartInterrogation hook, with the announce() method added in + this release. +
  • +
  • + Anchored conversations — the user-side + counterpart: point-of-focus + annotation-as-response. +
  • +
  • + Multimodal response: chart as output channel — + the broader frame this fits into. +
  • +
  • + RealtimeLineChart — the production chart for + streaming. Drop-in replacement for the demo's static buffer. +
  • +
+ + ) +} + +export default { + slug: "live-conversational-dashboard", + title: "Live conversational dashboards", + subtitle: + "Streaming data + an AI watching alongside you + anchored annotations + a conversational follow-up surface. The class of product Semiotic's streaming-first runtime makes possible.", + author: "Elijah Meeks", + date: "2026-05-31", + tags: ["case-study", "realtime"], + excerpt: + "Static dashboards show the past; chat-with-chart makes the past interrogable. Live conversational dashboards add what's missing: an AI watching the stream as it arrives, narrating events anchored to the chart, with a chat surface for human follow-ups. Draft post on composing Semiotic's streaming runtime, interrogation hook, and annotation model into a single product.", + draft: true, + component: Body, +} diff --git a/docs/src/blog/entries/multimodal-response.js b/docs/src/blog/entries/multimodal-response.js new file mode 100644 index 00000000..26aa1cfd --- /dev/null +++ b/docs/src/blog/entries/multimodal-response.js @@ -0,0 +1,418 @@ +import React, { useMemo, useState } from "react" +import { Link } from "react-router-dom" +import { LineChart } from "semiotic" + +// ─── Shared blog styling ────────────────────────────────────────────────── +const chartFrame = { + background: "var(--surface-1)", + borderRadius: 8, + padding: 16, + border: "1px solid var(--surface-3)", + overflow: "hidden", + margin: "20px 0", +} + +const controlsRow = { + display: "flex", + flexWrap: "wrap", + gap: 8, + margin: "12px 0 16px", +} + +const buttonStyle = { + padding: "6px 12px", + borderRadius: 999, + border: "1px solid var(--surface-3)", + background: "var(--background)", + color: "var(--text)", + fontSize: 12, + cursor: "pointer", + fontWeight: 600, +} + +const buttonActiveStyle = { + ...buttonStyle, + background: "var(--accent)", + color: "white", + borderColor: "var(--accent)", +} + +const transcriptStyle = { + background: "var(--surface-2)", + borderRadius: 8, + padding: 12, + fontSize: 13, + lineHeight: 1.5, + minHeight: 80, + marginTop: 12, +} + +const userBubble = { + display: "inline-block", + background: "var(--accent)", + color: "white", + padding: "6px 12px", + borderRadius: "12px 12px 2px 12px", + marginBottom: 8, + maxWidth: "85%", +} + +const aiBubble = { + display: "inline-block", + background: "var(--surface-3)", + color: "var(--text)", + padding: "6px 12px", + borderRadius: "12px 12px 12px 2px", + marginBottom: 8, + maxWidth: "85%", + whiteSpace: "pre-wrap", +} + +const aiSide = { display: "flex", justifyContent: "flex-start" } +const userSide = { display: "flex", justifyContent: "flex-end" } + +// ─── The multimodal-response demo ───────────────────────────────────────── +// Synthetic dataset — 12 months of revenue + visits, with two visible +// anomalies (a late-spring spike, an autumn dip) and an obvious trend. +const SALES_DATA = [ + { month: 1, revenue: 1100, visits: 8200, label: "Jan" }, + { month: 2, revenue: 1180, visits: 8700, label: "Feb" }, + { month: 3, revenue: 1320, visits: 9500, label: "Mar" }, + { month: 4, revenue: 1450, visits: 10100, label: "Apr" }, + { month: 5, revenue: 2200, visits: 14000, label: "May" }, // promo spike + { month: 6, revenue: 1610, visits: 11200, label: "Jun" }, + { month: 7, revenue: 1720, visits: 11800, label: "Jul" }, + { month: 8, revenue: 1830, visits: 12400, label: "Aug" }, + { month: 9, revenue: 1950, visits: 13100, label: "Sep" }, + { month: 10, revenue: 1380, visits: 9600, label: "Oct" }, // outage dip + { month: 11, revenue: 2080, visits: 13600, label: "Nov" }, + { month: 12, revenue: 2240, visits: 14400, label: "Dec" }, +] + +// Five pre-baked questions, each with the text answer AND the chart +// annotations the response renders. A real LLM-backed version would +// generate both; this demo is a stand-in to show the round trip. +const CANNED_RESPONSES = { + "When did revenue peak?": { + text: "Revenue peaked at $2,240 in December — the year's high point.", + annotations: [ + { type: "callout", month: 12, revenue: 2240, label: "Peak: $2,240", dx: -40, dy: -30 }, + ], + }, + "Were there any unusual months?": { + text: + "Two stand out: May ($2,200) was a promotion-driven spike well above the underlying trend, and October ($1,380) is the inverse — a sharp dip below where the trend was tracking. Both deserve a closer look.", + annotations: [ + { type: "callout", month: 5, revenue: 2200, label: "May spike", dx: 30, dy: -30 }, + { type: "callout", month: 10, revenue: 1380, label: "Oct dip", dx: -30, dy: 30 }, + ], + }, + "What's the overall trend?": { + text: + "Revenue is on a steady upward trend across the year, climbing from $1,100 in January to $2,240 in December — roughly doubling. Removing the May spike and October dip, the trend line is almost monotonic.", + annotations: [ + { type: "y-threshold", value: 1670, label: "Year average", color: "var(--accent)" }, + { type: "trend", lineBy: "all", color: "var(--semiotic-info)", label: "Trend" }, + ], + }, + "Which months were below average?": { + text: + "Six months sat below the $1,670 yearly average: January through April, June (just barely), and October. The first half of the year was the slower stretch.", + annotations: [ + { type: "y-threshold", value: 1670, label: "Average ($1,670)", color: "var(--text-secondary)" }, + { type: "band", y0: 0, y1: 1670, color: "rgba(94,234,212,0.06)" }, + ], + }, + "Compare May and December.": { + text: + "December ($2,240) edged out May ($2,200) by just $40 — but they're qualitatively different. May was a one-month promotion spike; December is the natural endpoint of a sustained climb. The same revenue, two different stories.", + annotations: [ + { type: "callout", month: 5, revenue: 2200, label: "May $2,200 (promo)", dx: 30, dy: -30 }, + { type: "callout", month: 12, revenue: 2240, label: "Dec $2,240 (trend)", dx: -50, dy: -30 }, + ], + }, +} + +function MultimodalDemo() { + const [askedQuestions, setAskedQuestions] = useState([]) + + const annotations = useMemo(() => { + return askedQuestions.flatMap((q) => CANNED_RESPONSES[q]?.annotations ?? []) + }, [askedQuestions]) + + const ask = (q) => { + setAskedQuestions((prev) => (prev.includes(q) ? prev : [...prev, q])) + } + + const reset = () => setAskedQuestions([]) + + return ( +
+ +
+ {Object.keys(CANNED_RESPONSES).map((q) => ( + + ))} + {askedQuestions.length > 0 && ( + + )} +
+
+ {askedQuestions.length === 0 && ( +
+ Click any question above. The text answer appears here; the visual answer appears on the + chart simultaneously. Stack multiple questions to see the annotations compose. +
+ )} + {askedQuestions.map((q, i) => ( +
+
+
{q}
+
+
+
{CANNED_RESPONSES[q].text}
+
+
+ ))} +
+
+ ) +} + +function Body() { + return ( + <> +

+ Modern LLMs are interfaces, not just text generators. When an assistant answers a question + about data, the answer can — and increasingly should — include visual artifacts: + highlights on the chart the user is looking at, regions of interest, threshold lines, + sub-selections, even a different chart entirely. We've been optimizing chat interfaces for + text output for two years. Charts give us a parallel output channel that's underused. +

+ +

Text is half the answer

+

+ The dominant LLM response pattern is a wall of prose. Even when the question is{" "} + "where's the peak in this chart?" the answer comes back as a paragraph: "The peak + appears to be around month 12 at approximately $2,240, which represents a notable + increase from..." — and the reader's eye has to leave the chart, parse the paragraph, find + the relevant month, look back at the chart, and locate the point. +

+

+ Every step of that loop is friction. The peak is in the chart. The model has + access to the chart's data. It can answer "where's the peak?" by drawing a circle around + the peak, with the prose as supporting detail. +

+ +

Demo: ask, see the chart respond

+

+ This is a canned version of the round trip. Each question button below pretends to ask a + small local LLM; the model's response is a { text, annotations }{" "} + object. The text goes into the transcript; the annotations land on the chart. You can + stack questions to see how multiple annotations compose. +

+ +

+ Click Were there any unusual months? first — that's the canonical version of + the example. The text names May and October as outliers; the chart simultaneously gets + callouts on those two points. Reading the text confirms what the chart already showed. + Reading the chart confirms what the text says. The two channels reinforce instead of + duplicating. +

+ +

Why this works, and why it doesn't break the chat metaphor

+

+ The chat surface stays familiar — there's a question and an answer in a transcript. What's + new is that the answer has two faces: +

+
    +
  • + Text in the transcript, for the parts that need words: nuance, + comparison, context, the "why" behind a value. +
  • +
  • + Annotations on the chart, for the parts that need pixels:{" "} + where the peak is, which months are below average, which two{" "} + observations the question is about. +
  • +
+

+ The split is not arbitrary. Some claims compress better as text ("revenue doubled"); + others compress better as space ("here's the threshold and here are the six months below + it"). When the model gets to choose, the answer fits the question's natural shape. +

+ +

The contract

+

+ Concretely, this is what an LLM-backed answer looks like with a chart library that can + render annotations: +

+
+{`async function onQuery(question, context) {
+  const response = await callYourLLM({
+    question,
+    chartSummary: context.summary,   // min/max/mean/median per field
+    chartData:    context.data,      // raw rows
+    intent:       inferIntent(question)?.intent,
+  })
+  return {
+    answer: response.text,
+    annotations: response.highlights,  // [{type: "callout", month: 5, revenue: 2200, label: "..."}, ...]
+  }
+}`}
+      
+

+ The LLM is asked for two things and returns two things. The text is rendered in the chat + transcript like any other LLM response; the annotations are passed through to the + chart's annotations prop. No extra plumbing — both already exist as + first-class chart concepts (callouts, thresholds, bands, trend lines, region highlights). +

+ +

A small annotation vocabulary the model can use

+

+ The chart library defines the vocabulary; the LLM picks from it. A useful starting set: +

+
    +
  • + callout — point a label at a specific observation. Use for{" "} + "this is the peak", "this is the outlier". +
  • +
  • + y-threshold / x-threshold — a horizontal or vertical + reference line. Use for "the average is here", "before this date". +
  • +
  • + band — a shaded region between two values. Use for "below target",{" "} + "within tolerance". +
  • +
  • + trend / envelope — a statistical overlay. Use for{" "} + "if we remove these outliers, the trend is...". +
  • +
  • + enclose / rect-enclose — wrap a set of observations in a hull + or rectangle. Use for "these three points form a cluster". +
  • +
+

+ Each is JSON-serializable. The LLM doesn't draw pixels — it emits structured intent and + the chart library handles the geometry. That's the right division of labor: language + models are good at saying which observations matter and why; chart + runtimes are good at converting that into pixels. +

+ +

Beyond annotations — the broader pattern

+

+ Annotations are the entry point. Once you accept that LLM responses can have a visual + face, the pattern extends: +

+
    +
  • + Selection responses. "Show me only the Q3 data" — the model returns a + filter the chart applies. Same brushing surface used by humans. +
  • +
  • + Chart-type swaps. "This isn't the right chart for that question" — the + model returns a new { component, props } spec the runtime mounts + in place of the current chart. The Semiotic capability layer can power this: the model + consults suggestCharts and picks the best alternative. +
  • +
  • + Linked follow-ups. "What about by region?" — the model returns a + companion chart that gets rendered alongside the current one, with its hover state + linked to the first. +
  • +
  • + Audience-calibrated responses. The same question to the same data + could return a BoxPlot for a data-science audience and a BarChart for an executive — the + model reads the audience profile and adjusts. (See{" "} + Chart Suggestions for how that calibration + works.) +
  • +
+

+ All of these are extensions of the same idea: the chart library is an output channel. + LLMs that ignore it are leaving the most expressive part of the surface dark. +

+ +

What to watch out for

+

+ Multimodal output isn't free of failure modes. Three to watch: +

+
    +
  • + Hallucinated annotations. A model that's wrong about the peak's + location is now visibly wrong, with a callout pointing at the wrong dot. The + fix is upstream: give the model the data summary and statistical context, not just the + chart props, so its claims are grounded. +
  • +
  • + Annotation clutter. A model that surfaces an annotation for every + question accumulates noise. Either give the model a reset signal or accept that the + chart needs a "clear annotations" affordance in the chat UI. +
  • +
  • + Mode confusion. Users will eventually ask follow-up questions about + the annotations themselves ("why did you highlight October?"). The chat history needs + to include the annotations alongside the text so the next turn has full context. +
  • +
+ + +
    +
  • + Interrogation — the{" "} + useChartInterrogation hook that ships this pattern as a first-class + surface. The annotation-return contract is exactly what powers the demo above. +
  • +
  • + Annotations — the chart-library side of the + vocabulary: every annotation type the LLM can emit. +
  • +
  • + Chart Suggestions — what powers the + chart-type-swap response mode mentioned above. +
  • +
+ + ) +} + +export default { + slug: "multimodal-response", + title: "Multimodal response: chart as output channel", + subtitle: + "Text is half the answer. The other half — callouts, thresholds, bands, selections — lives on the chart, and LLMs already know how to ask for it.", + author: "Elijah Meeks", + date: "2026-05-24", + tags: ["case-study"], + excerpt: + "Modern LLM assistants treat text as the only output channel. When the question is about a chart, charts give us a parallel surface — callouts, threshold lines, bands, selections — that's both more honest and easier to read. Drafted exploration of what multimodal response means in practice.", + draft: true, + component: Body, +} diff --git a/docs/src/blog/entries/process-sankey-vs-classic-sankey.js b/docs/src/blog/entries/process-sankey-vs-classic-sankey.js index d8fd318d..9a459b1c 100644 --- a/docs/src/blog/entries/process-sankey-vs-classic-sankey.js +++ b/docs/src/blog/entries/process-sankey-vs-classic-sankey.js @@ -1,3 +1,4 @@ +/* eslint-disable react/no-unescaped-entities */ import React from "react" import { Link } from "react-router-dom" import WaterCycleFlow from "../../examples/recipes/WaterCycleFlow.js" diff --git a/docs/src/blog/entries/release-3-5-2.js b/docs/src/blog/entries/release-3-5-2.js index 4f8e2a8b..9d5ef2cf 100644 --- a/docs/src/blog/entries/release-3-5-2.js +++ b/docs/src/blog/entries/release-3-5-2.js @@ -1,3 +1,4 @@ +/* eslint-disable react/no-unescaped-entities */ import React from "react" import { Link } from "react-router-dom" diff --git a/docs/src/blog/entries/release-3-5-3.js b/docs/src/blog/entries/release-3-5-3.js index db292454..3a0734c7 100644 --- a/docs/src/blog/entries/release-3-5-3.js +++ b/docs/src/blog/entries/release-3-5-3.js @@ -1,3 +1,4 @@ +/* eslint-disable react/no-unescaped-entities */ import React from "react" import { Link } from "react-router-dom" diff --git a/docs/src/blog/entries/release-3-5-4.js b/docs/src/blog/entries/release-3-5-4.js index c7dea717..0893b53f 100644 --- a/docs/src/blog/entries/release-3-5-4.js +++ b/docs/src/blog/entries/release-3-5-4.js @@ -1,3 +1,4 @@ +/* eslint-disable react/no-unescaped-entities */ import React from "react" import { Link } from "react-router-dom" @@ -5,14 +6,12 @@ function Body() { return ( <>

- 3.5.4 lands a real envelope encoding on{" "} - LineChart and{" "} - AreaChart, sharpens the axis surface - (edge-anchored ticks, CSS-variable font sizes, per-axis class names), and gives every - HOC a sibling to emptyContent with the new loadingContent{" "} - slot. Under the hood, boundsAccessor and band now share a - single ribbon primitive — one scene builder, one y-extent pass, one style cascade. - Full release notes are on{" "} + 3.5.4 lands a real envelope encoding on LineChart and{" "} + AreaChart, sharpens the axis surface (edge-anchored + ticks, CSS-variable font sizes, per-axis class names), and gives every HOC a sibling to{" "} + emptyContent with the new loadingContent slot. Under the hood,{" "} + boundsAccessor and band now share a single ribbon primitive — one + scene builder, one y-extent pass, one style cascade. Full release notes are on{" "} The new band prop on LineChart and AreaChart draws an asymmetric min/max envelope under the line/area, driven by independent y0Accessor and{" "} - y1Accessor. That's distinct from the existing{" "} - boundsAccessor (which is symmetric ±offset) and from{" "} - AreaChart.y0Accessor (which replaces the area baseline). Pass a single{" "} - BandConfig for one envelope or an array for percentile fans — p25/p75 - stacked on top of p10/p90 is the canonical shape. + y1Accessor. That's distinct from the existing boundsAccessor{" "} + (which is symmetric ±offset) and from AreaChart.y0Accessor (which replaces the + area baseline). Pass a single BandConfig for one envelope or an array for + percentile fans — p25/p75 stacked on top of p10/p90 is the canonical shape.

- Per-series by default: one ribbon per lineBy / colorBy{" "} - group, colored from the parent line at 0.2 fillOpacity. Pass{" "} - perSeries: false for an aggregate min/max envelope across all series. - Bands are non-interactive by default (hovers pass through to the line on top); set{" "} - interactive: true if the band should participate in hit testing. Band - y0/y1 values feed yExtent auto-derivation so a tall envelope can never - clip; explicit yExtent still wins. Live demo at{" "} + Per-series by default: one ribbon per lineBy / colorBy group, + colored from the parent line at 0.2 fillOpacity. Pass{" "} + perSeries: false for an aggregate min/max envelope across all series. Bands are + non-interactive by default (hovers pass through to the line on top); set{" "} + interactive: true if the band should participate in hit testing. Band y0/y1 + values feed yExtent auto-derivation so a tall envelope can never clip; explicit{" "} + yExtent still wins. Live demo at{" "} /charts/line-chart#band.

Tooltip enrichment covers every interaction surface: the hovered datum carries{" "} - band: {`{ y0, y1 }`} (first band) and bands: [...] (all - bands) on the pointer hover path, each allSeries[i].datum in multi-mode, - and the keyboard-navigation datum. The default tooltip auto-surfaces band rows when{" "} - band is configured without a custom tooltip — string accessors become - labels; function accessors fall back to low / high. + band: {`{ y0, y1 }`} (first band) and bands: [...] (all bands) on + the pointer hover path, each allSeries[i].datum in multi-mode, and the + keyboard-navigation datum. The default tooltip auto-surfaces band rows when{" "} + band is configured without a custom tooltip — string accessors become labels; + function accessors fall back to low / high.

Axis surface: edge anchors, CSS vars, per-axis targeting

@@ -58,27 +56,26 @@ function Body() { tickAnchor: "edges" on frameProps.axes[i] {" "} - — flips the leftmost tick's text-anchor to start and - the rightmost to end on horizontal axes (and{" "} - dominant-baseline to hanging / auto on - vertical axes) so edge labels can't overflow the plot. Pairs naturally with{" "} - axisExtent: "exact": exact pins the domain to the literal data - min/max; edges keeps the labels readable at those bounds. Edge detection is - pixel-based, so inverted y scales and reversed-x streaming charts anchor + — flips the leftmost tick's text-anchor to start and the + rightmost to end on horizontal axes (and dominant-baseline to{" "} + hanging / auto on vertical axes) so edge labels can't + overflow the plot. Pairs naturally with axisExtent: "exact": exact pins the + domain to the literal data min/max; edges keeps the labels readable at those bounds. Edge + detection is pixel-based, so inverted y scales and reversed-x streaming charts anchor correctly.
  • - --semiotic-tick-font-size and{" "} - --semiotic-axis-label-font-size CSS variables + --semiotic-tick-font-size and --semiotic-axis-label-font-size{" "} + CSS variables {" "} — emitted from the canonical theme typography fields (tickSize,{" "} - labelSize) alongside the existing tick/title font-family/size - variables. Both themeToCSS and ThemeProvider write them;{" "} - themeToTokens exports them as DTCG dimension tokens. SVG - axes consume the vars via inline style, so an override on any - ancestor ({`
    `}) - flows down without consumers needing !important. + labelSize) alongside the existing tick/title font-family/size variables. Both{" "} + themeToCSS and ThemeProvider write them;{" "} + themeToTokens exports them as DTCG dimension tokens. SVG axes + consume the vars via inline style, so an override on any ancestor ( + {`
    `}) flows down without + consumers needing !important.
  • @@ -88,39 +85,36 @@ function Body() { {``}{" "} inside .stream-axes. Style one axis at a time from external CSS:{" "} {`[data-orient='left'] text { font-size: 14px }`}. Tick text carries{" "} - semiotic-axis-tick, axis labels{" "} - semiotic-axis-label, and chart titles semiotic-chart-title{" "} - for class-based targeting. + semiotic-axis-tick, axis labels semiotic-axis-label, and chart + titles semiotic-chart-title for class-based targeting.
  • loadingContent on every HOC

    Sibling to emptyContent. When loading is true and{" "} - loadingContent is set, it renders in place of the default shimmer-bar - skeleton (wrapped in the same sized container so the chart slot stays reserved). - Pass loadingContent={`{false}`} to suppress the loading UI entirely — - the early-return becomes null and a consumer's outer loading state - takes over. Threaded through useChartSetup,{" "} - useNetworkChartSetup, and useCustomChartSetup; all 47 HOCs - accept it via BaseChartProps. + loadingContent is set, it renders in place of the default shimmer-bar skeleton + (wrapped in the same sized container so the chart slot stays reserved). Pass{" "} + loadingContent={`{false}`} to suppress the loading UI entirely — the + early-return becomes null and a consumer's outer loading state takes over. + Threaded through useChartSetup, useNetworkChartSetup, and{" "} + useCustomChartSetup; all 47 HOCs accept it via BaseChartProps.

    One ribbon primitive for bounds and band

    - Both public envelope APIs (boundsAccessor and band) now - normalize to a single resolvedRibbons: ResolvedRibbon[] array at the - PipelineStore layer, then flow through xySceneBuilders/ribbonScene.ts — - one scene builder, one y-extent expansion pass, one style cascade. The dedicated{" "} - boundsScene.ts and bandScene.ts modules are gone. Public - prop surfaces stay distinct (asymmetric pairs read better as band than - as a boundsAccessor union return type), but the implementation is no - longer duplicated. + Both public envelope APIs (boundsAccessor and band) now normalize + to a single resolvedRibbons: ResolvedRibbon[] array at the PipelineStore layer, + then flow through xySceneBuilders/ribbonScene.ts — one scene builder, one + y-extent expansion pass, one style cascade. The dedicated boundsScene.ts and{" "} + bandScene.ts modules are gone. Public prop surfaces stay distinct (asymmetric + pairs read better as band than as a boundsAccessor union return + type), but the implementation is no longer duplicated.

    - Two correctness wins fell out of the unification: bounds ribbons now skip datums - with null/NaN y (the coerced +null === 0 previously - rendered a ribbon around the implicit-zero "value" of a missing row), and a{" "} + Two correctness wins fell out of the unification: bounds ribbons now skip datums with + null/NaN y (the coerced +null === 0 previously rendered a ribbon + around the implicit-zero "value" of a missing row), and a{" "} kind: "bounds" | "band" discriminator on each ribbon restricts{" "} datum.band / datum.bands tooltip enrichment to band-sourced envelopes — bounds stays decorative-only, matching its prior contract. @@ -129,10 +123,10 @@ function Body() {

    Upgrade notes

    This release is additive. Consumers already using boundsAccessor get the - null/NaN-row fix for free; anything that relied on the implicit-zero ribbon behavior - should switch to filtering at the data layer. The website build now injects the Atom - feed {``} via the prerender step instead of source - HTML, which closes a parcel resolution failure on nested prerendered routes. + null/NaN-row fix for free; anything that relied on the implicit-zero ribbon behavior should + switch to filtering at the data layer. The website build now injects the Atom feed{" "} + {``} via the prerender step instead of source HTML, which + closes a parcel resolution failure on nested prerendered routes.

    ) diff --git a/docs/src/blog/entries/release-3-6-0.js b/docs/src/blog/entries/release-3-6-0.js new file mode 100644 index 00000000..87d2a1c8 --- /dev/null +++ b/docs/src/blog/entries/release-3-6-0.js @@ -0,0 +1,208 @@ +/* eslint-disable react/no-unescaped-entities */ +import React from "react" +import { Link } from "react-router-dom" + +function Body() { + return ( + <> +

    + 3.6.0 is the AI release. The library has carried{" "} + observation hooks,{" "} + native annotations, and a{" "} + streaming-first runtime for a while; this + version turns those pieces into an explicit AI-facing surface. Charts now declare what + they're for, datasets get profiled and ranked, audiences get calibrated, and conversations + anchor back onto the chart instead of stopping at a chat bubble. Three case-study posts + published alongside this release walk through what that makes possible. Full release notes + are on{" "} + + GitHub + + . +

    + +

    Why this matters

    +

    + The default pattern for "AI on a chart" today is a chat box next to the visualization. The + user types a question in prose, the model answers in prose, and the chart is decorative. + Both ends lose information: the user has to verbalize which point they meant; the model has + to verbalize where the answer applies. The chart already encodes those spatial signals on + screen. The 3.6.0 surface is a bet that the right shape isn't "chat with a chart" but{" "} + two-way structured context — charts emit profiled data and capability descriptors, + models consume those AND return back annotations the chart natively renders. +

    +

    + Three things compose under that frame: a heuristic recommendation engine that ranks charts + for a dataset (so any agent can answer "which chart?"), a focus + interrogation pair that + gives a chat surface a point-of-anchor (so any agent can answer "about which row?"), and a + capability descriptor per chart that turns the library itself into a structured catalog (so + an LLM can reason about the visualization options without prompt-stuffing the entire + reference docs). None of these primitives require an LLM to be useful — the recommender is + offline-deterministic; the interrogation hook is headless — but each one produces the kind + of structured context that lands cleanly when a model is on the other side. +

    + +

    A chart recommendation engine — heuristic-first, LLM-optional

    +

    + Every chart in the library now ships a capability descriptor: what data shapes it serves, + what intents (`trend`, `correlation`, `distribution`, `part-to-whole`, eleven more) it + answers well, what settings change those answers, and what `buildProps` would look like + against a given profile. suggestCharts(data, options?) returns a ranked list + of suggestions with runnable props, an audit trail of reasons, and caveats. Pair it with an{" "} + AudienceProfile — a serializable per-organization config of familiarity + numbers and adoption targets — and the ranking calibrates to who is actually reading. +

    +

    + The deeper architectural move is that the same descriptors feed{" "} + suggestDashboard (composite multi-intent views with honest{" "} + intentsMissing reporting),{" "} + suggestStretchCharts (a literacy-growth surface that shows charts the audience + is unfamiliar with but the data actually supports), scoreChart (single-chart + introspection), useChartSuggestions (the React hook), and the MCP server's{" "} + suggestCharts tool. One catalog, many surfaces. The post on{" "} + + Charts that know what they're for + {" "} + walks through the design, the audience layer, and the stretch surface in detail. +

    + +

    Anchored conversation — focus + interrogation + annotation

    +

    + The other half of "AI on a chart" is what happens when the user wants to ask about{" "} + this point. Two new hooks compose into that pattern:{" "} + useChartFocus subscribes to the chart's observation store and returns the + latest hover/click as {`{ datum, x, y, source }`};{" "} + useChartInterrogation gives consumers a{" "} + {`{ ask, history, summary, annotations, loading, error, reset }`} surface + where the consumer brings their own LLM via onQuery. The hook supplies the + model with the profiled data summary, the suggestion list, and the current focus datum as + structured context; the model returns annotations the chart natively renders. +

    +

    + The detail post —{" "} + Anchored conversations — works through the + bidirectional loop: the user points at a data point, the AI answers about that specific + point, and the answer lives on the chart as a clickable note. Pronouns work. Comparisons + get cheap. Answers persist where they're useful. The chart accumulates institutional + knowledge about itself. +

    +

    + Compose that with the realtime runtime and the chat surface flips from passive observer to + active narrator:{" "} + Live conversational dashboards{" "} + sketches the product shape — streaming data + an AI watching alongside you + anchored + annotations + a conversational follow-up surface — and walks through the pieces that + compose it. +

    + +

    Capability descriptor refinements

    +

    + Authoring the per-chart descriptors surfaced a few cases where the chart family's + recommendation behavior was wrong on its face. Those are tightened in this release: +

    +
      +
    • + + AreaChart is now a single-series chart. + {" "} + Multi-series area overlays are an occlusion nightmare; the capability subselects to the + leading series (largest cumulative y) when the input has 2+ groups and surfaces a caveat. + Gradient fill is the baseline default. AreaChart now outranks LineChart on single-series + trend (the gradient is more visually arresting than a thin line); LineChart still wins on + multi-series because it shows the whole dataset instead of one slice. +
    • +
    • + + DifferenceChart accepts 2+ series via top-2 subselection. + {" "} + Previously rejected anything other than exactly two series; now picks the two series with + the highest cumulative y and emits a caveat when subselecting from 3+. Makes the chart a + real alternative on multi-series data where the comparison-between-two story is the + interesting one. +
    • +
    • + + Scatterplot and ConnectedScatterplot prefer the canonical + 2-numeric form when a sequence axis is present. + {" "} + On {`{quarter, revenue, profit}`} data both charts now plot revenue × + profit (the canonical correlation form) instead of recapitulating a line chart on + quarter. ConnectedScatterplot threads the sequence as orderAccessor so the + path encodes temporal progression — Hans Rosling's "income vs life expectancy over + years" shape, served automatically when the data supports it. +
    • +
    • + + X_FIELD_HINT recognizes calendar-segment field names. + {" "} + The profiler's x-axis name regex now matches quarter, qtr,{" "} + fiscal, and week. Without this,{" "} + {`{quarter, revenue, region}`} data fell into scatter-fallback provenance + and series detection never fired — lineBy / areaBy were + silently dropped and multi-series time-series charts zigzagged across regions. +
    • +
    + +

    For agents — the MCP server and the CLI

    +

    + npx semiotic-mcp launches a Model Context Protocol server that exposes{" "} + renderChart, interrogateChart, suggestCharts, and{" "} + diagnoseConfig as MCP tools. Agents inside Claude Code, Cursor, Windsurf, and + other MCP-aware environments can drive Semiotic directly — render a static SVG, profile a + dataset, ask the recommender for a ranked list, repair a config that doesn't validate.{" "} + npx semiotic-ai --doctor covers the CLI variant: pass a{" "} + {`{component, props, data}`} JSON spec and get back a validated config (or a + ranked list of alternatives if the requested chart doesn't fit the data). +

    + +

    Upgrade notes

    +

    + Most of 3.6.0 is additive. The capability-descriptor refinements above are the one + behavior change worth flagging: +

    +
      +
    • + AreaChart on multi-series data. If you were passing + multi-series data to AreaChart directly (not via suggestCharts) + and relying on the chart to render overlapping multi-area output, that path still works + at the chart level — the capability change affects what the recommender suggests, not the + chart's prop surface. The chart's areaBy prop is untouched. The change is + about{" "} + suggestCharts output: AreaChart suggestions now subselect their data. +
    • +
    • + Scatterplot's x/y on sequence-shaped data. Same caveat — the chart still + plots whatever you pass it; the recommender's buildProps output changes. Any + code reading suggestion.props.xAccessor / yAccessor for charts + with {`{sequence, num1, num2}`} shape will now see the two numerics in + place of the sequence. +
    • +
    • + + DifferenceChart data pivoting. + {" "} + The chart's wide-form data contract (`{`{x, a, b}`}`) is unchanged. The recommender now + pivots long-form input automatically and emits the wide form on{" "} + suggestion.props.data. +
    • +
    + + ) +} + +export default { + slug: "release-3-6-0", + title: "Semiotic 3.6.0", + subtitle: + "The AI release. A heuristic chart recommender, audience-aware ranking, focus + interrogation hooks for two-way anchored conversation, an MCP server, and a per-chart capability layer that makes the library itself a structured catalog.", + author: "AI-Generated", + date: "2026-05-31", + tags: ["release"], + excerpt: + "3.6.0 turns Semiotic's observation hooks, native annotations, and streaming runtime into an explicit AI-facing surface. Charts declare what they're for; datasets get profiled and ranked; audiences get calibrated; conversations anchor back to the chart instead of stopping at a chat bubble. Three case-study posts published alongside the release walk through what the new shape makes possible.", + component: Body, +} diff --git a/docs/src/components/navData.js b/docs/src/components/navData.js index d24f377d..5256c03c 100644 --- a/docs/src/components/navData.js +++ b/docs/src/components/navData.js @@ -115,14 +115,22 @@ const navData = [ { title: "Chart Container", path: "/features/chart-container" }, { title: "Chart States", path: "/features/chart-states" }, { title: "Chart Modes", path: "/features/chart-modes" }, - { title: "AI Observation Hooks", path: "/features/observation-hooks" }, - { title: "Serialization", path: "/features/serialization" }, - { title: "Vega-Lite Translator", path: "/features/vega-lite" }, { title: "Streaming System Model", path: "/features/streaming-system-model" }, { title: "Performance", path: "/features/performance" }, { title: "Push API", path: "/features/push-api" }, - { title: "Custom Charts", path: "/features/custom-charts" }, - { title: "Capability Matrix", path: "/features/capabilities" } + { title: "Custom Charts", path: "/features/custom-charts" } + ] + }, + { + title: "Intelligence", + path: "/intelligence", + children: [ + { title: "Observation Hooks", path: "/intelligence/observation-hooks" }, + { title: "Capability Matrix", path: "/intelligence/capabilities" }, + { title: "Chart Suggestions", path: "/intelligence/suggestions" }, + { title: "Interrogation", path: "/intelligence/interrogation" }, + { title: "Serialization", path: "/intelligence/serialization" }, + { title: "Vega-Lite Translator", path: "/intelligence/vega-lite" } ] }, { diff --git a/docs/src/pages/features/CapabilitiesPage.js b/docs/src/pages/features/CapabilitiesPage.js index f087e539..8fea9af5 100644 --- a/docs/src/pages/features/CapabilitiesPage.js +++ b/docs/src/pages/features/CapabilitiesPage.js @@ -123,11 +123,11 @@ export default function CapabilitiesPage() {

    Every Semiotic chart declares a fixed set of capabilities — does it diff --git a/docs/src/pages/features/InterrogationPage.js b/docs/src/pages/features/InterrogationPage.js new file mode 100644 index 00000000..5a27d232 --- /dev/null +++ b/docs/src/pages/features/InterrogationPage.js @@ -0,0 +1,260 @@ +import React, { useState } from "react" +import { LineChart, useChartInterrogation } from "semiotic/ai" +import PageLayout from "../../components/PageLayout" +import CodeBlock from "../../components/CodeBlock" + +const MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun"] +const salesData = [ + ...[1200, 2100, 1800, 3200, 2800, 4500].map((revenue, i) => ({ + month: i + 1, + monthLabel: MONTHS[i], + revenue, + category: "Software", + })), + ...[800, 1200, 1500, 1100, 1900, 2200].map((revenue, i) => ({ + month: i + 1, + monthLabel: MONTHS[i], + revenue, + category: "Hardware", + })), +] +const monthFormat = (m) => MONTHS[m - 1] ?? "" + +// Stand-in for a real LLM call. In production this would POST to your AI endpoint +// with the user's question and `context.summary`. The shape of the return value is +// the contract: `{ answer, annotations }`. +async function simulatedQuery(query, context) { + await new Promise((r) => setTimeout(r, 500)) + const q = query.toLowerCase() + const rev = context.summary.fields.revenue + if (q.includes("peak") || q.includes("highest")) { + return { + answer: `The peak revenue was $${rev?.max?.toLocaleString()} in June, driven by Software.`, + annotations: [{ type: "callout", month: 6, revenue: 4500, label: "Peak" }], + } + } + if (q.includes("software")) { + return { + answer: "Software more than tripled from Jan to Jun — a strong upward trend.", + annotations: [{ type: "trend", lineBy: "Software", label: "Software trend" }], + } + } + if (q.includes("hardware")) { + return { + answer: "Hardware grew steadily, peaking at $2,200 in June.", + annotations: [{ type: "callout", month: 6, revenue: 2200, label: "Hardware peak" }], + } + } + return { + answer: `Across ${context.summary.rowCount} rows, mean revenue is $${rev?.mean?.toFixed(0)}. Try asking about the peak, software, or hardware.`, + annotations: [], + } +} + +function ChatPanel({ history, loading, onAsk, placeholder }) { + const [input, setInput] = useState("") + const submit = (e) => { + e.preventDefault() + onAsk(input) + setInput("") + } + return ( +

    +
    + {history.length === 0 && ( +
    + Ask about trends, outliers, or specific data points. +
    + )} + {history.map((m, i) => ( +
    {m.text}
    + ))} + {loading &&
    Analyzing…
    } +
    +
    + setInput(e.target.value)} + placeholder={placeholder} + disabled={loading} + style={{ + flex: 1, + padding: "8px 12px", + borderRadius: 16, + border: "1px solid var(--surface-3)", + background: "var(--background)", + color: "var(--text)", + }} + /> + +
    +
    + ) +} + +function InterrogationDemo() { + const { ask, history, annotations, loading } = useChartInterrogation({ + data: salesData, + onQuery: simulatedQuery, + componentName: "LineChart", + props: { xAccessor: "month", yAccessor: "revenue", lineBy: "category" }, + }) + return ( +
    + + +
    + ) +} + +export default function InterrogationPage() { + return ( + +

    + Semiotic ships a headless hook, useChartInterrogation, that lets users + ask natural-language questions about a chart. It pairs an LLM-friendly{" "} + statistical summary of your data with a contract for{" "} + visual highlighting: your AI returns annotations, the chart renders them. +

    + +

    + The hook owns no UI. You bring your own chat surface — input box, transcript, panel, + whatever fits your product. The demo below is ~70 lines of plain React for context. +

    + +

    Interactive Demo

    +

    + The demo uses a canned onQuery in place of a real LLM. Try{" "} + "where is the peak?", "tell me about software", or{" "} + "hardware growth". +

    + +
    + +
    + +

    How it works

    +
      +
    1. Summarize: useChartInterrogation runs summarizeData on your data — min, max, mean, median, top categorical values, date ranges.
    2. +
    3. Ask: Your onQuery receives the question plus the summary and any props you passed. Call your LLM, return {`{ answer, annotations }`}.
    4. +
    5. Render: The hook merges your initial annotations with the AI's response and exposes the combined array — wire it to the chart's annotations prop.
    6. +
    + +

    Implementation

    + +{`import { LineChart, useChartInterrogation } from "semiotic/ai" + +function InterrogatableChart({ data }) { + const { ask, history, annotations, loading } = useChartInterrogation({ + data, + componentName: "LineChart", + props: { xAccessor: "month", yAccessor: "revenue" }, + onQuery: async (query, context) => { + const res = await fetch("/api/chat", { + method: "POST", + body: JSON.stringify({ query, summary: context.summary }), + }).then((r) => r.json()) + return { answer: res.text, annotations: res.highlights } + }, + }) + + return ( + <> + + + + ) +}`} + + +

    The statistical summary

    +

    + context.summary is the payload to send to an LLM. It's compact, typed, and + avoids shipping raw rows: +

    + +{`{ + "rowCount": 12, + "fields": { + "revenue": { + "type": "numeric", + "min": 800, + "max": 4500, + "mean": 2025, + "median": 1850 + }, + "category": { + "type": "categorical", + "distinctCount": 2, + "topValues": [ + { "value": "Software", "count": 6 }, + { "value": "Hardware", "count": 6 } + ] + } + } +}`} + + +

    + Use summarizeData directly if you want the summary without the hook — + for server-side prompting, batch jobs, or the interrogateChart MCP tool. +

    +
    + ) +} diff --git a/docs/src/pages/features/ObservationHooksPage.js b/docs/src/pages/features/ObservationHooksPage.js index 25ed8a89..d9eee449 100644 --- a/docs/src/pages/features/ObservationHooksPage.js +++ b/docs/src/pages/features/ObservationHooksPage.js @@ -219,13 +219,13 @@ function LinkedObserverDemo() { export default function ObservationHooksPage() { return (

    Every Semiotic chart accepts an onObservation callback that diff --git a/docs/src/pages/features/PushApiPage.js b/docs/src/pages/features/PushApiPage.js index 4821d78d..636bd027 100644 --- a/docs/src/pages/features/PushApiPage.js +++ b/docs/src/pages/features/PushApiPage.js @@ -250,7 +250,7 @@ export default function PushApiPage() { { label: "Push API", path: "/features/push-api" }, ]} prevPage={{ title: "Performance", path: "/features/performance" }} - nextPage={{ title: "Styling", path: "/theming/styling" }} + nextPage={{ title: "Observation Hooks", path: "/intelligence/observation-hooks" }} >

    The push API lets you imperatively add, remove, and update data on a chart diff --git a/docs/src/pages/features/SerializationPage.js b/docs/src/pages/features/SerializationPage.js index a9c88844..7a7092d2 100644 --- a/docs/src/pages/features/SerializationPage.js +++ b/docs/src/pages/features/SerializationPage.js @@ -308,11 +308,11 @@ export default function SerializationPage() {

    Serialize any chart's configuration to JSON, encode it as a URL for diff --git a/docs/src/pages/features/SuggestionsPage.js b/docs/src/pages/features/SuggestionsPage.js new file mode 100644 index 00000000..b5ead0d0 --- /dev/null +++ b/docs/src/pages/features/SuggestionsPage.js @@ -0,0 +1,348 @@ +import React, { useState } from "react" +import { + useChartSuggestions, + LineChart, + AreaChart, + StackedAreaChart, + Scatterplot, + ConnectedScatterplot, + BubbleChart, + QuadrantChart, + MultiAxisLineChart, + MinimapChart, + DifferenceChart, + CandlestickChart, + Heatmap, + BarChart, + GroupedBarChart, + StackedBarChart, + DotPlot, + Histogram, + BoxPlot, + SwarmPlot, + ViolinPlot, + RidgelinePlot, + PieChart, + DonutChart, + FunnelChart, + GaugeChart, + LikertChart, + SwimlaneChart, +} from "semiotic/ai" +import PageLayout from "../../components/PageLayout" +import CodeBlock from "../../components/CodeBlock" + +// Comprehensive map of HOC chart names → React components. Realtime, +// network, and geo families are intentionally omitted — the SuggestionsPage +// demo datasets are all row-shaped tabular data that won't trigger those. +// If the engine recommends a chart not listed here, the demo falls back to +// the next renderable suggestion (with a note that the top pick wasn't +// available in this surface). +const COMPONENT_MAP = { + LineChart, + AreaChart, + StackedAreaChart, + Scatterplot, + ConnectedScatterplot, + BubbleChart, + QuadrantChart, + MultiAxisLineChart, + MinimapChart, + DifferenceChart, + CandlestickChart, + Heatmap, + BarChart, + GroupedBarChart, + StackedBarChart, + DotPlot, + Histogram, + BoxPlot, + SwarmPlot, + ViolinPlot, + RidgelinePlot, + PieChart, + DonutChart, + FunnelChart, + GaugeChart, + LikertChart, + SwimlaneChart, +} + +const DATASETS = { + temporal: { + label: "Temporal multi-series", + description: "Two regions, six months of revenue. Time x-axis, categorical series.", + data: [ + ...[1200, 1400, 1100, 1700, 1900, 2200].map((revenue, i) => ({ month: i + 1, revenue, region: "EU" })), + ...[900, 1100, 1500, 1300, 1700, 2000].map((revenue, i) => ({ month: i + 1, revenue, region: "NA" })), + ], + }, + categorical: { + label: "Categorical totals", + description: "Four products, one numeric. Classic bar-chart shape.", + data: [ + { product: "Widget", units: 30 }, + { product: "Gadget", units: 50 }, + { product: "Sprocket", units: 20 }, + { product: "Whatsit", units: 45 }, + ], + }, + distribution: { + label: "Distribution", + description: "100 numeric observations — best read as a distribution.", + data: Array.from({ length: 100 }, (_, i) => ({ + observation: 50 + Math.sin(i / 7) * 18 + (i % 5 === 0 ? 25 : 0) + Math.random() * 6, + })), + }, + scatter: { + label: "Two-numeric relationship", + description: "x and y are both numeric without time semantics.", + data: Array.from({ length: 60 }, () => { + const x = Math.random() * 100 + return { x, y: x * 0.6 + Math.random() * 25 } + }), + }, +} + +const INTENTS = [ + { id: "", label: "Any intent" }, + { id: "trend", label: "Trend" }, + { id: "compare-categories", label: "Compare categories" }, + { id: "rank", label: "Rank" }, + { id: "part-to-whole", label: "Part to whole" }, + { id: "distribution", label: "Distribution" }, + { id: "correlation", label: "Correlation" }, + { id: "composition-over-time", label: "Composition over time" }, +] + +function SuggestionsDemo() { + const [datasetKey, setDatasetKey] = useState("temporal") + const [intent, setIntent] = useState("") + const dataset = DATASETS[datasetKey] + + const { suggestions, profile } = useChartSuggestions(dataset.data, { + intent: intent || undefined, + maxResults: 6, + includeVariants: true, + }) + + // Find the highest-ranked suggestion this surface can render. The engine's + // actual top pick is shown in the "All suggestions" sidebar regardless; + // the rendered preview falls back to the next renderable one if the very + // top isn't in this demo's COMPONENT_MAP. + const Top = suggestions.find((s) => COMPONENT_MAP[s.component]) ?? null + const Component = Top && COMPONENT_MAP[Top.component] + const trueTop = suggestions[0] + const topNotRenderable = trueTop && Top && trueTop.component !== Top.component + + return ( +

    +
    + + +
    + +

    {dataset.description}

    + +
    +
    + {Component && Top ? ( + <> +
    + {topNotRenderable ? "Top renderable suggestion: " : "Top suggestion: "} + {Top.component}{Top.variant ? ` · ${Top.variant.label}` : ""} +
    + {topNotRenderable && ( +
    + Engine's actual top pick was {trueTop.component} — not included + in this demo's render map. See the all-suggestions sidebar for the full ranking. +
    + )} + + + ) : ( +
    No fitting chart for this profile.
    + )} +
    + +
    +
    + All suggestions (ranked) +
    +
    + {suggestions.map((s, i) => ( +
    +
    + {s.component}{s.variant ? ` · ${s.variant.label}` : ""} + {s.score.toFixed(1)}/5 +
    +
    + fam {s.rubric.familiarity} · acc {s.rubric.accuracy} · prec {s.rubric.precision} +
    + {s.reasons.length > 0 && ( +
    + {s.reasons.join("; ")} +
    + )} + {s.caveats.length > 0 && ( +
    + {s.caveats.join("; ")} +
    + )} +
    + ))} +
    +
    +
    + +
    + Shape profile +
    {JSON.stringify({
    +          rowCount: profile.rowCount,
    +          primary: profile.primary,
    +          categoryCount: profile.categoryCount,
    +          seriesCount: profile.seriesCount,
    +          uniqueXCount: profile.uniqueXCount,
    +          hasRepeatedX: profile.hasRepeatedX,
    +          monotonicX: profile.monotonicX,
    +          hasTimeAxis: profile.hasTimeAxis,
    +        }, null, 2)}
    +
    +
    + ) +} + +export default function SuggestionsPage() { + return ( + +

    + Semiotic charts ship capability descriptors alongside their components. + Each chart declares what data shapes it serves, which intents it answers, what variants + change those answers, and which props to use for a given dataset. The{" "} + useChartSuggestions hook walks the registry and returns a ranked, ready-to-render + list. Heuristic only — no LLM call. Pair with{" "} + useChartInterrogation to let an LLM re-rank or narrate. +

    + +

    Interactive demo

    +

    + Pick a dataset and (optionally) an intent. The same profile is evaluated against every + registered capability and its variants. The top suggestion's props drop straight + into the matching chart. +

    + + + +

    How it composes

    +
      +
    1. profileData(data) infers candidate x/y/series/category fields, distinct counts, monotonicity, and structure (hierarchy/network/geo).
    2. +
    3. For each capability: fits(profile) is a hard gate (returns null to pass).
    4. +
    5. intentScores are evaluated (numbers or profile-aware functions).
    6. +
    7. Variants apply additive intentDeltas and rubricDeltas.
    8. +
    9. Suggestions are sorted by the requested intent (or mean across intents).
    10. +
    11. buildProps(profile, variant) returns spreadable props for the chart.
    12. +
    + +

    Implementation

    + +{`import { useChartSuggestions, LineChart, BarChart, /* ... */ } from "semiotic/ai" + +const COMPONENT_MAP = { LineChart, BarChart, /* ... */ } + +function SuggestedChart({ data, intent }) { + const { suggestions } = useChartSuggestions(data, { intent }) + const top = suggestions[0] + if (!top) return

    No fitting chart for this data.

    + const Component = COMPONENT_MAP[top.component] + return +}`} +
    + +

    Charts know what they're good for

    +

    + Each chart's capability lives next to its TSX file (e.g.{" "} + LineChart.capability.ts). It declares fits,{" "} + intentScores, variants, caveats, and{" "} + buildProps. Variants encode the idea that{" "} + settings change what a chart is good for — a stacked area with the{" "} + streamgraph variant boosts trend readability but penalizes{" "} + part-to-whole (because totals become unreadable). Those tradeoffs surface in the + suggestion's intentScores, caveats, and{" "} + reasons. +

    + +

    Tying in interrogation

    +

    + Set includeSuggestions: true on useChartInterrogation and the same + ranked list lands in the LLM's context.suggestions. Use it to answer + questions like "would another chart show this better?" without re-deriving rules. +

    + +

    Adding a custom capability

    + +{`import { registerChartCapability } from "semiotic/ai" + +registerChartCapability({ + component: "MyDomainChart", + family: "categorical", + importPath: "semiotic", + rubric: { familiarity: 2, accuracy: 4, precision: 4 }, + fits: (profile) => profile.primary.category ? null : "needs a category field", + intentScores: { "compare-categories": 5, "rank": 4 }, + buildProps: (profile) => ({ + data: profile.data, + categoryAccessor: profile.primary.category, + valueAccessor: profile.primary.y, + }), +})`} + +
    + ) +} diff --git a/docs/src/pages/features/VegaLiteTranslatorPage.js b/docs/src/pages/features/VegaLiteTranslatorPage.js index c2f888e9..d28c0b37 100644 --- a/docs/src/pages/features/VegaLiteTranslatorPage.js +++ b/docs/src/pages/features/VegaLiteTranslatorPage.js @@ -236,10 +236,11 @@ export default function VegaLiteTranslatorPage() { {/* ── Why ────────────────────────────────────────────────────────── */}
    diff --git a/etc/api-surface/semiotic-ai.api.md b/etc/api-surface/semiotic-ai.api.md index 8a809ff5..d79b071c 100644 --- a/etc/api-surface/semiotic-ai.api.md +++ b/etc/api-surface/semiotic-ai.api.md @@ -4,6 +4,51 @@ _Auto-generated by `scripts/generate-api-surface.mjs` from `dist/semiotic-ai.d.t _Edit dist/semiotic-ai.d.ts's sources, then re-run `npm run docs:api-surface` to refresh._ ``` +const AreaChartCapability +const BUILT_IN_AUDIENCES +const BUILT_IN_INTENT_IDS +const BarChartCapability +const BoxPlotCapability +const BubbleChartCapability +const CANONICAL_FIXTURES +const CandlestickChartCapability +const ChordDiagramCapability +const ChoroplethMapCapability +const CirclePackCapability +const ConnectedScatterplotCapability +const DifferenceChartCapability +const DistanceCartogramCapability +const DonutChartCapability +const DotPlotCapability +const FlowMapCapability +const ForceDirectedGraphCapability +const FunnelChartCapability +const GaugeChartCapability +const GroupedBarChartCapability +const HeatmapCapability +const HistogramCapability +const LikertChartCapability +const LineChartCapability +const MinimapChartCapability +const MultiAxisLineChartCapability +const OrbitDiagramCapability +const PieChartCapability +const ProcessSankeyCapability +const ProportionalSymbolMapCapability +const QuadrantChartCapability +const RidgelinePlotCapability +const SankeyDiagramCapability +const ScatterplotCapability +const StackedAreaChartCapability +const StackedBarChartCapability +const SwarmPlotCapability +const SwimlaneChartCapability +const TreeDiagramCapability +const TreemapCapability +const ViolinPlotCapability +const analystPersona +const dataScientistPersona +const executivePersona function AreaChart function BarChart function BoxPlot @@ -55,55 +100,148 @@ function TooltipProvider function TreeDiagram function Treemap function ViolinPlot +function applyAudienceBias function configToJSX function copyConfig function deserializeSelections function diagnoseConfig +function diffProfile +function effectiveFamiliarity +function explainCapabilityFit function exportChart function fromConfig function fromURL function fromVegaLite +function getCapabilities +function getCapability +function getIntent +function getStreamCapabilities +function inferIntent +function listIntents +function profileData +function registerChartCapability +function registerIntent +function registerStreamChartCapability +function repairChartConfig +function runQualityScorecard +function scoreChart function serializeSelections +function stretchFamiliarityCeiling +function suggestCharts +function suggestDashboard +function suggestStreamCharts +function suggestStretchCharts +function summarizeData function toConfig function toURL +function unregisterChartCapability +function unregisterStreamChartCapability function useBrushSelection function useCategoryColors +function useChartFocus +function useChartInterrogation function useChartObserver +function useChartSuggestions function useFilteredData function useLinkedHover function useSelection function useTheme function validateProps interface AnomalyConfig +interface AudienceBiasResult +interface AudienceProfile +interface AudienceTarget interface BrushEndObservation interface BrushObservation +interface CategoricalFieldSummary interface CategoryColorProviderProps +interface ChartCapability interface ChartConfig interface ChartContainerHandle interface ChartContainerProps +interface ChartDataProfile interface ChartGridProps +interface ChartRubric +interface ChartVariant interface ClickEndObservation interface ClickObservation interface ContextLayoutProps +interface DashboardPanel +interface DashboardSuggestion +interface DataSummary +interface DateFieldSummary interface DetailsPanelProps interface Diagnosis interface DiagnosisResult +interface ExplainCapabilityFitResult +interface FieldCandidate +interface FieldTypeChange interface ForecastConfig interface HoverEndObservation interface HoverObservation +interface InferIntentResult +interface IntentDescriptor +interface InterrogationContext +interface InterrogationFocus +interface InterrogationMessage +interface InterrogationResult +interface NumericFieldSummary +interface PerCapabilityScore +interface PerFixtureScore +interface PrimaryRoleChange +interface ProfileDataOptions +interface ProfileDiff +interface RejectedCapability +interface RepairAlternativeResult +interface RepairOkResult +interface RepairOptions +interface RepairUnknownResult +interface ScorecardFixture +interface ScorecardReport interface SelectionEndObservation interface SelectionObservation interface SerializedSelection +interface StreamChartCapability +interface StreamFieldSchema +interface StreamSchema +interface StreamSuggestion +interface StretchSuggestion +interface SuggestChartsOptions +interface SuggestDashboardOptions +interface SuggestStreamChartsOptions +interface SuggestStretchChartsOptions +interface Suggestion +interface SummarizeOptions interface ToConfigOptions +interface UnknownFieldSummary +interface UseChartFocusOptions +interface UseChartInterrogationOptions +interface UseChartInterrogationResult interface UseChartObserverOptions interface UseChartObserverResult +interface UseChartSuggestionsOptions +interface UseChartSuggestionsResult interface ValidationResult interface VegaLiteEncoding interface VegaLiteSpec +type BuiltInIntentId type CategoryColorMap +type ChartFamily +type ChartImportPath type ChartObservation type CopyFormat +type FieldKind +type FieldSummary +type FieldType +type FitResult +type IntentId +type IntentScorer +type InterrogationQuery type OnObservationCallback +type PrimaryRole +type RepairResult type SerializedFieldSelection type SerializedSelections +type StreamFieldKind +type StreamIntentScorer ``` diff --git a/package-lock.json b/package-lock.json index 70d3830f..c22298f9 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "semiotic", - "version": "3.5.4", + "version": "3.6.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "semiotic", - "version": "3.5.4", + "version": "3.6.0", "license": "Apache-2.0", "dependencies": { "d3-array": "^3.2.4", diff --git a/package.json b/package.json index 2f14f7ac..a0561c99 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "semiotic", - "version": "3.5.4", + "version": "3.6.0", "mcpName": "io.github.nteract/semiotic", "description": "React data visualization library with built-in MCP server for AI-assisted chart generation", "main": "dist/semiotic.min.js", @@ -155,6 +155,8 @@ "check:ai-contracts": "node scripts/generate-ai-behavior-contracts.mjs --check", "check:ssr": "node scripts/check-ssr-alignment.js", "check:capabilities": "node scripts/check-capabilities.mjs", + "check:capability-coverage": "node scripts/check-capability-coverage.mjs", + "scorecard": "node scripts/run-capability-scorecard.mjs", "docs:capabilities": "node scripts/generate-capabilities-md.mjs && node scripts/generate-capabilities-json.mjs", "check:chart-specs": "npx tsx scripts/check-chart-specs.ts", "docs:chart-specs:schema": "npx tsx scripts/regenerate-schema.ts", @@ -165,8 +167,8 @@ "check:blog-entries": "node scripts/check-blog-entry-sync.mjs", "check:bundle-sizes": "node scripts/sync-bundle-sizes.mjs --check", "docs:bundle-sizes": "node scripts/sync-bundle-sizes.mjs", - "release:check": "npm run lint && npm run typescript && npm run typescript:tests && npm run typescript:mcp && npm run test && npm run check:chart-specs && npm run check:capabilities && npm run check:blog-entries && npm run check:claude-md-coverage && npm run check:context7 && npm run check:mcp-registry && npm run check:surface && npm run check:ai-contracts && npm run check:ssr && npm run check:test-quality && npm run check:jsdoc-coverage && npm run check:ai-examples-coverage && npm run dist:prod && npm run check:bundle-sizes && npm run size && npm run check:pack && npm pack --dry-run", - "prepublishOnly": "npm run lint && npm run typescript && npm run typescript:tests && npm run typescript:mcp && npm run test && npm run check:chart-specs && npm run check:capabilities && npm run check:blog-entries && npm run check:claude-md-coverage && npm run check:context7 && npm run check:mcp-registry && npm run check:surface && npm run check:ai-contracts && npm run check:ssr && npm run check:test-quality && npm run check:jsdoc-coverage && npm run check:ai-examples-coverage && rm -rf dist && npm run dist:prod && npm run check:bundle-sizes && npm run size" + "release:check": "npm run lint && npm run typescript && npm run typescript:tests && npm run typescript:mcp && npm run test && npm run check:chart-specs && npm run check:capabilities && npm run check:capability-coverage && npm run check:blog-entries && npm run check:claude-md-coverage && npm run check:context7 && npm run check:mcp-registry && npm run check:surface && npm run check:ai-contracts && npm run check:ssr && npm run check:test-quality && npm run check:jsdoc-coverage && npm run check:ai-examples-coverage && npm run dist:prod && npm run check:bundle-sizes && npm run size && npm run check:pack && npm pack --dry-run", + "prepublishOnly": "npm run lint && npm run typescript && npm run typescript:tests && npm run typescript:mcp && npm run test && npm run check:chart-specs && npm run check:capabilities && npm run check:capability-coverage && npm run check:blog-entries && npm run check:claude-md-coverage && npm run check:context7 && npm run check:mcp-registry && npm run check:surface && npm run check:ai-contracts && npm run check:ssr && npm run check:test-quality && npm run check:jsdoc-coverage && npm run check:ai-examples-coverage && rm -rf dist && npm run dist:prod && npm run check:bundle-sizes && npm run size" }, "targets": { "website": { @@ -181,9 +183,16 @@ }, "alias": { "semiotic": "./src/components/semiotic.ts", + "semiotic/ai": "./src/components/semiotic-ai.ts", + "semiotic/data": "./src/components/semiotic-data.ts", "semiotic/geo": "./src/components/semiotic-geo.ts", - "semiotic/utils": "./src/components/semiotic-utils.ts", + "semiotic/network": "./src/components/semiotic-network.ts", + "semiotic/ordinal": "./src/components/semiotic-ordinal.ts", + "semiotic/realtime": "./src/components/semiotic-realtime.ts", "semiotic/recipes": "./src/components/semiotic-recipes.ts", + "semiotic/themes": "./src/components/semiotic-themes.ts", + "semiotic/utils": "./src/components/semiotic-utils.ts", + "semiotic/xy": "./src/components/semiotic-xy.ts", "react-router-dom": "react-router" }, "repository": { diff --git a/scripts/check-blog-entry-sync.mjs b/scripts/check-blog-entry-sync.mjs index da46c38c..60af777f 100644 --- a/scripts/check-blog-entry-sync.mjs +++ b/scripts/check-blog-entry-sync.mjs @@ -44,6 +44,13 @@ function readOgChart(source) { return match ? { component: parseJsonString(match[1]) } : undefined } +function readDraftFlag(source) { + // Match `draft: true` (and `draft: false` for completeness). Absent → undefined. + const match = source.match(/draft:\s*(true|false)/m) + if (!match) return undefined + return match[1] === "true" +} + function parseEntryFile(path) { const source = readFileSync(path, "utf8") return { @@ -55,6 +62,7 @@ function parseEntryFile(path) { tags: readTags(source), excerpt: readStringField(source, "excerpt"), ogChart: readOgChart(source), + draft: readDraftFlag(source), } } @@ -64,8 +72,11 @@ function parseEntriesRegistry() { for (const match of source.matchAll(/import\s+([A-Za-z_$][\w$]*)\s+from\s+"\.\/entries\/([^"]+)"/g)) { imports.set(match[1], resolve(ROOT, "docs/src/blog/entries", match[2])) } - const arrayMatch = source.match(/export const blogEntries\s*=\s*\[([\s\S]*?)\]/m) - if (!arrayMatch) throw new Error("Could not find `export const blogEntries = [...]`") + // Match `allBlogEntries` (full list including drafts). `blogEntries` is + // derived via filter and so isn't a literal array — we always read the + // source-of-truth literal. + const arrayMatch = source.match(/export const allBlogEntries\s*=\s*\[([\s\S]*?)\]/m) + if (!arrayMatch) throw new Error("Could not find `export const allBlogEntries = [...]`") const names = [...arrayMatch[1].matchAll(/\b([A-Za-z_$][\w$]*)\b/g)].map((m) => m[1]) return names.map((name) => { const entryPath = imports.get(name) @@ -110,7 +121,9 @@ function objectBlocksFromArray(source, marker) { function parseMetaRegistry() { const source = readFileSync(META_JS, "utf8") - return objectBlocksFromArray(source, "blogEntriesMeta").map((block) => ({ + // Read `allBlogEntriesMeta` literal — `blogEntriesMeta` is the filtered + // alias and isn't an array literal at parse time. + return objectBlocksFromArray(source, "allBlogEntriesMeta").map((block) => ({ slug: readStringField(block, "slug"), title: readStringField(block, "title"), subtitle: readStringField(block, "subtitle"), @@ -119,6 +132,7 @@ function parseMetaRegistry() { tags: readTags(block), excerpt: readStringField(block, "excerpt"), ogChart: readOgChart(block), + draft: readDraftFlag(block), })) } @@ -167,6 +181,13 @@ for (let i = 0; i < max; i++) { if (fullOg !== mirrorOg) { fail(errors, `${full.name}.ogChart.component drift: entries.js=${JSON.stringify(fullOg)}, entries-meta.js=${JSON.stringify(mirrorOg)}`) } + // Treat absent and false as the same — `draft: false` and no `draft` field + // are equivalent (entry is published). + const fullDraft = full.draft === true + const mirrorDraft = mirror.draft === true + if (fullDraft !== mirrorDraft) { + fail(errors, `${full.name}.draft drift: entries.js=${fullDraft}, entries-meta.js=${mirrorDraft}`) + } } if (errors.length > 0) { diff --git a/scripts/check-capability-coverage.mjs b/scripts/check-capability-coverage.mjs new file mode 100644 index 00000000..235f3206 --- /dev/null +++ b/scripts/check-capability-coverage.mjs @@ -0,0 +1,134 @@ +#!/usr/bin/env node +/** + * Capability-descriptor coverage check. + * + * Every HOC chart listed in `ai/capabilities.json` should either: + * (a) have a colocated `Foo.capability.ts` descriptor registered in + * `src/components/ai/chartCapabilities.ts`, or + * (b) appear in the deliberate-exclusion list at the bottom of this file + * (with a reason — realtime, custom-layout, multi-chart). + * + * Drift in either direction is a CI error. + * + * Rationale lives in `docs/strategy/chart-capability-layer.md` § + * "Phase 2.6 — Capability coverage CI". + */ + +import fs from "node:fs" +import path from "node:path" +import { fileURLToPath } from "node:url" + +const __filename = fileURLToPath(import.meta.url) +const __dirname = path.dirname(__filename) +const repoRoot = path.resolve(__dirname, "..") + +const errors = [] +const note = (msg) => errors.push(msg) + +// 1. Load the chart inventory from the existing capabilities.json +const capabilitiesPath = path.join(repoRoot, "ai", "capabilities.json") +const inventory = JSON.parse(fs.readFileSync(capabilitiesPath, "utf8")) +const allCharts = Object.keys(inventory.charts ?? {}).sort() + +// 2. Read the capability registry source and extract the components it imports. +const registryPath = path.join( + repoRoot, + "src", + "components", + "ai", + "chartCapabilities.ts" +) +const registrySrc = fs.readFileSync(registryPath, "utf8") +const importedCapabilities = new Set() +const importRe = + /import\s+\{\s*(\w+Capability)\s*\}\s+from\s+"[^"]+\/(\w+)\.capability"/g +let match +while ((match = importRe.exec(registrySrc)) !== null) { + const componentName = match[2] + importedCapabilities.add(componentName) +} + +// 3. Deliberate exclusions — kept in sync with the comment block in chartCapabilities.ts. +// Only includes charts that are in ai/capabilities.json. Custom-layout charts +// (XY/Ordinal/NetworkCustomChart) and LinkedCharts aren't in capabilities.json +// because they don't fit the standard chart-spec model. +const DELIBERATELY_EXCLUDED = new Map([ + [ + "RealtimeLineChart", + "realtime — streaming source, static suggestion engine doesn't apply" + ], + ["RealtimeHistogram", "realtime — streaming source"], + ["TemporalHistogram", "realtime sibling — streaming source"], + ["RealtimeSwarmChart", "realtime"], + ["RealtimeWaterfallChart", "realtime"], + ["RealtimeHeatmap", "realtime"], + ["ScatterplotMatrix", "multi-chart composition — data shape is a tuple"] +]) + +// 4. Cross-check +const missing = [] +const unexpectedExclusion = [] +for (const chart of allCharts) { + const hasCapability = importedCapabilities.has(chart) + const isExcluded = DELIBERATELY_EXCLUDED.has(chart) + if (!hasCapability && !isExcluded) { + missing.push(chart) + } + if (hasCapability && isExcluded) { + unexpectedExclusion.push(chart) + } +} + +// 5. Charts in exclusion list but not in inventory (typo guard) +const inventorySet = new Set(allCharts) +const phantomExclusions = [] +for (const chart of DELIBERATELY_EXCLUDED.keys()) { + if (!inventorySet.has(chart)) phantomExclusions.push(chart) +} + +// 6. Capability files that aren't imported (orphans) +const colocatedFiles = [] +const chartDirs = ["xy", "ordinal", "network", "geo"] +for (const dir of chartDirs) { + const dirPath = path.join(repoRoot, "src", "components", "charts", dir) + if (!fs.existsSync(dirPath)) continue + for (const file of fs.readdirSync(dirPath)) { + if (file.endsWith(".capability.ts")) { + const componentName = file.replace(".capability.ts", "") + colocatedFiles.push(componentName) + } + } +} +const orphanFiles = colocatedFiles.filter((c) => !importedCapabilities.has(c)) + +if (missing.length) { + note( + `Charts in ai/capabilities.json without a registered capability descriptor:\n ${missing.join(", ")}\n Either add a *.capability.ts file and register it in src/components/ai/chartCapabilities.ts, or add an entry to DELIBERATELY_EXCLUDED in this script with a reason.` + ) +} +if (unexpectedExclusion.length) { + note( + `Charts that have a registered capability AND appear in DELIBERATELY_EXCLUDED:\n ${unexpectedExclusion.join(", ")}\n Remove them from one or the other.` + ) +} +if (phantomExclusions.length) { + note( + `DELIBERATELY_EXCLUDED entries that don't match any chart in ai/capabilities.json (typo?):\n ${phantomExclusions.join(", ")}` + ) +} +if (orphanFiles.length) { + note( + `Capability descriptor files on disk but not imported by the registry:\n ${orphanFiles.join(", ")}` + ) +} + +if (errors.length) { + console.error("❌ Capability coverage check failed:\n") + for (const e of errors) console.error(" - " + e + "\n") + process.exit(1) +} + +const coveredCount = allCharts.length - DELIBERATELY_EXCLUDED.size +console.log( + `✅ Capability coverage: ${importedCapabilities.size} descriptors registered, ${coveredCount} covered charts, ${DELIBERATELY_EXCLUDED.size} deliberate exclusions, ${allCharts.length} charts total.` +) diff --git a/scripts/run-capability-scorecard.mjs b/scripts/run-capability-scorecard.mjs new file mode 100644 index 00000000..99862f5d --- /dev/null +++ b/scripts/run-capability-scorecard.mjs @@ -0,0 +1,69 @@ +#!/usr/bin/env node +/** + * Run the descriptor quality scorecard against the canonical fixture set. + * + * Prints a human-readable summary and writes the full report to + * `ai/capability-scorecard.json` for vizmart / tooling to consume. + * + * Not in `release:check` by default — the scorecard is a tuning tool, not + * a release gate. Run with `npm run scorecard`. + * + * Rationale: `docs/strategy/chart-capability-layer.md` § Phase 2.1 + V.8. + */ + +import fs from "node:fs" +import path from "node:path" +import { fileURLToPath } from "node:url" + +// Use the built dist — keeps the script Node-runnable without ts-node. +const { runQualityScorecard } = await import("../dist/semiotic-ai.module.min.js") +const { CANONICAL_FIXTURES } = await import("../dist/semiotic-ai.module.min.js") + +if (!runQualityScorecard || !CANONICAL_FIXTURES) { + console.error("❌ Scorecard helpers not found in dist/semiotic-ai.module.min.js — rebuild with `npm run dist`.") + process.exit(1) +} + +const __filename = fileURLToPath(import.meta.url) +const __dirname = path.dirname(__filename) +const repoRoot = path.resolve(__dirname, "..") + +const report = runQualityScorecard(CANONICAL_FIXTURES) + +// Write machine-readable copy +const outPath = path.join(repoRoot, "ai", "capability-scorecard.json") +fs.writeFileSync(outPath, JSON.stringify(report, null, 2)) + +// Human-readable summary +const fmtPct = (n) => `${(n * 100).toFixed(0)}%` +const fmtScore = (n) => n.toFixed(2) + +console.log("Capability Quality Scorecard") +console.log("============================") +console.log(`Fixtures evaluated: ${report.summary.fixtureCount}`) +console.log(`Capabilities tested: ${report.summary.capabilityCount}`) +console.log(`Expert agreement rate: ${fmtPct(report.summary.expertAgreementRate)}`) +console.log(`Overall caveat coverage: ${fmtPct(report.summary.overallCaveatCoverage)}`) +console.log(`Overall variant utilization: ${fmtPct(report.summary.overallVariantUtilization)}`) +console.log("") + +console.log("Per-fixture results:") +for (const f of report.perFixture) { + const top = f.topPick ? `${f.topPick.component}${f.topPick.variantKey ? "/" + f.topPick.variantKey : ""} (${fmtScore(f.topPick.score)})` : "—" + const agreement = f.expertAgreement === null ? " " : f.expertAgreement ? "✓ " : "✗ " + const intent = f.intent ? ` [${f.intent}]` : "" + console.log(` ${agreement}${f.fixture}${intent}`) + console.log(` top: ${top}, fitting=${f.fittingCount}, rejected=${f.rejectedCount}`) + if (f.expected && f.expected.length) { + console.log(` expected: ${f.expected.join(", ")}`) + } +} +console.log("") + +console.log("Weakest descriptors (sorted by expert-agreement count, ascending):") +const weakest = report.perCapability.slice(0, 12) +for (const c of weakest) { + console.log(` ${c.component.padEnd(28)} fits=${String(c.fitsOn).padStart(2)} reject=${String(c.rejectedOn).padStart(2)} top3=${String(c.inTopThreeOn).padStart(2)} agree=${c.expertAgreementCount} avg=${fmtScore(c.averageScore)} caveat=${fmtPct(c.caveatCoverage)} variant=${fmtPct(c.variantUtilization)}`) +} +console.log("") +console.log(`Full report written to: ${path.relative(repoRoot, outPath)}`) diff --git a/scripts/scorecard-dev.ts b/scripts/scorecard-dev.ts new file mode 100644 index 00000000..6a64b250 --- /dev/null +++ b/scripts/scorecard-dev.ts @@ -0,0 +1,26 @@ +// Dev-only: run the scorecard against TS source so we can iterate without +// waiting for full dist rebuilds. Invoked via npx tsx. +import { runQualityScorecard } from "../src/components/ai/qualityScorecard" +import { CANONICAL_FIXTURES } from "../src/components/ai/qualityFixtures" + +const report = runQualityScorecard(CANONICAL_FIXTURES) +const fmtPct = (n: number) => `${(n * 100).toFixed(0)}%` +const fmtScore = (n: number) => n.toFixed(2) + +console.log(`Expert agreement: ${fmtPct(report.summary.expertAgreementRate)} across ${report.summary.fixtureCount} fixtures`) +console.log(`Caveat coverage: ${fmtPct(report.summary.overallCaveatCoverage)}`) +console.log(`Variant util: ${fmtPct(report.summary.overallVariantUtilization)}`) +console.log("") + +console.log("Per-fixture:") +for (const f of report.perFixture) { + const top = f.topPick ? `${f.topPick.component}${f.topPick.variantKey ? "/" + f.topPick.variantKey : ""}` : "—" + const agree = f.expertAgreement === null ? " " : f.expertAgreement ? "✓" : "✗" + console.log(` ${agree} ${f.fixture.padEnd(60)} top=${top}`) +} +console.log("") + +console.log("Weakest descriptors:") +for (const c of report.perCapability.slice(0, 12)) { + console.log(` ${c.component.padEnd(28)} fits=${String(c.fitsOn).padStart(2)} rej=${String(c.rejectedOn).padStart(2)} top3=${String(c.inTopThreeOn).padStart(2)} agree=${c.expertAgreementCount} avg=${fmtScore(c.averageScore)}`) +} diff --git a/server.json b/server.json index 07b56b1d..da2f5d9f 100644 --- a/server.json +++ b/server.json @@ -8,13 +8,13 @@ "url": "https://github.com/nteract/semiotic", "source": "github" }, - "version": "3.5.4", + "version": "3.6.0", "packages": [ { "registryType": "npm", "registryBaseUrl": "https://registry.npmjs.org", "identifier": "semiotic", - "version": "3.5.4", + "version": "3.6.0", "transport": { "type": "stdio" } diff --git a/src/__tests__/scenarios/mcp-protocol.test.ts b/src/__tests__/scenarios/mcp-protocol.test.ts index 297b3722..a891e7fe 100644 --- a/src/__tests__/scenarios/mcp-protocol.test.ts +++ b/src/__tests__/scenarios/mcp-protocol.test.ts @@ -309,7 +309,7 @@ describe("MCP protocol round-trip", () => { } }) - it("tools/list returns all 6 tools", async () => { + it("tools/list returns all registered tools", async () => { const result = await sendRequest(proc, "tools/list", {}, "list-1") expect(result.result).toBeDefined() @@ -318,9 +318,15 @@ describe("MCP protocol round-trip", () => { "applyTheme", "diagnoseConfig", "getSchema", + "interrogateChart", "renderChart", + "repairChartConfig", "reportIssue", "suggestChart", + "suggestCharts", + "suggestDashboard", + "suggestStreamCharts", + "suggestStretchCharts", ]) }) diff --git a/src/components/ai/audienceProfile.test.ts b/src/components/ai/audienceProfile.test.ts new file mode 100644 index 00000000..6635d54b --- /dev/null +++ b/src/components/ai/audienceProfile.test.ts @@ -0,0 +1,185 @@ +import { describe, it, expect } from "vitest" +import { + applyAudienceBias, + effectiveFamiliarity, + stretchFamiliarityCeiling +} from "./audienceProfile" +import type { AudienceProfile } from "./audienceProfile" +import { suggestCharts } from "./suggestCharts" +import { dataScientistPersona, analystPersona } from "./audiences" + +const baseRubric = { familiarity: 3, accuracy: 4, precision: 4 } + +describe("applyAudienceBias", () => { + it("returns identity when no audience is supplied", () => { + const r = applyAudienceBias(3.5, baseRubric, "BarChart", undefined) + expect(r.score).toBe(3.5) + expect(r.rubric).toEqual(baseRubric) + expect(r.appliedReason).toBeUndefined() + }) + + it("overrides familiarity when audience specifies it", () => { + const audience: AudienceProfile = { familiarity: { BarChart: 5 } } + const r = applyAudienceBias(3.5, baseRubric, "BarChart", audience) + expect(r.rubric.familiarity).toBe(5) + // Familiarity bias: (5 - 3) * 0.5 = +1.0 + expect(r.score).toBeCloseTo(4.5) + }) + + it("applies increase target as positive score delta", () => { + const audience: AudienceProfile = { + targets: { BoxPlot: { direction: "increase", weight: 2 } } + } + const r = applyAudienceBias(3.0, baseRubric, "BoxPlot", audience) + // No familiarity override; target +1.0 * 2 = +2.0 + expect(r.score).toBe(5.0) + }) + + it("applies decrease target as negative score delta", () => { + const audience: AudienceProfile = { + targets: { PieChart: { direction: "decrease", weight: 3 } } + } + const r = applyAudienceBias(4.5, baseRubric, "PieChart", audience) + // Target -1.0 * 3 = -3.0 + expect(r.score).toBeCloseTo(1.5) + }) + + it("combines familiarity + target", () => { + const audience: AudienceProfile = { + familiarity: { BoxPlot: 2 }, + targets: { BoxPlot: { direction: "increase", weight: 2 } } + } + const r = applyAudienceBias(3.0, baseRubric, "BoxPlot", audience) + // Familiarity (2-3)*0.5 = -0.5; target +2.0 → +1.5 total + expect(r.score).toBeCloseTo(4.5) + expect(r.rubric.familiarity).toBe(2) + }) + + it("clamps target weight to 1..3", () => { + const audience: AudienceProfile = { + targets: { X: { direction: "increase", weight: 10 } } + } + const r = applyAudienceBias(0, baseRubric, "X", audience) + expect(r.score).toBe(3) // 1.0 * 3 (clamped) + }) + + it("includes appliedReason when target fires", () => { + const audience: AudienceProfile = { + name: "Acme", + targets: { + BoxPlot: { direction: "increase", reason: "we want distributions" } + } + } + const r = applyAudienceBias(3.0, baseRubric, "BoxPlot", audience) + expect(r.appliedReason).toContain("Acme") + expect(r.appliedReason).toContain("distributions") + }) +}) + +describe("effectiveFamiliarity", () => { + it("returns audience override when present", () => { + const audience: AudienceProfile = { familiarity: { BoxPlot: 5 } } + expect(effectiveFamiliarity("BoxPlot", 2, audience)).toBe(5) + }) + it("returns default when audience does not list the chart", () => { + const audience: AudienceProfile = { familiarity: { BarChart: 5 } } + expect(effectiveFamiliarity("BoxPlot", 2, audience)).toBe(2) + }) + it("returns default when no audience supplied", () => { + expect(effectiveFamiliarity("BoxPlot", 2, undefined)).toBe(2) + }) +}) + +describe("stretchFamiliarityCeiling", () => { + it("returns 3 for no audience or exposureLevel undefined/1", () => { + expect(stretchFamiliarityCeiling(undefined)).toBe(3) + expect(stretchFamiliarityCeiling({})).toBe(3) + expect(stretchFamiliarityCeiling({ exposureLevel: 1 })).toBe(3) + }) + it("returns 4 at exposureLevel 2", () => { + expect(stretchFamiliarityCeiling({ exposureLevel: 2 })).toBe(4) + }) +}) + +describe("suggestCharts × audience", () => { + const categorical = [ + { product: "A", units: 30 }, + { product: "B", units: 50 }, + { product: "C", units: 20 }, + { product: "D", units: 45 } + ] + + it("data scientist persona meaningfully decreases PieChart for rank intent", () => { + const withoutAudience = suggestCharts(categorical, { + intent: "rank", + includeVariants: false + }) + const withAudience = suggestCharts(categorical, { + intent: "rank", + audience: dataScientistPersona, + includeVariants: false, + // Lower minScore so we can see the biased score even if it goes negative + minScore: -10 + }) + const pieBase = withoutAudience.find((s) => s.component === "PieChart") + const pieAud = withAudience.find((s) => s.component === "PieChart") + expect(pieBase).toBeDefined() + expect(pieAud).toBeDefined() + if (pieBase && pieAud) { + // Data scientist: PieChart familiarity 3 (no shift) + decrease target weight 2 = -2.0 + expect(pieAud.score).toBeLessThan(pieBase.score - 1) + } + }) + + it("strong decrease targets can suppress a chart entirely below default minScore", () => { + // With the default minScore (0), PieChart's biased score for rank + // (1 - 2 = -1) falls below the floor and disappears from results. + const suggestions = suggestCharts(categorical, { + intent: "rank", + audience: dataScientistPersona, + includeVariants: false + }) + expect(suggestions.find((s) => s.component === "PieChart")).toBeUndefined() + }) + + it("appends audience rationale to suggestion.reasons when a target fires", () => { + const suggestions = suggestCharts(categorical, { + audience: dataScientistPersona, + includeVariants: false + }) + const pie = suggestions.find((s) => s.component === "PieChart") + if (pie) { + expect( + pie.reasons.some( + (r) => + r.toLowerCase().includes("length") || + r.toLowerCase().includes("decrease") + ) + ).toBe(true) + } + }) + + it("returns the same ranking as no-audience when audience is empty", () => { + const a = suggestCharts(categorical, { + intent: "rank", + includeVariants: false + }) + const b = suggestCharts(categorical, { + intent: "rank", + includeVariants: false, + audience: {} + }) + expect(a.map((s) => s.component)).toEqual(b.map((s) => s.component)) + }) + + it("preserves overall ranking quality — top pick remains valid", () => { + // BarChart should still win rank for the analyst even with a mild + // decrease-pie target, because BarChart is the correct answer. + const suggestions = suggestCharts(categorical, { + intent: "rank", + audience: analystPersona, + includeVariants: false + }) + expect(suggestions[0].component).toBe("BarChart") + }) +}) diff --git a/src/components/ai/audienceProfile.ts b/src/components/ai/audienceProfile.ts new file mode 100644 index 00000000..2744ac48 --- /dev/null +++ b/src/components/ai/audienceProfile.ts @@ -0,0 +1,143 @@ +import type { ChartRubric } from "./chartCapabilityTypes" + +/** + * A serializable description of who's reading the charts and what the + * organization is trying to grow. + * + * Semiotic does not measure familiarity — it consumes measurements. Orgs + * produce an AudienceProfile through whatever channel makes sense (surveys, + * telemetry, manager judgment, training records) and pass it to the + * suggestion APIs. The library applies the bias and returns rankings that + * reflect the audience instead of a generic data-literate baseline. + * + * Strategy memo: docs/strategy/audience-profiles.md + */ +export interface AudienceProfile { + /** + * Display name. Surfaced in suggestion `reasons[]` when a target fires so + * users can see whose policy is influencing the ranking. + */ + name?: string + /** + * Per-chart familiarity override (1..5). Replaces the descriptor's + * `rubric.familiarity`. Charts not listed fall back to the descriptor. + * + * @example + * familiarity: { BarChart: 5, LineChart: 5, PieChart: 4, BoxPlot: 2 } + */ + familiarity?: Partial> + /** + * Adoption targets — which charts the org is trying to grow or reduce. + * The engine applies a meaningful score bias (±1..3 depending on weight) + * so growth targets win close calls and decrease targets fall back unless + * they're the only fit. + * + * @example + * targets: { + * PieChart: { direction: "decrease", weight: 1 }, + * BoxPlot: { direction: "increase", weight: 2, + * reason: "we want the team reading distributions, not means" } + * } + */ + targets?: Partial> + /** + * Controls visibility of stretch picks (unfamiliar-but-relevant charts). + * 0 — never surface stretches; familiar-only rankings + * 1 — surface in a separate `stretchSuggestions` list (default when audience set) + * 2 — same as 1 but lowers the familiarity threshold (≤4) for what counts as stretch, + * widening the menu + */ + exposureLevel?: 0 | 1 | 2 +} + +export interface AudienceTarget { + direction: "increase" | "decrease" + /** 1..3 — controls bias magnitude. Default 1. */ + weight?: number + /** Human-readable rationale. Surfaces in suggestion.reasons when the target fires. */ + reason?: string +} + +export interface AudienceBiasResult { + /** Composite score after audience adjustments. Unclamped — can range outside 0..5. */ + score: number + /** Effective rubric for the chart after audience overrides. */ + rubric: ChartRubric + /** Reason string to append to the suggestion when a target fired. */ + appliedReason?: string +} + +const FAMILIARITY_WEIGHT = 0.5 +const TARGET_WEIGHT = 1.0 + +/** + * Apply an AudienceProfile's bias to a chart's composite score and rubric. + * Pure function — used by both `suggestCharts` and `suggestStretchCharts`. + * + * Two terms compose additively: + * • Familiarity bias: (audienceFamiliarity − 3) × 0.5 + * — Range ±1.0. At familiarity 5 we add 1.0; at 1 we subtract 1.0. + * • Target bias: ±1.0 × weight + * — Range ±3.0 for weight=3. Strong enough to reorder rankings, + * not so strong that it overrides chart correctness for the data shape. + * + * Score is left unclamped so internal sorting reflects the magnitude of bias. + */ +export function applyAudienceBias( + baseScore: number, + baseRubric: ChartRubric, + component: string, + audience: AudienceProfile | undefined, +): AudienceBiasResult { + if (!audience) return { score: baseScore, rubric: baseRubric } + + const audienceFamiliarity = audience.familiarity?.[component] + const familiarity = audienceFamiliarity ?? baseRubric.familiarity + const target = audience.targets?.[component] + + let delta = 0 + if (audienceFamiliarity !== undefined) { + delta += (audienceFamiliarity - 3) * FAMILIARITY_WEIGHT + } + let appliedReason: string | undefined + if (target) { + const weight = Math.max(1, Math.min(3, target.weight ?? 1)) + const sign = target.direction === "increase" ? 1 : -1 + delta += sign * TARGET_WEIGHT * weight + if (target.reason) { + appliedReason = `${audience.name ? `${audience.name}: ` : ""}${target.reason}` + } else { + appliedReason = `${audience.name ? `${audience.name} ` : ""}target: ${target.direction} ${component}` + } + } + + return { + score: baseScore + delta, + rubric: { ...baseRubric, familiarity }, + appliedReason, + } +} + +/** + * Resolve the effective familiarity for a chart under an audience. Used by + * the stretch surface to decide whether a chart qualifies as "unfamiliar." + */ +export function effectiveFamiliarity( + component: string, + defaultFamiliarity: number, + audience: AudienceProfile | undefined, +): number { + if (!audience) return defaultFamiliarity + return audience.familiarity?.[component] ?? defaultFamiliarity +} + +/** + * Familiarity threshold for what counts as a "stretch" pick under this audience. + * Tighter for exposureLevel 1, wider for 2. Returns the highest familiarity a + * chart can have and still appear in the stretch surface. + */ +export function stretchFamiliarityCeiling(audience: AudienceProfile | undefined): number { + if (!audience) return 3 + if (audience.exposureLevel === 2) return 4 + return 3 +} diff --git a/src/components/ai/audiences.ts b/src/components/ai/audiences.ts new file mode 100644 index 00000000..7b7c36d4 --- /dev/null +++ b/src/components/ai/audiences.ts @@ -0,0 +1,224 @@ +import type { AudienceProfile } from "./audienceProfile" + +/** + * Three example AudienceProfile shapes. Not authoritative — these are + * sketches based on rough industry stereotypes, useful for documentation, + * demos, and as starting points consumers can fork. + * + * To use one in production, copy it and tune to your audience's actual + * survey/telemetry data. Do not assume these defaults represent your team. + */ + +/** + * Executive audience — high familiarity with bar/line/pie/gauge, + * limited tolerance for unfamiliar chart shapes. Most likely to encounter + * dashboards built by analysts; not building their own. + */ +export const executivePersona: AudienceProfile = { + name: "Executive", + familiarity: { + // Boardroom-comfortable + BarChart: 5, + LineChart: 5, + PieChart: 5, + DonutChart: 4, + GaugeChart: 5, + AreaChart: 4, + FunnelChart: 4, + ChoroplethMap: 4, + + // Recognizable but less common + Histogram: 3, + Heatmap: 3, + StackedBarChart: 3, + StackedAreaChart: 3, + Scatterplot: 3, + BubbleChart: 3, + GroupedBarChart: 3, + DotPlot: 3, + + // Specialist + BoxPlot: 2, + ViolinPlot: 1, + SwarmPlot: 1, + RidgelinePlot: 1, + MultiAxisLineChart: 2, + CandlestickChart: 2, + DifferenceChart: 2, + QuadrantChart: 3, + LikertChart: 3, + SwimlaneChart: 2, + MinimapChart: 2, + ConnectedScatterplot: 1, + + // Network/hierarchy + SankeyDiagram: 2, + TreeDiagram: 3, + Treemap: 3, + CirclePack: 2, + OrbitDiagram: 1, + ChordDiagram: 1, + ProcessSankey: 2, + ForceDirectedGraph: 1, + + // Geo specialist + ProportionalSymbolMap: 3, + FlowMap: 2, + DistanceCartogram: 1, + }, + targets: { + PieChart: { + direction: "decrease", + weight: 1, + reason: "shifting from share-by-angle toward share-by-length for accuracy", + }, + BarChart: { + direction: "increase", + weight: 1, + }, + }, + exposureLevel: 1, +} + +/** + * Analyst audience — broader chart vocabulary, comfortable with + * distribution-shape and matrix-shape charts. Building dashboards for + * others; can read most things on first encounter. + */ +export const analystPersona: AudienceProfile = { + name: "Analyst", + familiarity: { + BarChart: 5, + LineChart: 5, + PieChart: 4, + DonutChart: 4, + AreaChart: 5, + StackedAreaChart: 4, + StackedBarChart: 5, + GroupedBarChart: 5, + Histogram: 5, + Heatmap: 5, + Scatterplot: 5, + BubbleChart: 4, + BoxPlot: 4, + DotPlot: 4, + GaugeChart: 3, + FunnelChart: 4, + LikertChart: 4, + QuadrantChart: 4, + SwimlaneChart: 4, + MinimapChart: 4, + DifferenceChart: 3, + MultiAxisLineChart: 4, + CandlestickChart: 3, + ConnectedScatterplot: 3, + + // Less common in analyst workflows + ViolinPlot: 3, + SwarmPlot: 3, + RidgelinePlot: 2, + + // Network/hierarchy + TreeDiagram: 4, + Treemap: 4, + CirclePack: 3, + SankeyDiagram: 4, + ProcessSankey: 3, + ChordDiagram: 3, + OrbitDiagram: 2, + ForceDirectedGraph: 3, + + // Geo + ChoroplethMap: 4, + ProportionalSymbolMap: 4, + FlowMap: 3, + DistanceCartogram: 2, + }, + targets: { + PieChart: { direction: "decrease", weight: 1 }, + BoxPlot: { + direction: "increase", + weight: 1, + reason: "team is shifting from averages to distribution-aware comparisons", + }, + }, + exposureLevel: 1, +} + +/** + * Data scientist audience — comfortable with the full distribution-chart + * family, regression overlays, and density encodings. Will accept most + * exotic shapes if they're more honest about the data. + */ +export const dataScientistPersona: AudienceProfile = { + name: "Data scientist", + familiarity: { + BarChart: 5, + LineChart: 5, + PieChart: 3, + DonutChart: 3, + AreaChart: 5, + StackedAreaChart: 5, + StackedBarChart: 5, + GroupedBarChart: 5, + Histogram: 5, + Heatmap: 5, + Scatterplot: 5, + BubbleChart: 5, + BoxPlot: 5, + ViolinPlot: 5, + SwarmPlot: 4, + RidgelinePlot: 4, + DotPlot: 4, + QuadrantChart: 4, + LikertChart: 4, + DifferenceChart: 4, + MultiAxisLineChart: 4, + ConnectedScatterplot: 4, + GaugeChart: 2, + FunnelChart: 3, + SwimlaneChart: 3, + MinimapChart: 4, + CandlestickChart: 3, + + // Network/hierarchy + TreeDiagram: 4, + Treemap: 4, + CirclePack: 4, + SankeyDiagram: 4, + ProcessSankey: 3, + ChordDiagram: 3, + OrbitDiagram: 2, + ForceDirectedGraph: 4, + + // Geo + ChoroplethMap: 4, + ProportionalSymbolMap: 4, + FlowMap: 3, + DistanceCartogram: 3, + }, + targets: { + PieChart: { + direction: "decrease", + weight: 2, + reason: "preferring length-encoded comparisons for precision", + }, + BarChart: { + direction: "decrease", + weight: 1, + reason: "promoting distribution-aware charts over single-value bars when raw observations are available", + }, + BoxPlot: { direction: "increase", weight: 1 }, + ViolinPlot: { direction: "increase", weight: 1 }, + }, + exposureLevel: 2, +} + +/** + * Convenience map for consumers loading audience by name (e.g. from a config string). + */ +export const BUILT_IN_AUDIENCES: Record = { + executive: executivePersona, + analyst: analystPersona, + "data-scientist": dataScientistPersona, +} diff --git a/src/components/ai/chartCapabilities.ts b/src/components/ai/chartCapabilities.ts new file mode 100644 index 00000000..30427a58 --- /dev/null +++ b/src/components/ai/chartCapabilities.ts @@ -0,0 +1,193 @@ +import type { ChartCapability } from "./chartCapabilityTypes" + +// XY family +import { LineChartCapability } from "../charts/xy/LineChart.capability" +import { AreaChartCapability } from "../charts/xy/AreaChart.capability" +import { StackedAreaChartCapability } from "../charts/xy/StackedAreaChart.capability" +import { ScatterplotCapability } from "../charts/xy/Scatterplot.capability" +import { ConnectedScatterplotCapability } from "../charts/xy/ConnectedScatterplot.capability" +import { BubbleChartCapability } from "../charts/xy/BubbleChart.capability" +import { QuadrantChartCapability } from "../charts/xy/QuadrantChart.capability" +import { MultiAxisLineChartCapability } from "../charts/xy/MultiAxisLineChart.capability" +import { MinimapChartCapability } from "../charts/xy/MinimapChart.capability" +import { DifferenceChartCapability } from "../charts/xy/DifferenceChart.capability" +import { CandlestickChartCapability } from "../charts/xy/CandlestickChart.capability" +import { HeatmapCapability } from "../charts/xy/Heatmap.capability" + +// Ordinal family +import { BarChartCapability } from "../charts/ordinal/BarChart.capability" +import { GroupedBarChartCapability } from "../charts/ordinal/GroupedBarChart.capability" +import { StackedBarChartCapability } from "../charts/ordinal/StackedBarChart.capability" +import { DotPlotCapability } from "../charts/ordinal/DotPlot.capability" +import { PieChartCapability } from "../charts/ordinal/PieChart.capability" +import { DonutChartCapability } from "../charts/ordinal/DonutChart.capability" +import { FunnelChartCapability } from "../charts/ordinal/FunnelChart.capability" +import { GaugeChartCapability } from "../charts/ordinal/GaugeChart.capability" +import { LikertChartCapability } from "../charts/ordinal/LikertChart.capability" +import { SwimlaneChartCapability } from "../charts/ordinal/SwimlaneChart.capability" +import { HistogramCapability } from "../charts/ordinal/Histogram.capability" +import { BoxPlotCapability } from "../charts/ordinal/BoxPlot.capability" +import { SwarmPlotCapability } from "../charts/ordinal/SwarmPlot.capability" +import { ViolinPlotCapability } from "../charts/ordinal/ViolinPlot.capability" +import { RidgelinePlotCapability } from "../charts/ordinal/RidgelinePlot.capability" + +// Network family +import { ForceDirectedGraphCapability } from "../charts/network/ForceDirectedGraph.capability" +import { SankeyDiagramCapability } from "../charts/network/SankeyDiagram.capability" +import { ChordDiagramCapability } from "../charts/network/ChordDiagram.capability" +import { ProcessSankeyCapability } from "../charts/network/ProcessSankey.capability" +import { TreeDiagramCapability } from "../charts/network/TreeDiagram.capability" +import { TreemapCapability } from "../charts/network/Treemap.capability" +import { CirclePackCapability } from "../charts/network/CirclePack.capability" +import { OrbitDiagramCapability } from "../charts/network/OrbitDiagram.capability" + +// Geo family +import { ChoroplethMapCapability } from "../charts/geo/ChoroplethMap.capability" +import { ProportionalSymbolMapCapability } from "../charts/geo/ProportionalSymbolMap.capability" +import { FlowMapCapability } from "../charts/geo/FlowMap.capability" +import { DistanceCartogramCapability } from "../charts/geo/DistanceCartogram.capability" + +/** + * Built-in capability descriptors. Each chart owns its own descriptor in + * `Foo.capability.ts` next to `Foo.tsx`. To add a new chart, write the descriptor + * and append it here. + * + * Charts intentionally NOT in this registry: + * • Realtime variants (RealtimeLineChart, RealtimeHistogram, ...) — they're for + * streaming data, while `suggestCharts` operates on static datasets. + * • Custom-layout charts (XYCustomChart, OrdinalCustomChart, NetworkCustomChart) — + * they require a layout function and are escape-hatches by design. + * • LinkedCharts and ScatterplotMatrix — multi-chart compositions whose data + * shape is a tuple, not a single dataset. + * + * Consumers can still register these (or any custom chart) via `registerChartCapability`. + */ +const BUILT_IN_CAPABILITIES: ReadonlyArray = [ + // XY + LineChartCapability, + AreaChartCapability, + StackedAreaChartCapability, + ScatterplotCapability, + ConnectedScatterplotCapability, + BubbleChartCapability, + QuadrantChartCapability, + MultiAxisLineChartCapability, + MinimapChartCapability, + DifferenceChartCapability, + CandlestickChartCapability, + HeatmapCapability, + // Ordinal + BarChartCapability, + GroupedBarChartCapability, + StackedBarChartCapability, + DotPlotCapability, + PieChartCapability, + DonutChartCapability, + FunnelChartCapability, + GaugeChartCapability, + LikertChartCapability, + SwimlaneChartCapability, + // Distribution + HistogramCapability, + BoxPlotCapability, + SwarmPlotCapability, + ViolinPlotCapability, + RidgelinePlotCapability, + // Network + ForceDirectedGraphCapability, + SankeyDiagramCapability, + ChordDiagramCapability, + ProcessSankeyCapability, + // Hierarchy + TreeDiagramCapability, + TreemapCapability, + CirclePackCapability, + OrbitDiagramCapability, + // Geo + ChoroplethMapCapability, + ProportionalSymbolMapCapability, + FlowMapCapability, + DistanceCartogramCapability, +] + +const userCapabilities = new Map() + +/** + * Register a capability for a chart (built-in or third-party). Re-registering by + * component name replaces the previous descriptor — useful for overriding defaults. + */ +export function registerChartCapability(capability: ChartCapability): void { + userCapabilities.set(capability.component, capability) +} + +/** Remove a previously-registered capability. Does not affect built-ins. */ +export function unregisterChartCapability(component: string): void { + userCapabilities.delete(component) +} + +/** + * Current capability list — built-ins, then user-registered, with user-registered + * overriding built-ins by component name. + */ +export function getCapabilities(): ReadonlyArray { + if (userCapabilities.size === 0) return BUILT_IN_CAPABILITIES + const merged = new Map() + for (const c of BUILT_IN_CAPABILITIES) merged.set(c.component, c) + for (const [name, c] of userCapabilities) merged.set(name, c) + return Array.from(merged.values()) +} + +/** Look up a capability by component name. */ +export function getCapability(component: string): ChartCapability | undefined { + return getCapabilities().find((c) => c.component === component) +} + +// Re-export every built-in descriptor so consumers can import them individually +// without pulling in the registry. +export { + // XY + LineChartCapability, + AreaChartCapability, + StackedAreaChartCapability, + ScatterplotCapability, + ConnectedScatterplotCapability, + BubbleChartCapability, + QuadrantChartCapability, + MultiAxisLineChartCapability, + MinimapChartCapability, + DifferenceChartCapability, + CandlestickChartCapability, + HeatmapCapability, + // Ordinal + BarChartCapability, + GroupedBarChartCapability, + StackedBarChartCapability, + DotPlotCapability, + PieChartCapability, + DonutChartCapability, + FunnelChartCapability, + GaugeChartCapability, + LikertChartCapability, + SwimlaneChartCapability, + // Distribution + HistogramCapability, + BoxPlotCapability, + SwarmPlotCapability, + ViolinPlotCapability, + RidgelinePlotCapability, + // Network + ForceDirectedGraphCapability, + SankeyDiagramCapability, + ChordDiagramCapability, + ProcessSankeyCapability, + // Hierarchy + TreeDiagramCapability, + TreemapCapability, + CirclePackCapability, + OrbitDiagramCapability, + // Geo + ChoroplethMapCapability, + ProportionalSymbolMapCapability, + FlowMapCapability, + DistanceCartogramCapability, +} diff --git a/src/components/ai/chartCapabilityTypes.ts b/src/components/ai/chartCapabilityTypes.ts new file mode 100644 index 00000000..4f5cc421 --- /dev/null +++ b/src/components/ai/chartCapabilityTypes.ts @@ -0,0 +1,219 @@ +import type { Datum } from "../charts/shared/datumTypes" +import type { DataSummary } from "../data/DataSummarizer" +import type { IntentId } from "./intents" + +/** + * Chart family — high-level taxonomy used for filtering and intent matching. + */ +export type ChartFamily = + | "time-series" + | "categorical" + | "distribution" + | "relationship" + | "flow" + | "network" + | "hierarchy" + | "geo" + | "realtime" + | "custom" + +/** + * Where a chart is imported from. Used by generators to emit correct import paths. + */ +export type ChartImportPath = + | "semiotic/xy" + | "semiotic/ordinal" + | "semiotic/network" + | "semiotic/geo" + | "semiotic/realtime" + | "semiotic/ai" + | "semiotic" + +/** + * Familiarity/accuracy/precision rubric (1-5 each). + * Familiarity = how well-known the chart is to a general audience. + * Accuracy = how faithfully it represents the underlying data. + * Precision = how readable individual values are. + */ +export interface ChartRubric { + familiarity: number + accuracy: number + precision: number +} + +/** + * The kind of value a field holds, used for axis fitness. + */ +export type FieldKind = "numeric" | "categorical" | "date" | "boolean" | "unknown" + +/** + * A candidate field for a given role (x, y, series, etc.), with a quality score. + */ +export interface FieldCandidate { + field: string + kind: FieldKind + /** 0..1 — how good this field is for the role being considered. */ + quality: number + /** Field-level stats for downstream scorers. */ + distinctCount?: number + /** True if the field's values are strictly increasing in row order. */ + monotonic?: boolean +} + +/** + * Profile of a dataset for chart-fitness scoring. Extends DataSummary with + * shape inference (axis candidates, structure detection, primary roles). + */ +export interface ChartDataProfile extends DataSummary { + /** Original rows (read-only); used by capabilities to compute their own stats. */ + data: ReadonlyArray + /** Candidate fields per role, sorted best-first. */ + candidates: { + x: FieldCandidate[] + y: FieldCandidate[] + size: FieldCandidate[] + category: FieldCandidate[] + series: FieldCandidate[] + time: FieldCandidate[] + } + /** Best-guess primary assignment per role (the top candidate, if any). */ + primary: { + x?: string + y?: string + size?: string + category?: string + series?: string + time?: string + } + /** Distinct count of the primary category field, if any. */ + categoryCount?: number + /** Distinct count of the primary series field, if any. */ + seriesCount?: number + /** Distinct count of the primary x field, if any. */ + uniqueXCount?: number + /** True when some x value appears in more than one row (suggests aggregation). */ + hasRepeatedX: boolean + /** True when the primary x candidate is monotonic. */ + monotonicX: boolean + /** True when there is at least one date-typed candidate. */ + hasTimeAxis: boolean + /** + * How the primary x role was inferred. Capabilities can use this to detect + * the "scatter fallback" case (x picked only because there were 2+ numerics, + * not because the field is genuinely an x-axis) and decline to recommend + * themselves for trend-shaped intents. + * + * • "time" — explicit date/time field + * • "named" — numeric whose name matches an x-pattern (month, year, index, …) + * • "scatter"— filled in via the two-numeric scatter fallback; weak signal + * • "none" — no x role inferred + */ + xProvenance: "time" | "named" | "scatter" | "none" + /** Source dataset looks like a hierarchy (had a `children` array at root). */ + hasHierarchy: boolean + /** Source dataset looks like a node/edge graph. */ + hasNetwork: boolean + /** Source dataset looks like GeoJSON (FeatureCollection). */ + hasGeo: boolean + /** Extracted network payload when hasNetwork is true. */ + network?: { nodes: ReadonlyArray; edges: ReadonlyArray } + /** Extracted hierarchy root when hasHierarchy is true. */ + hierarchy?: Datum + /** Extracted GeoJSON FeatureCollection when hasGeo is true. */ + geo?: { features: ReadonlyArray; points?: ReadonlyArray; flows?: ReadonlyArray } +} + +/** + * An intent scorer is either a static 0..5 score or a function evaluated against the profile. + */ +export type IntentScorer = + | number + | ((profile: ChartDataProfile) => number) + +/** + * Variant — a configuration of the chart that meaningfully changes what it's good for. + * + * Variants compose into suggestions. The `intentDeltas` are additive against the + * base capability's intent scores (clamped to 0..5 by the engine). + */ +export interface ChartVariant { + key: string + label: string + description?: string + /** Props to merge into the base chart props. */ + props: Record + /** Style/role tags (used by consumers like vizmart for filtering). */ + tags?: ReadonlyArray + /** Per-intent additive score deltas (e.g. {"trend": +1, "outlier-detection": -2}). */ + intentDeltas?: Partial> + /** Rubric deltas — usually small, e.g. smoothing trades precision for familiarity. */ + rubricDeltas?: Partial + /** Caveats specific to this variant — surfaced in suggestion.caveats. */ + caveats?: ReadonlyArray +} + +/** + * Result of a capability's `fits()` gate. `null` means the chart fits. A string + * is the human-readable reason it doesn't, used for diagnostics and reasoning. + */ +export type FitResult = null | string + +/** + * The capability descriptor each chart ships alongside itself. + * + * Charts that declare a capability participate in `suggestCharts`, `useChartSuggestions`, + * and the `interrogateChart` MCP tool's recommendation surface. + */ +export interface ChartCapability { + component: string + family: ChartFamily + importPath: ChartImportPath + /** Base rubric, before variant/profile adjustments. */ + rubric: ChartRubric + /** + * Hard requirements gate. Return null if the chart can render this profile, + * or a human-readable string explaining why not (e.g. "no numeric y candidate"). + */ + fits: (profile: ChartDataProfile) => FitResult + /** + * Per-intent suitability score (0..5). Missing intents default to 0. + * Values may be functions for profile-aware scoring. + */ + intentScores: Partial> + /** + * Variants — different settings that change what the chart is useful for. + * Suggestion engine emits one suggestion per (capability × variant) pair. + * If empty, the engine still emits a base suggestion. + */ + variants?: ReadonlyArray + /** Caveats independent of variants (e.g. "log scale skipped for negative values"). */ + caveats?: (profile: ChartDataProfile) => ReadonlyArray + /** + * Build the props you'd pass to this chart for this dataset. Should produce + * a runnable config (accessor names, etc.) so consumers can ``. + */ + buildProps: (profile: ChartDataProfile, variant?: ChartVariant) => Record +} + +/** + * One suggestion produced by `suggestCharts`. Consumers render this as a card, + * pass it to an LLM for re-ranking, or hand the props straight to the chart. + */ +export interface Suggestion { + component: string + family: ChartFamily + importPath: ChartImportPath + variant?: ChartVariant + /** Composite score for the ranking intent(s), 0..5. */ + score: number + /** Per-intent scores after variant deltas. */ + intentScores: Partial> + /** Rubric after variant/profile adjustments. */ + rubric: ChartRubric + /** Narrative reasons this chart fits — suitable for tooltips or LLM context. */ + reasons: ReadonlyArray + /** Gotchas / things to be careful about. */ + caveats: ReadonlyArray + /** Ready-to-spread props. */ + props: Record +} diff --git a/src/components/ai/diffProfile.test.ts b/src/components/ai/diffProfile.test.ts new file mode 100644 index 00000000..4b01fec0 --- /dev/null +++ b/src/components/ai/diffProfile.test.ts @@ -0,0 +1,68 @@ +import { describe, it, expect } from "vitest" +import { profileData } from "./profileData" +import { diffProfile } from "./diffProfile" + +describe("diffProfile", () => { + it("reports unchanged when profiles are equivalent", () => { + const data = [{ a: 1, b: "x" }, { a: 2, b: "y" }] + const diff = diffProfile(profileData(data), profileData(data)) + expect(diff.unchanged).toBe(true) + expect(diff.added).toEqual([]) + expect(diff.removed).toEqual([]) + }) + + it("reports row count change", () => { + const a = profileData([{ x: 1 }, { x: 2 }]) + const b = profileData([{ x: 1 }, { x: 2 }, { x: 3 }]) + const diff = diffProfile(a, b) + expect(diff.rowCountChange).toBe(1) + }) + + it("reports added and removed fields", () => { + const a = profileData([{ a: 1, b: 2 }]) + const b = profileData([{ b: 2, c: 3 }]) + const diff = diffProfile(a, b) + expect(diff.added).toEqual(["c"]) + expect(diff.removed).toEqual(["a"]) + }) + + it("reports field type changes", () => { + const a = profileData([{ x: 1, score: 10 }, { x: 2, score: 20 }]) + const b = profileData([{ x: 1, score: "high" }, { x: 2, score: "low" }]) + const diff = diffProfile(a, b) + expect(diff.typeChanges.some((c) => c.field === "score" && c.from === "numeric" && c.to === "categorical")).toBe(true) + }) + + it("reports primary role re-assignments", () => { + const a = profileData([{ value: 10, region: "EU" }, { value: 20, region: "NA" }]) + // Adding a time field should move x's primary from numeric to time + const b = profileData([ + { value: 10, region: "EU", date: "2025-01-01" }, + { value: 20, region: "NA", date: "2025-02-01" }, + ]) + const diff = diffProfile(a, b) + const xChange = diff.primaryChanges.find((c) => c.role === "x") + const timeChange = diff.primaryChanges.find((c) => c.role === "time") + expect(xChange || timeChange).toBeDefined() + if (timeChange) { + expect(timeChange.from).toBeUndefined() + expect(timeChange.to).toBe("date") + } + }) + + it("reports charts that become fit/unfit", () => { + // Single row → 50 rows: histogram should become fit + const a = profileData([{ value: 10 }]) + const b = profileData(Array.from({ length: 50 }, (_, i) => ({ value: i + Math.random() * 5 }))) + const diff = diffProfile(a, b) + expect(diff.becameFit).toContain("Histogram") + }) + + it("becameUnfit and becameFit are disjoint", () => { + const a = profileData([{ x: 1, y: 2 }, { x: 3, y: 4 }, { x: 5, y: 6 }]) + const b = profileData([{ category: "A", value: 10 }, { category: "B", value: 20 }]) + const diff = diffProfile(a, b) + const overlap = diff.becameFit.filter((c) => diff.becameUnfit.includes(c)) + expect(overlap).toEqual([]) + }) +}) diff --git a/src/components/ai/diffProfile.ts b/src/components/ai/diffProfile.ts new file mode 100644 index 00000000..451fa447 --- /dev/null +++ b/src/components/ai/diffProfile.ts @@ -0,0 +1,131 @@ +import type { ChartDataProfile, FieldKind } from "./chartCapabilityTypes" +import { getCapabilities } from "./chartCapabilities" + +export type PrimaryRole = "x" | "y" | "size" | "category" | "series" | "time" + +export interface FieldTypeChange { + field: string + from: FieldKind | "unknown" + to: FieldKind | "unknown" +} + +export interface PrimaryRoleChange { + role: PrimaryRole + from: string | undefined + to: string | undefined +} + +export interface ProfileDiff { + /** Row count change (b.rowCount - a.rowCount). */ + rowCountChange: number + /** Fields present in b but not in a. */ + added: ReadonlyArray + /** Fields present in a but not in b. */ + removed: ReadonlyArray + /** Fields whose inferred type changed. */ + typeChanges: ReadonlyArray + /** Primary role re-assignments (e.g. x switched from "month" to "date"). */ + primaryChanges: ReadonlyArray + /** Suggestion components that fit a but not b. */ + becameUnfit: ReadonlyArray + /** Suggestion components that fit b but not a. */ + becameFit: ReadonlyArray + /** True when no observable change was detected. */ + unchanged: boolean +} + +const PRIMARY_ROLES: ReadonlyArray = ["x", "y", "size", "category", "series", "time"] + +function fieldKind(profile: ChartDataProfile, field: string): FieldKind | "unknown" { + const summary = profile.fields[field] + if (!summary) return "unknown" + if (summary.type === "numeric") return "numeric" + if (summary.type === "categorical") return "categorical" + if (summary.type === "date") return "date" + return "unknown" +} + +function fittingComponents(profile: ChartDataProfile): Set { + const set = new Set() + for (const capability of getCapabilities()) { + if (capability.fits(profile) === null) set.add(capability.component) + } + return set +} + +/** + * Compare two profiles and report what changed plus how the change affects + * chart suitability. Useful for: + * + * • "Why does my dashboard look different after the data refreshed?" + * • Editor warnings when a CSV upload would change the visible charts. + * • CI checks that flag when a fixture migration affects descriptor coverage. + * + * Doesn't compute *which suggestions ranked first* (that requires intent + + * full suggestCharts). Reports only structural deltas — added/removed fields, + * type changes, primary role re-assignments, fit set changes. + * + * @example + * const a = profileData(yesterdaysData) + * const b = profileData(todaysData) + * const diff = diffProfile(a, b) + * if (diff.becameUnfit.length) { + * console.warn(`These charts no longer fit: ${diff.becameUnfit.join(", ")}`) + * } + */ +export function diffProfile(a: ChartDataProfile, b: ChartDataProfile): ProfileDiff { + const aFields = new Set(Object.keys(a.fields)) + const bFields = new Set(Object.keys(b.fields)) + + const added: string[] = [] + const removed: string[] = [] + for (const field of bFields) { + if (!aFields.has(field)) added.push(field) + } + for (const field of aFields) { + if (!bFields.has(field)) removed.push(field) + } + added.sort() + removed.sort() + + const typeChanges: FieldTypeChange[] = [] + for (const field of bFields) { + if (!aFields.has(field)) continue + const aKind = fieldKind(a, field) + const bKind = fieldKind(b, field) + if (aKind !== bKind) typeChanges.push({ field, from: aKind, to: bKind }) + } + typeChanges.sort((x, y) => x.field.localeCompare(y.field)) + + const primaryChanges: PrimaryRoleChange[] = [] + for (const role of PRIMARY_ROLES) { + const aValue = a.primary[role] + const bValue = b.primary[role] + if (aValue !== bValue) primaryChanges.push({ role, from: aValue, to: bValue }) + } + + const aFit = fittingComponents(a) + const bFit = fittingComponents(b) + const becameUnfit = Array.from(aFit).filter((c) => !bFit.has(c)).sort() + const becameFit = Array.from(bFit).filter((c) => !aFit.has(c)).sort() + + const unchanged = + added.length === 0 && + removed.length === 0 && + typeChanges.length === 0 && + primaryChanges.length === 0 && + becameUnfit.length === 0 && + becameFit.length === 0 && + a.rowCount === b.rowCount + + return { + rowCountChange: b.rowCount - a.rowCount, + added, + removed, + typeChanges, + primaryChanges, + becameUnfit, + becameFit, + unchanged, + } +} diff --git a/src/components/ai/inferIntent.test.ts b/src/components/ai/inferIntent.test.ts new file mode 100644 index 00000000..8faa6e85 --- /dev/null +++ b/src/components/ai/inferIntent.test.ts @@ -0,0 +1,51 @@ +import { describe, it, expect } from "vitest" +import { inferIntent } from "./inferIntent" + +describe("inferIntent", () => { + const cases: Array<[string, string]> = [ + ["when did revenue peak?", "outlier-detection"], + ["show me the trend over time", "trend"], + ["which products are the top sellers?", "rank"], + ["what's the breakdown of revenue by region?", "part-to-whole"], + ["how is the distribution of test scores?", "distribution"], + ["is there a relationship between hours and grade?", "correlation"], + ["show conversion funnel from signup to purchase", "flow"], + ["display the org hierarchy", "hierarchy"], + ["what does this look like across countries?", "geo"], + ["how did the cohort composition change over time?", "composition-over-time"], + ["where did revenue suddenly shift?", "change-detection"], + ["compare regions side by side", "compare-series"], + ] + + it.each(cases)("maps %j → %s", (query, expected) => { + const result = inferIntent(query) + expect(result?.intent).toBe(expected) + }) + + it("returns null for empty or non-matching queries", () => { + expect(inferIntent("")).toBeNull() + expect(inferIntent(" ")).toBeNull() + expect(inferIntent("hello there")).toBeNull() + expect(inferIntent("what is this?")).toBeNull() + }) + + it("composition-over-time outranks plain trend when both apply", () => { + const result = inferIntent("show me the composition over time of revenue") + expect(result?.intent).toBe("composition-over-time") + }) + + it("returns alternates when multiple intents apply", () => { + const result = inferIntent("trend by category over time") + expect(result).not.toBeNull() + if (result) { + expect(result.confidence).toBeGreaterThan(0) + // alternates may be empty or populated depending on patterns matched + expect(Array.isArray(result.alternates)).toBe(true) + } + }) + + it("geo wins over other intents when geography is mentioned", () => { + const result = inferIntent("show me the trend across countries") + expect(result?.intent).toBe("geo") + }) +}) diff --git a/src/components/ai/inferIntent.ts b/src/components/ai/inferIntent.ts new file mode 100644 index 00000000..17f51342 --- /dev/null +++ b/src/components/ai/inferIntent.ts @@ -0,0 +1,180 @@ +import type { IntentId, BuiltInIntentId } from "./intents" + +/** + * Pure-heuristic mapping from a natural-language query to a canonical intent. + * + * Designed for chat-style interrogation surfaces (vizmart's Shopkeeper, any + * "ask the chart" UI) where the user types in their own words and the + * suggestion engine needs an intent to rank by. Built on regex patterns — + * fast, zero-dependency, offline. Returns the single best-matching intent + * or `null` if nothing clearly applies. + * + * Consumers who want a richer mapping (handling negation, multi-intent + * queries, domain jargon) should layer their own LLM call on top of this + * heuristic — it's a good cheap default, not a replacement. + */ + +interface IntentPattern { + intent: BuiltInIntentId + /** Patterns that should match the query (case-insensitive). Any match wins. */ + patterns: RegExp[] + /** Weight when multiple intents match — higher wins ties. */ + weight: number +} + +const PATTERNS: IntentPattern[] = [ + { + intent: "outlier-detection", + weight: 4, + patterns: [ + /\b(outlier|outliers|anomal|anomaly|anomalies|extreme|extremes|unusual|stands? out|sticks? out|odd one)\b/i, + /\b(peak|peaks|highest|lowest|biggest spike|spike|min|max|maximum|minimum)\b/i, + ], + }, + { + intent: "trend", + weight: 4, + patterns: [ + /\b(trend|trends|trending|trajectory|over time|across time|growth|decline|rising|falling|increasing|decreasing)\b/i, + /\b(history|historical|evolved|evolution|change over)\b/i, + ], + }, + { + intent: "change-detection", + weight: 3, + patterns: [ + /\b(when did|what changed|shift|shifted|breakpoint|inflection|turning point|sudden|abrupt)\b/i, + ], + }, + { + intent: "rank", + weight: 4, + patterns: [ + /\b(rank|ranking|ranked|biggest|smallest|largest|order by|sorted|best|worst|leaderboard)\b/i, + /\btop\s+(\d+|sellers?|performers?|picks?|results?|categories|items?)\b/i, + /\bbottom\s+(\d+|results?|items?)\b/i, + /\b(who has the most|which.*most|which.*highest|which.*lowest)\b/i, + ], + }, + { + intent: "part-to-whole", + weight: 4, + patterns: [ + /\b(share|shares|composition|portion|portions|fraction|percentage of|percent of|breakdown|make up|made up of|slice|slices)\b/i, + /\b(part of|part to whole|piece of the pie|how much of)\b/i, + ], + }, + { + intent: "composition-over-time", + weight: 5, // outranks plain "trend" + "part-to-whole" when both appear + patterns: [ + /\b(composition.*time|share.*over time|share.*across|how.*mix.*changed|stacked.*time)\b/i, + /\b(over time.*share|over time.*composition|over time.*breakdown)\b/i, + ], + }, + { + intent: "distribution", + weight: 4, + patterns: [ + /\b(distribution|distributions|spread|variance|variation|histogram|skew|skewed|range of|how.*spread|shape of|bell curve)\b/i, + /\b(typical value|typical range|where do most|mode|median)\b/i, + ], + }, + { + intent: "correlation", + weight: 4, + patterns: [ + /\b(correl|correlation|relationship|related to|connected to|associated|connection between|relate to)\b/i, + /\b(\w+ vs\.? \w+|\w+ versus \w+|\w+ against \w+|scatter)\b/i, + ], + }, + { + intent: "compare-series", + weight: 3, + patterns: [ + /\b(compare.*series|compare.*groups|compare.*cohorts|side by side|group.*vs|series.*vs)\b/i, + /\b(how do.*compare|each group|each series|each cohort)\b/i, + ], + }, + { + intent: "compare-categories", + weight: 3, + patterns: [ + /\b(compare.*categor|category.*compar|which is bigger|how does.*compare|differences? between)\b/i, + ], + }, + { + intent: "flow", + weight: 4, + patterns: [ + /\b(flow|flows|transition|transitions|movement|moved from|funnel|conversion|drop[- ]off|sankey|chord)\b/i, + /\b(from.*to|source.*target|path|journey|pipeline)\b/i, + ], + }, + { + intent: "hierarchy", + weight: 4, + patterns: [ + /\b(hierarchy|hierarchical|tree|nested|parent.*child|subcategory|sub-?categor|drill down|drilldown|breakdown by level)\b/i, + ], + }, + { + intent: "geo", + weight: 5, // geographic mentions are almost always intent-defining + patterns: [ + // Strong: explicitly geographic vocabulary that's unambiguous + /\b(geographic|geography|geospatial|map|maps|country|countries|cities|latitude|longitude|spatial|cartogr|choropleth)\b/i, + // Medium: "city" alone, "state" only when clearly a place + /\b(city|us state|each state|the states)\b/i, + // "across" + place noun is a strong geo signal (regions get caught here) + /\bacross\s+(countries|states|regions|cities|the world|the country)\b/i, + ], + }, +] + +export interface InferIntentResult { + intent: IntentId + /** 1..5 score for ranking ties. Higher = stronger match. */ + confidence: number + /** Other plausible intents, sorted by confidence. */ + alternates: ReadonlyArray<{ intent: IntentId; confidence: number }> +} + +/** + * Map a natural-language query to a built-in intent. Returns `null` when no + * pattern matches with meaningful confidence. + * + * @example + * inferIntent("when did revenue peak?") + * // → { intent: "outlier-detection", confidence: 4, alternates: [] } + * inferIntent("show me the trend over time") + * // → { intent: "trend", confidence: 4, alternates: [] } + * inferIntent("hello") + * // → null + */ +export function inferIntent(query: string): InferIntentResult | null { + if (typeof query !== "string" || query.trim().length === 0) return null + + const matches = new Map() + for (const pattern of PATTERNS) { + for (const re of pattern.patterns) { + if (re.test(query)) { + const existing = matches.get(pattern.intent) ?? 0 + // First match contributes full weight; subsequent matches of the + // same intent add diminishing weight (capped at 5). + const next = Math.min(5, existing === 0 ? pattern.weight : existing + 0.5) + matches.set(pattern.intent, next) + break // one match per intent is enough — multiple regex hits within an intent shouldn't dominate + } + } + } + + if (matches.size === 0) return null + + const sorted = Array.from(matches.entries()) + .map(([intent, confidence]) => ({ intent, confidence })) + .sort((a, b) => b.confidence - a.confidence) + + const [top, ...alternates] = sorted + return { intent: top.intent, confidence: top.confidence, alternates } +} diff --git a/src/components/ai/intents.ts b/src/components/ai/intents.ts new file mode 100644 index 00000000..f59f1175 --- /dev/null +++ b/src/components/ai/intents.ts @@ -0,0 +1,147 @@ +/** + * Canonical intent taxonomy for chart suggestion / interrogation. + * + * An "intent" is what the user is trying to *see* in the data. Charts declare how + * well they serve each intent in their capability descriptor. The suggestion engine + * filters and ranks by intent. + * + * The taxonomy is fixed but extensible: consumers can call `registerIntent` to add + * domain-specific intents at runtime. The IntentId type stays union-of-known so + * built-in code remains type-safe; registered intents are addressable as plain strings. + */ + +export type BuiltInIntentId = + | "trend" + | "compare-series" + | "compare-categories" + | "rank" + | "part-to-whole" + | "distribution" + | "correlation" + | "flow" + | "hierarchy" + | "geo" + | "outlier-detection" + | "composition-over-time" + | "change-detection" + +/** + * Any intent — built-in or user-registered. Custom intents are plain strings. + */ +export type IntentId = BuiltInIntentId | (string & {}) + +export interface IntentDescriptor { + id: IntentId + label: string + description: string + /** Soft hint of which chart family typically serves this intent. */ + familyHint?: "time-series" | "categorical" | "distribution" | "relationship" | "flow" | "network" | "hierarchy" | "geo" +} + +const BUILT_IN_INTENTS: IntentDescriptor[] = [ + { + id: "trend", + label: "Trend over time", + description: "How a single metric changes over an ordered sequence (typically time).", + familyHint: "time-series", + }, + { + id: "compare-series", + label: "Compare series", + description: "Compare multiple measured series across a shared x domain.", + familyHint: "time-series", + }, + { + id: "compare-categories", + label: "Compare categories", + description: "Compare a single measure across discrete categories.", + familyHint: "categorical", + }, + { + id: "rank", + label: "Rank", + description: "Show category ordering by a measure (largest to smallest).", + familyHint: "categorical", + }, + { + id: "part-to-whole", + label: "Part to whole", + description: "Show how individual categories share a total.", + familyHint: "categorical", + }, + { + id: "distribution", + label: "Distribution", + description: "Show the shape, spread, and central tendency of a numeric variable.", + familyHint: "distribution", + }, + { + id: "correlation", + label: "Correlation", + description: "Show the relationship between two (or more) numeric variables.", + familyHint: "relationship", + }, + { + id: "flow", + label: "Flow", + description: "Show movement, transitions, or transfers between states.", + familyHint: "flow", + }, + { + id: "hierarchy", + label: "Hierarchy", + description: "Show parent/child structure or nested totals.", + familyHint: "hierarchy", + }, + { + id: "geo", + label: "Geography", + description: "Show values bound to geographic locations or regions.", + familyHint: "geo", + }, + { + id: "outlier-detection", + label: "Outlier detection", + description: "Surface individual data points that diverge from the rest.", + familyHint: "distribution", + }, + { + id: "composition-over-time", + label: "Composition over time", + description: "Show how the share of categories changes across an ordered sequence.", + familyHint: "time-series", + }, + { + id: "change-detection", + label: "Change detection", + description: "Surface where or when a metric shifted meaningfully.", + familyHint: "time-series", + }, +] + +const intentRegistry = new Map( + BUILT_IN_INTENTS.map((intent) => [intent.id, intent]) +) + +/** Get an intent descriptor by id, or undefined if not registered. */ +export function getIntent(id: IntentId): IntentDescriptor | undefined { + return intentRegistry.get(id) +} + +/** All currently-registered intents (built-in + user-added). */ +export function listIntents(): IntentDescriptor[] { + return Array.from(intentRegistry.values()) +} + +/** + * Register a custom intent at runtime. Idempotent — re-registering with the same id + * replaces the descriptor. + */ +export function registerIntent(intent: IntentDescriptor): void { + intentRegistry.set(intent.id, intent) +} + +/** Sentinel set used by capability authors to opt out of an intent without misspelling. */ +export const BUILT_IN_INTENT_IDS: ReadonlySet = new Set( + BUILT_IN_INTENTS.map((intent) => intent.id) +) as ReadonlySet diff --git a/src/components/ai/profileData.test.ts b/src/components/ai/profileData.test.ts new file mode 100644 index 00000000..0c24d634 --- /dev/null +++ b/src/components/ai/profileData.test.ts @@ -0,0 +1,58 @@ +import { describe, it, expect } from "vitest" +import { profileData } from "./profileData" + +describe("profileData", () => { + it("identifies time/x/y/series candidates from a temporal dataset", () => { + const data = [ + { date: "2024-01-01", revenue: 1200, region: "EU" }, + { date: "2024-02-01", revenue: 1400, region: "EU" }, + { date: "2024-03-01", revenue: 1100, region: "EU" }, + { date: "2024-01-01", revenue: 900, region: "NA" }, + { date: "2024-02-01", revenue: 1100, region: "NA" }, + { date: "2024-03-01", revenue: 1500, region: "NA" }, + ] + const profile = profileData(data) + expect(profile.hasTimeAxis).toBe(true) + expect(profile.primary.time).toBe("date") + expect(profile.primary.x).toBe("date") + expect(profile.primary.y).toBe("revenue") + expect(profile.primary.series).toBe("region") + expect(profile.seriesCount).toBe(2) + expect(profile.hasRepeatedX).toBe(true) + }) + + it("handles a categorical dataset (bar-chart-shaped)", () => { + const data = [ + { product: "Widget", units: 30 }, + { product: "Gadget", units: 50 }, + { product: "Sprocket", units: 20 }, + ] + const profile = profileData(data) + expect(profile.primary.category).toBe("product") + expect(profile.primary.y).toBe("units") + expect(profile.categoryCount).toBe(3) + expect(profile.hasTimeAxis).toBe(false) + }) + + it("detects monotonic x", () => { + const data = Array.from({ length: 10 }, (_, i) => ({ x: i, y: Math.random() })) + const profile = profileData(data) + expect(profile.monotonicX).toBe(true) + }) + + it("detects hierarchy structure via rawInput", () => { + const profile = profileData([], { rawInput: { name: "root", children: [{ name: "a", value: 1 }] } }) + expect(profile.hasHierarchy).toBe(true) + expect(profile.hasNetwork).toBe(false) + }) + + it("detects network structure via rawInput", () => { + const profile = profileData([], { rawInput: { nodes: [{}], edges: [{}] } }) + expect(profile.hasNetwork).toBe(true) + }) + + it("detects geo structure via rawInput", () => { + const profile = profileData([], { rawInput: { type: "FeatureCollection", features: [] } }) + expect(profile.hasGeo).toBe(true) + }) +}) diff --git a/src/components/ai/profileData.ts b/src/components/ai/profileData.ts new file mode 100644 index 00000000..92516f62 --- /dev/null +++ b/src/components/ai/profileData.ts @@ -0,0 +1,365 @@ +import type { Datum } from "../charts/shared/datumTypes" +import { summarizeData, type DataSummary, type FieldSummary } from "../data/DataSummarizer" +import type { ChartDataProfile, FieldCandidate, FieldKind } from "./chartCapabilityTypes" + +const X_FIELD_HINT = /^(x|index|rank|order|step|sequence|year|quarter|qtr|fiscal|month|week|day|date|time|timestamp)$/i +const Y_FIELD_HINT = /^(y|value|amount|total|count|revenue|sales|price|score|rate|population|measure)$/i +const SIZE_FIELD_HINT = /(size|magnitude|volume|weight|count|amount)/i +const CATEGORY_FIELD_HINT = /^(category|label|name|type|group|region|segment|kind|class)$/i +const SERIES_FIELD_HINT = /^(series|group|type|category|segment|cohort|product)$/i + +const NUMERIC_LIKE_FOR_SIZE = new Set(["numeric"]) +const NUMERIC_OR_TIME_FOR_X = new Set(["numeric", "date"]) +const NUMERIC_FOR_Y = new Set(["numeric"]) +const DATE_FOR_TIME = new Set(["date"]) +const CATEGORICAL_LIKE = new Set(["categorical", "boolean"]) + +function fieldKindFromSummary(summary: FieldSummary): FieldKind { + if (summary.type === "numeric") return "numeric" + if (summary.type === "date") return "date" + if (summary.type === "categorical") return "categorical" + return "unknown" +} + +function nameBonus(field: string, hint: RegExp): number { + return hint.test(field) ? 0.2 : 0 +} + +function monotonic(data: ReadonlyArray, field: string): boolean { + let prev: number | null = null + for (let i = 0; i < data.length; i++) { + const v = data[i]?.[field] + if (v == null) continue + const n = v instanceof Date ? v.getTime() : Number(v) + if (!Number.isFinite(n)) return false + if (prev !== null && n < prev) return false + prev = n + } + return prev !== null +} + +function rankCandidates( + fields: Record, + data: ReadonlyArray, + allowed: Set, + hint: RegExp, + options: { computeMonotonic?: boolean } = {} +): FieldCandidate[] { + const out: FieldCandidate[] = [] + for (const [field, summary] of Object.entries(fields)) { + const kind = fieldKindFromSummary(summary) + if (!allowed.has(kind)) continue + let quality = 0.5 + quality += nameBonus(field, hint) + + let distinctCount: number | undefined + if (summary.type === "categorical") { + distinctCount = summary.distinctCount + // Categories with too few or too many values are less useful + if (distinctCount && distinctCount >= 2 && distinctCount <= 12) quality += 0.2 + if (distinctCount && distinctCount > 50) quality -= 0.2 + } + if (summary.type === "numeric") { + // Stable numerics with a real range score better + if (Number.isFinite(summary.min) && Number.isFinite(summary.max) && summary.max > summary.min) quality += 0.1 + } + + const candidate: FieldCandidate = { + field, + kind, + quality: Math.max(0, Math.min(1, quality)), + distinctCount, + } + if (options.computeMonotonic && (kind === "numeric" || kind === "date")) { + candidate.monotonic = monotonic(data, field) + if (candidate.monotonic) candidate.quality = Math.min(1, candidate.quality + 0.2) + } + out.push(candidate) + } + out.sort((a, b) => b.quality - a.quality) + return out +} + +function distinct(data: ReadonlyArray, field: string): number { + const seen = new Set() + for (let i = 0; i < data.length; i++) { + const v = data[i]?.[field] + if (v == null) continue + seen.add(String(v)) + } + return seen.size +} + +function hasRepeatedField(data: ReadonlyArray, field: string): boolean { + const seen = new Set() + for (let i = 0; i < data.length; i++) { + const v = data[i]?.[field] + if (v == null) continue + const key = String(v) + if (seen.has(key)) return true + seen.add(key) + } + return false +} + +interface InferStructure { + hasHierarchy: boolean + hasNetwork: boolean + hasGeo: boolean + network?: { nodes: ReadonlyArray; edges: ReadonlyArray } + hierarchy?: Datum + geo?: { features: ReadonlyArray; points?: ReadonlyArray; flows?: ReadonlyArray } +} + +function inferStructure(rawInput: unknown): InferStructure { + if (rawInput && typeof rawInput === "object" && !Array.isArray(rawInput)) { + const obj = rawInput as Record + if (obj.type === "FeatureCollection" && Array.isArray(obj.features)) { + return { + hasHierarchy: false, + hasNetwork: false, + hasGeo: true, + geo: { + features: obj.features as ReadonlyArray, + points: Array.isArray(obj.points) ? (obj.points as ReadonlyArray) : undefined, + flows: Array.isArray(obj.flows) ? (obj.flows as ReadonlyArray) : undefined, + }, + } + } + if (Array.isArray(obj.children)) { + return { hasHierarchy: true, hasNetwork: false, hasGeo: false, hierarchy: obj as Datum } + } + if (Array.isArray(obj.nodes) && (Array.isArray(obj.edges) || Array.isArray(obj.links))) { + const edges = (obj.edges ?? obj.links) as ReadonlyArray + return { + hasHierarchy: false, + hasNetwork: true, + hasGeo: false, + network: { nodes: obj.nodes as ReadonlyArray, edges }, + } + } + } + return { hasHierarchy: false, hasNetwork: false, hasGeo: false } +} + +// Field-name patterns for transition-event detection. A row like +// { stage: "Qualified", nextStage: "Discovery", startTime: "...", value: 14 } +// is conceptually an edge in a network — even though the rows themselves are +// a flat array, not a {nodes, edges} object. Recognising this pattern lets +// SankeyDiagram, ProcessSankey, ChordDiagram, and ForceDirectedGraph fit. +const SOURCE_FIELD_PATTERNS = /^(source|from|origin|stage|currentstage|sourcestage|fromstage)$/i +const TARGET_FIELD_PATTERNS = + /^(target|to|destination|nextstage|next|targetstage|tostage|destinationstage|status)$/i +const TRANSITION_START_PATTERNS = /^(starttime|startedat|enteredat|startdate|start|timestamp|date|time)$/i +const TRANSITION_END_PATTERNS = /^(endtime|endedat|exitedat|completedat|finishtime|enddate|end)$/i +const TRANSITION_VALUE_PATTERNS = /^(value|weight|amount|count|magnitude|volume)$/i + +function findField(fieldNames: ReadonlyArray, pattern: RegExp): string | undefined { + return fieldNames.find((f) => pattern.test(f)) +} + +/** + * Detect transition-event data — a flat array of rows where each row encodes + * an edge ({source, target, value?, startTime?}). When detected, derive an + * aggregated {nodes, edges} network so the network/flow chart family becomes + * viable. + * + * Returns null when the row shape doesn't look like transitions (e.g. when + * source and target aren't both present, or every row has source === target). + */ +function detectTransitionNetwork( + rows: ReadonlyArray, +): { nodes: ReadonlyArray; edges: ReadonlyArray } | null { + if (rows.length < 3) return null + const firstRow = rows[0] + if (!firstRow || typeof firstRow !== "object") return null + const fieldNames = Object.keys(firstRow) + + const sourceField = findField(fieldNames, SOURCE_FIELD_PATTERNS) + const targetField = findField(fieldNames, TARGET_FIELD_PATTERNS) + if (!sourceField || !targetField || sourceField === targetField) return null + + const startTimeField = findField(fieldNames, TRANSITION_START_PATTERNS) + const endTimeField = findField(fieldNames, TRANSITION_END_PATTERNS) + const valueField = findField(fieldNames, TRANSITION_VALUE_PATTERNS) + + // Validate: at least 3 rows must have both source and target with different, + // non-empty values. Guards against false positives on data where one of the + // matched fields happens to be present but isn't a transition signal. + const validRows: Datum[] = [] + for (const row of rows) { + if (!row) continue + const source = row[sourceField] + const target = row[targetField] + if (source == null || target == null) continue + const sourceStr = String(source).trim() + const targetStr = String(target).trim() + if (!sourceStr || !targetStr || sourceStr === targetStr) continue + validRows.push(row) + } + if (validRows.length < 3) return null + + // Build nodes (one per distinct source/target label) and edges (one per row, + // aggregating value across duplicates). + const nodes = new Map() + const edgeWeights = new Map() + const edgeMeta = new Map() + + for (const row of validRows) { + const sourceLabel = String(row[sourceField]).trim() + const targetLabel = String(row[targetField]).trim() + if (!nodes.has(sourceLabel)) nodes.set(sourceLabel, { id: sourceLabel, label: sourceLabel }) + if (!nodes.has(targetLabel)) nodes.set(targetLabel, { id: targetLabel, label: targetLabel }) + + const edgeKey = `${sourceLabel}->${targetLabel}` + const weight = valueField ? Number(row[valueField]) : 1 + const w = Number.isFinite(weight) ? weight : 1 + edgeWeights.set(edgeKey, (edgeWeights.get(edgeKey) ?? 0) + w) + + // Preserve the *first* row's timestamps for the edge — ProcessSankey reads + // startTime/endTime off each edge for its temporal layout. Aggregating + // weights across duplicates is correct; aggregating timestamps isn't. + if (!edgeMeta.has(edgeKey)) { + edgeMeta.set(edgeKey, { + source: sourceLabel, + target: targetLabel, + ...(startTimeField ? { startTime: row[startTimeField] } : {}), + ...(endTimeField ? { endTime: row[endTimeField] } : {}), + }) + } + } + + const edges: Datum[] = [] + for (const [key, meta] of edgeMeta) { + edges.push({ ...meta, value: edgeWeights.get(key) ?? 1 }) + } + + return { nodes: Array.from(nodes.values()), edges } +} + +export interface ProfileDataOptions { + /** If you have access to the raw input (which might be {nodes, edges} or GeoJSON), pass it for structure detection. */ + rawInput?: unknown + /** Override the field used as the primary series, useful when the heuristic guesses wrong. */ + seriesField?: string +} + +/** + * Build a ChartDataProfile from row data. Extends DataSummary with shape inference — + * candidate fields per role, distinct counts, monotonicity, and structure detection. + * + * Designed to be called once per dataset; the result is what `suggestCharts` and + * capability evaluators consume. + */ +export function profileData( + data: ReadonlyArray | null | undefined, + options: ProfileDataOptions = {} +): ChartDataProfile { + const summary = summarizeData(data ?? []) + const rows: ReadonlyArray = Array.isArray(data) ? data : [] + const structure = inferStructure(options.rawInput) + + // Transition-event detection: a flat array of rows with source/target fields + // is conceptually a network even though there's no {nodes, edges} payload. + // Derive one so flow charts (SankeyDiagram, ProcessSankey, ChordDiagram, + // ForceDirectedGraph) become viable on this data shape. Skip when rawInput + // already produced a structured network — that takes precedence. + if (!structure.hasNetwork && !structure.hasHierarchy && !structure.hasGeo) { + const transitionNet = detectTransitionNetwork(rows) + if (transitionNet) { + structure.hasNetwork = true + structure.network = transitionNet + } + } + + const xCandidates = rankCandidates(summary.fields, rows, NUMERIC_OR_TIME_FOR_X, X_FIELD_HINT, { computeMonotonic: true }) + const yCandidates = rankCandidates(summary.fields, rows, NUMERIC_FOR_Y, Y_FIELD_HINT) + const sizeCandidates = rankCandidates(summary.fields, rows, NUMERIC_LIKE_FOR_SIZE, SIZE_FIELD_HINT) + const categoryCandidates = rankCandidates(summary.fields, rows, CATEGORICAL_LIKE, CATEGORY_FIELD_HINT) + const seriesCandidates = rankCandidates(summary.fields, rows, CATEGORICAL_LIKE, SERIES_FIELD_HINT) + const timeCandidates = rankCandidates(summary.fields, rows, DATE_FOR_TIME, /(date|time|timestamp)/i, { computeMonotonic: true }) + + // x assignment proceeds in three tiers, each tagged so downstream logic + // can tell *how confident* we are that x is meaningful: + // • "time" — there's a date/time field; almost certainly the x axis + // • "named" — a numeric named like "month", "rank", "year"; high confidence + // • "scatter"— two+ numerics with no x-name signal; we pick one as a fallback + // The category/series disambiguation later uses this — when x is a scatter + // fallback, the lone categorical is more useful as `category` than `series`. + const time = timeCandidates[0]?.field + let x: string | undefined = time + let xProvenance: "time" | "named" | "scatter" | "none" = time ? "time" : "none" + if (!x) { + const xNamed = xCandidates.find((c) => X_FIELD_HINT.test(c.field) && c.kind === "numeric") + if (xNamed) { + x = xNamed.field + xProvenance = "named" + } + } + + // y: best numeric that isn't already x + let y: string | undefined = yCandidates.find((c) => c.field !== x)?.field + + // Scatter pattern: two+ numerics, no time-or-named x. + if (!x && y) { + const numericFields = Object.entries(summary.fields) + .filter(([_, s]) => s.type === "numeric") + .map(([k]) => k) + if (numericFields.length >= 2) { + x = numericFields.find((f) => f !== y) + if (x) xProvenance = "scatter" + } + } + + const size = sizeCandidates.find((c) => c.field !== x && c.field !== y)?.field + + // Category vs. series disambiguation. + // • Strong x (time/named): the lone categorical is the series (lineBy / stackBy). + // • Scatter-fallback x or no x: the lone categorical is the category — that's + // what enables BoxPlot/ViolinPlot/SwarmPlot on data like {id, value, cohort}. + const strongX = xProvenance === "time" || xProvenance === "named" + const categoricalList = categoryCandidates.map((c) => c.field) + let category: string | undefined + let series: string | undefined + if (strongX) { + series = options.seriesField ?? categoricalList[0] + category = categoricalList.find((f) => f !== series) + } else { + category = categoricalList[0] + series = options.seriesField ?? categoricalList.find((f) => f !== category) + } + + const categoryCount = category ? distinct(rows, category) : undefined + const seriesCount = series ? distinct(rows, series) : undefined + const uniqueXCount = x ? distinct(rows, x) : undefined + const hasRepeatedX = x ? hasRepeatedField(rows, x) : false + const monotonicX = xCandidates.find((c) => c.field === x)?.monotonic ?? false + const hasTimeAxis = timeCandidates.length > 0 + + return { + ...summary, + data: rows, + candidates: { + x: xCandidates, + y: yCandidates, + size: sizeCandidates, + category: categoryCandidates, + series: seriesCandidates, + time: timeCandidates, + }, + primary: { x, y, size, category, series, time }, + categoryCount, + seriesCount, + uniqueXCount, + hasRepeatedX, + monotonicX, + hasTimeAxis, + hasHierarchy: structure.hasHierarchy, + hasNetwork: structure.hasNetwork, + hasGeo: structure.hasGeo, + xProvenance, + network: structure.network, + hierarchy: structure.hierarchy, + geo: structure.geo, + } +} diff --git a/src/components/ai/qualityFixtures.ts b/src/components/ai/qualityFixtures.ts new file mode 100644 index 00000000..222d23a8 --- /dev/null +++ b/src/components/ai/qualityFixtures.ts @@ -0,0 +1,566 @@ +import type { ScorecardFixture } from "./qualityScorecard" + +/** + * Canonical scorecard fixtures — the test set that descriptor tuning is + * measured against. Curated by hand. Each entry pairs a dataset with the + * intent the human expert would search by and the chart(s) the expert would + * pick. Stress-test fixtures (single-column, broken GeoJSON, etc.) set + * `expectsNoFit: true` to confirm the engine honestly rejects rather than + * forces a recommendation. + * + * To add a new fixture: keep it small (≤ ~50 rows), name it descriptively, + * pick the most-defensible expert answer. The scorecard tolerates the expert + * pick appearing anywhere in the top-3 — close-second behavior counts as + * agreement. + */ + +const monthlyRevenueMultiSeries = (() => { + const months = Array.from({ length: 12 }, (_, i) => i + 1) + const regions = ["EU", "NA", "APAC"] + return regions.flatMap((region, regionIdx) => + months.map((month) => ({ + month, + revenue: 800 + month * (200 + regionIdx * 40) + Math.sin(month) * 150, + region + })) + ) +})() + +const monthlyRevenueOneSeries = Array.from({ length: 12 }, (_, i) => ({ + month: i + 1, + revenue: 1000 + i * 150 + Math.sin(i / 2) * 100 +})) + +const productSales = [ + { product: "Widget", units: 480 }, + { product: "Gadget", units: 620 }, + { product: "Sprocket", units: 290 }, + { product: "Whatsit", units: 740 }, + { product: "Doohickey", units: 410 } +] + +const surveySatisfaction = Array.from({ length: 150 }, (_, i) => ({ + respondent_id: i + 1, + satisfaction: Math.max( + 1, + Math.min(10, 6 + Math.sin(i / 7) * 2 + Math.random() * 3 - 1) + ), + cohort: ["Beta", "GA", "Enterprise"][i % 3] +})) + +const studyHoursVsGrade = Array.from({ length: 80 }, (_, i) => { + const hours = Math.max(0, Math.random() * 40) + return { + student_id: `s${i + 1}`, + hours, + grade: Math.min(100, hours * 1.8 + 30 + (Math.random() - 0.5) * 20) + } +}) + +const conversionFunnel = [ + { stage: "Visit", users: 10000 }, + { stage: "Signup", users: 2400 }, + { stage: "Trial", users: 1100 }, + { stage: "Paid", users: 380 } +] + +const orgHierarchy = { + name: "Acme", + children: [ + { + name: "Engineering", + children: [ + { name: "Platform", value: 18 }, + { name: "Product", value: 22 } + ] + }, + { + name: "Sales", + children: [ + { name: "EMEA", value: 12 }, + { name: "AMER", value: 26 } + ] + }, + { name: "Ops", value: 9 } + ] +} + +const transitionNetwork = { + nodes: [ + { id: "draft" }, + { id: "review" }, + { id: "approved" }, + { id: "shipped" }, + { id: "rejected" } + ], + edges: [ + { source: "draft", target: "review", value: 100 }, + { source: "review", target: "approved", value: 60 }, + { source: "review", target: "rejected", value: 40 }, + { source: "approved", target: "shipped", value: 58 } + ] +} + +const usGeoFeatures = { + type: "FeatureCollection", + features: [ + { + type: "Feature", + id: "CA", + properties: { name: "California", value: 39 }, + geometry: { + type: "Polygon", + coordinates: [ + [ + [-124, 32], + [-114, 32], + [-114, 42], + [-124, 42], + [-124, 32] + ] + ] + } + }, + { + type: "Feature", + id: "TX", + properties: { name: "Texas", value: 29 }, + geometry: { + type: "Polygon", + coordinates: [ + [ + [-106, 26], + [-93, 26], + [-93, 36], + [-106, 36], + [-106, 26] + ] + ] + } + }, + { + type: "Feature", + id: "NY", + properties: { name: "New York", value: 19 }, + geometry: { + type: "Polygon", + coordinates: [ + [ + [-79, 40], + [-72, 40], + [-72, 45], + [-79, 45], + [-79, 40] + ] + ] + } + } + ] +} + +const flatSingleColumn = Array.from({ length: 50 }, (_, i) => ({ + observation: 50 + Math.sin(i / 4) * 12 + Math.random() * 6 +})) + +// Three-numeric scatter — fixture for BubbleChart +const economiesByCountry = [ + { + country: "USA", + gdp_per_capita: 70, + hours_worked: 1700, + population_size: 330 + }, + { + country: "UK", + gdp_per_capita: 48, + hours_worked: 1500, + population_size: 67 + }, + { + country: "Germany", + gdp_per_capita: 53, + hours_worked: 1330, + population_size: 84 + }, + { + country: "Japan", + gdp_per_capita: 40, + hours_worked: 1600, + population_size: 125 + }, + { + country: "France", + gdp_per_capita: 45, + hours_worked: 1480, + population_size: 67 + }, + { + country: "Italy", + gdp_per_capita: 38, + hours_worked: 1700, + population_size: 60 + }, + { + country: "Spain", + gdp_per_capita: 32, + hours_worked: 1640, + population_size: 47 + }, + { + country: "Canada", + gdp_per_capita: 52, + hours_worked: 1690, + population_size: 38 + }, + { + country: "Australia", + gdp_per_capita: 56, + hours_worked: 1700, + population_size: 26 + }, + { + country: "South Korea", + gdp_per_capita: 35, + hours_worked: 1900, + population_size: 52 + } +] + +// Multi-measure time series for MultiAxisLineChart +const websiteMetrics = Array.from({ length: 24 }, (_, i) => ({ + month: i + 1, + page_views: Math.round(50000 + i * 1200 + Math.sin(i / 3) * 8000), + conversion_rate: 2.5 + Math.sin(i / 4) * 0.8 + i * 0.05, + avg_session_seconds: Math.round(120 + i * 2 + Math.cos(i / 5) * 15) +})) + +// Categorical × series × value for GroupedBarChart / StackedBarChart +const salesByRegionAndProduct = [ + { product: "Widget", region: "EU", units: 480 }, + { product: "Widget", region: "NA", units: 620 }, + { product: "Widget", region: "APAC", units: 290 }, + { product: "Gadget", region: "EU", units: 320 }, + { product: "Gadget", region: "NA", units: 740 }, + { product: "Gadget", region: "APAC", units: 410 }, + { product: "Sprocket", region: "EU", units: 200 }, + { product: "Sprocket", region: "NA", units: 380 }, + { product: "Sprocket", region: "APAC", units: 150 }, + { product: "Whatsit", region: "EU", units: 290 }, + { product: "Whatsit", region: "NA", units: 550 }, + { product: "Whatsit", region: "APAC", units: 180 } +] + +// Coerce to exactly-two-series shape by partitioning evenly +const revenueVsExpensesTwoSeries = [ + ...Array.from({ length: 24 }, (_, i) => ({ + month: i + 1, + amount: 100 + i * 8 + Math.sin(i / 3) * 25, + series: "revenue" + })), + ...Array.from({ length: 24 }, (_, i) => ({ + month: i + 1, + amount: 80 + i * 6 + Math.cos(i / 4) * 15, + series: "expenses" + })) +] + +// OHLC time series for CandlestickChart +const stockPrices = Array.from({ length: 30 }, (_, i) => { + const base = 100 + i * 1.2 + Math.sin(i / 4) * 8 + const open = base + (Math.random() - 0.5) * 4 + const close = base + (Math.random() - 0.5) * 4 + const high = Math.max(open, close) + Math.random() * 3 + const low = Math.min(open, close) - Math.random() * 3 + return { day: i + 1, open, high, low, close } +}) + +// Ordered-sequence scatter for ConnectedScatterplot +const usaUnemploymentVsInflation = Array.from({ length: 20 }, (_, i) => ({ + year: 2005 + i, + unemployment: 5 + Math.sin(i / 2) * 2 + (i > 4 && i < 10 ? 3 : 0), + inflation: 2 + Math.cos(i / 3) * 1.5 +})) + +const sparseThreeRow = [ + { name: "A", value: 12 }, + { name: "B", value: 34 }, + { name: "C", value: 8 } +] + +// Flat array of transition events. The canonical input shape for SankeyDiagram / +// ProcessSankey / ChordDiagram / ForceDirectedGraph — should fit even though +// the data is rows, not a {nodes, edges} object. Exercises the +// detectTransitionNetwork path in profileData. +const transitionEvents = [ + { + case: "deal-001", + stage: "Inbound Lead", + nextStage: "Qualified", + startTime: "2024-04-01T09:00:00", + value: 18 + }, + { + case: "deal-001", + stage: "Qualified", + nextStage: "Discovery", + startTime: "2024-04-01T13:00:00", + value: 16 + }, + { + case: "deal-001", + stage: "Discovery", + nextStage: "Proposal", + startTime: "2024-04-02T11:00:00", + value: 14 + }, + { + case: "deal-001", + stage: "Proposal", + nextStage: "Closed Won", + startTime: "2024-04-04T09:00:00", + value: 12 + }, + { + case: "deal-002", + stage: "Inbound Lead", + nextStage: "Qualified", + startTime: "2024-04-01T10:00:00", + value: 10 + }, + { + case: "deal-002", + stage: "Qualified", + nextStage: "Discovery", + startTime: "2024-04-02T09:00:00", + value: 9 + }, + { + case: "deal-002", + stage: "Discovery", + nextStage: "Proposal", + startTime: "2024-04-03T09:00:00", + value: 7 + }, + { + case: "deal-002", + stage: "Proposal", + nextStage: "Closed Lost", + startTime: "2024-04-04T11:00:00", + value: 5 + }, + { + case: "deal-003", + stage: "Signup", + nextStage: "Activated", + startTime: "2024-04-01T08:30:00", + value: 28 + }, + { + case: "deal-003", + stage: "Activated", + nextStage: "Trial", + startTime: "2024-04-01T10:00:00", + value: 24 + }, + { + case: "deal-003", + stage: "Trial", + nextStage: "Subscribed", + startTime: "2024-04-02T10:00:00", + value: 18 + } +] + +export const CANONICAL_FIXTURES: ReadonlyArray = [ + // Time-series family + { + name: "monthly revenue with regions, intent=trend", + shape: "12 months × 3 regions, numeric month, numeric revenue", + data: monthlyRevenueMultiSeries, + intent: "trend", + expected: ["LineChart", "AreaChart", "MinimapChart"] + }, + { + name: "monthly revenue with regions, intent=compare-series", + shape: "12 months × 3 regions", + data: monthlyRevenueMultiSeries, + intent: "compare-series", + expected: ["LineChart", "GroupedBarChart"] + }, + { + name: "monthly revenue with regions, intent=composition-over-time", + shape: "12 months × 3 regions, additive", + data: monthlyRevenueMultiSeries, + intent: "composition-over-time", + expected: ["StackedAreaChart", "StackedBarChart"] + }, + { + name: "monthly revenue single series, intent=trend", + shape: "12 months, no series", + data: monthlyRevenueOneSeries, + intent: "trend", + expected: ["LineChart", "AreaChart"] + }, + // Categorical family + { + name: "product sales, intent=rank", + shape: "5 products, single numeric measure", + data: productSales, + intent: "rank", + expected: ["BarChart", "DotPlot"] + }, + { + name: "product sales, intent=part-to-whole", + shape: "5 products, single numeric measure", + data: productSales, + intent: "part-to-whole", + expected: ["PieChart", "DonutChart", "BarChart"] + }, + // Distribution family + { + name: "satisfaction scores, intent=distribution", + shape: "150 numeric observations across 3 cohorts", + data: surveySatisfaction, + intent: "distribution", + expected: ["Histogram", "BoxPlot", "ViolinPlot"] + }, + { + name: "satisfaction scores, intent=compare-categories", + shape: "150 obs × 3 cohorts", + data: surveySatisfaction, + intent: "compare-categories", + expected: ["BoxPlot", "ViolinPlot", "SwarmPlot"] + }, + // Relationship family + { + name: "hours vs grade, intent=correlation", + shape: "80 students, hours + grade", + data: studyHoursVsGrade, + intent: "correlation", + expected: ["Scatterplot"] + }, + { + name: "hours vs grade, intent=outlier-detection", + shape: "80 students", + data: studyHoursVsGrade, + intent: "outlier-detection", + expected: ["Scatterplot"] + }, + // Flow family + { + name: "conversion funnel, intent=flow", + shape: "4 stages, descending values", + data: conversionFunnel, + intent: "flow", + expected: ["FunnelChart"] + }, + // Hierarchy family (rawInput payload) + { + name: "org chart, intent=hierarchy", + shape: "3-deep org tree", + data: [], + rawInput: orgHierarchy, + intent: "hierarchy", + expected: ["TreeDiagram", "Treemap", "CirclePack"] + }, + // Network family (rawInput payload) + { + name: "approval workflow transitions, intent=flow", + shape: "5 nodes / 4 weighted edges", + data: [], + rawInput: transitionNetwork, + intent: "flow", + expected: ["SankeyDiagram", "ChordDiagram"] + }, + // Geo family (rawInput payload) + { + name: "US states with values, intent=geo", + shape: "3 polygon features with numeric values", + data: [], + rawInput: usGeoFeatures, + intent: "geo", + expected: ["ChoroplethMap", "ProportionalSymbolMap"] + }, + + // Three-numeric scatter — exercises BubbleChart + { + name: "country economies, intent=correlation", + shape: "10 countries × 3 numeric measures (gdp, hours, population)", + data: economiesByCountry, + intent: "correlation", + expected: ["Scatterplot", "BubbleChart"] + }, + // Multi-measure time-series — exercises MultiAxisLineChart + { + name: "website metrics with 3 measures, intent=compare-series", + shape: "24 months × 3 numeric measures with different ranges", + data: websiteMetrics, + intent: "compare-series", + expected: ["MultiAxisLineChart", "LineChart"] + }, + // Category × series × value — exercises GroupedBarChart / StackedBarChart + { + name: "sales by region and product, intent=compare-series", + shape: "12 rows = 4 products × 3 regions", + data: salesByRegionAndProduct, + intent: "compare-series", + expected: ["GroupedBarChart", "StackedBarChart"] + }, + { + name: "sales by region and product, intent=part-to-whole", + shape: "12 rows = 4 products × 3 regions", + data: salesByRegionAndProduct, + intent: "part-to-whole", + expected: ["StackedBarChart", "PieChart"] + }, + // Exactly-two-series temporal — exercises DifferenceChart + { + name: "revenue vs expenses, intent=compare-series", + shape: "48 rows = 24 months × 2 series", + data: revenueVsExpensesTwoSeries, + intent: "compare-series", + expected: ["DifferenceChart", "LineChart", "GroupedBarChart"] + }, + // OHLC — exercises CandlestickChart + { + name: "stock OHLC prices, intent=change-detection", + shape: "30 days × open/high/low/close", + data: stockPrices, + intent: "change-detection", + expected: ["CandlestickChart", "LineChart"] + }, + // Ordered-sequence scatter — exercises ConnectedScatterplot + { + name: "unemployment vs inflation by year, intent=correlation", + shape: "20 years × 2 measures, ordered by year", + data: usaUnemploymentVsInflation, + intent: "correlation", + expected: ["ConnectedScatterplot", "Scatterplot"] + }, + + // Transition events — flat array of edges with stage/nextStage/startTime/value. + // Should be auto-derived into a network so flow charts fit. + { + name: "transition events, intent=flow", + shape: "11 stage transitions across 3 deals with startTime + value", + data: transitionEvents, + intent: "flow", + expected: ["SankeyDiagram", "ProcessSankey", "ChordDiagram"] + }, + + // Stress fixtures — expect no fitting chart for these. + { + name: "flat single column", + shape: "50 rows, one numeric column", + data: flatSingleColumn, + // intentionally no intent — we want the engine to refuse this whole class. + expected: ["Histogram"] // a histogram is genuinely the best (only) fit here + }, + { + name: "sparse 3-row data, intent=rank", + shape: "3 rows total", + data: sparseThreeRow, + intent: "rank", + expected: ["BarChart", "DotPlot"] + } +] diff --git a/src/components/ai/qualityScorecard.test.ts b/src/components/ai/qualityScorecard.test.ts new file mode 100644 index 00000000..d90e3345 --- /dev/null +++ b/src/components/ai/qualityScorecard.test.ts @@ -0,0 +1,47 @@ +import { describe, it, expect } from "vitest" +import { runQualityScorecard } from "./qualityScorecard" +import { CANONICAL_FIXTURES } from "./qualityFixtures" + +describe("runQualityScorecard", () => { + it("returns a report covering every fixture", () => { + const report = runQualityScorecard(CANONICAL_FIXTURES) + expect(report.summary.fixtureCount).toBe(CANONICAL_FIXTURES.length) + expect(report.perFixture.length).toBe(CANONICAL_FIXTURES.length) + expect(report.perCapability.length).toBeGreaterThan(0) + }) + + it("expert agreement rate stays above 90% across the canonical set", () => { + // Phase 2.1 tuning landed expert agreement at 100% on 23 fixtures. + // Below 90% means a descriptor regressed; below 80% means urgent work. + // This gate is intentionally tight — the canonical set is curated and + // the engine should win on all of it. + const report = runQualityScorecard(CANONICAL_FIXTURES) + expect(report.summary.expertAgreementRate).toBeGreaterThanOrEqual(0.9) + }) + + it("emits per-capability tallies for every registered chart", () => { + const report = runQualityScorecard(CANONICAL_FIXTURES) + const names = new Set(report.perCapability.map((c) => c.component)) + expect(names.has("LineChart")).toBe(true) + expect(names.has("BarChart")).toBe(true) + expect(names.has("Histogram")).toBe(true) + }) + + it("ranks capabilities with zero expert agreement first", () => { + const report = runQualityScorecard(CANONICAL_FIXTURES) + // perCapability is sorted by expertAgreementCount ascending + const counts = report.perCapability.map((c) => c.expertAgreementCount) + for (let i = 1; i < counts.length; i++) { + expect(counts[i]).toBeGreaterThanOrEqual(counts[i - 1]) + } + }) + + it("doesn't crash on the sparse-data fixture", () => { + const sparse = CANONICAL_FIXTURES.find((f) => f.name.includes("sparse")) + expect(sparse).toBeDefined() + if (sparse) { + const report = runQualityScorecard([sparse]) + expect(report.perFixture[0]).toBeDefined() + } + }) +}) diff --git a/src/components/ai/qualityScorecard.ts b/src/components/ai/qualityScorecard.ts new file mode 100644 index 00000000..3bac79e8 --- /dev/null +++ b/src/components/ai/qualityScorecard.ts @@ -0,0 +1,228 @@ +import type { Datum } from "../charts/shared/datumTypes" +import { getCapabilities } from "./chartCapabilities" +import { profileData } from "./profileData" +import { explainCapabilityFit } from "./suggestCharts" +import type { ChartCapability, ChartDataProfile } from "./chartCapabilityTypes" +import type { IntentId } from "./intents" + +/** + * One canonical fixture in a scorecard run. Pair canonical data with the + * intents/components a human expert would expect to win on it. Use null + * `expected` when the fixture is a stress-test that should produce no + * fitting chart at all (e.g. flat single-column data, broken GeoJSON). + */ +export interface ScorecardFixture { + name: string + /** Free-text shape description, used in scorecard output for context. */ + shape?: string + data: ReadonlyArray + /** Optional non-tabular payload (network/hierarchy/GeoJSON). */ + rawInput?: unknown + /** Intent to rank by. If omitted, scored without intent (mean-of-all). */ + intent?: IntentId + /** Components the human expert would pick. Empty = "anything fits". */ + expected?: ReadonlyArray + /** True if the fixture should produce zero fitting suggestions. Mutually exclusive with `expected`. */ + expectsNoFit?: boolean +} + +export interface PerCapabilityScore { + component: string + family: ChartCapability["family"] + /** Number of fixtures where this capability fit. */ + fitsOn: number + /** Number of fixtures where this capability was rejected. */ + rejectedOn: number + /** Number of fixtures where this capability appeared in the top-3 ranked suggestions. */ + inTopThreeOn: number + /** Fixtures where the human expert picked this chart AND it was in top-3 ranking. */ + expertAgreementCount: number + /** Mean composite score across fixtures where it fit. */ + averageScore: number + /** Fraction of suggestions that included at least one caveat. */ + caveatCoverage: number + /** Fraction of suggestions that picked a non-base variant. */ + variantUtilization: number +} + +export interface PerFixtureScore { + fixture: string + shape?: string + intent?: IntentId + expected?: ReadonlyArray + topPick?: { component: string; variantKey?: string; score: number } + topThree: ReadonlyArray<{ component: string; variantKey?: string; score: number }> + fittingCount: number + rejectedCount: number + /** True if the top-3 ranking contained at least one expected component (when expected is provided). */ + expertAgreement: boolean | null + /** Did the engine honor `expectsNoFit`? */ + noFitHonored: boolean | null +} + +export interface ScorecardReport { + perCapability: PerCapabilityScore[] + perFixture: PerFixtureScore[] + summary: { + fixtureCount: number + capabilityCount: number + /** Fraction of expectation-bearing fixtures where the engine agreed with the expert. */ + expertAgreementRate: number + /** Average caveat coverage across all suggestions. */ + overallCaveatCoverage: number + /** Average variant utilization across all suggestions. */ + overallVariantUtilization: number + } +} + +/** + * Run the scorecard. Pure — does no I/O — so it can be called from CI scripts, + * vizmart UIs, or test suites. + */ +export function runQualityScorecard( + fixtures: ReadonlyArray, + capabilities: ReadonlyArray = getCapabilities(), +): ScorecardReport { + const perCapability = new Map() + for (const c of capabilities) { + perCapability.set(c.component, { + component: c.component, + family: c.family, + fitsOn: 0, + rejectedOn: 0, + inTopThreeOn: 0, + expertAgreementCount: 0, + averageScore: 0, + caveatCoverage: 0, + variantUtilization: 0, + }) + } + + // Running tallies for averaging + const scoreSums = new Map() + const suggestionCount = new Map() + const caveatCount = new Map() + const variantCount = new Map() + + const perFixture: PerFixtureScore[] = [] + + for (const fixture of fixtures) { + let profile: ChartDataProfile + let result: ReturnType + try { + profile = profileData(fixture.data, { rawInput: fixture.rawInput }) + result = explainCapabilityFit(fixture.data, { + profile, + intent: fixture.intent, + capabilities, + maxResults: 40, + }) + } catch (err) { + // A descriptor crashed on this fixture — flag it. + perFixture.push({ + fixture: fixture.name, + shape: fixture.shape, + intent: fixture.intent, + expected: fixture.expected, + topPick: undefined, + topThree: [], + fittingCount: 0, + rejectedCount: 0, + expertAgreement: false, + noFitHonored: null, + }) + continue + } + + const topThree = result.fitting.slice(0, 3).map((s) => ({ + component: s.component, + variantKey: s.variant?.key, + score: s.score, + })) + + const expertAgreement = fixture.expected && fixture.expected.length > 0 + ? topThree.some((t) => fixture.expected!.includes(t.component)) + : null + + const noFitHonored = fixture.expectsNoFit === true + ? result.fitting.length === 0 + : null + + perFixture.push({ + fixture: fixture.name, + shape: fixture.shape, + intent: fixture.intent, + expected: fixture.expected, + topPick: topThree[0], + topThree, + fittingCount: result.fitting.length, + rejectedCount: result.rejected.length, + expertAgreement, + noFitHonored, + }) + + // Tally per-capability stats + for (const s of result.fitting) { + const row = perCapability.get(s.component) + if (!row) continue + row.fitsOn += 1 + scoreSums.set(s.component, (scoreSums.get(s.component) ?? 0) + s.score) + suggestionCount.set(s.component, (suggestionCount.get(s.component) ?? 0) + 1) + if (s.caveats.length > 0) caveatCount.set(s.component, (caveatCount.get(s.component) ?? 0) + 1) + if (s.variant) variantCount.set(s.component, (variantCount.get(s.component) ?? 0) + 1) + } + for (const r of result.rejected) { + const row = perCapability.get(r.component) + if (row) row.rejectedOn += 1 + } + for (const t of topThree) { + const row = perCapability.get(t.component) + if (row) row.inTopThreeOn += 1 + } + if (fixture.expected && expertAgreement) { + for (const t of topThree) { + if (fixture.expected.includes(t.component)) { + const row = perCapability.get(t.component) + if (row) row.expertAgreementCount += 1 + } + } + } + } + + // Finalize averages + for (const row of perCapability.values()) { + const count = suggestionCount.get(row.component) ?? 0 + row.averageScore = count === 0 ? 0 : (scoreSums.get(row.component) ?? 0) / count + row.caveatCoverage = count === 0 ? 0 : (caveatCount.get(row.component) ?? 0) / count + row.variantUtilization = count === 0 ? 0 : (variantCount.get(row.component) ?? 0) / count + } + + // Sort: lowest expertAgreementCount first so weak descriptors surface first. + // Ties broken by fitsOn (higher = more chances to demonstrate value). + const perCapabilitySorted = Array.from(perCapability.values()).sort((a, b) => { + const expertDelta = a.expertAgreementCount - b.expertAgreementCount + if (expertDelta !== 0) return expertDelta + return b.fitsOn - a.fitsOn + }) + + const fixturesWithExpectations = perFixture.filter((f) => f.expertAgreement !== null) + const expertAgreementRate = fixturesWithExpectations.length === 0 + ? 0 + : fixturesWithExpectations.filter((f) => f.expertAgreement === true).length / fixturesWithExpectations.length + + const allSuggestionCount = Array.from(suggestionCount.values()).reduce((a, b) => a + b, 0) + const allCaveatCount = Array.from(caveatCount.values()).reduce((a, b) => a + b, 0) + const allVariantCount = Array.from(variantCount.values()).reduce((a, b) => a + b, 0) + + return { + perCapability: perCapabilitySorted, + perFixture, + summary: { + fixtureCount: fixtures.length, + capabilityCount: capabilities.length, + expertAgreementRate, + overallCaveatCoverage: allSuggestionCount === 0 ? 0 : allCaveatCount / allSuggestionCount, + overallVariantUtilization: allSuggestionCount === 0 ? 0 : allVariantCount / allSuggestionCount, + }, + } +} diff --git a/src/components/ai/repairChartConfig.test.ts b/src/components/ai/repairChartConfig.test.ts new file mode 100644 index 00000000..c8e9caaa --- /dev/null +++ b/src/components/ai/repairChartConfig.test.ts @@ -0,0 +1,80 @@ +import { describe, it, expect } from "vitest" +import { repairChartConfig } from "./repairChartConfig" + +const productSales = [ + { product: "Widget", units: 480 }, + { product: "Gadget", units: 620 }, + { product: "Sprocket", units: 290 }, + { product: "Whatsit", units: 740 }, + { product: "Doohickey", units: 410 }, + { product: "Gizmo", units: 200 }, + { product: "Thingamajig", units: 320 }, + { product: "Item-8", units: 110 }, + { product: "Item-9", units: 90 }, + { product: "Item-10", units: 75 }, +] + +const temporal = Array.from({ length: 12 }, (_, i) => ({ + month: i + 1, + revenue: 1000 + i * 120 + Math.sin(i) * 80, +})) + +describe("repairChartConfig", () => { + it("returns ok when the chart fits", () => { + const result = repairChartConfig("BarChart", productSales.slice(0, 5)) + expect(result.status).toBe("ok") + if (result.status === "ok") { + expect(result.component).toBe("BarChart") + } + }) + + it("proposes alternatives when the chart doesn't fit", () => { + // PieChart can't handle 10 categories + const result = repairChartConfig("PieChart", productSales, { intent: "rank" }) + expect(result.status).toBe("alternative") + if (result.status === "alternative") { + expect(result.reason).toMatch(/slices/) + expect(result.alternatives.length).toBeGreaterThan(0) + // BarChart or DotPlot should be the strongest replacement for rank + expect(["BarChart", "DotPlot"]).toContain(result.alternatives[0].component) + } + }) + + it("excludes the requested component from alternatives", () => { + const result = repairChartConfig("StackedBarChart", productSales) + expect(result.status).toBe("alternative") + if (result.status === "alternative") { + for (const alt of result.alternatives) { + expect(alt.component).not.toBe("StackedBarChart") + } + } + }) + + it("returns unknown for components without a registered capability", () => { + const result = repairChartConfig("NotARealChart", temporal, { intent: "trend" }) + expect(result.status).toBe("unknown") + if (result.status === "unknown") { + expect(result.alternatives.length).toBeGreaterThan(0) + // Top alt for trend on single-series temporal is AreaChart (gradient + // fill outranks LineChart's plain line for trend); LineChart is still + // in the alternatives list, just not first. + expect(result.alternatives[0].component).toBe("AreaChart") + expect(result.alternatives.map((a) => a.component)).toContain("LineChart") + } + }) + + it("includes profile in every result for caller inspection", () => { + const result = repairChartConfig("PieChart", productSales) + expect(result.profile).toBeDefined() + expect(result.profile.rowCount).toBe(productSales.length) + }) + + it("alternatives carry runnable props", () => { + const result = repairChartConfig("PieChart", productSales, { intent: "rank" }) + if (result.status === "alternative") { + const top = result.alternatives[0] + expect(top.props).toBeDefined() + expect(top.props.data).toBeDefined() + } + }) +}) diff --git a/src/components/ai/repairChartConfig.ts b/src/components/ai/repairChartConfig.ts new file mode 100644 index 00000000..9948bb39 --- /dev/null +++ b/src/components/ai/repairChartConfig.ts @@ -0,0 +1,122 @@ +import type { Datum } from "../charts/shared/datumTypes" +import { getCapability } from "./chartCapabilities" +import { profileData } from "./profileData" +import { suggestCharts } from "./suggestCharts" +import type { ChartDataProfile, Suggestion } from "./chartCapabilityTypes" +import type { IntentId } from "./intents" + +/** + * Repair result when the chosen chart fits the data — nothing to fix. + */ +export interface RepairOkResult { + status: "ok" + component: string + /** The same data profile that was evaluated. */ + profile: ChartDataProfile +} + +/** + * Repair result when the chosen chart doesn't fit. Carries the diagnostic + * reason from the capability's `fits()` plus ranked alternatives that *do* + * fit, with their reasons surfaced for caller narration. + */ +export interface RepairAlternativeResult { + status: "alternative" + /** The component the caller asked about. */ + component: string + /** Why it doesn't fit. */ + reason: string + /** Whether the caller intended one of the alternatives anyway. */ + alternatives: Suggestion[] + profile: ChartDataProfile +} + +/** + * Repair result when no capability is registered for the asked component. + */ +export interface RepairUnknownResult { + status: "unknown" + component: string + /** Closest matches by family/intent — best effort. */ + alternatives: Suggestion[] + profile: ChartDataProfile +} + +export type RepairResult = RepairOkResult | RepairAlternativeResult | RepairUnknownResult + +export interface RepairOptions { + /** Caller's intent — informs ranking of alternatives when the chart doesn't fit. */ + intent?: IntentId | IntentId[] + /** Non-tabular payload (network/hierarchy/GeoJSON). Forwarded to profileData. */ + rawInput?: unknown + /** Limit number of alternatives returned (default 3). */ + maxAlternatives?: number + /** Pre-computed profile, avoids recomputation. */ + profile?: ChartDataProfile +} + +/** + * Validate that a chart component is a sensible choice for a dataset, and + * if not, propose alternatives that *do* fit — ranked by the caller's + * intent if provided. + * + * This is the "auto-fix" surface for `--doctor` and agent retry loops. + * Given a chart + data, returns either: + * + * - { status: "ok", component } — the chart fits, ship it + * - { status: "alternative", reason, alternatives } — the chart doesn't + * fit; here are charts that do, ranked by intent if specified + * - { status: "unknown", alternatives } — we don't have a + * capability for that component name; here are sensible defaults + * + * The contract: a caller can always render `alternatives[0]` and get + * something useful. The `reason` field is suitable for verbatim display + * to the user. + * + * @example + * repairChartConfig("PieChart", productData, { intent: "rank" }) + * // → { status: "alternative", + * // reason: "9 slices is too many for a pie chart", + * // alternatives: [BarChart, DotPlot, ...] } + */ +export function repairChartConfig( + component: string, + data: ReadonlyArray | null | undefined, + options: RepairOptions = {}, +): RepairResult { + const profile = options.profile ?? profileData(data ?? [], { rawInput: options.rawInput }) + const capability = getCapability(component) + const maxAlternatives = options.maxAlternatives ?? 3 + + if (!capability) { + // Unknown component — return top suggestions as best-effort fallbacks + const alternatives = suggestCharts(data, { + profile, + intent: options.intent, + maxResults: maxAlternatives, + includeVariants: false, + }) + return { status: "unknown", component, alternatives, profile } + } + + const fitReason = capability.fits(profile) + if (fitReason === null) { + return { status: "ok", component, profile } + } + + const alternatives = suggestCharts(data, { + profile, + intent: options.intent, + maxResults: maxAlternatives, + deny: [component], // don't recommend the one that already failed + includeVariants: false, + }) + + return { + status: "alternative", + component, + reason: fitReason, + alternatives, + profile, + } +} diff --git a/src/components/ai/streamingTypes.ts b/src/components/ai/streamingTypes.ts new file mode 100644 index 00000000..ce2e957f --- /dev/null +++ b/src/components/ai/streamingTypes.ts @@ -0,0 +1,73 @@ +import type { ChartRubric } from "./chartCapabilityTypes" +import type { IntentId } from "./intents" + +/** + * Streaming chart selection has a different shape than static. We don't have + * rows yet — we have a *schema*: which fields will arrive, what types, plus + * environment hints (throughput, retention). + * + * Rather than overloading `profileData` (which is row-statistics-centric) we + * model streams as a parallel API. The two share the intent vocabulary — + * "trend" still means trend — but the suitability logic is its own thing. + */ + +export type StreamFieldKind = "numeric" | "categorical" | "date" | "boolean" + +export interface StreamFieldSchema { + name: string + kind: StreamFieldKind + /** Optional role hint — overrides the engine's inference. */ + role?: "x" | "y" | "value" | "category" | "series" | "size" +} + +/** + * Schema describing what a stream emits. No data, just shape + environment hints. + */ +export interface StreamSchema { + fields: ReadonlyArray + /** + * Hint about expected event rate. Affects chart selection — heatmaps and + * waterfalls amortize high-throughput streams better than line charts do. + * • "low" — < 1 event/sec, line/area charts read well + * • "medium" — ~1-100 events/sec + * • "high" — > 100 events/sec, prefer aggregating visualizations + */ + throughput?: "low" | "medium" | "high" + /** + * Hint about how long events are kept in view. + * • "windowed" — only recent events visible (default) + * • "cumulative" — all events accumulate + */ + retention?: "windowed" | "cumulative" +} + +/** + * Stream capability descriptor — parallel to ChartCapability but operates on + * a schema. No `fits(profile)`; instead `fits(schema)` returns null/reason. + */ +export interface StreamChartCapability { + component: string + importPath: "semiotic/realtime" + rubric: ChartRubric + fits: (schema: StreamSchema) => null | string + intentScores: Partial> + caveats?: (schema: StreamSchema) => ReadonlyArray + buildProps: (schema: StreamSchema) => Record +} + +export type StreamIntentScorer = + | number + | ((schema: StreamSchema) => number) + +export interface StreamSuggestion { + component: string + family: "realtime" + importPath: "semiotic/realtime" + score: number + intentScores: Partial> + rubric: ChartRubric + reasons: ReadonlyArray + caveats: ReadonlyArray + /** Props ready to spread into the matching realtime chart. */ + props: Record +} diff --git a/src/components/ai/suggestCharts.test.ts b/src/components/ai/suggestCharts.test.ts new file mode 100644 index 00000000..cde017ca --- /dev/null +++ b/src/components/ai/suggestCharts.test.ts @@ -0,0 +1,205 @@ +import { describe, it, expect } from "vitest" +import { suggestCharts, scoreChart, explainCapabilityFit } from "./suggestCharts" +import { registerChartCapability, unregisterChartCapability } from "./chartCapabilities" +import type { ChartCapability } from "./chartCapabilityTypes" + +const temporalMultiSeries = [ + { month: 1, revenue: 1200, region: "EU" }, + { month: 2, revenue: 1400, region: "EU" }, + { month: 3, revenue: 1100, region: "EU" }, + { month: 4, revenue: 1700, region: "EU" }, + { month: 5, revenue: 1900, region: "EU" }, + { month: 1, revenue: 900, region: "NA" }, + { month: 2, revenue: 1100, region: "NA" }, + { month: 3, revenue: 1500, region: "NA" }, + { month: 4, revenue: 1300, region: "NA" }, + { month: 5, revenue: 1700, region: "NA" }, +] + +const categorical = [ + { product: "Widget", units: 30 }, + { product: "Gadget", units: 50 }, + { product: "Sprocket", units: 20 }, + { product: "Whatsit", units: 45 }, +] + +const distributionData = Array.from({ length: 100 }, (_, i) => ({ + observation: 50 + Math.sin(i / 7) * 20 + (i % 3 === 0 ? 30 : 0), +})) + +describe("suggestCharts", () => { + it("ranks LineChart highly for temporal multi-series with intent=trend", () => { + const suggestions = suggestCharts(temporalMultiSeries, { intent: "trend", includeVariants: false }) + expect(suggestions.length).toBeGreaterThan(0) + expect(suggestions[0].component).toBe("LineChart") + expect(suggestions[0].score).toBeGreaterThan(3) + }) + + it("ranks BarChart highly for categorical with intent=rank", () => { + const suggestions = suggestCharts(categorical, { intent: "rank", includeVariants: false }) + expect(suggestions[0].component).toBe("BarChart") + expect(suggestions[0].props.categoryAccessor).toBe("product") + expect(suggestions[0].props.valueAccessor).toBe("units") + }) + + it("ranks Histogram highly for distribution intent", () => { + const suggestions = suggestCharts(distributionData, { intent: "distribution", includeVariants: false }) + expect(suggestions[0].component).toBe("Histogram") + }) + + it("filters by allow list", () => { + const suggestions = suggestCharts(temporalMultiSeries, { allow: ["AreaChart"], includeVariants: false }) + expect(suggestions.every((s) => s.component === "AreaChart")).toBe(true) + }) + + it("emits variants by default", () => { + const suggestions = suggestCharts(temporalMultiSeries, { intent: "trend" }) + const lineVariants = suggestions.filter((s) => s.component === "LineChart" && s.variant) + expect(lineVariants.length).toBeGreaterThan(0) + }) + + it("smooth variant boosts trend score relative to base for LineChart", () => { + const suggestions = suggestCharts(temporalMultiSeries, { intent: "trend", allow: ["LineChart"] }) + const base = suggestions.find((s) => s.variant?.key === "linear") + const smooth = suggestions.find((s) => s.variant?.key === "smooth") + expect(base).toBeDefined() + expect(smooth).toBeDefined() + expect((smooth!.score)).toBeGreaterThanOrEqual(base!.score) + }) + + it("excludes PieChart when there are too many categories", () => { + const tooManyCategories = Array.from({ length: 15 }, (_, i) => ({ name: `Cat${i}`, count: i + 1 })) + const suggestions = suggestCharts(tooManyCategories) + expect(suggestions.find((s) => s.component === "PieChart")).toBeUndefined() + }) + + it("excludes StackedBarChart when there is no series field", () => { + const suggestions = suggestCharts(categorical) + expect(suggestions.find((s) => s.component === "StackedBarChart")).toBeUndefined() + }) + + it("buildProps returns runnable accessor configuration", () => { + const suggestions = suggestCharts(temporalMultiSeries, { intent: "trend", allow: ["LineChart"], includeVariants: false }) + const top = suggestions[0] + expect(top.props.xAccessor).toBe("month") + expect(top.props.yAccessor).toBe("revenue") + expect(top.props.lineBy).toBe("region") + expect(top.props.colorBy).toBe("region") + }) + + it("respects user-registered capabilities", () => { + const fake: ChartCapability = { + component: "MyCustomChart", + family: "custom", + importPath: "semiotic", + rubric: { familiarity: 1, accuracy: 5, precision: 5 }, + fits: () => null, + intentScores: { "trend": 5 }, + buildProps: () => ({ custom: true }), + } + registerChartCapability(fake) + try { + const suggestions = suggestCharts(temporalMultiSeries, { allow: ["MyCustomChart"] }) + expect(suggestions[0].component).toBe("MyCustomChart") + } finally { + unregisterChartCapability("MyCustomChart") + } + }) +}) + +describe("suggestCharts — structural shapes", () => { + it("recommends ForceDirectedGraph for {nodes, edges}", () => { + const network = { + nodes: [{ id: "a" }, { id: "b" }, { id: "c" }], + edges: [ + { source: "a", target: "b" }, + { source: "b", target: "c" }, + ], + } + const suggestions = suggestCharts([], { rawInput: network, allow: ["ForceDirectedGraph", "SankeyDiagram", "ChordDiagram"] }) + expect(suggestions.length).toBeGreaterThan(0) + expect(["network", "flow"]).toContain(suggestions[0].family) + expect((suggestions[0].props.nodes as unknown[]).length).toBe(3) + }) + + it("recommends Treemap/TreeDiagram for hierarchies", () => { + const hierarchy = { + name: "root", + children: [ + { name: "a", value: 10 }, + { name: "b", value: 20, children: [{ name: "b1", value: 5 }] }, + ], + } + const suggestions = suggestCharts([], { rawInput: hierarchy, intent: "hierarchy" }) + expect(suggestions.some((s) => s.family === "hierarchy")).toBe(true) + }) + + it("recommends ChoroplethMap for GeoJSON", () => { + const geo = { + type: "FeatureCollection", + features: [ + { type: "Feature", geometry: { type: "Polygon", coordinates: [] }, properties: { value: 5 } }, + { type: "Feature", geometry: { type: "Polygon", coordinates: [] }, properties: { value: 10 } }, + ], + } + const suggestions = suggestCharts([], { rawInput: geo, intent: "geo" }) + expect(suggestions.some((s) => s.component === "ChoroplethMap")).toBe(true) + }) +}) + +describe("explainCapabilityFit", () => { + it("returns both fitting and rejected capabilities", () => { + const { fitting, rejected, profile } = explainCapabilityFit(categorical) + expect(fitting.length).toBeGreaterThan(0) + expect(rejected.length).toBeGreaterThan(0) + // BarChart should fit categorical data; StackedBarChart should be rejected + expect(fitting.some((s) => s.component === "BarChart")).toBe(true) + expect(rejected.some((r) => r.component === "StackedBarChart")).toBe(true) + expect(profile.rowCount).toBe(categorical.length) + }) + + it("rejection reasons are human-readable strings", () => { + const { rejected } = explainCapabilityFit(categorical) + for (const r of rejected) { + expect(typeof r.reason).toBe("string") + expect(r.reason.length).toBeGreaterThan(0) + } + }) + + it("respects allow/deny lists", () => { + const { fitting, rejected } = explainCapabilityFit(categorical, { + allow: ["BarChart", "Histogram", "DotPlot"], + }) + for (const s of fitting) expect(["BarChart", "Histogram", "DotPlot"]).toContain(s.component) + for (const r of rejected) expect(["BarChart", "Histogram", "DotPlot"]).toContain(r.component) + }) + + it("rejection set + fitting set is disjoint", () => { + const { fitting, rejected } = explainCapabilityFit(temporalMultiSeries) + const fittingNames = new Set(fitting.map((s) => s.component)) + for (const r of rejected) { + expect(fittingNames.has(r.component)).toBe(false) + } + }) +}) + +describe("scoreChart", () => { + it("returns a suggestion for a fitting chart", () => { + const result = scoreChart("LineChart", temporalMultiSeries, { intent: "trend" }) + expect("score" in result).toBe(true) + if ("score" in result) { + expect(result.score).toBeGreaterThan(3) + expect(result.props.xAccessor).toBe("month") + } + }) + + it("returns a reason when the chart doesn't fit", () => { + const result = scoreChart("StackedBarChart", categorical) + expect("reason" in result).toBe(true) + }) + + it("returns a reason for unknown components", () => { + const result = scoreChart("DoesNotExist", categorical) + expect("reason" in result).toBe(true) + }) +}) diff --git a/src/components/ai/suggestCharts.ts b/src/components/ai/suggestCharts.ts new file mode 100644 index 00000000..e8dedd21 --- /dev/null +++ b/src/components/ai/suggestCharts.ts @@ -0,0 +1,312 @@ +import type { Datum } from "../charts/shared/datumTypes" +import { profileData, type ProfileDataOptions } from "./profileData" +import type { + ChartCapability, + ChartDataProfile, + ChartRubric, + ChartVariant, + IntentScorer, + Suggestion, +} from "./chartCapabilityTypes" +import type { IntentId } from "./intents" +import { getCapabilities } from "./chartCapabilities" +import { applyAudienceBias, type AudienceProfile } from "./audienceProfile" + +function score(scorer: IntentScorer | undefined, profile: ChartDataProfile): number { + if (scorer === undefined) return 0 + const raw = typeof scorer === "function" ? scorer(profile) : scorer + if (!Number.isFinite(raw)) return 0 + return Math.max(0, Math.min(5, raw)) +} + +function clampRubric(r: ChartRubric): ChartRubric { + const clamp = (n: number) => Math.max(1, Math.min(5, Math.round(n))) + return { familiarity: clamp(r.familiarity), accuracy: clamp(r.accuracy), precision: clamp(r.precision) } +} + +function applyVariantToScores( + baseScores: Partial>, + variant: ChartVariant | undefined +): Partial> { + if (!variant?.intentDeltas) return baseScores + const out: Partial> = { ...baseScores } + for (const [intent, delta] of Object.entries(variant.intentDeltas) as Array<[IntentId, number]>) { + const current = out[intent] ?? 0 + out[intent] = Math.max(0, Math.min(5, current + delta)) + } + return out +} + +function applyVariantToRubric(rubric: ChartRubric, variant: ChartVariant | undefined): ChartRubric { + if (!variant?.rubricDeltas) return rubric + return clampRubric({ + familiarity: rubric.familiarity + (variant.rubricDeltas.familiarity ?? 0), + accuracy: rubric.accuracy + (variant.rubricDeltas.accuracy ?? 0), + precision: rubric.precision + (variant.rubricDeltas.precision ?? 0), + }) +} + +function buildReasons( + capability: ChartCapability, + profile: ChartDataProfile, + intentScores: Partial>, + rankingIntents: IntentId[] +): string[] { + const reasons: string[] = [] + const top = rankingIntents + .map((intent) => ({ intent, score: intentScores[intent] ?? 0 })) + .filter((entry) => entry.score >= 3) + .sort((a, b) => b.score - a.score) + .slice(0, 2) + for (const { intent, score } of top) { + reasons.push(`Strong fit for ${intent} (${score}/5)`) + } + if (profile.primary.x && profile.primary.y) { + reasons.push(`x = ${profile.primary.x}, y = ${profile.primary.y}`) + } + if (profile.seriesCount && profile.seriesCount > 1) { + reasons.push(`${profile.seriesCount} series detected on field "${profile.primary.series ?? "series"}"`) + } + return reasons +} + +function compositeScore( + intentScores: Partial>, + rankingIntents: IntentId[] +): number { + if (rankingIntents.length === 0) { + // No intent specified — use mean of non-zero scores across all intents + const nonZero = Object.values(intentScores).filter((n): n is number => typeof n === "number" && n > 0) + if (nonZero.length === 0) return 0 + return nonZero.reduce((a, b) => a + b, 0) / nonZero.length + } + // Average the requested intents + let sum = 0 + for (const intent of rankingIntents) sum += intentScores[intent] ?? 0 + return sum / rankingIntents.length +} + +export interface SuggestChartsOptions extends ProfileDataOptions { + /** Ranking intent(s). When omitted, suggestions are ranked by mean intent score. */ + intent?: IntentId | IntentId[] + /** Restrict to these component names. */ + allow?: ReadonlyArray + /** Exclude these component names. */ + deny?: ReadonlyArray + /** Maximum suggestions to return (default 10). */ + maxResults?: number + /** Include variant-level suggestions (default true). */ + includeVariants?: boolean + /** Filter out suggestions with a composite score below this (default 0 — keep all). */ + minScore?: number + /** Provide a pre-built profile instead of re-deriving from data. */ + profile?: ChartDataProfile + /** Override the registry. Defaults to the global capability registry. */ + capabilities?: ReadonlyArray + /** + * Audience profile — overrides chart familiarity and applies adoption-target + * bias to the ranking. See `audienceProfile.ts`. + */ + audience?: AudienceProfile +} + +/** + * Suggest charts for a dataset, ranked by intent suitability. + * + * Heuristic-only — does not call an LLM. Designed to be cheap enough to run on every + * keystroke in a UI, and to feed structured context to an LLM when one is available. + */ +export function suggestCharts( + data: ReadonlyArray | null | undefined, + options: SuggestChartsOptions = {} +): Suggestion[] { + const profile = options.profile ?? profileData(data ?? [], { rawInput: options.rawInput, seriesField: options.seriesField }) + const capabilities = options.capabilities ?? getCapabilities() + const rankingIntents: IntentId[] = options.intent + ? Array.isArray(options.intent) ? options.intent : [options.intent] + : [] + const includeVariants = options.includeVariants !== false + const minScore = options.minScore ?? 0 + const maxResults = options.maxResults ?? 10 + + const allow = options.allow ? new Set(options.allow) : null + const deny = options.deny ? new Set(options.deny) : null + + const out: Suggestion[] = [] + + for (const capability of capabilities) { + if (allow && !allow.has(capability.component)) continue + if (deny && deny.has(capability.component)) continue + + const fitReason = capability.fits(profile) + if (fitReason !== null) continue + + // Base intent scores from the capability + const baseScores: Partial> = {} + for (const [intent, scorer] of Object.entries(capability.intentScores) as Array<[IntentId, IntentScorer]>) { + baseScores[intent] = score(scorer, profile) + } + + const baseCaveats = capability.caveats ? Array.from(capability.caveats(profile)) : [] + const variants: ReadonlyArray = + includeVariants && capability.variants && capability.variants.length > 0 + ? capability.variants + : [undefined] + + for (const variant of variants) { + const intentScores = applyVariantToScores(baseScores, variant) + const baseComposite = compositeScore(intentScores, rankingIntents) + const variantRubric = applyVariantToRubric(capability.rubric, variant) + + // Audience bias: overrides familiarity and shifts composite score + // by ±familiarity + ±target. Strong enough to reorder rankings, not + // strong enough to override fits-driven correctness. + const biased = applyAudienceBias( + baseComposite, + variantRubric, + capability.component, + options.audience, + ) + if (biased.score < minScore) continue + + const reasons = buildReasons(capability, profile, intentScores, rankingIntents) + if (biased.appliedReason) reasons.push(biased.appliedReason) + const caveats = [...baseCaveats, ...(variant?.caveats ?? [])] + const props = capability.buildProps(profile, variant) + + out.push({ + component: capability.component, + family: capability.family, + importPath: capability.importPath, + variant, + score: biased.score, + intentScores, + rubric: biased.rubric, + reasons, + caveats, + props, + }) + } + } + + // Sort: higher composite score first, then higher accuracy, then higher familiarity. + out.sort((a, b) => { + if (b.score !== a.score) return b.score - a.score + if (b.rubric.accuracy !== a.rubric.accuracy) return b.rubric.accuracy - a.rubric.accuracy + return b.rubric.familiarity - a.rubric.familiarity + }) + + return out.slice(0, maxResults) +} + +/** + * One rejected capability: a chart whose `fits()` returned a reason. + * Surfaced by `explainCapabilityFit` for diagnostic panels and `--doctor` auto-fix. + */ +export interface RejectedCapability { + component: string + family: ChartCapability["family"] + importPath: ChartCapability["importPath"] + /** Human-readable reason this chart can't render this profile. */ + reason: string +} + +export interface ExplainCapabilityFitResult { + /** Capabilities that fit the profile — full ranked suggestion list. */ + fitting: Suggestion[] + /** Capabilities that did not fit, with their rejection reasons. */ + rejected: RejectedCapability[] + /** The profile that was evaluated against (provided or computed). */ + profile: ChartDataProfile +} + +/** + * Like `suggestCharts`, but also returns the capabilities that *didn't* fit + * along with their rejection reasons. The single best primitive for: + * • "Why isn't there a pie chart option?" UI surfaces (vizmart V.4) + * • `--doctor` auto-fix loops that need to enumerate alternatives + * • Descriptor authoring — quickly see whose `fits()` is too strict + * + * Mirrors `suggestCharts` for the fitting side. Rejection enumeration walks + * every registered capability whether it fits or not. + */ +export function explainCapabilityFit( + data: ReadonlyArray | null | undefined, + options: SuggestChartsOptions = {} +): ExplainCapabilityFitResult { + const profile = options.profile ?? profileData(data ?? [], { rawInput: options.rawInput, seriesField: options.seriesField }) + const capabilities = options.capabilities ?? getCapabilities() + + const allow = options.allow ? new Set(options.allow) : null + const deny = options.deny ? new Set(options.deny) : null + + const rejected: RejectedCapability[] = [] + for (const capability of capabilities) { + if (allow && !allow.has(capability.component)) continue + if (deny && deny.has(capability.component)) continue + const fitReason = capability.fits(profile) + if (fitReason !== null) { + rejected.push({ + component: capability.component, + family: capability.family, + importPath: capability.importPath, + reason: fitReason, + }) + } + } + + const fitting = suggestCharts(data, { ...options, profile }) + + return { fitting, rejected, profile } +} + +/** + * Score a specific (component, variant) pair against a dataset and (optionally) an intent. + * Useful for evaluating a chart a user already chose: "is this a good fit for what they want?" + */ +export function scoreChart( + component: string, + data: ReadonlyArray | null | undefined, + options: { intent?: IntentId | IntentId[]; variantKey?: string; profile?: ChartDataProfile } = {} +): Suggestion | { reason: string } { + const capabilities = getCapabilities() + const capability = capabilities.find((c) => c.component === component) + if (!capability) return { reason: `No capability registered for "${component}"` } + const profile = options.profile ?? profileData(data ?? []) + const fit = capability.fits(profile) + if (fit !== null) return { reason: fit } + + const variant = options.variantKey + ? capability.variants?.find((v) => v.key === options.variantKey) + : undefined + + const intents: IntentId[] = options.intent + ? Array.isArray(options.intent) ? options.intent : [options.intent] + : [] + + const baseScores: Partial> = {} + for (const [intent, scorer] of Object.entries(capability.intentScores) as Array<[IntentId, IntentScorer]>) { + baseScores[intent] = score(scorer, profile) + } + const intentScores = applyVariantToScores(baseScores, variant) + const composite = compositeScore(intentScores, intents) + const rubric = applyVariantToRubric(capability.rubric, variant) + const reasons = buildReasons(capability, profile, intentScores, intents) + const caveats = [ + ...(capability.caveats ? capability.caveats(profile) : []), + ...(variant?.caveats ?? []), + ] + + return { + component: capability.component, + family: capability.family, + importPath: capability.importPath, + variant, + score: composite, + intentScores, + rubric, + reasons, + caveats, + props: capability.buildProps(profile, variant), + } +} diff --git a/src/components/ai/suggestDashboard.test.ts b/src/components/ai/suggestDashboard.test.ts new file mode 100644 index 00000000..e85a9ad1 --- /dev/null +++ b/src/components/ai/suggestDashboard.test.ts @@ -0,0 +1,92 @@ +import { describe, it, expect } from "vitest" +import { suggestDashboard } from "./suggestDashboard" + +const temporalMultiSeries = Array.from({ length: 24 }, (_, i) => { + const region = ["EU", "NA", "APAC"][i % 3] + return { month: Math.floor(i / 3) + 1, revenue: 1000 + i * 80, region } +}) + +const productCatalog = [ + { product: "Widget", category: "tools", units: 480, region: "EU", price: 12 }, + { product: "Gadget", category: "tools", units: 620, region: "NA", price: 25 }, + { product: "Sprocket", category: "parts", units: 290, region: "EU", price: 8 }, + { product: "Whatsit", category: "parts", units: 740, region: "APAC", price: 15 }, + { product: "Gizmo", category: "tools", units: 410, region: "NA", price: 18 }, +] + +describe("suggestDashboard", () => { + it("returns multiple panels covering distinct intents", () => { + const dashboard = suggestDashboard(temporalMultiSeries) + expect(dashboard.panels.length).toBeGreaterThan(1) + // No two panels share the same intent + const intents = dashboard.panels.map((p) => p.intent) + expect(new Set(intents).size).toBe(intents.length) + }) + + it("diversifies by chart family by default", () => { + const dashboard = suggestDashboard(temporalMultiSeries) + const families = dashboard.panels.map((p) => p.suggestion.family) + // Ideally every family appears at most once; allow occasional repeat if + // diversification's fallback path kicked in. + const uniqueFamilies = new Set(families) + expect(uniqueFamilies.size).toBeGreaterThanOrEqual(Math.min(2, families.length)) + }) + + it("emits a dashboard sized to maxPanels", () => { + const dashboard = suggestDashboard(temporalMultiSeries, { maxPanels: 3 }) + expect(dashboard.panels.length).toBeLessThanOrEqual(3) + }) + + it("respects an explicit intent list when provided", () => { + const dashboard = suggestDashboard(temporalMultiSeries, { + intents: ["trend", "compare-series", "compare-categories"], + }) + expect(dashboard.panels.map((p) => p.intent)).toEqual([ + "trend", + "compare-series", + "compare-categories", + ]) + }) + + it("reports intents the data couldn't cover", () => { + // Categorical product data can't cover trend/hierarchy/geo + const dashboard = suggestDashboard(productCatalog, { + intents: ["rank", "trend", "hierarchy", "geo"], + }) + expect(dashboard.intentsMissing).toContain("trend") + expect(dashboard.intentsMissing).toContain("hierarchy") + expect(dashboard.intentsMissing).toContain("geo") + expect(dashboard.intentsCovered).toContain("rank") + }) + + it("includes runnable props on every panel", () => { + const dashboard = suggestDashboard(temporalMultiSeries) + for (const panel of dashboard.panels) { + expect(panel.suggestion.props).toBeDefined() + expect(panel.suggestion.props.data).toBeDefined() + } + }) + + it("does not repeat the same chart twice", () => { + const dashboard = suggestDashboard(temporalMultiSeries) + const keys = dashboard.panels.map( + (p) => `${p.suggestion.component}/${p.suggestion.variant?.key ?? "base"}`, + ) + expect(new Set(keys).size).toBe(keys.length) + }) + + it("returns empty panels gracefully for empty data", () => { + const dashboard = suggestDashboard([]) + expect(dashboard.panels).toEqual([]) + expect(dashboard.intentsCovered).toEqual([]) + }) + + it("default intents skip families the data doesn't support", () => { + // productCatalog has no time axis and no hierarchy; default intents shouldn't include trend/hierarchy + const dashboard = suggestDashboard(productCatalog) + const intents = [...dashboard.intentsCovered, ...dashboard.intentsMissing] + expect(intents).not.toContain("trend") + expect(intents).not.toContain("hierarchy") + expect(intents).not.toContain("geo") + }) +}) diff --git a/src/components/ai/suggestDashboard.ts b/src/components/ai/suggestDashboard.ts new file mode 100644 index 00000000..d041311a --- /dev/null +++ b/src/components/ai/suggestDashboard.ts @@ -0,0 +1,223 @@ +import type { Datum } from "../charts/shared/datumTypes" +import { profileData } from "./profileData" +import { suggestCharts } from "./suggestCharts" +import { suggestStretchCharts, type StretchSuggestion } from "./suggestStretchCharts" +import type { ChartDataProfile, Suggestion } from "./chartCapabilityTypes" +import type { IntentId } from "./intents" +import type { AudienceProfile } from "./audienceProfile" + +/** + * One panel in a generated dashboard. Pairs a chart suggestion with the + * intent that motivated it — consumers render the suggestion and label it + * with the intent so readers know *why* that panel exists. + */ +export interface DashboardPanel { + /** The intent this panel covers. */ + intent: IntentId + /** The chart picked for that intent. */ + suggestion: Suggestion +} + +export interface DashboardSuggestion { + /** Ordered panels, each covering a distinct intent. */ + panels: DashboardPanel[] + /** Intents the engine actually filled. */ + intentsCovered: IntentId[] + /** Intents the engine couldn't fill from this data. */ + intentsMissing: IntentId[] + /** + * Stretch panels — unfamiliar-but-fitting charts the audience could grow + * into. Empty when no `audience` is provided or `exposureLevel` is 0. + * Render alongside the main panels in a distinct surface so users see + * them as opt-in literacy growth, not silent defaults. + */ + stretchPanels: StretchSuggestion[] + /** The shape profile (computed once, reused for every panel). */ + profile: ChartDataProfile +} + +export interface SuggestDashboardOptions { + /** + * Intents to attempt. When omitted, the engine picks a sensible default set + * based on the data shape (e.g. if `hasTimeAxis`, include "trend"; if + * `categoryCount`, include "rank" and "part-to-whole"). + */ + intents?: ReadonlyArray + /** Maximum number of panels. Default 6. */ + maxPanels?: number + /** + * When true (default), prefer not to repeat the same chart family across + * panels — produces a more varied dashboard. Set false to allow duplicates. + */ + diversifyByFamily?: boolean + /** Allow only these component names. */ + allow?: ReadonlyArray + /** Exclude these component names. */ + deny?: ReadonlyArray + /** Optional pre-built profile (avoids recomputation). */ + profile?: ChartDataProfile + /** Non-tabular payload — forwarded to profileData. */ + rawInput?: unknown + /** + * Audience profile — applies familiarity overrides and adoption-target bias + * to every panel's ranking. When set with `exposureLevel >= 1`, the dashboard + * additionally returns `stretchPanels` showing unfamiliar-but-fitting charts. + */ + audience?: AudienceProfile + /** Max stretch panels (default min(maxPanels, 3)). */ + maxStretchPanels?: number +} + +/** + * Choose a default intent set based on data shape. The intuition: a good + * dashboard answers "what's here?" through several lenses, but those lenses + * only make sense if the data actually supports them. + */ +function defaultIntents(profile: ChartDataProfile): IntentId[] { + const intents: IntentId[] = [] + + if (profile.hasTimeAxis) { + intents.push("trend") + if (profile.seriesCount && profile.seriesCount >= 2) { + intents.push("compare-series", "composition-over-time") + } + intents.push("change-detection") + } + + if (profile.categoryCount) { + intents.push("rank", "compare-categories", "part-to-whole") + } + + // Distribution applies whenever we have a primary numeric y and enough rows. + if (profile.primary.y && profile.rowCount >= 10) { + intents.push("distribution") + } + + // Correlation if there are 2+ numerics + const numericFieldCount = Object.values(profile.fields).filter( + (f) => f.type === "numeric", + ).length + if (numericFieldCount >= 2) { + intents.push("correlation", "outlier-detection") + } + + if (profile.hasHierarchy) intents.push("hierarchy") + if (profile.hasNetwork) intents.push("flow") + if (profile.hasGeo) intents.push("geo") + + // Dedup while preserving order + return Array.from(new Set(intents)) +} + +/** + * Generate a dashboard: a set of complementary chart panels, each + * answering a distinct analytical intent on the same dataset. + * + * The contract: every panel has a stated `intent` and a suggestion that + * fits that intent. The engine diversifies by chart family by default to + * avoid "every panel is a bar chart" outcomes. Intents that can't be + * filled from the data (e.g. "geo" on row data with no lat/lon) are + * reported in `intentsMissing` so consumers can show "no fit for geo + * here" rather than silently dropping them. + * + * Heuristic only — no LLM call. The result is suitable for direct + * rendering (each panel's `suggestion.props` is spreadable into the + * matching chart) or for piping to an LLM as composition context. + * + * @example + * const { panels } = suggestDashboard(data) + * return ( + * + * {panels.map(({ intent, suggestion }) => ( + * + * + * + * ))} + * + * ) + */ +export function suggestDashboard( + data: ReadonlyArray | null | undefined, + options: SuggestDashboardOptions = {}, +): DashboardSuggestion { + const profile = options.profile ?? profileData(data ?? [], { rawInput: options.rawInput }) + const maxPanels = options.maxPanels ?? 6 + const diversify = options.diversifyByFamily !== false + const intents = options.intents ?? defaultIntents(profile) + + const panels: DashboardPanel[] = [] + const intentsCovered: IntentId[] = [] + const intentsMissing: IntentId[] = [] + const usedFamilies = new Set() + // Track (component, variantKey) so the same chart never appears twice + const usedKeys = new Set() + + for (const intent of intents) { + if (panels.length >= maxPanels) { + intentsMissing.push(intent) + continue + } + + // Get a fresh ranked list for this intent. We re-rank rather than + // cherry-picking from a single suggestion set because intent-specific + // ranking is the whole point. The minScore floor ensures we don't + // recommend "the technically least-bad fit" when *nothing* actually + // serves the intent (e.g. "geo" on row data with no lat/lon). + const candidates = suggestCharts(data, { + profile, + intent, + allow: options.allow, + deny: options.deny, + maxResults: 20, + includeVariants: true, + minScore: 1.5, + audience: options.audience, + }) + + // Find the highest-ranked candidate not already used (component+variant), + // and (when diversifying) whose family isn't already in the dashboard. + let pick: Suggestion | undefined + for (const candidate of candidates) { + const key = `${candidate.component}/${candidate.variant?.key ?? "base"}` + if (usedKeys.has(key)) continue + if (diversify && usedFamilies.has(candidate.family)) continue + pick = candidate + break + } + + // Fallback: if diversification eliminated all candidates, accept a + // family repeat rather than skipping the intent. + if (!pick && diversify) { + for (const candidate of candidates) { + const key = `${candidate.component}/${candidate.variant?.key ?? "base"}` + if (usedKeys.has(key)) continue + pick = candidate + break + } + } + + if (pick) { + panels.push({ intent, suggestion: pick }) + intentsCovered.push(intent) + usedFamilies.add(pick.family) + usedKeys.add(`${pick.component}/${pick.variant?.key ?? "base"}`) + } else { + intentsMissing.push(intent) + } + } + + // Stretch panels are populated when an audience is provided and exposure is enabled. + // Excludes anything already in the main dashboard so the stretch rail genuinely + // shows growth opportunities, not duplicates of the familiar picks. + const stretchPanels: StretchSuggestion[] = + options.audience && (options.audience.exposureLevel ?? 1) > 0 + ? suggestStretchCharts(data, { + profile, + audience: options.audience, + deny: Array.from(usedKeys).map((k) => k.split("/")[0]), + maxResults: options.maxStretchPanels ?? Math.min(3, maxPanels), + }) + : [] + + return { panels, intentsCovered, intentsMissing, stretchPanels, profile } +} diff --git a/src/components/ai/suggestStreamCharts.test.ts b/src/components/ai/suggestStreamCharts.test.ts new file mode 100644 index 00000000..f2a368ef --- /dev/null +++ b/src/components/ai/suggestStreamCharts.test.ts @@ -0,0 +1,98 @@ +import { describe, it, expect } from "vitest" +import { suggestStreamCharts, registerStreamChartCapability, unregisterStreamChartCapability } from "./suggestStreamCharts" +import type { StreamSchema, StreamChartCapability } from "./streamingTypes" + +const latencyStream: StreamSchema = { + fields: [ + { name: "ts", kind: "date" }, + { name: "latency_ms", kind: "numeric" }, + { name: "endpoint", kind: "categorical" }, + ], + throughput: "medium", + retention: "windowed", +} + +const highVolumeStream: StreamSchema = { + fields: [ + { name: "ts", kind: "date" }, + { name: "value", kind: "numeric" }, + ], + throughput: "high", + retention: "windowed", +} + +const pureValueStream: StreamSchema = { + fields: [ + { name: "ts", kind: "date" }, + { name: "value", kind: "numeric" }, + { name: "cohort", kind: "categorical" }, + ], +} + +describe("suggestStreamCharts", () => { + it("recommends RealtimeLineChart for medium-throughput trend", () => { + const suggestions = suggestStreamCharts(latencyStream, { intent: "trend" }) + expect(suggestions[0].component).toBe("RealtimeLineChart") + }) + + it("recommends RealtimeHeatmap / Waterfall for high throughput trend", () => { + const suggestions = suggestStreamCharts(highVolumeStream, { intent: "trend" }) + expect(suggestions[0].component).not.toBe("RealtimeLineChart") + expect(["RealtimeHeatmap", "RealtimeWaterfallChart"]).toContain(suggestions[0].component) + }) + + it("rejects RealtimeLineChart at high throughput", () => { + const suggestions = suggestStreamCharts(highVolumeStream) + expect(suggestions.find((s) => s.component === "RealtimeLineChart")).toBeUndefined() + }) + + it("recommends RealtimeHistogram for distribution", () => { + const suggestions = suggestStreamCharts(latencyStream, { intent: "distribution" }) + expect(suggestions[0].component).toBe("RealtimeHistogram") + }) + + it("recommends RealtimeSwarmChart for outlier detection with categories", () => { + const suggestions = suggestStreamCharts(pureValueStream, { intent: "outlier-detection" }) + expect(suggestions[0].component).toBe("RealtimeSwarmChart") + }) + + it("includes ready-to-use props", () => { + // Realtime charts use timeAccessor / valueAccessor (not xAccessor / yAccessor). + // The recommender's output must be spreadable directly into the chart. + const suggestions = suggestStreamCharts(latencyStream, { intent: "trend" }) + expect(suggestions[0].props.timeAccessor).toBe("ts") + expect(suggestions[0].props.valueAccessor).toBe("latency_ms") + }) + + it("surfaces cumulative-retention caveat for line chart", () => { + const cumulativeStream: StreamSchema = { + fields: [ + { name: "ts", kind: "date" }, + { name: "value", kind: "numeric" }, + ], + throughput: "low", + retention: "cumulative", + } + const suggestions = suggestStreamCharts(cumulativeStream, { intent: "trend" }) + const line = suggestions.find((s) => s.component === "RealtimeLineChart") + expect(line?.caveats.some((c) => c.includes("buffer") || c.includes("windowSize"))).toBe(true) + }) + + it("respects user-registered capabilities", () => { + const custom: StreamChartCapability = { + component: "MyStreamChart", + importPath: "semiotic/realtime", + rubric: { familiarity: 1, accuracy: 5, precision: 5 }, + fits: () => null, + intentScores: { "trend": 5 }, + buildProps: () => ({}), + } + registerStreamChartCapability(custom) + try { + const suggestions = suggestStreamCharts(latencyStream, { allow: ["MyStreamChart"] }) + expect(suggestions[0].component).toBe("MyStreamChart") + } finally { + unregisterStreamChartCapability("MyStreamChart") + } + }) +}) diff --git a/src/components/ai/suggestStreamCharts.ts b/src/components/ai/suggestStreamCharts.ts new file mode 100644 index 00000000..c3df4bf6 --- /dev/null +++ b/src/components/ai/suggestStreamCharts.ts @@ -0,0 +1,167 @@ +import type { + StreamChartCapability, + StreamIntentScorer, + StreamSchema, + StreamSuggestion, +} from "./streamingTypes" +import type { ChartRubric } from "./chartCapabilityTypes" +import type { IntentId } from "./intents" +import { RealtimeLineChartCapability } from "../charts/realtime/RealtimeLineChart.capability" +import { RealtimeHistogramCapability } from "../charts/realtime/RealtimeHistogram.capability" +import { RealtimeSwarmChartCapability } from "../charts/realtime/RealtimeSwarmChart.capability" +import { RealtimeWaterfallChartCapability } from "../charts/realtime/RealtimeWaterfallChart.capability" +import { RealtimeHeatmapCapability } from "../charts/realtime/RealtimeHeatmap.capability" +import { TemporalHistogramCapability } from "../charts/realtime/TemporalHistogram.capability" + +const BUILT_IN_STREAM_CAPABILITIES: ReadonlyArray = [ + RealtimeLineChartCapability, + RealtimeHistogramCapability, + RealtimeSwarmChartCapability, + RealtimeWaterfallChartCapability, + RealtimeHeatmapCapability, + TemporalHistogramCapability, +] + +const userStreamCapabilities = new Map() + +export function registerStreamChartCapability(capability: StreamChartCapability): void { + userStreamCapabilities.set(capability.component, capability) +} + +export function unregisterStreamChartCapability(component: string): void { + userStreamCapabilities.delete(component) +} + +export function getStreamCapabilities(): ReadonlyArray { + if (userStreamCapabilities.size === 0) return BUILT_IN_STREAM_CAPABILITIES + const merged = new Map() + for (const c of BUILT_IN_STREAM_CAPABILITIES) merged.set(c.component, c) + for (const [name, c] of userStreamCapabilities) merged.set(name, c) + return Array.from(merged.values()) +} + +function scoreValue(scorer: StreamIntentScorer | undefined, schema: StreamSchema): number { + if (scorer === undefined) return 0 + const raw = typeof scorer === "function" ? (scorer as (s: StreamSchema) => number)(schema) : scorer + if (!Number.isFinite(raw)) return 0 + return Math.max(0, Math.min(5, raw)) +} + +function compositeScore( + intentScores: Partial>, + rankingIntents: IntentId[], +): number { + if (rankingIntents.length === 0) { + const nonZero = Object.values(intentScores).filter((n): n is number => typeof n === "number" && n > 0) + if (nonZero.length === 0) return 0 + return nonZero.reduce((a, b) => a + b, 0) / nonZero.length + } + let sum = 0 + for (const intent of rankingIntents) sum += intentScores[intent] ?? 0 + return sum / rankingIntents.length +} + +function buildReasons( + schema: StreamSchema, + intentScores: Partial>, + rankingIntents: IntentId[], +): string[] { + const reasons: string[] = [] + const top = rankingIntents + .map((intent) => ({ intent, score: intentScores[intent] ?? 0 })) + .filter((entry) => entry.score >= 3) + .sort((a, b) => b.score - a.score) + .slice(0, 2) + for (const { intent, score } of top) { + reasons.push(`Strong fit for ${intent} (${score}/5)`) + } + if (schema.throughput) reasons.push(`tuned for ${schema.throughput} throughput`) + return reasons +} + +export interface SuggestStreamChartsOptions { + intent?: IntentId | IntentId[] + allow?: ReadonlyArray + deny?: ReadonlyArray + maxResults?: number + minScore?: number + capabilities?: ReadonlyArray +} + +/** + * Suggest realtime charts for a schema, ranked by intent. + * + * Parallel to `suggestCharts` but operates on a `StreamSchema` (fields + + * throughput/retention hints) rather than row data. Use for live dashboards, + * monitoring views, anywhere events arrive over time rather than as a bounded + * table. + * + * @example + * const suggestions = suggestStreamCharts({ + * fields: [ + * { name: "ts", kind: "date" }, + * { name: "latency_ms", kind: "numeric" }, + * { name: "endpoint", kind: "categorical" }, + * ], + * throughput: "high", + * retention: "windowed", + * }, { intent: "trend" }) + * // → [{ component: "RealtimeHeatmap", ... }, { component: "RealtimeWaterfallChart", ... }] + */ +export function suggestStreamCharts( + schema: StreamSchema, + options: SuggestStreamChartsOptions = {}, +): StreamSuggestion[] { + const capabilities = options.capabilities ?? getStreamCapabilities() + const rankingIntents: IntentId[] = options.intent + ? Array.isArray(options.intent) ? options.intent : [options.intent] + : [] + const minScore = options.minScore ?? 0 + const maxResults = options.maxResults ?? 10 + + const allow = options.allow ? new Set(options.allow) : null + const deny = options.deny ? new Set(options.deny) : null + + const out: StreamSuggestion[] = [] + + for (const capability of capabilities) { + if (allow && !allow.has(capability.component)) continue + if (deny && deny.has(capability.component)) continue + + const fitReason = capability.fits(schema) + if (fitReason !== null) continue + + const intentScores: Partial> = {} + for (const [intent, scorer] of Object.entries(capability.intentScores) as Array<[IntentId, StreamIntentScorer]>) { + intentScores[intent] = scoreValue(scorer, schema) + } + + const composite = compositeScore(intentScores, rankingIntents) + if (composite < minScore) continue + + const rubric: ChartRubric = { ...capability.rubric } + const caveats = capability.caveats ? Array.from(capability.caveats(schema)) : [] + const reasons = buildReasons(schema, intentScores, rankingIntents) + const props = capability.buildProps(schema) + + out.push({ + component: capability.component, + family: "realtime", + importPath: capability.importPath, + score: composite, + intentScores, + rubric, + reasons, + caveats, + props, + }) + } + + out.sort((a, b) => { + if (b.score !== a.score) return b.score - a.score + if (b.rubric.accuracy !== a.rubric.accuracy) return b.rubric.accuracy - a.rubric.accuracy + return b.rubric.familiarity - a.rubric.familiarity + }) + + return out.slice(0, maxResults) +} diff --git a/src/components/ai/suggestStretchCharts.test.ts b/src/components/ai/suggestStretchCharts.test.ts new file mode 100644 index 00000000..d53a52ec --- /dev/null +++ b/src/components/ai/suggestStretchCharts.test.ts @@ -0,0 +1,126 @@ +import { describe, it, expect } from "vitest" +import { suggestStretchCharts } from "./suggestStretchCharts" +import { suggestDashboard } from "./suggestDashboard" +import type { AudienceProfile } from "./audienceProfile" + +const satisfactionByCohort = Array.from({ length: 150 }, (_, i) => ({ + respondent: i + 1, + satisfaction: Math.max(1, Math.min(10, 6 + Math.sin(i / 7) * 2 + Math.random() * 3 - 1)), + cohort: ["Beta", "GA", "Enterprise"][i % 3], +})) + +const productSales = [ + { product: "A", units: 30 }, + { product: "B", units: 50 }, + { product: "C", units: 20 }, + { product: "D", units: 45 }, +] + +const executiveAudience: AudienceProfile = { + name: "Exec", + familiarity: { BarChart: 5, LineChart: 5, PieChart: 5, BoxPlot: 2, ViolinPlot: 1, SwarmPlot: 1 }, + targets: { + BoxPlot: { direction: "increase", weight: 2, reason: "growing distribution literacy" }, + }, + exposureLevel: 1, +} + +describe("suggestStretchCharts", () => { + it("returns empty array when no audience is supplied", () => { + const result = suggestStretchCharts(satisfactionByCohort) + expect(result).toEqual([]) + }) + + it("surfaces audience-targeted increase charts as stretches", () => { + const result = suggestStretchCharts(satisfactionByCohort, { + audience: executiveAudience, + intent: "compare-categories", + }) + expect(result.some((s) => s.suggestion.component === "BoxPlot")).toBe(true) + }) + + it("each stretch carries a non-empty rationale", () => { + const result = suggestStretchCharts(satisfactionByCohort, { + audience: executiveAudience, + intent: "compare-categories", + }) + for (const s of result) { + expect(s.rationale.length).toBeGreaterThan(0) + } + }) + + it("uses target reason verbatim when one is provided", () => { + const result = suggestStretchCharts(satisfactionByCohort, { + audience: executiveAudience, + intent: "compare-categories", + }) + const boxStretch = result.find((s) => s.suggestion.component === "BoxPlot") + expect(boxStretch?.rationale).toContain("growing distribution literacy") + }) + + it("respects the familiarity ceiling — never recommends a chart the audience already knows", () => { + const result = suggestStretchCharts(productSales, { + audience: executiveAudience, + intent: "rank", + }) + // BarChart is familiarity 5; should never appear as a stretch + expect(result.some((s) => s.suggestion.component === "BarChart")).toBe(false) + }) + + it("does not return charts that fail the fits gate", () => { + // 4-row product data can't fit ViolinPlot/RidgelinePlot + const result = suggestStretchCharts(productSales, { + audience: executiveAudience, + intent: "rank", + }) + expect(result.some((s) => s.suggestion.component === "RidgelinePlot")).toBe(false) + }) + + it("widens the ceiling at exposureLevel 2", () => { + // bump exposure level — Scatterplot is familiarity 3 (executive default) + const audience: AudienceProfile = { + ...executiveAudience, + familiarity: { ...executiveAudience.familiarity, Scatterplot: 3 }, + exposureLevel: 2, + } + const dataWith2Numerics = Array.from({ length: 30 }, () => ({ + x: Math.random() * 100, + y: Math.random() * 100, + })) + const result = suggestStretchCharts(dataWith2Numerics, { + audience, + intent: "correlation", + }) + expect(result.some((s) => s.suggestion.component === "Scatterplot")).toBe(true) + }) +}) + +describe("suggestDashboard × stretchPanels", () => { + it("includes stretchPanels when audience has exposureLevel >= 1", () => { + const dashboard = suggestDashboard(satisfactionByCohort, { + audience: executiveAudience, + }) + expect(dashboard.stretchPanels.length).toBeGreaterThan(0) + }) + + it("returns no stretchPanels when exposureLevel is 0", () => { + const audience = { ...executiveAudience, exposureLevel: 0 as const } + const dashboard = suggestDashboard(satisfactionByCohort, { audience }) + expect(dashboard.stretchPanels).toEqual([]) + }) + + it("returns no stretchPanels when no audience is supplied", () => { + const dashboard = suggestDashboard(satisfactionByCohort) + expect(dashboard.stretchPanels).toEqual([]) + }) + + it("stretchPanels do not duplicate main panels", () => { + const dashboard = suggestDashboard(satisfactionByCohort, { + audience: executiveAudience, + }) + const panelComponents = new Set(dashboard.panels.map((p) => p.suggestion.component)) + for (const stretch of dashboard.stretchPanels) { + expect(panelComponents.has(stretch.suggestion.component)).toBe(false) + } + }) +}) diff --git a/src/components/ai/suggestStretchCharts.ts b/src/components/ai/suggestStretchCharts.ts new file mode 100644 index 00000000..4391b0da --- /dev/null +++ b/src/components/ai/suggestStretchCharts.ts @@ -0,0 +1,156 @@ +import type { Datum } from "../charts/shared/datumTypes" +import { profileData } from "./profileData" +import { suggestCharts } from "./suggestCharts" +import { getCapabilities } from "./chartCapabilities" +import type { ChartDataProfile, Suggestion } from "./chartCapabilityTypes" +import type { IntentId } from "./intents" +import { effectiveFamiliarity, stretchFamiliarityCeiling, type AudienceProfile } from "./audienceProfile" + +/** + * A "stretch pick" — an unfamiliar-but-fitting chart paired with the + * familiar chart it could substitute for. Pairing makes the literacy + * suggestion concrete: "instead of BarChart, try BoxPlot here, because…" + */ +export interface StretchSuggestion { + /** The unfamiliar chart we're suggesting as growth. */ + suggestion: Suggestion + /** + * The familiar chart this stretch could replace for the same intent. + * Undefined when the stretch is recommended on its own merits (e.g. a + * direct "increase" target with no obvious familiar counterpart). + */ + replacing?: string + /** Human-readable rationale, suitable for verbatim display. */ + rationale: string + /** Audience familiarity for this chart — the number that made it qualify as a stretch. */ + familiarity: number +} + +export interface SuggestStretchChartsOptions { + /** Intent(s) to rank by. When omitted, charts are picked by data fit alone. */ + intent?: IntentId | IntentId[] + /** Required — without an audience profile, the concept of "stretch" doesn't apply. */ + audience?: AudienceProfile + /** Restrict to these component names. */ + allow?: ReadonlyArray + /** Exclude these component names. */ + deny?: ReadonlyArray + /** Max stretch picks to return (default 5). */ + maxResults?: number + /** Pre-built profile. */ + profile?: ChartDataProfile + /** Non-tabular payload — forwarded to profileData. */ + rawInput?: unknown + /** + * Only return stretches within this score distance of the top familiar pick + * (default 1.5). Tighter values keep the suggestions plausible; wider values + * expose more variety. + */ + scoreTolerance?: number +} + +interface PairCandidate { + stretch: Suggestion + familiar?: Suggestion +} + +/** + * Find pairs (familiar, stretch) where the stretch chart fits the data, + * has audience familiarity at or below the stretch ceiling, and either: + * • is an `increase` target for this audience, OR + * • scores within `scoreTolerance` of a familiar alternative for the + * same intent. + * + * Each pair is returned as a StretchSuggestion with `replacing` (the + * familiar chart it could substitute for) and a rationale string. + * + * Heuristic only. Use `audience` with care — without target signals, every + * audience-unfamiliar chart becomes a candidate, which can drown the + * surface in dubious recommendations. + */ +export function suggestStretchCharts( + data: ReadonlyArray | null | undefined, + options: SuggestStretchChartsOptions = {}, +): StretchSuggestion[] { + const audience = options.audience + if (!audience) return [] + + const profile = options.profile ?? profileData(data ?? [], { rawInput: options.rawInput }) + const ceiling = stretchFamiliarityCeiling(audience) + const scoreTolerance = options.scoreTolerance ?? 1.5 + const maxResults = options.maxResults ?? 5 + + // Build a map of effective familiarity per registered component + const capabilities = getCapabilities() + const familiarityByComponent = new Map() + for (const c of capabilities) { + familiarityByComponent.set(c.component, effectiveFamiliarity(c.component, c.rubric.familiarity, audience)) + } + + // Run a familiar-only pass (no audience bias) so we have a baseline ranking + // to compare stretches against — otherwise we'd compare biased scores to + // biased scores and the comparison is degenerate. + const baseline = suggestCharts(data, { + profile, + intent: options.intent, + maxResults: 30, + includeVariants: true, + minScore: 1.0, + allow: options.allow, + deny: options.deny, + }) + + // Top-scoring familiar pick — used as the "you'd already pick this" anchor + // each stretch is paired against. Multi-intent / no-intent cases just take + // the global top; per-intent buckets aren't needed because `suggestCharts` + // has already ranked by the requested intent (or by overall fit). + const familiarPicks = baseline.filter( + (s) => (familiarityByComponent.get(s.component) ?? s.rubric.familiarity) >= 4, + ) + const topFamiliar = familiarPicks[0] + + // Identify stretches: charts that fit, with audience familiarity ≤ ceiling. + const stretchCandidates: PairCandidate[] = [] + for (const candidate of baseline) { + const familiarity = familiarityByComponent.get(candidate.component) ?? candidate.rubric.familiarity + if (familiarity > ceiling) continue + + const isIncreaseTarget = audience.targets?.[candidate.component]?.direction === "increase" + const withinTolerance = topFamiliar + ? topFamiliar.score - candidate.score <= scoreTolerance + : true + + if (!isIncreaseTarget && !withinTolerance) continue + + stretchCandidates.push({ stretch: candidate, familiar: topFamiliar }) + } + + // Dedupe by component+variant + const seen = new Set() + const out: StretchSuggestion[] = [] + for (const { stretch, familiar } of stretchCandidates) { + const key = `${stretch.component}/${stretch.variant?.key ?? "base"}` + if (seen.has(key)) continue + seen.add(key) + + const familiarity = familiarityByComponent.get(stretch.component) ?? stretch.rubric.familiarity + const target = audience.targets?.[stretch.component] + const rationale = + target?.reason ?? + (target?.direction === "increase" + ? `${audience.name ?? "your audience"} is growing adoption of ${stretch.component}` + : familiar + ? `${stretch.component} is on the data, and within reach of ${familiar.component} which you're already familiar with` + : `${stretch.component} fits this data and would expand your team's vocabulary`) + + out.push({ + suggestion: stretch, + replacing: familiar?.component, + rationale, + familiarity, + }) + if (out.length >= maxResults) break + } + + return out +} diff --git a/src/components/ai/useChartSuggestions.ts b/src/components/ai/useChartSuggestions.ts new file mode 100644 index 00000000..dad234d4 --- /dev/null +++ b/src/components/ai/useChartSuggestions.ts @@ -0,0 +1,58 @@ +"use client" +import { useMemo } from "react" +import type { Datum } from "../charts/shared/datumTypes" +import { profileData, type ProfileDataOptions } from "./profileData" +import { suggestCharts, type SuggestChartsOptions } from "./suggestCharts" +import type { ChartDataProfile, Suggestion } from "./chartCapabilityTypes" + +export interface UseChartSuggestionsOptions extends SuggestChartsOptions, ProfileDataOptions {} + +export interface UseChartSuggestionsResult { + suggestions: ReadonlyArray + profile: ChartDataProfile +} + +/** + * Memoized chart suggestion hook. + * + * Heuristic-only: this hook never calls an LLM. Pair with `useChartInterrogation` + * to let an LLM re-rank or narrate the heuristic suggestions. + * + * @example + * const { suggestions } = useChartSuggestions(data, { intent: "trend" }) + * const top = suggestions[0] + * return + */ +export function useChartSuggestions( + data: ReadonlyArray | null | undefined, + options: UseChartSuggestionsOptions = {} +): UseChartSuggestionsResult { + const { + intent, allow, deny, maxResults, includeVariants, minScore, + rawInput, seriesField, capabilities, audience, + profile: providedProfile, + } = options + + const profile = useMemo( + () => providedProfile ?? profileData(data ?? [], { rawInput, seriesField }), + [providedProfile, data, rawInput, seriesField] + ) + + const suggestions = useMemo( + () => + suggestCharts(data, { + intent, + allow, + deny, + maxResults, + includeVariants, + minScore, + capabilities, + audience, + profile, + }), + [data, intent, allow, deny, maxResults, includeVariants, minScore, capabilities, audience, profile] + ) + + return { suggestions, profile } +} diff --git a/src/components/charts/geo/ChoroplethMap.capability.ts b/src/components/charts/geo/ChoroplethMap.capability.ts new file mode 100644 index 00000000..61a52ea9 --- /dev/null +++ b/src/components/charts/geo/ChoroplethMap.capability.ts @@ -0,0 +1,23 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const ChoroplethMapCapability: ChartCapability = { + component: "ChoroplethMap", + family: "geo", + importPath: "semiotic/geo", + rubric: { familiarity: 4, accuracy: 3, precision: 2 }, + + fits: (profile) => { + if (!profile.hasGeo || !profile.geo) return "needs a GeoJSON FeatureCollection via rawInput" + if (profile.geo.features.length < 1) return "needs at least 1 area feature" + return null + }, + + intentScores: { "geo": 5, "compare-categories": 3 }, + + caveats: () => ["large areas dominate visual weight regardless of measurement"], + + buildProps: (profile) => ({ + areas: profile.geo?.features ?? [], + valueAccessor: profile.primary.y ?? "value", + }), +} diff --git a/src/components/charts/geo/DistanceCartogram.capability.ts b/src/components/charts/geo/DistanceCartogram.capability.ts new file mode 100644 index 00000000..f7d8a521 --- /dev/null +++ b/src/components/charts/geo/DistanceCartogram.capability.ts @@ -0,0 +1,23 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const DistanceCartogramCapability: ChartCapability = { + component: "DistanceCartogram", + family: "geo", + importPath: "semiotic/geo", + rubric: { familiarity: 1, accuracy: 3, precision: 3 }, + + fits: (profile) => { + if (!profile.hasGeo || !profile.geo) return "needs a geo dataset" + if (!(profile.geo.points?.length)) return "needs point nodes with lat/lon and a cost field" + return null + }, + + intentScores: { "geo": 3, "rank": 3, "compare-categories": 2 }, + + caveats: () => ["non-standard projection — requires explanation for most readers"], + + buildProps: (profile) => ({ + points: profile.geo?.points ?? [], + costAccessor: "cost", + }), +} diff --git a/src/components/charts/geo/FlowMap.capability.ts b/src/components/charts/geo/FlowMap.capability.ts new file mode 100644 index 00000000..750b8070 --- /dev/null +++ b/src/components/charts/geo/FlowMap.capability.ts @@ -0,0 +1,23 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const FlowMapCapability: ChartCapability = { + component: "FlowMap", + family: "geo", + importPath: "semiotic/geo", + rubric: { familiarity: 2, accuracy: 3, precision: 2 }, + + fits: (profile) => { + if (!profile.hasGeo || !profile.geo) return "needs a geo dataset" + if (!(profile.geo.flows?.length)) return "needs flow records (source/target/value)" + if (!(profile.geo.points?.length)) return "needs point nodes with lat/lon" + return null + }, + + intentScores: { "geo": 4, "flow": 5 }, + + buildProps: (profile) => ({ + flows: profile.geo?.flows ?? [], + nodes: profile.geo?.points ?? [], + valueAccessor: "value", + }), +} diff --git a/src/components/charts/geo/ProportionalSymbolMap.capability.ts b/src/components/charts/geo/ProportionalSymbolMap.capability.ts new file mode 100644 index 00000000..ccc58c69 --- /dev/null +++ b/src/components/charts/geo/ProportionalSymbolMap.capability.ts @@ -0,0 +1,25 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const ProportionalSymbolMapCapability: ChartCapability = { + component: "ProportionalSymbolMap", + family: "geo", + importPath: "semiotic/geo", + rubric: { familiarity: 3, accuracy: 3, precision: 3 }, + + fits: (profile) => { + if (!profile.hasGeo || !profile.geo) return "needs a GeoJSON FeatureCollection (with points or area centroids)" + const havePoints = (profile.geo.points?.length ?? 0) > 0 + if (!havePoints && (profile.geo.features.length ?? 0) === 0) return "needs points or area features" + return null + }, + + intentScores: { "geo": 4, "rank": 3, "compare-categories": 3 }, + + buildProps: (profile) => ({ + points: profile.geo?.points ?? [], + areas: profile.geo?.features ?? undefined, + xAccessor: "lon", + yAccessor: "lat", + sizeBy: profile.primary.size ?? "value", + }), +} diff --git a/src/components/charts/network/ChordDiagram.capability.ts b/src/components/charts/network/ChordDiagram.capability.ts new file mode 100644 index 00000000..63a148b7 --- /dev/null +++ b/src/components/charts/network/ChordDiagram.capability.ts @@ -0,0 +1,27 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const ChordDiagramCapability: ChartCapability = { + component: "ChordDiagram", + family: "flow", + importPath: "semiotic/network", + rubric: { familiarity: 2, accuracy: 3, precision: 2 }, + + fits: (profile) => { + if (!profile.hasNetwork || !profile.network) return "needs a {nodes, edges} network" + if (profile.network.nodes.length < 3) return "needs 3+ nodes" + if (profile.network.edges.length < 3) return "needs 3+ edges" + return null + }, + + intentScores: { + "flow": 4, + }, + + caveats: () => ["chord diagrams trade accuracy for symmetry; use Sankey if direction matters"], + + buildProps: (profile) => ({ + nodes: profile.network?.nodes ?? [], + edges: profile.network?.edges ?? [], + valueAccessor: "value", + }), +} diff --git a/src/components/charts/network/CirclePack.capability.ts b/src/components/charts/network/CirclePack.capability.ts new file mode 100644 index 00000000..33ecd93d --- /dev/null +++ b/src/components/charts/network/CirclePack.capability.ts @@ -0,0 +1,25 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const CirclePackCapability: ChartCapability = { + component: "CirclePack", + family: "hierarchy", + importPath: "semiotic/network", + rubric: { familiarity: 3, accuracy: 3, precision: 2 }, + + fits: (profile) => { + if (!profile.hasHierarchy || !profile.hierarchy) return "needs a hierarchical root with values" + return null + }, + + intentScores: { + "hierarchy": 4, + "part-to-whole": 3, + }, + + caveats: () => ["circle area is harder to compare than rectangle area"], + + buildProps: (profile) => ({ + data: profile.hierarchy ?? { name: "root", children: [] }, + valueAccessor: "value", + }), +} diff --git a/src/components/charts/network/ForceDirectedGraph.capability.ts b/src/components/charts/network/ForceDirectedGraph.capability.ts new file mode 100644 index 00000000..3f2487c6 --- /dev/null +++ b/src/components/charts/network/ForceDirectedGraph.capability.ts @@ -0,0 +1,36 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const ForceDirectedGraphCapability: ChartCapability = { + component: "ForceDirectedGraph", + family: "network", + importPath: "semiotic/network", + rubric: { familiarity: 3, accuracy: 3, precision: 3 }, + + fits: (profile) => { + if (!profile.hasNetwork || !profile.network) return "needs a {nodes, edges} network passed via rawInput" + if (profile.network.nodes.length < 2) return "needs at least 2 nodes" + if (profile.network.edges.length < 1) return "needs at least 1 edge" + return null + }, + + intentScores: { + "flow": 3, + "correlation": 2, + }, + + caveats: (p) => { + const n = p.network?.nodes.length ?? 0 + return n > 500 ? ["large graphs become hairballs — consider filtering or aggregating"] : [] + }, + + buildProps: (profile) => ({ + nodes: profile.network?.nodes ?? [], + edges: profile.network?.edges ?? [], + // Canonical camelCase form. The chart still accepts `nodeIDAccessor` as a + // deprecated alias, but the recommender should emit the supported name so + // generated props don't carry a deprecation footgun forward. + nodeIdAccessor: "id", + sourceAccessor: "source", + targetAccessor: "target", + }), +} diff --git a/src/components/charts/network/OrbitDiagram.capability.ts b/src/components/charts/network/OrbitDiagram.capability.ts new file mode 100644 index 00000000..7e96c3a1 --- /dev/null +++ b/src/components/charts/network/OrbitDiagram.capability.ts @@ -0,0 +1,22 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const OrbitDiagramCapability: ChartCapability = { + component: "OrbitDiagram", + family: "hierarchy", + importPath: "semiotic/network", + rubric: { familiarity: 1, accuracy: 2, precision: 2 }, + + fits: (profile) => { + if (!profile.hasHierarchy || !profile.hierarchy) return "needs a hierarchical root" + return null + }, + + intentScores: { "hierarchy": 3 }, + + caveats: () => ["decorative — readers without context will not infer hierarchy easily"], + + buildProps: (profile) => ({ + data: profile.hierarchy ?? { name: "root", children: [] }, + orbitMode: "solar", + }), +} diff --git a/src/components/charts/network/ProcessSankey.capability.ts b/src/components/charts/network/ProcessSankey.capability.ts new file mode 100644 index 00000000..af571e96 --- /dev/null +++ b/src/components/charts/network/ProcessSankey.capability.ts @@ -0,0 +1,51 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const ProcessSankeyCapability: ChartCapability = { + component: "ProcessSankey", + family: "flow", + importPath: "semiotic/network", + rubric: { familiarity: 2, accuracy: 4, precision: 3 }, + + fits: (profile) => { + if (!profile.hasNetwork || !profile.network) return "needs a {nodes, edges} network" + // Edges need BOTH startTime and endTime (or start/end) — a process sankey + // lays each edge along a time axis that runs from one to the other. + const first = profile.network.edges[0] + if (!first) return "needs at least one edge with start/end times" + const hasStart = first.startTime !== undefined || first.start !== undefined + const hasEnd = first.endTime !== undefined || first.end !== undefined + if (!hasStart || !hasEnd) { + return "edges need both startTime and endTime (or start/end) for a temporal sankey" + } + return null + }, + + intentScores: { + "flow": 5, + "composition-over-time": 4, + "change-detection": 3, + }, + + buildProps: (profile) => { + const props: Record = { + nodes: profile.network?.nodes ?? [], + edges: profile.network?.edges ?? [], + pairing: "temporal", + laneOrder: "crossing-min", + } + // ProcessSankey defaults to `startTime` / `endTime` field names. If the + // input data uses `start` / `end` instead (the alternative form fits() + // accepts), emit the matching accessor props so the suggestion is + // runnable without further patching. + const first = profile.network?.edges[0] + if (first) { + if (first.startTime === undefined && first.start !== undefined) { + props.startTimeAccessor = "start" + } + if (first.endTime === undefined && first.end !== undefined) { + props.endTimeAccessor = "end" + } + } + return props + }, +} diff --git a/src/components/charts/network/SankeyDiagram.capability.ts b/src/components/charts/network/SankeyDiagram.capability.ts new file mode 100644 index 00000000..2048936c --- /dev/null +++ b/src/components/charts/network/SankeyDiagram.capability.ts @@ -0,0 +1,28 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const SankeyDiagramCapability: ChartCapability = { + component: "SankeyDiagram", + family: "flow", + importPath: "semiotic/network", + rubric: { familiarity: 3, accuracy: 4, precision: 3 }, + + fits: (profile) => { + if (!profile.hasNetwork || !profile.network) return "needs a {nodes, edges} network with edge weights" + if (profile.network.edges.length < 2) return "needs 2+ weighted edges" + return null + }, + + intentScores: { + "flow": 5, + "part-to-whole": 3, + }, + + buildProps: (profile) => ({ + nodes: profile.network?.nodes ?? [], + edges: profile.network?.edges ?? [], + sourceAccessor: "source", + targetAccessor: "target", + valueAccessor: "value", + nodeIdAccessor: "id", + }), +} diff --git a/src/components/charts/network/TreeDiagram.capability.ts b/src/components/charts/network/TreeDiagram.capability.ts new file mode 100644 index 00000000..370ea7d0 --- /dev/null +++ b/src/components/charts/network/TreeDiagram.capability.ts @@ -0,0 +1,25 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const TreeDiagramCapability: ChartCapability = { + component: "TreeDiagram", + family: "hierarchy", + importPath: "semiotic/network", + rubric: { familiarity: 4, accuracy: 4, precision: 3 }, + + fits: (profile) => { + if (!profile.hasHierarchy || !profile.hierarchy) return "needs a hierarchical root (object with children) via rawInput" + return null + }, + + intentScores: { "hierarchy": 5 }, + + variants: [ + { key: "vertical-tree", label: "Vertical tree", props: { layout: "tree", orientation: "vertical" }, tags: ["vertical"] }, + { key: "horizontal-cluster", label: "Horizontal cluster", props: { layout: "cluster", orientation: "horizontal" }, tags: ["horizontal"] }, + ], + + buildProps: (profile, variant) => ({ + data: profile.hierarchy ?? { name: "root", children: [] }, + ...(variant?.props ?? {}), + }), +} diff --git a/src/components/charts/network/Treemap.capability.ts b/src/components/charts/network/Treemap.capability.ts new file mode 100644 index 00000000..94ec0b93 --- /dev/null +++ b/src/components/charts/network/Treemap.capability.ts @@ -0,0 +1,26 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const TreemapCapability: ChartCapability = { + component: "Treemap", + family: "hierarchy", + importPath: "semiotic/network", + rubric: { familiarity: 4, accuracy: 3, precision: 3 }, + + fits: (profile) => { + if (!profile.hasHierarchy || !profile.hierarchy) return "needs a hierarchical root with values" + return null + }, + + intentScores: { + "hierarchy": 4, + "part-to-whole": 4, + "compare-categories": 3, + }, + + caveats: () => ["rectangle area comparisons are less precise than length — prefer a bar chart for ranking"], + + buildProps: (profile) => ({ + data: profile.hierarchy ?? { name: "root", children: [] }, + valueAccessor: "value", + }), +} diff --git a/src/components/charts/ordinal/BarChart.capability.ts b/src/components/charts/ordinal/BarChart.capability.ts new file mode 100644 index 00000000..38d4fd86 --- /dev/null +++ b/src/components/charts/ordinal/BarChart.capability.ts @@ -0,0 +1,63 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const BarChartCapability: ChartCapability = { + component: "BarChart", + family: "categorical", + importPath: "semiotic/ordinal", + rubric: { familiarity: 5, accuracy: 5, precision: 4 }, + + fits: (profile) => { + if (!profile.primary.category) return "needs a category field" + if (!profile.primary.y) return "needs a numeric value field" + if ((profile.categoryCount ?? 0) < 1) return "needs at least 1 category" + if ((profile.categoryCount ?? 0) > 50) return "too many categories — consider aggregating or use a different chart" + return null + }, + + intentScores: { + // BarChart compares pre-aggregated category totals. When each category has + // many raw observations, a BoxPlot / ViolinPlot / SwarmPlot is more honest — + // BarChart's implicit aggregation hides the within-category distribution. + "compare-categories": (p) => { + if (!p.categoryCount) return 0 + const obsPerCategory = p.rowCount / p.categoryCount + if (obsPerCategory >= 10) return 3 // distribution-shaped — yield to distribution charts + return 5 + }, + "rank": 5, + "part-to-whole": (p) => ((p.categoryCount ?? 0) <= 8 ? 3 : 2), + "distribution": 1, + }, + + variants: [ + { + key: "sorted-desc", + label: "Ranked", + props: { sort: "desc" }, + tags: ["sorted", "ranked"], + intentDeltas: { "rank": +0, "compare-categories": +0 }, + }, + { + key: "source-order", + label: "Source order", + props: { sort: false }, + tags: ["source-order"], + intentDeltas: { "rank": -2 }, + }, + { + key: "horizontal", + label: "Horizontal bars", + props: { orientation: "horizontal", sort: "desc" }, + tags: ["horizontal", "ranked"], + intentDeltas: { "rank": +1 }, + rubricDeltas: { precision: +1 }, + }, + ], + + buildProps: (profile, variant) => ({ + data: profile.data, + categoryAccessor: profile.primary.category, + valueAccessor: profile.primary.y, + ...(variant?.props ?? {}), + }), +} diff --git a/src/components/charts/ordinal/BoxPlot.capability.ts b/src/components/charts/ordinal/BoxPlot.capability.ts new file mode 100644 index 00000000..8a20a692 --- /dev/null +++ b/src/components/charts/ordinal/BoxPlot.capability.ts @@ -0,0 +1,30 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const BoxPlotCapability: ChartCapability = { + component: "BoxPlot", + family: "distribution", + importPath: "semiotic/ordinal", + rubric: { familiarity: 4, accuracy: 4, precision: 3 }, + + fits: (profile) => { + if (!profile.primary.y) return "needs a numeric field" + if (!profile.primary.category) return "needs a category to split distributions" + // We need repeated rows per category — otherwise there's no distribution per box. + if (profile.rowCount / Math.max(profile.categoryCount ?? 1, 1) < 3) { + return "needs 3+ observations per category" + } + return null + }, + + intentScores: { + "distribution": 5, + "compare-categories": 4, + "outlier-detection": 4, + }, + + buildProps: (profile) => ({ + data: profile.data, + categoryAccessor: profile.primary.category, + valueAccessor: profile.primary.y, + }), +} diff --git a/src/components/charts/ordinal/DonutChart.capability.ts b/src/components/charts/ordinal/DonutChart.capability.ts new file mode 100644 index 00000000..90d3ddd0 --- /dev/null +++ b/src/components/charts/ordinal/DonutChart.capability.ts @@ -0,0 +1,29 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const DonutChartCapability: ChartCapability = { + component: "DonutChart", + family: "categorical", + importPath: "semiotic/ordinal", + rubric: { familiarity: 4, accuracy: 3, precision: 2 }, + + fits: (profile) => { + if (!profile.primary.category) return "needs a category field" + if (!profile.primary.y) return "needs a numeric value field" + const count = profile.categoryCount ?? 0 + if (count < 2) return "needs 2+ categories" + if (count > 8) return `${count} slices is too many for a donut` + return null + }, + + intentScores: { + "part-to-whole": 4, + "compare-categories": 2 + }, + + buildProps: (profile) => ({ + data: profile.data, + categoryAccessor: profile.primary.category, + valueAccessor: profile.primary.y, + innerRadius: 40 + }) +} diff --git a/src/components/charts/ordinal/DotPlot.capability.ts b/src/components/charts/ordinal/DotPlot.capability.ts new file mode 100644 index 00000000..2629db7f --- /dev/null +++ b/src/components/charts/ordinal/DotPlot.capability.ts @@ -0,0 +1,34 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const DotPlotCapability: ChartCapability = { + component: "DotPlot", + family: "categorical", + importPath: "semiotic/ordinal", + rubric: { familiarity: 3, accuracy: 5, precision: 5 }, + + fits: (profile) => { + if (!profile.primary.category) return "needs a category field" + if (!profile.primary.y) return "needs a numeric value field" + if ((profile.categoryCount ?? 0) > 30) return "too many categories for a dot plot" + return null + }, + + intentScores: { + // Like BarChart, DotPlot implicitly aggregates — yield to distribution + // charts when each category has many observations. + "compare-categories": (p) => { + if (!p.categoryCount) return 0 + const obsPerCategory = p.rowCount / p.categoryCount + if (obsPerCategory >= 10) return 3 + return 5 + }, + "rank": 5, + "outlier-detection": 3, + }, + + buildProps: (profile) => ({ + data: profile.data, + categoryAccessor: profile.primary.category, + valueAccessor: profile.primary.y, + }), +} diff --git a/src/components/charts/ordinal/FunnelChart.capability.ts b/src/components/charts/ordinal/FunnelChart.capability.ts new file mode 100644 index 00000000..259ef6a0 --- /dev/null +++ b/src/components/charts/ordinal/FunnelChart.capability.ts @@ -0,0 +1,35 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +const STAGE_HINT = /(stage|step|funnel|status|outcome|phase)/i + +export const FunnelChartCapability: ChartCapability = { + component: "FunnelChart", + family: "flow", + importPath: "semiotic/ordinal", + rubric: { familiarity: 4, accuracy: 3, precision: 3 }, + + fits: (profile) => { + if (!profile.primary.y) return "needs a numeric value field" + const stepField = Object.keys(profile.fields).find((f) => STAGE_HINT.test(f)) + if (!stepField) return "needs a stage/step/funnel-named field" + return null + }, + + intentScores: { + "flow": 4, + "rank": 3, + "part-to-whole": 2, + }, + + caveats: () => ["readers infer conversion drop-off — make sure rows actually represent sequential stages"], + + buildProps: (profile) => { + const stepField = Object.keys(profile.fields).find((f) => STAGE_HINT.test(f)) + return { + data: profile.data, + stepAccessor: stepField, + valueAccessor: profile.primary.y, + ...(profile.primary.category && profile.primary.category !== stepField ? { categoryAccessor: profile.primary.category } : {}), + } + }, +} diff --git a/src/components/charts/ordinal/GaugeChart.capability.ts b/src/components/charts/ordinal/GaugeChart.capability.ts new file mode 100644 index 00000000..35437b25 --- /dev/null +++ b/src/components/charts/ordinal/GaugeChart.capability.ts @@ -0,0 +1,34 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const GaugeChartCapability: ChartCapability = { + component: "GaugeChart", + family: "categorical", + importPath: "semiotic/ordinal", + rubric: { familiarity: 4, accuracy: 2, precision: 2 }, + + fits: (profile) => { + if (profile.rowCount > 1) return "GaugeChart shows a single value — provide a 1-row dataset or use BarChart" + if (!profile.primary.y) return "needs a numeric value" + return null + }, + + intentScores: { + "compare-categories": 1, + "rank": 1, + }, + + caveats: () => ["gauges only show a single value; consider a stat card or bar instead for comparison"], + + buildProps: (profile) => { + const yField = profile.primary.y! + const firstRow = profile.data[0] + const value = firstRow ? Number(firstRow[yField]) : 0 + const summary = profile.fields[yField] + const max = summary?.type === "numeric" ? summary.max : 100 + return { + value: Number.isFinite(value) ? value : 0, + min: 0, + max, + } + }, +} diff --git a/src/components/charts/ordinal/GroupedBarChart.capability.ts b/src/components/charts/ordinal/GroupedBarChart.capability.ts new file mode 100644 index 00000000..037917b7 --- /dev/null +++ b/src/components/charts/ordinal/GroupedBarChart.capability.ts @@ -0,0 +1,32 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const GroupedBarChartCapability: ChartCapability = { + component: "GroupedBarChart", + family: "categorical", + importPath: "semiotic/ordinal", + rubric: { familiarity: 4, accuracy: 5, precision: 4 }, + + fits: (profile) => { + if (!profile.primary.category) return "needs a category field" + if (!profile.primary.y) return "needs a numeric value field" + if (!profile.primary.series) return "needs a series field to group by" + if ((profile.seriesCount ?? 0) < 2) return "needs 2+ groups" + if ((profile.seriesCount ?? 0) > 6) return `${profile.seriesCount} groups is too many for grouped bars` + if ((profile.categoryCount ?? 0) > 25) return "too many categories for grouped bars" + return null + }, + + intentScores: { + "compare-categories": 5, + "compare-series": 4, + "rank": 3, + }, + + buildProps: (profile) => ({ + data: profile.data, + categoryAccessor: profile.primary.category, + valueAccessor: profile.primary.y, + groupBy: profile.primary.series, + colorBy: profile.primary.series, + }), +} diff --git a/src/components/charts/ordinal/Histogram.capability.ts b/src/components/charts/ordinal/Histogram.capability.ts new file mode 100644 index 00000000..2a3e3051 --- /dev/null +++ b/src/components/charts/ordinal/Histogram.capability.ts @@ -0,0 +1,48 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const HistogramCapability: ChartCapability = { + component: "Histogram", + family: "distribution", + importPath: "semiotic/ordinal", + rubric: { familiarity: 4, accuracy: 4, precision: 3 }, + + fits: (profile) => { + if (profile.rowCount < 10) return "histograms need at least ~10 observations" + if (!profile.primary.y) return "needs a numeric field to bin" + // Distinct values must be > a handful — otherwise a bar chart of counts is better + const yField = profile.primary.y + const yCandidate = profile.candidates.y.find((c) => c.field === yField) + if (yCandidate?.distinctCount !== undefined && yCandidate.distinctCount < 6) { + return "too few distinct numeric values; a bar chart of counts is a better fit" + } + return null + }, + + intentScores: { + "distribution": 5, + "outlier-detection": 3, + "compare-categories": 1, + }, + + variants: [ + { + key: "count-bins", + label: "Count bins", + props: { bins: 10, relative: false }, + tags: ["count"], + }, + { + key: "share-bins", + label: "Share bins (relative)", + props: { bins: 10, relative: true }, + tags: ["share"], + intentDeltas: { "distribution": +0 }, + }, + ], + + buildProps: (profile, variant) => ({ + data: profile.data, + valueAccessor: profile.primary.y, + ...(variant?.props ?? {}), + }), +} diff --git a/src/components/charts/ordinal/Histogram.test.tsx b/src/components/charts/ordinal/Histogram.test.tsx index 8c577f87..9c55a948 100644 --- a/src/components/charts/ordinal/Histogram.test.tsx +++ b/src/components/charts/ordinal/Histogram.test.tsx @@ -44,6 +44,23 @@ describe("Histogram", () => { expect(lastOrdinalFrameProps.data).toBe(sampleData) }) + it("renders raw-observation data with no category field (single bucket)", () => { + // Regression: prior default categoryAccessor="category" failed validation + // on rows like { value: 12 } because "category" wasn't in the data. + // The default now synthesizes an "All" bucket for these cases so + // suggestCharts can route raw-observation data to Histogram cleanly. + const observations = Array.from({ length: 30 }, (_, i) => ({ value: i * 2 + Math.random() * 5 })) + const { container } = render( + + + + ) + const frame = container.querySelector(".stream-ordinal-frame") + expect(frame).toBeTruthy() + // No ChartError rendered — the validator path passed. + expect(container.querySelector(".semiotic-chart-error")).toBeNull() + }) + it("handles empty data gracefully (no frame rendered)", () => { const { container } = render( diff --git a/src/components/charts/ordinal/Histogram.tsx b/src/components/charts/ordinal/Histogram.tsx index 7d548a18..0b7b7028 100644 --- a/src/components/charts/ordinal/Histogram.tsx +++ b/src/components/charts/ordinal/Histogram.tsx @@ -19,6 +19,22 @@ import { useChartSetup } from "../shared/useChartSetup" import { useFrameImperativeHandle } from "../shared/useFrameImperativeHandle" import { useOrdinalBrush } from "../shared/useOrdinalBrush" +/** + * Default categoryAccessor — hoisted to module scope so it stays + * referentially stable across renders. A new function on every render + * would invalidate any downstream memo keyed on accessor identity (frame + * binning, layout, validation), and would silently re-bin the data even + * when nothing about the accessor's behavior actually changed. + * + * Reads `d.category` when present, falls back to a single "All" bucket + * for raw-observation data like `[{ value: 12 }, { value: 18 }]`. + * Coerces non-string values so the `string`-return contract always holds. + */ +const defaultCategoryAccessor = ((d: Datum) => { + const c = d?.category + return c == null ? "All" : String(c) +}) as ChartAccessor + /** * Histogram component props */ @@ -37,8 +53,10 @@ export interface HistogramProps extends BaseChartP data?: TDatum[] /** * Field name or function returning the bin label (used when data is - * already binned). Ignored when binning raw values. - * @default "category" + * already binned). For raw-observation data with no category dimension, + * the default treats all rows as a single "All" bucket — no need to set + * this explicitly. + * @default (d) => d.category ?? "All" */ categoryAccessor?: ChartAccessor /** @@ -169,7 +187,12 @@ export const Histogram = forwardRef(function Histogram, + valueAccessor = "value", bins = 25, relative = false, valueFormat, colorBy, colorScheme, categoryPadding = 20, diff --git a/src/components/charts/ordinal/LikertChart.capability.ts b/src/components/charts/ordinal/LikertChart.capability.ts new file mode 100644 index 00000000..df5aeb77 --- /dev/null +++ b/src/components/charts/ordinal/LikertChart.capability.ts @@ -0,0 +1,34 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +const RATING_HINT = /(rating|score|likert|satisfaction|nps|agree|sentiment|level)/i + +export const LikertChartCapability: ChartCapability = { + component: "LikertChart", + family: "categorical", + importPath: "semiotic/ordinal", + rubric: { familiarity: 3, accuracy: 4, precision: 3 }, + + fits: (profile) => { + if (!profile.primary.category) return "needs a category (question) field" + if (!profile.primary.y) return "needs a numeric rating/count field" + const ratingField = Object.keys(profile.fields).find((f) => RATING_HINT.test(f)) + if (!ratingField) return "needs an ordinal rating/level field (rating, score, level...)" + return null + }, + + intentScores: { + "compare-categories": 4, + "distribution": 3, + "part-to-whole": 3, + }, + + buildProps: (profile) => { + const ratingField = Object.keys(profile.fields).find((f) => RATING_HINT.test(f))! + return { + data: profile.data, + categoryAccessor: profile.primary.category, + valueAccessor: profile.primary.y, + levelAccessor: ratingField, + } + }, +} diff --git a/src/components/charts/ordinal/PieChart.capability.ts b/src/components/charts/ordinal/PieChart.capability.ts new file mode 100644 index 00000000..194dfdd6 --- /dev/null +++ b/src/components/charts/ordinal/PieChart.capability.ts @@ -0,0 +1,50 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const PieChartCapability: ChartCapability = { + component: "PieChart", + family: "categorical", + importPath: "semiotic/ordinal", + rubric: { familiarity: 5, accuracy: 3, precision: 2 }, + + fits: (profile) => { + if (!profile.primary.category) return "needs a category field" + if (!profile.primary.y) return "needs a numeric value field" + const count = profile.categoryCount ?? 0 + if (count < 2) return "needs 2+ categories" + if (count > 8) return `${count} slices is too many for a pie chart` + return null + }, + + intentScores: { + "part-to-whole": 4, + "compare-categories": 2, + "rank": 1, + }, + + caveats: () => [ + "angle comparisons are less accurate than length — prefer a bar chart unless part-to-whole is the explicit message", + ], + + variants: [ + { + key: "pie", + label: "Pie", + props: {}, + tags: ["pie"], + }, + { + key: "donut", + label: "Donut", + description: "Hollow center — easier to fit a label or KPI inside.", + props: { innerRadius: 60 }, + tags: ["donut"], + }, + ], + + buildProps: (profile, variant) => ({ + data: profile.data, + categoryAccessor: profile.primary.category, + valueAccessor: profile.primary.y, + ...(variant?.props ?? {}), + }), +} diff --git a/src/components/charts/ordinal/RidgelinePlot.capability.ts b/src/components/charts/ordinal/RidgelinePlot.capability.ts new file mode 100644 index 00000000..b683e547 --- /dev/null +++ b/src/components/charts/ordinal/RidgelinePlot.capability.ts @@ -0,0 +1,30 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const RidgelinePlotCapability: ChartCapability = { + component: "RidgelinePlot", + family: "distribution", + importPath: "semiotic/ordinal", + rubric: { familiarity: 2, accuracy: 3, precision: 3 }, + + fits: (profile) => { + if (!profile.primary.y) return "needs a numeric field" + if (!profile.primary.category) return "needs a category dimension to stack distributions" + if ((profile.categoryCount ?? 0) < 3) return "needs 3+ categories to make a ridgeline meaningful" + if (profile.rowCount / Math.max(profile.categoryCount ?? 1, 1) < 6) return "needs 6+ observations per category" + return null + }, + + intentScores: { + "distribution": 4, + "compare-categories": 3, + "composition-over-time": 2, + }, + + caveats: () => ["readers can confuse overlapping ridges — limit categories or use small multiples"], + + buildProps: (profile) => ({ + data: profile.data, + categoryAccessor: profile.primary.category, + valueAccessor: profile.primary.y, + }), +} diff --git a/src/components/charts/ordinal/StackedBarChart.capability.ts b/src/components/charts/ordinal/StackedBarChart.capability.ts new file mode 100644 index 00000000..a2bb51cb --- /dev/null +++ b/src/components/charts/ordinal/StackedBarChart.capability.ts @@ -0,0 +1,53 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const StackedBarChartCapability: ChartCapability = { + component: "StackedBarChart", + family: "categorical", + importPath: "semiotic/ordinal", + rubric: { familiarity: 4, accuracy: 4, precision: 3 }, + + fits: (profile) => { + if (!profile.primary.category) return "needs a category field" + if (!profile.primary.y) return "needs a numeric value field" + if (!profile.primary.series) return "needs a series field to stack by" + if ((profile.seriesCount ?? 0) < 2) return "needs 2+ stack groups" + if ((profile.seriesCount ?? 0) > 8) return `${profile.seriesCount} stacked groups is too many` + return null + }, + + intentScores: { + "part-to-whole": 4, + "compare-categories": 4, + "composition-over-time": (p) => (p.hasTimeAxis ? 3 : 1), + "compare-series": 2, + }, + + caveats: () => ["only the bottom segment shares a baseline; others are harder to compare across categories"], + + variants: [ + { + key: "absolute", + label: "Absolute stacks", + props: { normalize: false }, + tags: ["absolute"], + }, + { + key: "normalized", + label: "100% stacked", + description: "Each bar normalized to 1 — emphasizes composition, hides totals.", + props: { normalize: true }, + tags: ["normalized", "part-to-whole"], + intentDeltas: { "part-to-whole": +1, "compare-categories": -1 }, + caveats: ["absolute magnitudes are no longer comparable across bars"], + }, + ], + + buildProps: (profile, variant) => ({ + data: profile.data, + categoryAccessor: profile.primary.category, + valueAccessor: profile.primary.y, + stackBy: profile.primary.series, + colorBy: profile.primary.series, + ...(variant?.props ?? {}), + }), +} diff --git a/src/components/charts/ordinal/SwarmPlot.capability.ts b/src/components/charts/ordinal/SwarmPlot.capability.ts new file mode 100644 index 00000000..fc8d0ee1 --- /dev/null +++ b/src/components/charts/ordinal/SwarmPlot.capability.ts @@ -0,0 +1,29 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const SwarmPlotCapability: ChartCapability = { + component: "SwarmPlot", + family: "distribution", + importPath: "semiotic/ordinal", + rubric: { familiarity: 3, accuracy: 4, precision: 4 }, + + fits: (profile) => { + if (!profile.primary.y) return "needs a numeric field" + if (!profile.primary.category) return "needs a category" + if (profile.rowCount / Math.max(profile.categoryCount ?? 1, 1) < 4) return "needs 4+ observations per category" + if (profile.rowCount > 2000) return "too many points for a swarm — consider a violin or box" + return null + }, + + intentScores: { + "distribution": 4, + "outlier-detection": 5, + "compare-categories": 3, + }, + + buildProps: (profile) => ({ + data: profile.data, + categoryAccessor: profile.primary.category, + valueAccessor: profile.primary.y, + ...(profile.primary.series && profile.primary.series !== profile.primary.category ? { colorBy: profile.primary.series } : {}), + }), +} diff --git a/src/components/charts/ordinal/SwimlaneChart.capability.ts b/src/components/charts/ordinal/SwimlaneChart.capability.ts new file mode 100644 index 00000000..37ec89e4 --- /dev/null +++ b/src/components/charts/ordinal/SwimlaneChart.capability.ts @@ -0,0 +1,30 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const SwimlaneChartCapability: ChartCapability = { + component: "SwimlaneChart", + family: "categorical", + importPath: "semiotic/ordinal", + rubric: { familiarity: 3, accuracy: 4, precision: 4 }, + + fits: (profile) => { + if (!profile.primary.category) return "needs a category field" + if (!profile.primary.series) return "needs a sub-category (lane) field" + if (!profile.primary.y) return "needs a numeric value field" + if ((profile.categoryCount ?? 0) < 2) return "needs 2+ categories" + return null + }, + + intentScores: { + "compare-categories": 4, + "composition-over-time": (p) => (p.hasTimeAxis ? 3 : 1), + "compare-series": 3, + }, + + buildProps: (profile) => ({ + data: profile.data, + categoryAccessor: profile.primary.category, + subcategoryAccessor: profile.primary.series, + valueAccessor: profile.primary.y, + colorBy: profile.primary.series, + }), +} diff --git a/src/components/charts/ordinal/ViolinPlot.capability.ts b/src/components/charts/ordinal/ViolinPlot.capability.ts new file mode 100644 index 00000000..6e23c8e0 --- /dev/null +++ b/src/components/charts/ordinal/ViolinPlot.capability.ts @@ -0,0 +1,39 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const ViolinPlotCapability: ChartCapability = { + component: "ViolinPlot", + family: "distribution", + importPath: "semiotic/ordinal", + rubric: { familiarity: 3, accuracy: 4, precision: 3 }, + + fits: (profile) => { + if (!profile.primary.y) return "needs a numeric field" + if (!profile.primary.category) return "needs a category to split distributions" + if (profile.rowCount / Math.max(profile.categoryCount ?? 1, 1) < 6) return "needs 6+ observations per category" + return null + }, + + intentScores: { + "distribution": 5, + "compare-categories": 4, + }, + + variants: [ + { key: "density", label: "Density only", props: { showIQR: false }, tags: ["density"] }, + { + key: "density-iqr", + label: "Density with IQR", + props: { showIQR: true }, + tags: ["density", "iqr"], + intentDeltas: { "distribution": +0 }, + rubricDeltas: { precision: +1 }, + }, + ], + + buildProps: (profile, variant) => ({ + data: profile.data, + categoryAccessor: profile.primary.category, + valueAccessor: profile.primary.y, + ...(variant?.props ?? {}), + }), +} diff --git a/src/components/charts/realtime/RealtimeHeatmap.capability.ts b/src/components/charts/realtime/RealtimeHeatmap.capability.ts new file mode 100644 index 00000000..58f9ecc8 --- /dev/null +++ b/src/components/charts/realtime/RealtimeHeatmap.capability.ts @@ -0,0 +1,42 @@ +import type { StreamChartCapability } from "../../ai/streamingTypes" + +export const RealtimeHeatmapCapability: StreamChartCapability = { + component: "RealtimeHeatmap", + importPath: "semiotic/realtime", + rubric: { familiarity: 2, accuracy: 3, precision: 2 }, + + fits: (schema) => { + if (!schema.fields.some((f) => f.kind === "date" || f.role === "x")) { + return "needs a time field for the x axis" + } + if (!schema.fields.some((f) => f.kind === "numeric" || f.role === "value")) { + return "needs a numeric value field" + } + // Heatmaps shine at higher throughputs where line charts get cluttered + return null + }, + + intentScores: { + // Particularly strong for high-throughput streams where lines would saturate + "trend": (schema) => (schema.throughput === "high" ? 4 : 2), + "distribution": 3, + "change-detection": 3, + "compare-series": (schema) => { + const seriesField = schema.fields.find((f) => f.role === "series" || (f.kind === "categorical" && f.role !== "category")) + return seriesField ? 4 : 1 + }, + }, + + buildProps: (schema) => { + const timeField = schema.fields.find((f) => f.role === "x" || f.kind === "date")?.name + const valueField = schema.fields.find((f) => f.role === "y" || f.role === "value" || f.kind === "numeric")?.name + const categoryField = schema.fields.find( + (f) => f.role === "category" || (f.kind === "categorical" && f.role !== "series"), + )?.name + return { + timeAccessor: timeField, + valueAccessor: valueField, + ...(categoryField ? { categoryAccessor: categoryField } : {}), + } + }, +} diff --git a/src/components/charts/realtime/RealtimeHistogram.capability.ts b/src/components/charts/realtime/RealtimeHistogram.capability.ts new file mode 100644 index 00000000..695166a8 --- /dev/null +++ b/src/components/charts/realtime/RealtimeHistogram.capability.ts @@ -0,0 +1,32 @@ +import type { StreamChartCapability } from "../../ai/streamingTypes" + +export const RealtimeHistogramCapability: StreamChartCapability = { + component: "RealtimeHistogram", + importPath: "semiotic/realtime", + rubric: { familiarity: 3, accuracy: 4, precision: 3 }, + + fits: (schema) => { + if (!schema.fields.some((f) => f.kind === "date" || f.role === "x")) { + return "needs a time field" + } + if (!schema.fields.some((f) => f.kind === "numeric" || f.role === "value")) { + return "needs a numeric field to bin" + } + return null + }, + + intentScores: { + "distribution": 5, + "outlier-detection": 4, + "change-detection": 2, + }, + + buildProps: (schema) => { + const timeField = schema.fields.find((f) => f.role === "x" || f.kind === "date")?.name + const valueField = schema.fields.find((f) => f.role === "value" || f.kind === "numeric")?.name + return { + timeAccessor: timeField, + valueAccessor: valueField, + } + }, +} diff --git a/src/components/charts/realtime/RealtimeLineChart.capability.ts b/src/components/charts/realtime/RealtimeLineChart.capability.ts new file mode 100644 index 00000000..45422351 --- /dev/null +++ b/src/components/charts/realtime/RealtimeLineChart.capability.ts @@ -0,0 +1,45 @@ +import type { StreamChartCapability } from "../../ai/streamingTypes" + +export const RealtimeLineChartCapability: StreamChartCapability = { + component: "RealtimeLineChart", + importPath: "semiotic/realtime", + rubric: { familiarity: 4, accuracy: 4, precision: 3 }, + + fits: (schema) => { + if (!schema.fields.some((f) => f.kind === "date" || f.role === "x")) { + return "needs a date/time field for the x axis" + } + if (!schema.fields.some((f) => f.kind === "numeric" || f.role === "y" || f.role === "value")) { + return "needs a numeric value field" + } + if (schema.throughput === "high") { + return "for high-throughput streams, prefer RealtimeHeatmap or RealtimeWaterfallChart" + } + return null + }, + + intentScores: { + "trend": 5, + "change-detection": 4, + // RealtimeLineChart doesn't split into multiple series — one + // (time, value) line per chart instance — so compare-series is a poor fit. + "outlier-detection": 2, + }, + + caveats: (schema) => { + const out: string[] = [] + if (schema.retention === "cumulative") { + out.push("cumulative retention will eventually exhaust the buffer — set a windowSize or downsample") + } + return out + }, + + buildProps: (schema) => { + const timeField = schema.fields.find((f) => f.role === "x" || f.kind === "date")?.name + const valueField = schema.fields.find((f) => f.role === "y" || f.role === "value" || f.kind === "numeric")?.name + return { + timeAccessor: timeField, + valueAccessor: valueField, + } + }, +} diff --git a/src/components/charts/realtime/RealtimeSwarmChart.capability.ts b/src/components/charts/realtime/RealtimeSwarmChart.capability.ts new file mode 100644 index 00000000..ca283da8 --- /dev/null +++ b/src/components/charts/realtime/RealtimeSwarmChart.capability.ts @@ -0,0 +1,39 @@ +import type { StreamChartCapability } from "../../ai/streamingTypes" + +export const RealtimeSwarmChartCapability: StreamChartCapability = { + component: "RealtimeSwarmChart", + importPath: "semiotic/realtime", + rubric: { familiarity: 2, accuracy: 4, precision: 4 }, + + fits: (schema) => { + if (!schema.fields.some((f) => f.kind === "date" || f.role === "x")) { + return "needs a time field (points are placed at (time, value))" + } + if (!schema.fields.some((f) => f.kind === "numeric" || f.role === "value")) { + return "needs a numeric field" + } + if (!schema.fields.some((f) => f.kind === "categorical" || f.role === "category")) { + return "needs a category to swarm by" + } + return null + }, + + intentScores: { + "outlier-detection": 5, + "distribution": 4, + "compare-categories": 3, + }, + + caveats: (schema) => (schema.throughput === "high" ? ["high-throughput swarms get crowded — consider RealtimeHistogram"] : []), + + buildProps: (schema) => { + const timeField = schema.fields.find((f) => f.role === "x" || f.kind === "date")?.name + const valueField = schema.fields.find((f) => f.role === "value" || f.kind === "numeric")?.name + const categoryField = schema.fields.find((f) => f.role === "category" || f.kind === "categorical")?.name + return { + timeAccessor: timeField, + valueAccessor: valueField, + categoryAccessor: categoryField, + } + }, +} diff --git a/src/components/charts/realtime/RealtimeWaterfallChart.capability.ts b/src/components/charts/realtime/RealtimeWaterfallChart.capability.ts new file mode 100644 index 00000000..2c49dbd9 --- /dev/null +++ b/src/components/charts/realtime/RealtimeWaterfallChart.capability.ts @@ -0,0 +1,34 @@ +import type { StreamChartCapability } from "../../ai/streamingTypes" + +export const RealtimeWaterfallChartCapability: StreamChartCapability = { + component: "RealtimeWaterfallChart", + importPath: "semiotic/realtime", + rubric: { familiarity: 2, accuracy: 4, precision: 3 }, + + fits: (schema) => { + if (!schema.fields.some((f) => f.kind === "date" || f.role === "x")) { + return "needs a time field" + } + if (!schema.fields.some((f) => f.kind === "numeric" || f.role === "value")) { + return "needs a numeric value field" + } + return null + }, + + intentScores: { + "change-detection": 5, + "trend": 3, + "outlier-detection": 4, + // Waterfalls work especially well at high throughput + "distribution": (schema) => (schema.throughput === "high" ? 4 : 2), + }, + + buildProps: (schema) => { + const timeField = schema.fields.find((f) => f.role === "x" || f.kind === "date")?.name + const valueField = schema.fields.find((f) => f.role === "value" || f.kind === "numeric")?.name + return { + timeAccessor: timeField, + valueAccessor: valueField, + } + }, +} diff --git a/src/components/charts/realtime/TemporalHistogram.capability.ts b/src/components/charts/realtime/TemporalHistogram.capability.ts new file mode 100644 index 00000000..38e0d53e --- /dev/null +++ b/src/components/charts/realtime/TemporalHistogram.capability.ts @@ -0,0 +1,41 @@ +import type { StreamChartCapability } from "../../ai/streamingTypes" + +/** + * TemporalHistogram is the bounded sibling of RealtimeHistogram — same chart + * but for static data with a fixed window. For stream selection it competes + * with RealtimeHistogram; the choice depends on retention. + */ +export const TemporalHistogramCapability: StreamChartCapability = { + component: "TemporalHistogram", + importPath: "semiotic/realtime", + rubric: { familiarity: 3, accuracy: 4, precision: 3 }, + + fits: (schema) => { + if (!schema.fields.some((f) => f.kind === "date" || f.role === "x")) { + return "needs a time field" + } + if (!schema.fields.some((f) => f.kind === "numeric" || f.role === "value")) { + return "needs a numeric value field" + } + if (schema.retention === "windowed") { + return "windowed retention is RealtimeHistogram's job; TemporalHistogram serves bounded/cumulative data" + } + return null + }, + + intentScores: { + "distribution": 5, + "change-detection": 3, + "trend": 2, + }, + + buildProps: (schema) => { + const valueField = schema.fields.find((f) => f.role === "value" || f.kind === "numeric")?.name + const timeField = schema.fields.find((f) => f.role === "x" || f.kind === "date")?.name + // Wraps RealtimeHistogram — same accessor surface (timeAccessor + valueAccessor). + return { + timeAccessor: timeField, + valueAccessor: valueField, + } + }, +} diff --git a/src/components/charts/xy/AreaChart.capability.ts b/src/components/charts/xy/AreaChart.capability.ts new file mode 100644 index 00000000..74a286a6 --- /dev/null +++ b/src/components/charts/xy/AreaChart.capability.ts @@ -0,0 +1,122 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +/** + * AreaChart is treated as a strictly single-series chart. Multi-series areas + * are an occlusion nightmare — when the data has 2+ series we subselect the + * leading series (largest cumulative y) and surface a caveat so the reader + * knows they're looking at one slice, not the whole dataset. For full multi- + * series comparison the engine routes callers to LineChart; for two-series + * comparison, to DifferenceChart. + */ +export const AreaChartCapability: ChartCapability = { + component: "AreaChart", + family: "time-series", + importPath: "semiotic/xy", + rubric: { familiarity: 4, accuracy: 3, precision: 3 }, + + fits: (profile) => { + if (profile.rowCount < 3) return "needs at least 3 rows" + if (!profile.primary.x) return "needs a numeric or time x field" + if (!profile.primary.y) return "needs a numeric y field" + if (profile.xProvenance === "scatter" && !profile.monotonicX) { + return "needs an ordered/temporal x — given x looks like a scatter pattern, not a sequence" + } + return null + }, + + intentScores: { + // Single-series trend is AreaChart's sweet spot — the gradient fill is + // more visually arresting than a thin line. Yield to LineChart when + // the data is genuinely multi-series; the subselected single series we + // emit is a partial picture, so it should not outrank a full multi-line. + "trend": (p) => { + if (p.xProvenance === "scatter" && !p.monotonicX) return 1 + const singleSeries = !p.seriesCount || p.seriesCount < 2 + if (!singleSeries) return 3 + return p.uniqueXCount && p.uniqueXCount >= 4 ? 5 : 3 + }, + "change-detection": (p) => (p.xProvenance === "scatter" && !p.monotonicX ? 1 : 3), + }, + + caveats: (p) => { + const out: string[] = [] + if (p.seriesCount && p.seriesCount >= 2 && p.primary.series) { + out.push( + `showing only the leading "${p.primary.series}" series — for multi-series comparison use LineChart or DifferenceChart`, + ) + } + return out + }, + + variants: [ + { + key: "smooth", + label: "Smooth gradient", + props: { curve: "monotoneX" }, + tags: ["smooth", "gradient", "narrative"], + }, + { + key: "linear", + label: "Linear", + props: { curve: "linear", gradientFill: false, areaOpacity: 0.5 }, + tags: ["linear"], + }, + { + key: "stepped", + label: "Stepped", + props: { curve: "stepAfter" }, + tags: ["step"], + intentDeltas: { "change-detection": +1 }, + }, + ], + + buildProps: (profile, variant) => { + let data = profile.data + + // Multi-series subselection: pull out the series with the largest summed y + // and show just that one. Same "narrow the dataset to make the chart + // honest" pattern DifferenceChart uses when the input has more series than + // its native two. + if (profile.seriesCount && profile.seriesCount >= 2 && profile.primary.series) { + const seriesKey = profile.primary.series + const yKey = profile.primary.y as string + const totals = new Map() + for (const row of profile.data) { + const k = row[seriesKey] + // Skip nullish/empty-string series values — `profileData` ignores them + // when counting categories, and bucketing them as their own group + // would let "undefined" or "" become the leading series. + if (k == null || k === "") continue + const v = Number(row[yKey]) + totals.set(k, (totals.get(k) ?? 0) + (Number.isFinite(v) ? v : 0)) + } + let leading: unknown + let max = -Infinity + for (const [k, v] of totals) { + if (v > max) { + max = v + leading = k + } + } + // Only subselect when we actually found a valid leading series; otherwise + // leave the data alone and let the single-series renderer handle it. + if (leading != null && totals.size > 0) { + data = profile.data.filter((row) => row[seriesKey] === leading) + } + } + + const base: Record = { + data, + xAccessor: profile.primary.x, + yAccessor: profile.primary.y, + // Gradient is the default — single-series areas read better with a + // top-to-baseline opacity ramp than a flat fill. + gradientFill: true, + areaOpacity: 0.55, + } + if (profile.hasTimeAxis && profile.primary.x === profile.primary.time) { + base.xScaleType = "time" + } + return { ...base, ...(variant?.props ?? {}) } + }, +} diff --git a/src/components/charts/xy/BubbleChart.capability.ts b/src/components/charts/xy/BubbleChart.capability.ts new file mode 100644 index 00000000..1d2fc325 --- /dev/null +++ b/src/components/charts/xy/BubbleChart.capability.ts @@ -0,0 +1,32 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const BubbleChartCapability: ChartCapability = { + component: "BubbleChart", + family: "relationship", + importPath: "semiotic/xy", + rubric: { familiarity: 3, accuracy: 4, precision: 3 }, + + fits: (profile) => { + if (profile.rowCount < 4) return "needs at least 4 points" + if (!profile.primary.x) return "needs a numeric x field" + if (!profile.primary.y) return "needs a numeric y field" + if (!profile.primary.size) return "needs a third numeric measure for bubble size" + return null + }, + + intentScores: { + "correlation": 4, + "compare-categories": 3, + "outlier-detection": 4, + }, + + caveats: () => ["bubble area is harder to compare than length — large dynamic ranges distort"], + + buildProps: (profile) => ({ + data: profile.data, + xAccessor: profile.primary.x, + yAccessor: profile.primary.y, + sizeBy: profile.primary.size, + ...(profile.primary.series && (profile.seriesCount ?? 0) <= 6 ? { colorBy: profile.primary.series } : {}), + }), +} diff --git a/src/components/charts/xy/CandlestickChart.capability.ts b/src/components/charts/xy/CandlestickChart.capability.ts new file mode 100644 index 00000000..466ca43d --- /dev/null +++ b/src/components/charts/xy/CandlestickChart.capability.ts @@ -0,0 +1,37 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const CandlestickChartCapability: ChartCapability = { + component: "CandlestickChart", + family: "time-series", + importPath: "semiotic/xy", + rubric: { familiarity: 3, accuracy: 4, precision: 4 }, + + fits: (profile) => { + if (profile.rowCount < 4) return "needs at least 4 rows" + if (!profile.primary.x) return "needs an x field (typically date)" + const fieldNames = new Set(Object.keys(profile.fields).map((f) => f.toLowerCase())) + const haveHigh = fieldNames.has("high") + const haveLow = fieldNames.has("low") + if (!haveHigh || !haveLow) return "needs at minimum high/low fields (open/close optional)" + return null + }, + + intentScores: { + "change-detection": 4, + "trend": 3, + "outlier-detection": 3, + }, + + buildProps: (profile) => { + const fields = Object.keys(profile.fields) + const find = (target: string) => fields.find((f) => f.toLowerCase() === target) + return { + data: profile.data, + xAccessor: profile.primary.x, + highAccessor: find("high"), + lowAccessor: find("low"), + openAccessor: find("open"), + closeAccessor: find("close"), + } + }, +} diff --git a/src/components/charts/xy/ConnectedScatterplot.capability.ts b/src/components/charts/xy/ConnectedScatterplot.capability.ts new file mode 100644 index 00000000..41ec9d5c --- /dev/null +++ b/src/components/charts/xy/ConnectedScatterplot.capability.ts @@ -0,0 +1,63 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const ConnectedScatterplotCapability: ChartCapability = { + component: "ConnectedScatterplot", + family: "relationship", + importPath: "semiotic/xy", + rubric: { familiarity: 3, accuracy: 4, precision: 4 }, + + fits: (profile) => { + if (profile.rowCount < 4) return "needs at least 4 ordered points" + if (!profile.primary.x) return "needs an x field" + if (!profile.primary.y) return "needs a y field" + if (!profile.monotonicX && !profile.hasTimeAxis) return "needs an ordered x sequence" + return null + }, + + intentScores: { + "trend": 3, + // "Correlation over time" is the canonical job: two numerics plotted + // against each other while the path traces a temporal sequence. Bump to 5 + // when the canonical form is available (sequence + 2+ other numerics); + // otherwise it's just an ordered scatter, which is weaker correlation + // evidence than a plain Scatterplot. + "correlation": (p) => { + const seq = p.xProvenance === "time" || p.xProvenance === "named" ? p.primary.x : p.primary.time + const others = seq ? p.candidates.y.filter((c) => c.field !== seq).map((c) => c.field) : [] + return seq && others.length >= 2 ? 5 : 4 + }, + "change-detection": 3, + }, + + caveats: () => ["readers can confuse path direction without explicit start/end markers"], + + buildProps: (profile) => { + const base: Record = { data: profile.data } + + // Canonical form — sequence-as-order, two numerics for x/y. Hans Rosling's + // "income vs life expectancy over years" shape. When the data shape doesn't + // support it (only one numeric besides the sequence), fall back to plotting + // the sequence on x with primary.y on y. + const seq = profile.xProvenance === "time" || profile.xProvenance === "named" + ? profile.primary.x + : profile.primary.time + const otherNumerics = seq + ? profile.candidates.y.filter((c) => c.field !== seq).map((c) => c.field) + : [] + const canonical = !!(seq && otherNumerics.length >= 2) + + if (canonical) { + base.xAccessor = otherNumerics[0] + base.yAccessor = otherNumerics[1] + base.orderAccessor = seq + } else { + base.xAccessor = profile.primary.x + base.yAccessor = profile.primary.y + base.orderAccessor = profile.primary.time ?? profile.primary.x + } + if (profile.primary.series && (profile.seriesCount ?? 0) <= 6) { + base.colorBy = profile.primary.series + } + return base + }, +} diff --git a/src/components/charts/xy/DifferenceChart.capability.ts b/src/components/charts/xy/DifferenceChart.capability.ts new file mode 100644 index 00000000..156acc05 --- /dev/null +++ b/src/components/charts/xy/DifferenceChart.capability.ts @@ -0,0 +1,110 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +/** + * DifferenceChart's native shape is two series. When the input has 2+ series + * we subselect the top two by total y and pivot them into the wide form the + * chart expects — same "narrow the dataset to make the chart honest" pattern + * AreaChart uses for its single-series fallback. + */ +export const DifferenceChartCapability: ChartCapability = { + component: "DifferenceChart", + family: "time-series", + importPath: "semiotic/xy", + rubric: { familiarity: 3, accuracy: 4, precision: 4 }, + + fits: (profile) => { + if (profile.rowCount < 4) return "needs at least 4 rows" + if (!profile.primary.x) return "needs an x field (numeric or time)" + if (!profile.primary.series) return "needs a series field with at least two groups" + if (!profile.seriesCount || profile.seriesCount < 2) return `needs 2+ series (got ${profile.seriesCount ?? 0})` + if (!profile.primary.y) return "needs a numeric y field" + // Same ordered-x guard LineChart/AreaChart use — a difference between two + // series only reads as "change over a sequence" if the x is actually a + // sequence. Scatter-fallback x with no monotonicity is meaningless here. + if (profile.xProvenance === "scatter" && !profile.monotonicX) { + return "needs an ordered/temporal x — given x looks like a scatter pattern, not a sequence" + } + return null + }, + + intentScores: { + "compare-series": 5, + "change-detection": 4, + "trend": 3, + }, + + caveats: (p) => { + const out: string[] = [] + if (p.seriesCount && p.seriesCount > 2) { + out.push(`showing the top 2 of ${p.seriesCount} series — for full multi-series comparison use LineChart`) + } + return out + }, + + buildProps: (profile) => { + // DifferenceChart wants wide-form `{x, a, b}` rows. Pivot long-form + // `{x, series, y}` into that shape — and when there are more than two + // series, pick the top two by cumulative y so the comparison surfaces + // the most significant pair rather than insertion-order accidents. + const xKey = profile.primary.x as string + const yKey = profile.primary.y as string + const seriesKey = profile.primary.series as string + + // Treat nullish / empty-string series values as "not a real series" — + // matches profileData's distinct-count semantics, which ignore them. + // Without this guard `String(undefined)` would seed a literal "undefined" + // bucket that could plausibly land in the top two. + const isValidSeries = (raw: unknown): raw is string | number | boolean => { + if (raw == null) return false + const s = String(raw) + return s.length > 0 + } + + const totals = new Map() + for (const row of profile.data) { + const raw = row[seriesKey] + if (!isValidSeries(raw)) continue + const name = String(raw) + const v = Number(row[yKey]) + totals.set(name, (totals.get(name) ?? 0) + (Number.isFinite(v) ? v : 0)) + } + const ranked = [...totals.entries()].sort((a, b) => b[1] - a[1]) + const aName = ranked[0]?.[0] + const bName = ranked[1]?.[0] + + // Normalize the pivot key so non-primitive x values (notably `Date` + // instances — Map uses reference equality on objects) collapse to the + // same bucket. Two rows with the same timestamp but different Date + // object identities would otherwise miss each other and the pivot + // would silently drop points that should pair up. + const keyOf = (v: unknown): unknown => (v instanceof Date ? v.getTime() : v) + + const byX = new Map>() + for (const row of profile.data) { + const raw = row[seriesKey] + if (!isValidSeries(raw)) continue + const series = String(raw) + if (series !== aName && series !== bName) continue + const x = row[xKey] + const y = row[yKey] + const k = keyOf(x) + let entry = byX.get(k) + if (!entry) { + entry = { [xKey]: x } + byX.set(k, entry) + } + if (series === aName) entry.a = y + else if (series === bName) entry.b = y + } + const wide = Array.from(byX.values()).filter((r) => r.a != null && r.b != null) + + return { + data: wide, + xAccessor: xKey, + seriesAAccessor: "a", + seriesBAccessor: "b", + seriesALabel: aName, + seriesBLabel: bName, + } + }, +} diff --git a/src/components/charts/xy/Heatmap.capability.ts b/src/components/charts/xy/Heatmap.capability.ts new file mode 100644 index 00000000..9fad8ea9 --- /dev/null +++ b/src/components/charts/xy/Heatmap.capability.ts @@ -0,0 +1,86 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +/** + * Heatmap is a matrix: categorical × categorical (or temporal × categorical), + * with a numeric encoded as color. Without two genuine discrete dimensions + * for the axes, a heatmap of raw rows is sparse and unreadable. Tuned in + * Phase 2.1 after the scorecard surfaced Heatmap winning unsuitable + * compare-categories rankings. + */ +export const HeatmapCapability: ChartCapability = { + component: "Heatmap", + family: "relationship", + importPath: "semiotic/xy", + rubric: { familiarity: 3, accuracy: 4, precision: 3 }, + + fits: (profile) => { + if (profile.rowCount < 4) return "needs at least 4 cells" + if (!profile.primary.y) return "needs a numeric value to encode in cell color" + // Heatmap needs two discrete axes. Acceptable shapes: + // • 2+ distinct categorical fields (category × category) + // • 1 categorical + 1 time field (category × time) + // • 1 categorical + low-cardinality numeric (≤ 30 distinct values) + const categoricalCount = profile.candidates.category.length + const hasTime = profile.hasTimeAxis + if (categoricalCount < 2 && !(categoricalCount >= 1 && hasTime)) { + return "needs two categorical-or-time dimensions for the axes" + } + const xUnique = profile.uniqueXCount ?? 0 + if (xUnique > 50) return "too many x cells for a legible heatmap" + return null + }, + + intentScores: { + "correlation": 3, + "distribution": 2, + // compare-categories only works when we have a *matrix*, not a 1D categorical comparison + "compare-categories": (p) => { + const catCount = p.candidates.category.length + return catCount >= 2 ? 4 : 1 + }, + "composition-over-time": (p) => (p.hasTimeAxis && p.candidates.category.length >= 1 ? 4 : 1), + }, + + caveats: (p) => { + const out: string[] = [] + if ((p.uniqueXCount ?? 0) > 30) out.push("many x values — cells will be narrow") + return out + }, + + variants: [ + { + key: "default", + label: "Sequential color", + props: {}, + tags: ["sequential"], + }, + { + key: "show-values", + label: "With cell labels", + props: { showValues: true }, + tags: ["labeled"], + intentDeltas: { "compare-categories": +1 }, + rubricDeltas: { precision: +1 }, + caveats: ["cell labels crowd dense matrices"], + }, + ], + + buildProps: (profile, variant) => { + // Prefer category × category if available, else category × time. + const categoricalFields = profile.candidates.category.map((c) => c.field) + const xField = profile.primary.time ?? categoricalFields[0] + const yField = + categoricalFields.find((f) => f !== xField) ?? + categoricalFields[0] ?? + profile.primary.series + const valueField = profile.primary.y + + return { + data: profile.data, + xAccessor: xField, + yAccessor: yField, + valueAccessor: valueField, + ...(variant?.props ?? {}), + } + }, +} diff --git a/src/components/charts/xy/LineChart.capability.ts b/src/components/charts/xy/LineChart.capability.ts new file mode 100644 index 00000000..cd876edc --- /dev/null +++ b/src/components/charts/xy/LineChart.capability.ts @@ -0,0 +1,108 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +/** + * LineChart capability — declares what data shapes LineChart serves well, + * what intents it answers, and what variants change those answers. + * + * Read alongside `LineChart.tsx`; this file is what makes the chart + * "self-aware" for suggestion and interrogation flows. + */ +export const LineChartCapability: ChartCapability = { + component: "LineChart", + family: "time-series", + importPath: "semiotic/xy", + rubric: { familiarity: 5, accuracy: 4, precision: 4 }, + + fits: (profile) => { + if (profile.rowCount < 2) return "needs at least 2 rows" + if (!profile.primary.x) return "needs a numeric or time x field" + if (!profile.primary.y) return "needs a numeric y field" + const xKind = profile.candidates.x.find((c) => c.field === profile.primary.x)?.kind + if (xKind && xKind !== "numeric" && xKind !== "date") return `x field "${profile.primary.x}" is ${xKind}, LineChart needs numeric or time` + // A line chart needs an *ordered* x — connecting points across an arbitrary + // numeric (scatter-fallback x with no monotonicity) is misleading. + if (profile.xProvenance === "scatter" && !profile.monotonicX) { + return "needs an ordered/temporal x — given x looks like a scatter pattern, not a sequence" + } + return null + }, + + intentScores: { + "trend": (p) => { + // A trend needs an *ordered* x — time field, monotonic numeric, or + // an x-named numeric. Scatter-fallback x (just "the other numeric" + // when there are two) doesn't qualify as a trend axis. + if (p.xProvenance === "scatter" && !p.monotonicX) return 1 + if (!p.uniqueXCount || p.uniqueXCount < 4) return 3 + // Yield to AreaChart on clean single-series trend — its gradient fill + // is more visually arresting. LineChart still wins on multi-series + // because AreaChart subselects to one series in that case. + const singleSeries = !p.seriesCount || p.seriesCount < 2 + return singleSeries ? 4 : 5 + }, + "compare-series": (p) => { + if (p.xProvenance === "scatter" && !p.monotonicX) return 1 + if (!p.seriesCount || p.seriesCount < 2) return 1 + if (p.seriesCount > 8) return 2 + return 4 + }, + "change-detection": (p) => (p.xProvenance === "scatter" && !p.monotonicX ? 1 : 4), + "outlier-detection": 2, + "correlation": 2, + }, + + caveats: (p) => { + const out: string[] = [] + if (p.hasRepeatedX && (!p.seriesCount || p.seriesCount < 2)) { + out.push("x values repeat — consider aggregating or adding a series field") + } + if (p.seriesCount && p.seriesCount > 8) { + out.push(`${p.seriesCount} series may produce a spaghetti chart`) + } + return out + }, + + variants: [ + { + key: "linear", + label: "Linear trend", + props: { curve: "linear", showPoints: false }, + tags: ["linear"], + }, + { + key: "smooth", + label: "Smooth trend", + description: "Monotone smoothing — emphasizes the shape over individual points.", + props: { curve: "monotoneX", showPoints: false }, + tags: ["smooth", "narrative"], + intentDeltas: { "trend": +1, "outlier-detection": -2 }, + rubricDeltas: { precision: -1 }, + caveats: ["smoothing hides individual outliers"], + }, + { + key: "stepped-with-points", + label: "Discrete steps", + description: "Step curve plus visible points — for state changes or discrete events.", + props: { curve: "step", showPoints: true, pointRadius: 3 }, + tags: ["step", "discrete"], + intentDeltas: { "change-detection": +1, "trend": -1 }, + rubricDeltas: { precision: +1 }, + }, + ], + + buildProps: (profile, variant) => { + const base: Record = { + data: profile.data, + xAccessor: profile.primary.x, + yAccessor: profile.primary.y, + } + if (profile.seriesCount && profile.seriesCount >= 2 && profile.primary.series) { + base.lineBy = profile.primary.series + base.colorBy = profile.primary.series + } + if (profile.hasTimeAxis && profile.primary.x === profile.primary.time) { + base.xScaleType = "time" + } + return { ...base, ...(variant?.props ?? {}) } + }, +} diff --git a/src/components/charts/xy/MinimapChart.capability.ts b/src/components/charts/xy/MinimapChart.capability.ts new file mode 100644 index 00000000..a6fadc6d --- /dev/null +++ b/src/components/charts/xy/MinimapChart.capability.ts @@ -0,0 +1,31 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const MinimapChartCapability: ChartCapability = { + component: "MinimapChart", + family: "time-series", + importPath: "semiotic/xy", + rubric: { familiarity: 4, accuracy: 4, precision: 4 }, + + fits: (profile) => { + if (profile.rowCount < 30) return "minimap pays off only on long sequences (30+ rows)" + if (!profile.primary.x) return "needs an ordered x field" + if (!profile.primary.y) return "needs a numeric y field" + if (profile.xProvenance === "scatter" && !profile.monotonicX) { + return "needs an ordered/temporal x — minimap previews a sequence" + } + return null + }, + + intentScores: { + "trend": 4, + "change-detection": 4, + "outlier-detection": 3, + }, + + buildProps: (profile) => ({ + data: profile.data, + xAccessor: profile.primary.x, + yAccessor: profile.primary.y, + ...(profile.hasTimeAxis && profile.primary.x === profile.primary.time ? { xScaleType: "time" } : {}), + }), +} diff --git a/src/components/charts/xy/MultiAxisLineChart.capability.ts b/src/components/charts/xy/MultiAxisLineChart.capability.ts new file mode 100644 index 00000000..969ff25c --- /dev/null +++ b/src/components/charts/xy/MultiAxisLineChart.capability.ts @@ -0,0 +1,42 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const MultiAxisLineChartCapability: ChartCapability = { + component: "MultiAxisLineChart", + family: "time-series", + importPath: "semiotic/xy", + rubric: { familiarity: 3, accuracy: 3, precision: 3 }, + + fits: (profile) => { + if (profile.rowCount < 4) return "needs at least 4 rows" + if (!profile.primary.x) return "needs an x field" + // Needs 2+ numeric measures with different ranges + const numericFields = Object.entries(profile.fields) + .filter(([f, s]) => s.type === "numeric" && f !== profile.primary.x) + .map(([f]) => f) + if (numericFields.length < 2) return "needs at least 2 numeric measures" + if (profile.xProvenance === "scatter" && !profile.monotonicX) { + return "needs an ordered/temporal x — multi-axis lines need a shared sequence" + } + return null + }, + + intentScores: { + "compare-series": 4, + "trend": 3, + "correlation": 3, + }, + + caveats: () => ["dual axes can mislead — only use when measures share interpretation"], + + buildProps: (profile) => { + const numericFields = Object.entries(profile.fields) + .filter(([f, s]) => s.type === "numeric" && f !== profile.primary.x) + .slice(0, 2) + .map(([f]) => ({ yAccessor: f, label: f })) + return { + data: profile.data, + xAccessor: profile.primary.x, + series: numericFields, + } + }, +} diff --git a/src/components/charts/xy/QuadrantChart.capability.ts b/src/components/charts/xy/QuadrantChart.capability.ts new file mode 100644 index 00000000..d385203c --- /dev/null +++ b/src/components/charts/xy/QuadrantChart.capability.ts @@ -0,0 +1,42 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const QuadrantChartCapability: ChartCapability = { + component: "QuadrantChart", + family: "relationship", + importPath: "semiotic/xy", + rubric: { familiarity: 3, accuracy: 4, precision: 4 }, + + fits: (profile) => { + if (profile.rowCount < 4) return "needs at least 4 points" + if (!profile.primary.x) return "needs a numeric x field" + if (!profile.primary.y) return "needs a numeric y field" + return null + }, + + intentScores: { + // QuadrantChart partitions a 2D plane by thresholds — useful for + // strategy-matrix views (BCG, Eisenhower), not for raw category comparison. + // The two axes should both be meaningful continuous measures. + "compare-categories": 2, + "correlation": 3, + "outlier-detection": 3, + }, + + buildProps: (profile) => { + // Use the median x and y as default split points. + const xField = profile.primary.x! + const yField = profile.primary.y! + const xSummary = profile.fields[xField] + const ySummary = profile.fields[yField] + const xCenter = xSummary?.type === "numeric" ? xSummary.median : undefined + const yCenter = ySummary?.type === "numeric" ? ySummary.median : undefined + return { + data: profile.data, + xAccessor: xField, + yAccessor: yField, + ...(xCenter !== undefined ? { xCenter } : {}), + ...(yCenter !== undefined ? { yCenter } : {}), + ...(profile.primary.series && (profile.seriesCount ?? 0) <= 6 ? { colorBy: profile.primary.series } : {}), + } + }, +} diff --git a/src/components/charts/xy/Scatterplot.capability.ts b/src/components/charts/xy/Scatterplot.capability.ts new file mode 100644 index 00000000..6dc955c6 --- /dev/null +++ b/src/components/charts/xy/Scatterplot.capability.ts @@ -0,0 +1,83 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const ScatterplotCapability: ChartCapability = { + component: "Scatterplot", + family: "relationship", + importPath: "semiotic/xy", + rubric: { familiarity: 4, accuracy: 5, precision: 5 }, + + fits: (profile) => { + if (profile.rowCount < 3) return "needs at least 3 rows" + if (!profile.primary.x) return "needs a numeric x field" + if (!profile.primary.y) return "needs a numeric y field" + const xKind = profile.candidates.x.find((c) => c.field === profile.primary.x)?.kind + if (xKind === "date") { + // Time-axis scatter is technically valid but usually a worse choice than a line/area + return null + } + if (xKind && xKind !== "numeric") return `x field "${profile.primary.x}" is ${xKind}, Scatterplot needs numeric` + return null + }, + + intentScores: { + // When a sequence axis is available and 2+ other numerics exist, + // ConnectedScatterplot is the strictly more informative correlation chart + // (same x/y plus temporal progression). Step back so it wins the tiebreak. + "correlation": (p) => { + const seq = p.xProvenance === "time" || p.xProvenance === "named" ? p.primary.x : p.primary.time + const others = seq ? p.candidates.y.filter((c) => c.field !== seq).map((c) => c.field) : [] + return seq && others.length >= 2 ? 4 : 5 + }, + "outlier-detection": 5, + "distribution": 3, + "compare-series": (p) => (p.seriesCount && p.seriesCount >= 2 && p.seriesCount <= 6 ? 3 : 1), + "rank": 1, + }, + + variants: [ + { + key: "points", + label: "Points only", + props: {}, + tags: ["points"], + }, + { + key: "with-trend", + label: "Points with regression line", + props: { regression: "linear" }, + tags: ["regression", "trend"], + // A regression line illuminates the correlation but doesn't make + // Scatterplot a "trend over time" chart — keep delta modest. + intentDeltas: { "correlation": +0, "trend": +1 }, + }, + ], + + buildProps: (profile, variant) => { + const base: Record = { data: profile.data } + + // Canonical "X vs Y" form: when there's a strong sequence axis (time or + // named — month, quarter, year...) AND 2+ other numerics, prefer plotting + // the two numerics against each other. Otherwise the scatterplot just + // recapitulates a line chart on the sequence axis. + const seq = profile.xProvenance === "time" || profile.xProvenance === "named" + ? profile.primary.x + : undefined + const otherNumerics = seq + ? profile.candidates.y.filter((c) => c.field !== seq).map((c) => c.field) + : [] + const canonical = !!(seq && otherNumerics.length >= 2) + + if (canonical) { + base.xAccessor = otherNumerics[0] + base.yAccessor = otherNumerics[1] + } else { + base.xAccessor = profile.primary.x + base.yAccessor = profile.primary.y + if (profile.primary.size) base.sizeBy = profile.primary.size + } + if (profile.primary.series && profile.seriesCount && profile.seriesCount <= 6) { + base.colorBy = profile.primary.series + } + return { ...base, ...(variant?.props ?? {}) } + }, +} diff --git a/src/components/charts/xy/StackedAreaChart.capability.ts b/src/components/charts/xy/StackedAreaChart.capability.ts new file mode 100644 index 00000000..8681ccd6 --- /dev/null +++ b/src/components/charts/xy/StackedAreaChart.capability.ts @@ -0,0 +1,69 @@ +import type { ChartCapability } from "../../ai/chartCapabilityTypes" + +export const StackedAreaChartCapability: ChartCapability = { + component: "StackedAreaChart", + family: "time-series", + importPath: "semiotic/xy", + rubric: { familiarity: 4, accuracy: 3, precision: 3 }, + + fits: (profile) => { + if (profile.rowCount < 4) return "needs at least 4 rows" + if (!profile.primary.x) return "needs an ordered x field" + if (!profile.primary.y) return "needs a numeric y field" + if (!profile.seriesCount || profile.seriesCount < 2) return "needs 2+ stack groups (series field)" + if (profile.seriesCount > 10) return `${profile.seriesCount} series is too many to stack legibly` + if (profile.xProvenance === "scatter" && !profile.monotonicX) { + return "needs an ordered/temporal x — stacking only makes sense across a sequence" + } + return null + }, + + intentScores: { + "composition-over-time": 5, + "part-to-whole": (p) => (p.hasTimeAxis ? 4 : 3), + "trend": 3, + "compare-series": 2, + }, + + caveats: () => ["readability of individual layers degrades below the baseline"], + + variants: [ + { + key: "baseline-zero", + label: "Zero baseline", + props: { baseline: "zero", stackOrder: "key" }, + tags: ["zero-baseline"], + }, + { + key: "streamgraph", + label: "Streamgraph", + description: "Wiggle baseline + inside-out ordering — emphasizes momentum over precise totals.", + props: { baseline: "wiggle", stackOrder: "insideOut", showLine: false }, + tags: ["streamgraph", "narrative"], + intentDeltas: { "composition-over-time": +0, "trend": +1, "part-to-whole": -2 }, + rubricDeltas: { accuracy: -1, precision: -1 }, + caveats: ["streamgraph hides absolute totals; precise reads not possible"], + }, + { + key: "centered", + label: "Centered baseline", + props: { baseline: "silhouette", stackOrder: "insideOut" }, + tags: ["silhouette"], + intentDeltas: { "part-to-whole": -1 }, + }, + ], + + buildProps: (profile, variant) => { + const base: Record = { + data: profile.data, + xAccessor: profile.primary.x, + yAccessor: profile.primary.y, + areaBy: profile.primary.series, + colorBy: profile.primary.series, + } + if (profile.hasTimeAxis && profile.primary.x === profile.primary.time) { + base.xScaleType = "time" + } + return { ...base, ...(variant?.props ?? {}) } + }, +} diff --git a/src/components/data/DataSummarizer.test.ts b/src/components/data/DataSummarizer.test.ts new file mode 100644 index 00000000..138c83e1 --- /dev/null +++ b/src/components/data/DataSummarizer.test.ts @@ -0,0 +1,97 @@ +import { describe, it, expect } from "vitest" +import { summarizeData } from "./DataSummarizer" + +describe("summarizeData", () => { + it("summarizes numeric fields with min/max/mean/median", () => { + const data = [ + { x: 1, y: 10 }, + { x: 2, y: 20 }, + { x: 3, y: 30 }, + { x: 4, y: 40 }, + ] + const summary = summarizeData(data) + expect(summary.rowCount).toBe(4) + const x = summary.fields.x + expect(x.type).toBe("numeric") + if (x.type === "numeric") { + expect(x.min).toBe(1) + expect(x.max).toBe(4) + expect(x.mean).toBe(2.5) + expect(x.median).toBe(2.5) + } + }) + + it("summarizes categorical fields with top values and distinct count", () => { + const data = [ + { category: "A" }, + { category: "A" }, + { category: "B" }, + { category: "C" }, + ] + const summary = summarizeData(data) + const c = summary.fields.category + expect(c.type).toBe("categorical") + if (c.type === "categorical") { + expect(c.distinctCount).toBe(3) + expect(c.topValues[0]).toEqual({ value: "A", count: 2 }) + expect(c.distinctValues).toEqual(["A", "B", "C"]) + } + }) + + it("detects ISO-like date strings", () => { + const data = [{ d: "2024-01-15" }, { d: "2024-06-30" }] + const summary = summarizeData(data) + const d = summary.fields.d + expect(d.type).toBe("date") + if (d.type === "date") { + expect(d.min.startsWith("2024-01-15")).toBe(true) + expect(d.max.startsWith("2024-06-30")).toBe(true) + } + }) + + it("handles Date instances", () => { + const data = [{ d: new Date("2024-01-01") }, { d: new Date("2024-12-31") }] + const summary = summarizeData(data) + expect(summary.fields.d.type).toBe("date") + }) + + it("handles empty data gracefully", () => { + const summary = summarizeData([]) + expect(summary.rowCount).toBe(0) + expect(summary.fields).toEqual({}) + expect(summary.sample).toEqual([]) + }) + + it("handles null/undefined input", () => { + expect(summarizeData(null).rowCount).toBe(0) + expect(summarizeData(undefined).rowCount).toBe(0) + }) + + it("discovers fields across ragged rows", () => { + const data = [{ a: 1 }, { b: 2 }, { a: 3, b: 4 }] + const summary = summarizeData(data) + expect(Object.keys(summary.fields).sort()).toEqual(["a", "b"]) + }) + + it("scales to large numeric arrays without stack overflow", () => { + const data = Array.from({ length: 200_000 }, (_, i) => ({ v: i })) + const summary = summarizeData(data) + const v = summary.fields.v + expect(v.type).toBe("numeric") + if (v.type === "numeric") { + expect(v.min).toBe(0) + expect(v.max).toBe(199_999) + } + }) + + it("limits sample to sampleSize", () => { + const data = Array.from({ length: 50 }, (_, i) => ({ i })) + const summary = summarizeData(data, { sampleSize: 3 }) + expect(summary.sample.length).toBe(3) + }) + + it("returns 'unknown' for fields with only null values", () => { + const data = [{ x: null }, { x: null }] + expect(summarizeData(data).fields.x.type).toBe("unknown") + }) +}) diff --git a/src/components/data/DataSummarizer.ts b/src/components/data/DataSummarizer.ts new file mode 100644 index 00000000..76fe4d74 --- /dev/null +++ b/src/components/data/DataSummarizer.ts @@ -0,0 +1,189 @@ +import type { Datum } from "../charts/shared/datumTypes" + +export type FieldType = "numeric" | "categorical" | "date" | "unknown" + +export interface NumericFieldSummary { + type: "numeric" + min: number + max: number + mean: number + median: number +} + +export interface DateFieldSummary { + type: "date" + min: string + max: string +} + +export interface CategoricalFieldSummary { + type: "categorical" + distinctCount: number + topValues: ReadonlyArray<{ value: string; count: number }> + distinctValues?: ReadonlyArray +} + +export interface UnknownFieldSummary { + type: "unknown" +} + +export type FieldSummary = + | NumericFieldSummary + | DateFieldSummary + | CategoricalFieldSummary + | UnknownFieldSummary + +export interface DataSummary { + rowCount: number + fields: Record + sample: ReadonlyArray +} + +export interface SummarizeOptions { + maxDistinct?: number + sampleSize?: number + /** Scan up to this many rows when discovering field keys (handles ragged rows). */ + keyScanRows?: number +} + +const DATE_LIKE = /^\d{4}[-/]\d{2}/ +const NUMERIC_STRING = /^-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?$/ + +function inferType(val: unknown): FieldType { + if (typeof val === "number") return Number.isFinite(val) ? "numeric" : "unknown" + if (val instanceof Date) return "date" + if (typeof val === "string") { + if (DATE_LIKE.test(val) && !Number.isNaN(Date.parse(val))) return "date" + // CSV/JSON often carries numerics as strings ("42", "3.14e6"). The numeric + // branch later coerces via Number(), so classify those as numeric up-front + // rather than dropping them into categorical and losing min/max/mean. + if (NUMERIC_STRING.test(val) && Number.isFinite(Number(val))) return "numeric" + return "categorical" + } + if (typeof val === "boolean") return "categorical" + return "unknown" +} + +function minMax(values: ReadonlyArray): { min: number; max: number } { + // Avoid Math.min(...values) — spread overflows the call stack around ~100k items. + let min = Infinity + let max = -Infinity + for (let i = 0; i < values.length; i++) { + const v = values[i] + if (v < min) min = v + if (v > max) max = v + } + return { min, max } +} + +function median(sorted: ReadonlyArray): number { + const n = sorted.length + if (n === 0) return NaN + const mid = n >> 1 + return n % 2 === 0 ? (sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid] +} + +/** + * Summarize a dataset for an LLM. Returns row count, per-field statistics, and a small sample. + * + * Designed so a model can answer questions about ranges, peaks, distributions, and categories + * without seeing the full dataset. + */ +export function summarizeData( + data: ReadonlyArray | null | undefined, + options: SummarizeOptions = {} +): DataSummary { + const { maxDistinct = 10, sampleSize = 5, keyScanRows = 100 } = options + + if (!Array.isArray(data) || data.length === 0) { + return { rowCount: 0, fields: {}, sample: [] } + } + + // Discover keys across the first N rows so ragged data doesn't drop fields. + const keys = new Set() + const scanLimit = Math.min(data.length, keyScanRows) + for (let i = 0; i < scanLimit; i++) { + const row = data[i] + if (row && typeof row === "object") { + for (const k of Object.keys(row)) keys.add(k) + } + } + + const fields: Record = {} + + for (const key of keys) { + const raw: unknown[] = [] + for (let i = 0; i < data.length; i++) { + const v = data[i]?.[key] + if (v != null) raw.push(v) + } + + if (raw.length === 0) { + fields[key] = { type: "unknown" } + continue + } + + const type = inferType(raw[0]) + + if (type === "numeric") { + const nums: number[] = [] + for (let i = 0; i < raw.length; i++) { + const n = Number(raw[i]) + if (Number.isFinite(n)) nums.push(n) + } + if (nums.length === 0) { + fields[key] = { type: "unknown" } + continue + } + const { min, max } = minMax(nums) + let sum = 0 + for (let i = 0; i < nums.length; i++) sum += nums[i] + const sorted = [...nums].sort((a, b) => a - b) + fields[key] = { + type: "numeric", + min, + max, + mean: sum / nums.length, + median: median(sorted), + } + } else if (type === "date") { + const times: number[] = [] + for (let i = 0; i < raw.length; i++) { + const v = raw[i] + const t = v instanceof Date ? v.getTime() : Date.parse(v as string) + if (Number.isFinite(t)) times.push(t) + } + if (times.length === 0) { + fields[key] = { type: "unknown" } + continue + } + const { min, max } = minMax(times) + fields[key] = { + type: "date", + min: new Date(min).toISOString(), + max: new Date(max).toISOString(), + } + } else if (type === "categorical") { + const counts = new Map() + for (let i = 0; i < raw.length; i++) { + const v = String(raw[i]) + counts.set(v, (counts.get(v) ?? 0) + 1) + } + const topValues = [...counts.entries()] + .sort((a, b) => b[1] - a[1]) + .slice(0, maxDistinct) + .map(([value, count]) => ({ value, count })) + fields[key] = { + type: "categorical", + distinctCount: counts.size, + topValues, + distinctValues: + counts.size <= maxDistinct ? topValues.map((v) => v.value) : undefined, + } + } else { + fields[key] = { type: "unknown" } + } + } + + return { rowCount: data.length, fields, sample: data.slice(0, sampleSize) } +} diff --git a/src/components/semiotic-ai.ts b/src/components/semiotic-ai.ts index 56ae4368..f1c04050 100644 --- a/src/components/semiotic-ai.ts +++ b/src/components/semiotic-ai.ts @@ -106,6 +106,175 @@ export type { SerializedSelections, SerializedSelection, SerializedFieldSelectio export { fromVegaLite } from "./data/fromVegaLite" export type { VegaLiteSpec, VegaLiteEncoding } from "./data/fromVegaLite" +// AI interrogation — headless hook + data summary +export { useChartInterrogation } from "./store/useChartInterrogation" +export type { + UseChartInterrogationOptions, + UseChartInterrogationResult, + InterrogationContext, + InterrogationFocus, + InterrogationResult, + InterrogationQuery, + InterrogationMessage, +} from "./store/useChartInterrogation" +export { useChartFocus } from "./store/useChartFocus" +export type { UseChartFocusOptions } from "./store/useChartFocus" +export { summarizeData } from "./data/DataSummarizer" +export type { + DataSummary, + FieldSummary, + FieldType, + NumericFieldSummary, + DateFieldSummary, + CategoricalFieldSummary, + UnknownFieldSummary, + SummarizeOptions, +} from "./data/DataSummarizer" + +// Chart capability layer — heuristic recommendations + intent taxonomy +export { profileData } from "./ai/profileData" +export type { ProfileDataOptions } from "./ai/profileData" +export { suggestCharts, scoreChart, explainCapabilityFit } from "./ai/suggestCharts" +export type { + SuggestChartsOptions, + RejectedCapability, + ExplainCapabilityFitResult, +} from "./ai/suggestCharts" +export { inferIntent } from "./ai/inferIntent" +export type { InferIntentResult } from "./ai/inferIntent" +export { suggestDashboard } from "./ai/suggestDashboard" +export type { + DashboardPanel, + DashboardSuggestion, + SuggestDashboardOptions, +} from "./ai/suggestDashboard" + +// Audience-aware suggestion + literacy-growth surface +export { + applyAudienceBias, + effectiveFamiliarity, + stretchFamiliarityCeiling, +} from "./ai/audienceProfile" +export type { + AudienceProfile, + AudienceTarget, + AudienceBiasResult, +} from "./ai/audienceProfile" +export { + executivePersona, + analystPersona, + dataScientistPersona, + BUILT_IN_AUDIENCES, +} from "./ai/audiences" +export { suggestStretchCharts } from "./ai/suggestStretchCharts" +export type { + StretchSuggestion, + SuggestStretchChartsOptions, +} from "./ai/suggestStretchCharts" + +// Streaming intent — parallel API for live charts (schema-based, not row-based) +export { + suggestStreamCharts, + registerStreamChartCapability, + unregisterStreamChartCapability, + getStreamCapabilities, +} from "./ai/suggestStreamCharts" +export type { SuggestStreamChartsOptions } from "./ai/suggestStreamCharts" +export type { + StreamSchema, + StreamFieldSchema, + StreamFieldKind, + StreamChartCapability, + StreamIntentScorer, + StreamSuggestion, +} from "./ai/streamingTypes" +export { diffProfile } from "./ai/diffProfile" +export type { ProfileDiff, FieldTypeChange, PrimaryRoleChange, PrimaryRole } from "./ai/diffProfile" +export { repairChartConfig } from "./ai/repairChartConfig" +export type { + RepairResult, + RepairOkResult, + RepairAlternativeResult, + RepairUnknownResult, + RepairOptions, +} from "./ai/repairChartConfig" +export { runQualityScorecard } from "./ai/qualityScorecard" +export type { + ScorecardFixture, + ScorecardReport, + PerCapabilityScore, + PerFixtureScore, +} from "./ai/qualityScorecard" +export { CANONICAL_FIXTURES } from "./ai/qualityFixtures" +export { useChartSuggestions } from "./ai/useChartSuggestions" +export type { UseChartSuggestionsOptions, UseChartSuggestionsResult } from "./ai/useChartSuggestions" +export { + getCapabilities, + getCapability, + registerChartCapability, + unregisterChartCapability, + // XY + LineChartCapability, + AreaChartCapability, + StackedAreaChartCapability, + ScatterplotCapability, + ConnectedScatterplotCapability, + BubbleChartCapability, + QuadrantChartCapability, + MultiAxisLineChartCapability, + MinimapChartCapability, + DifferenceChartCapability, + CandlestickChartCapability, + HeatmapCapability, + // Ordinal + BarChartCapability, + GroupedBarChartCapability, + StackedBarChartCapability, + DotPlotCapability, + PieChartCapability, + DonutChartCapability, + FunnelChartCapability, + GaugeChartCapability, + LikertChartCapability, + SwimlaneChartCapability, + // Distribution + HistogramCapability, + BoxPlotCapability, + SwarmPlotCapability, + ViolinPlotCapability, + RidgelinePlotCapability, + // Network + ForceDirectedGraphCapability, + SankeyDiagramCapability, + ChordDiagramCapability, + ProcessSankeyCapability, + // Hierarchy + TreeDiagramCapability, + TreemapCapability, + CirclePackCapability, + OrbitDiagramCapability, + // Geo + ChoroplethMapCapability, + ProportionalSymbolMapCapability, + FlowMapCapability, + DistanceCartogramCapability, +} from "./ai/chartCapabilities" +export type { + ChartCapability, + ChartDataProfile, + ChartFamily, + ChartImportPath, + ChartRubric, + ChartVariant, + FieldCandidate, + FieldKind, + FitResult, + IntentScorer, + Suggestion, +} from "./ai/chartCapabilityTypes" +export { listIntents, getIntent, registerIntent, BUILT_IN_INTENT_IDS } from "./ai/intents" +export type { BuiltInIntentId, IntentId, IntentDescriptor } from "./ai/intents" + // AI Observation hooks export { useChartObserver } from "./store/useObservation" export type { UseChartObserverOptions, UseChartObserverResult } from "./store/useObservation" diff --git a/src/components/store/useChartFocus.test.tsx b/src/components/store/useChartFocus.test.tsx new file mode 100644 index 00000000..4694a53a --- /dev/null +++ b/src/components/store/useChartFocus.test.tsx @@ -0,0 +1,103 @@ +import React from "react" +import { renderHook, act } from "@testing-library/react" +import { describe, it, expect } from "vitest" +import { useChartFocus } from "./useChartFocus" +import { ObservationProvider, useObservationSelector } from "./ObservationStore" +import type { ChartObservation, ObservationStoreState } from "./ObservationStore" + +const wrapper = ({ children }: { children: React.ReactNode }) => ( + {children} +) + +function makeHover(overrides: Partial = {}): ChartObservation { + return { + type: "hover", + datum: { month: 4, revenue: 32 }, + x: 100, + y: 200, + timestamp: Date.now(), + chartType: "line", + ...overrides, + } as ChartObservation +} + +function makeHoverEnd(overrides: Partial = {}): ChartObservation { + return { + type: "hover-end", + timestamp: Date.now(), + chartType: "line", + ...overrides, + } as ChartObservation +} + +function useFocusWithPush(options?: Parameters[0]) { + const focus = useChartFocus(options) + const push = useObservationSelector((s: ObservationStoreState) => s.pushObservation) + return { focus, push } +} + +describe("useChartFocus", () => { + it("returns null with no observations", () => { + const { result } = renderHook(() => useChartFocus(), { wrapper }) + expect(result.current).toBeNull() + }) + + it("converts the latest hover into a focus object", () => { + const { result } = renderHook(() => useFocusWithPush(), { wrapper }) + act(() => { + result.current.push(makeHover()) + }) + expect(result.current.focus).toEqual({ + datum: { month: 4, revenue: 32 }, + x: 100, + y: 200, + source: "hover", + }) + }) + + it("clears focus on hover-end", () => { + const { result } = renderHook(() => useFocusWithPush(), { wrapper }) + act(() => { + result.current.push(makeHover({ timestamp: 1 })) + result.current.push(makeHoverEnd({ timestamp: 2 })) + }) + expect(result.current.focus).toBeNull() + }) + + it("respects type filter — click-only mode ignores hovers", () => { + const { result } = renderHook(() => useFocusWithPush({ types: ["click"] }), { wrapper }) + act(() => { + result.current.push(makeHover()) + }) + expect(result.current.focus).toBeNull() + }) + + it("filters by chartId when set", () => { + const { result } = renderHook( + () => useFocusWithPush({ chartId: "chartA" }), + { wrapper }, + ) + act(() => { + result.current.push(makeHover({ chartId: "chartB" })) + }) + expect(result.current.focus).toBeNull() + + act(() => { + result.current.push(makeHover({ chartId: "chartA", datum: { id: 1 } })) + }) + expect(result.current.focus?.datum).toEqual({ id: 1 }) + }) + + it("does not error when latest observation has no datum", () => { + const { result } = renderHook(() => useFocusWithPush(), { wrapper }) + act(() => { + result.current.push({ + type: "hover", + timestamp: Date.now(), + chartType: "line", + // no datum + } as ChartObservation) + }) + expect(result.current.focus).toBeNull() + }) +}) diff --git a/src/components/store/useChartFocus.ts b/src/components/store/useChartFocus.ts new file mode 100644 index 00000000..b504b0ce --- /dev/null +++ b/src/components/store/useChartFocus.ts @@ -0,0 +1,98 @@ +"use client" +import { useMemo } from "react" +import { useChartObserver } from "./useObservation" +import type { ChartObservation } from "./ObservationStore" +import type { InterrogationFocus } from "./useChartInterrogation" + +export interface UseChartFocusOptions { + /** Limit attention to a specific chart instance. Required when the page has more than one. */ + chartId?: string + /** + * Which observation types count as "focused." Default is hover + click + + * selection — anything that signals user attention. Set to ["click"] for + * sticky-focus UIs where hover doesn't change the AI's reference point. + */ + types?: ChartObservation["type"][] +} + +/** + * Default observation types this hook subscribes to. The "-end" variants + * are included so a hover-out / click-elsewhere / brush-clear event can + * *clear* an existing focus rather than leaving it stuck on the previous + * datum — matches the `*-end` branch in the `useMemo` below that maps + * those observation types back to `null`. + */ +const DEFAULT_FOCUS_TYPES: ChartObservation["type"][] = [ + "hover", + "hover-end", + "click", + "click-end", + "selection", + "selection-end", + "brush-end", +] + +/** + * Convenience hook: returns the latest `InterrogationFocus` for use with + * `useChartInterrogation`'s `focus` option. Internally subscribes to the + * observation store and converts the latest matching observation into the + * focus shape. + * + * Pair with `` and an + * `` ancestor. + * + * Returns `null` when no qualifying observation has fired yet. + * + * @example + * function ChartWithChat({ data }) { + * const focus = useChartFocus({ chartId: "sales" }) + * const { ask, history, annotations } = useChartInterrogation({ + * data, + * focus, // ← latest hovered/clicked datum threads in + * onQuery: async (q, ctx) => { + * // ctx.focus is the same `focus` value passed above + * return askLLM({ question: q, focus: ctx.focus, summary: ctx.summary }) + * }, + * }) + * return ( + * <> + * + * + * + * ) + * } + */ +export function useChartFocus(options: UseChartFocusOptions = {}): InterrogationFocus | null { + const { chartId, types = DEFAULT_FOCUS_TYPES } = options + const { latest } = useChartObserver({ chartId, types, limit: 1 }) + + return useMemo(() => { + if (!latest) return null + // *-end observations signal "user moved away" — clear focus. + if ( + latest.type === "hover-end" || + latest.type === "selection-end" || + latest.type === "brush-end" || + latest.type === "click-end" + ) { + return null + } + // Hover/click carry the datum directly; selection carries it under + // selection.fields. Normalize so the focus shape is consistent. + let datum: unknown + if (latest.type === "selection") { + datum = latest.selection.fields + } else if (latest.type === "hover" || latest.type === "click") { + datum = latest.datum + } else { + return null + } + if (!datum || typeof datum !== "object") return null + return { + datum: datum as Record, + x: (latest as { x?: number }).x, + y: (latest as { y?: number }).y, + source: latest.type as InterrogationFocus["source"], + } + }, [latest]) +} diff --git a/src/components/store/useChartInterrogation.test.tsx b/src/components/store/useChartInterrogation.test.tsx new file mode 100644 index 00000000..d565fd4b --- /dev/null +++ b/src/components/store/useChartInterrogation.test.tsx @@ -0,0 +1,247 @@ +import { renderHook, act, waitFor } from "@testing-library/react" +import { describe, it, expect, vi } from "vitest" +import { useChartInterrogation } from "./useChartInterrogation" +import type { InterrogationQuery } from "./useChartInterrogation" + +const data = [ + { month: "Jan", revenue: 100 }, + { month: "Feb", revenue: 200 }, + { month: "Mar", revenue: 150 }, +] + +describe("useChartInterrogation", () => { + it("exposes a memoized summary derived from data", () => { + const onQuery: InterrogationQuery = async () => ({ answer: "" }) + const { result } = renderHook(() => useChartInterrogation({ data, onQuery })) + expect(result.current.summary.rowCount).toBe(3) + expect(result.current.summary.fields.revenue.type).toBe("numeric") + }) + + it("appends user and assistant messages on ask()", async () => { + const onQuery: InterrogationQuery = async () => ({ answer: "Peak in Feb." }) + const { result } = renderHook(() => useChartInterrogation({ data, onQuery })) + await act(async () => { + await result.current.ask("when is the peak?") + }) + expect(result.current.history).toEqual([ + { role: "user", text: "when is the peak?" }, + { role: "assistant", text: "Peak in Feb." }, + ]) + expect(result.current.loading).toBe(false) + }) + + it("forwards data, summary, componentName, and props to onQuery", async () => { + const onQuery = vi.fn().mockResolvedValue({ answer: "ok" }) + const { result } = renderHook(() => + useChartInterrogation({ + data, + onQuery, + componentName: "LineChart", + props: { xAccessor: "month", yAccessor: "revenue" }, + }) + ) + await act(async () => { + await result.current.ask("hi") + }) + const [query, ctx] = onQuery.mock.calls[0] + expect(query).toBe("hi") + expect(ctx.componentName).toBe("LineChart") + expect(ctx.props).toEqual({ xAccessor: "month", yAccessor: "revenue" }) + expect(ctx.summary.rowCount).toBe(3) + expect(ctx.data).toBe(data) + }) + + it("merges initial and AI annotations", async () => { + const onQuery: InterrogationQuery = async () => ({ + answer: "marking peak", + annotations: [{ type: "callout", month: "Feb", revenue: 200 }], + }) + const initialAnnotations = [{ type: "label", month: "Jan" }] + const { result } = renderHook(() => + useChartInterrogation({ data, onQuery, initialAnnotations }) + ) + await act(async () => { + await result.current.ask("peak?") + }) + expect(result.current.annotations).toHaveLength(2) + expect(result.current.annotations[0]).toMatchObject({ type: "label" }) + expect(result.current.annotations[1]).toMatchObject({ type: "callout" }) + }) + + it("ignores blank queries", async () => { + const onQuery = vi.fn().mockResolvedValue({ answer: "" }) + const { result } = renderHook(() => useChartInterrogation({ data, onQuery })) + await act(async () => { + await result.current.ask(" ") + }) + expect(onQuery).not.toHaveBeenCalled() + expect(result.current.history).toHaveLength(0) + }) + + it("captures errors without throwing", async () => { + const onQuery: InterrogationQuery = async () => { + throw new Error("LLM offline") + } + const { result } = renderHook(() => useChartInterrogation({ data, onQuery })) + await act(async () => { + await result.current.ask("anything") + }) + expect(result.current.error?.message).toBe("LLM offline") + expect(result.current.history.at(-1)?.role).toBe("assistant") + }) + + it("flips loading during the in-flight query", async () => { + let resolve: (v: { answer: string }) => void = () => {} + const onQuery: InterrogationQuery = () => + new Promise((r) => { + resolve = r + }) + const { result } = renderHook(() => useChartInterrogation({ data, onQuery })) + act(() => { + void result.current.ask("hi") + }) + await waitFor(() => expect(result.current.loading).toBe(true)) + await act(async () => { + resolve({ answer: "done" }) + }) + await waitFor(() => expect(result.current.loading).toBe(false)) + }) + + it("reset() clears history, annotations, and error", async () => { + const onQuery: InterrogationQuery = async () => ({ + answer: "x", + annotations: [{ type: "callout" }], + }) + const { result } = renderHook(() => useChartInterrogation({ data, onQuery })) + await act(async () => { + await result.current.ask("q") + }) + expect(result.current.history.length).toBe(2) + act(() => result.current.reset()) + expect(result.current.history).toEqual([]) + expect(result.current.annotations).toEqual([]) + expect(result.current.error).toBeNull() + }) + + it("forwards focus to onQuery when set", async () => { + const onQuery = vi.fn().mockResolvedValue({ answer: "about feb" }) + const focus = { + datum: { month: "Feb", revenue: 200 }, + x: 120, + y: 80, + source: "click" as const, + } + const { result } = renderHook(() => useChartInterrogation({ data, onQuery, focus })) + await act(async () => { + await result.current.ask("why this point?") + }) + expect(onQuery.mock.calls[0][1].focus).toEqual(focus) + }) + + it("omits focus from context when not set", async () => { + const onQuery = vi.fn().mockResolvedValue({ answer: "ok" }) + const { result } = renderHook(() => useChartInterrogation({ data, onQuery })) + await act(async () => { + await result.current.ask("anything") + }) + expect(onQuery.mock.calls[0][1].focus).toBeUndefined() + }) + + it("passes the *latest* focus to ask(), not the focus at hook-creation time", async () => { + const onQuery = vi.fn().mockResolvedValue({ answer: "ok" }) + let focus: { datum: Record } | null = { + datum: { month: "Feb", revenue: 200 }, + } + const { result, rerender } = renderHook(() => useChartInterrogation({ data, onQuery, focus })) + // Update focus before asking + focus = { datum: { month: "Mar", revenue: 150 } } + rerender() + await act(async () => { + await result.current.ask("about this") + }) + expect(onQuery.mock.calls[0][1].focus?.datum.month).toBe("Mar") + }) + + describe("announce()", () => { + const onQuery: InterrogationQuery = async () => ({ answer: "" }) + + it("appends an assistant-only message to the transcript", () => { + const { result } = renderHook(() => useChartInterrogation({ data, onQuery })) + act(() => { + result.current.announce({ text: "Spike detected at 14:32" }) + }) + expect(result.current.history).toEqual([ + { role: "assistant", text: "Spike detected at 14:32" }, + ]) + }) + + it("does not call onQuery", async () => { + const spy = vi.fn().mockResolvedValue({ answer: "" }) + const { result } = renderHook(() => useChartInterrogation({ data, onQuery: spy })) + act(() => { + result.current.announce({ text: "Proactive note" }) + }) + expect(spy).not.toHaveBeenCalled() + }) + + it("APPENDS annotations (unlike ask which replaces them)", () => { + const { result } = renderHook(() => useChartInterrogation({ data, onQuery })) + act(() => { + result.current.announce({ + text: "First spike", + annotations: [{ type: "callout", ts: 1, label: "A" }], + }) + }) + act(() => { + result.current.announce({ + text: "Second spike", + annotations: [{ type: "callout", ts: 2, label: "B" }], + }) + }) + expect(result.current.annotations).toHaveLength(2) + expect(result.current.annotations.map((a) => a.label)).toEqual(["A", "B"]) + }) + + it("ignores empty / whitespace-only messages", () => { + const { result } = renderHook(() => useChartInterrogation({ data, onQuery })) + act(() => { + result.current.announce({ text: " " }) + }) + expect(result.current.history).toEqual([]) + }) + + it("interleaves cleanly with ask()", async () => { + const spyQuery: InterrogationQuery = async () => ({ + answer: "user answer", + annotations: [{ type: "callout", label: "user-pick" }], + }) + const { result } = renderHook(() => useChartInterrogation({ data, onQuery: spyQuery })) + act(() => { + result.current.announce({ + text: "Watcher: spike", + annotations: [{ type: "callout", label: "watcher" }], + }) + }) + await act(async () => { + await result.current.ask("what was that?") + }) + // ask() REPLACES annotations; the watcher's annotation is gone after a fresh ask + expect(result.current.annotations.map((a) => a.label)).toEqual(["user-pick"]) + // History interleaves + expect(result.current.history.map((m) => m.role)).toEqual(["assistant", "user", "assistant"]) + }) + + it("reset() clears announcements", () => { + const { result } = renderHook(() => useChartInterrogation({ data, onQuery })) + act(() => { + result.current.announce({ + text: "Note", + annotations: [{ type: "callout" }], + }) + }) + act(() => result.current.reset()) + expect(result.current.history).toEqual([]) + expect(result.current.annotations).toEqual([]) + }) + }) +}) diff --git a/src/components/store/useChartInterrogation.ts b/src/components/store/useChartInterrogation.ts new file mode 100644 index 00000000..e33b84f1 --- /dev/null +++ b/src/components/store/useChartInterrogation.ts @@ -0,0 +1,270 @@ +"use client" +import { useCallback, useMemo, useRef, useState } from "react" +import type { Datum } from "../charts/shared/datumTypes" +import { summarizeData, type DataSummary } from "../data/DataSummarizer" +import { profileData } from "../ai/profileData" +import { suggestCharts } from "../ai/suggestCharts" +import type { ChartDataProfile, Suggestion } from "../ai/chartCapabilityTypes" +import type { IntentId } from "../ai/intents" + +/** + * Identifies a single point of interest on the chart — typically the datum + * the user is currently hovering, clicked, or otherwise focused on. When + * provided, the LLM gets the explicit signal that the user is asking + * "about *this specific point*" rather than the chart at large. + */ +export interface InterrogationFocus { + /** The row the user is focused on. */ + datum: Datum + /** Pixel x coordinate, when known. Useful for anchoring response annotations. */ + x?: number + /** Pixel y coordinate, when known. */ + y?: number + /** Optional source label — "hover" / "click" / "selection". Surfaces in the LLM prompt. */ + source?: "hover" | "click" | "selection" | "manual" +} + +export interface InterrogationContext { + /** The data extracted from the chart (or whatever caller passed in). */ + data: ReadonlyArray + /** Statistical summary, ready to send to an LLM. */ + summary: DataSummary + /** Shape profile — present when `includeProfile` or `includeSuggestions` is enabled. */ + profile?: ChartDataProfile + /** Heuristic chart suggestions — present when `includeSuggestions` is enabled. */ + suggestions?: ReadonlyArray + /** Optional caller-supplied chart component name (e.g. "LineChart"). */ + componentName?: string + /** Optional caller-supplied chart props (accessor names, scales, etc.). */ + props?: Record + /** + * The current focused datum — what the user is interactively pointing at. + * Lets the LLM tailor responses to a specific point ("why is *this* one + * higher than the rest?") and to anchor visual responses (callouts, + * comments) back at the same coordinates. + */ + focus?: InterrogationFocus +} + +export interface InterrogationResult { + /** Natural-language answer to display to the user. */ + answer: string + /** Optional Semiotic annotations to overlay on the chart. */ + annotations?: ReadonlyArray +} + +export type InterrogationQuery = ( + query: string, + context: InterrogationContext +) => Promise + +export interface InterrogationMessage { + role: "user" | "assistant" + text: string +} + +export interface UseChartInterrogationOptions { + /** Data backing the chart. Use whatever shape the chart consumes (rows, nodes, etc.). */ + data: ReadonlyArray | null | undefined + /** Async handler — typically calls your LLM with the query + summary. */ + onQuery: InterrogationQuery + /** Annotations to seed the merged set (e.g. existing chart annotations). */ + initialAnnotations?: ReadonlyArray + /** Optional context passed through to onQuery for richer prompts. */ + componentName?: string + /** Optional context passed through to onQuery. */ + props?: Record + /** + * Include the shape `profile` in the interrogation context. Required to let an LLM + * reason about candidate axes, distinct counts, hierarchy/network/geo detection, etc. + */ + includeProfile?: boolean + /** + * Include heuristic chart `suggestions` in the interrogation context. Implies `includeProfile`. + * Lets an LLM answer "would another chart show this better?" without re-deriving rules. + */ + includeSuggestions?: boolean + /** When `includeSuggestions` is true, rank by this intent. */ + suggestionsIntent?: IntentId | IntentId[] + /** When `includeSuggestions` is true, cap the suggestion list. Default 5. */ + suggestionsMax?: number + /** + * The point on the chart the user is currently focused on. Forwarded to + * onQuery so an LLM can answer "about this specific datum" rather than + * "about the chart in general." Typically wired from a chart's + * `onObservation` callback or the convenience `useChartFocus` hook. + */ + focus?: InterrogationFocus | null +} + +export interface UseChartInterrogationResult { + /** Ask a question. Updates history, annotations, loading, and error. */ + ask: (query: string) => Promise + /** + * Append an AI-initiated message to the transcript without a user query. + * + * Use for proactive narration — a streaming watcher that detected an + * anomaly, a background analysis that surfaced an insight, an LLM that + * decided to volunteer information mid-session. Synchronous; no `onQuery` + * call. Annotations merge into the chart's `annotations` array like + * any other AI response. + * + * @example + * announce({ + * text: "Spike detected at 14:32 — 3.2σ above rolling mean.", + * annotations: [{ type: "callout", ts: now, value: 850, note: "Slow query?" }], + * }) + */ + announce: (message: { text: string; annotations?: ReadonlyArray }) => void + /** Conversation history, oldest first. */ + history: ReadonlyArray + /** Statistical summary of the data — memoized, safe to pass to a prompt. */ + summary: DataSummary + /** Merged annotations: initial + latest AI response. Pass to the chart's `annotations` prop. */ + annotations: ReadonlyArray + /** True while onQuery is in flight. */ + loading: boolean + /** Last error from onQuery, if any. */ + error: Error | null + /** Clear history, AI annotations, and error. */ + reset: () => void +} + +/** + * Headless interrogation hook — a sibling to `useChartObserver`. + * + * Generates an LLM-friendly statistical summary of your chart's data, runs queries through + * a caller-supplied `onQuery`, and merges any annotations the response returns so the chart + * can highlight what the model is talking about. + * + * The hook owns no UI. Render whatever input/transcript surface fits your product. + * + * @example + * const { ask, history, annotations, loading } = useChartInterrogation({ + * data, + * onQuery: async (q, ctx) => { + * const res = await fetch("/api/chat", { method: "POST", body: JSON.stringify({ q, summary: ctx.summary }) }) + * return res.json() + * }, + * }) + * + * + */ +export function useChartInterrogation( + options: UseChartInterrogationOptions +): UseChartInterrogationResult { + const { + data, + onQuery, + initialAnnotations, + componentName, + props, + includeProfile, + includeSuggestions, + suggestionsIntent, + suggestionsMax, + focus, + } = options + + const [history, setHistory] = useState([]) + const [aiAnnotations, setAiAnnotations] = useState>([]) + const [loading, setLoading] = useState(false) + const [error, setError] = useState(null) + + const summary = useMemo(() => summarizeData(data ?? []), [data]) + + const wantsProfile = includeProfile || includeSuggestions + const profile = useMemo( + () => (wantsProfile ? profileData(data ?? []) : undefined), + [wantsProfile, data] + ) + const suggestions = useMemo( + () => + includeSuggestions && profile + ? suggestCharts(data, { + profile, + intent: suggestionsIntent, + maxResults: suggestionsMax ?? 5, + }) + : undefined, + [includeSuggestions, profile, data, suggestionsIntent, suggestionsMax] + ) + + // Latest callback ref so ask() always sees the current onQuery without re-creating itself. + const onQueryRef = useRef(onQuery) + onQueryRef.current = onQuery + const componentNameRef = useRef(componentName) + componentNameRef.current = componentName + const propsRef = useRef(props) + propsRef.current = props + const dataRef = useRef(data) + dataRef.current = data + const summaryRef = useRef(summary) + summaryRef.current = summary + const profileRef = useRef(profile) + profileRef.current = profile + const suggestionsRef = useRef(suggestions) + suggestionsRef.current = suggestions + const focusRef = useRef(focus) + focusRef.current = focus + + const ask = useCallback(async (query: string) => { + const trimmed = query.trim() + if (!trimmed) return + setLoading(true) + setError(null) + setHistory((prev) => [...prev, { role: "user", text: trimmed }]) + try { + const result = await onQueryRef.current(trimmed, { + data: (dataRef.current ?? []) as ReadonlyArray, + summary: summaryRef.current, + profile: profileRef.current, + suggestions: suggestionsRef.current, + componentName: componentNameRef.current, + props: propsRef.current, + focus: focusRef.current ?? undefined, + }) + setHistory((prev) => [...prev, { role: "assistant", text: result.answer }]) + if (result.annotations) setAiAnnotations(result.annotations) + } catch (err) { + const e = err instanceof Error ? err : new Error(String(err)) + setError(e) + setHistory((prev) => [ + ...prev, + { role: "assistant", text: "Sorry, I couldn't process that query." }, + ]) + } finally { + setLoading(false) + } + }, []) + + const announce = useCallback( + ({ text, annotations: newAnnotations }: { text: string; annotations?: ReadonlyArray }) => { + const trimmed = text.trim() + if (!trimmed) return + setHistory((prev) => [...prev, { role: "assistant", text: trimmed }]) + if (newAnnotations && newAnnotations.length > 0) { + // Merge — proactive announcements should ADD to the existing AI annotation + // set, not replace it the way a fresh user question does. A live watcher + // calling announce() repeatedly should accumulate notes on the chart. + setAiAnnotations((prev) => [...prev, ...newAnnotations]) + } + }, + [], + ) + + const reset = useCallback(() => { + setHistory([]) + setAiAnnotations([]) + setError(null) + }, []) + + const annotations = useMemo(() => { + const initial = initialAnnotations ?? [] + if (initial.length === 0) return aiAnnotations + if (aiAnnotations.length === 0) return initial + return [...initial, ...aiAnnotations] + }, [initialAnnotations, aiAnnotations]) + + return { ask, announce, history, summary, annotations, loading, error, reset } +}