Skip to content

Commit 0638121

Browse files
committed
Shared Backend Refactoring
1 parent 818dc2c commit 0638121

12 files changed

Lines changed: 353 additions & 246 deletions

File tree

ARCHITECTURE.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -71,13 +71,13 @@ The IR is the skeleton of the command line; the bindings define the typed interf
7171

7272
## Styx 1 vs Styx 2
7373

74-
| | Styx 1 (Python) | Styx 2 (TypeScript) |
75-
| ------------------- | ---------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------- |
76-
| **IR** | Dataclass hierarchy (`Param[T]` with body types) | Algebraic expr tree with `kind` discriminant |
77-
| **Optimization** | Minimal (string merging) | Pass-based pipeline (flatten, simplify, canonicalize) |
78-
| **Type resolution** | Direct mapping in frontend; each backend re-derives types via language provider protocol | Solver produces a universal `BoundType` tree; backends just translate it |
74+
| | Styx 1 (Python) | Styx 2 (TypeScript) |
75+
| ------------------- | ---------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------ |
76+
| **IR** | Dataclass hierarchy (`Param[T]` with body types) | Algebraic expr tree with `kind` discriminant |
77+
| **Optimization** | Minimal (string merging) | Pass-based pipeline (flatten, simplify, canonicalize) |
78+
| **Type resolution** | Direct mapping in frontend; each backend re-derives types via language provider protocol | Solver produces a universal `BoundType` tree; backends just translate it |
7979
| **Backends** | Python mature, TS/R partial; each implements a complex `LanguageProvider` protocol | TypeScript + JSON Schema complete; Python/R stubs; simpler since solver does heavy lifting |
80-
| **Output files** | First-class: path templates with param refs, suffix stripping, fallbacks | Not yet modeled to the same degree |
80+
| **Output files** | First-class: path templates with param refs, suffix stripping, fallbacks | Not yet modeled to the same degree |
8181

8282
Key Styx 1 features to eventually match:
8383

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import type { BoundType } from "../bindings/index.js";
2+
import type { CodegenContext } from "../manifest/index.js";
3+
import { findDoc } from "./find-doc.js";
4+
import { findStructNode } from "./find-struct-node.js";
5+
import { resolveFieldBinding } from "./resolve-field-binding.js";
6+
7+
/**
8+
* Metadata extracted for each field of a struct type.
9+
*
10+
* `doc` is the field's description, recovered from wrapper nodes via `findDoc`.
11+
* `defaultValue` is pulled from the wrapper or binding node's metadata.
12+
*/
13+
export interface FieldInfo {
14+
doc?: string;
15+
defaultValue?: string | number | boolean;
16+
}
17+
18+
/**
19+
* Collect field metadata (doc, defaultValue) for each field of a struct type.
20+
*
21+
* Walks the IR tree to find the sequence node containing the struct's fields,
22+
* then resolves each child to its field binding. Metadata is recovered from both
23+
* the wrapper node (where the parser hoists doc) and the binding node (where the
24+
* solver places the binding after sequence collapse).
25+
*/
26+
export function collectFieldInfo(
27+
ctx: CodegenContext,
28+
structType: Extract<BoundType, { kind: "struct" }>,
29+
): Map<string, FieldInfo> {
30+
const info = new Map<string, FieldInfo>();
31+
32+
const structNode = findStructNode(ctx.expr, ctx, structType);
33+
if (!structNode) return info;
34+
35+
for (const child of structNode.attrs.nodes) {
36+
const match = resolveFieldBinding(child, ctx, structType);
37+
if (!match) continue;
38+
const { binding, wrapperNode } = match;
39+
const fieldInfo: FieldInfo = {};
40+
const fieldType = structType.fields[binding.name]!;
41+
// Check wrapper node first (doc may be hoisted there), then binding node
42+
const doc = findDoc(wrapperNode, fieldType) ?? findDoc(binding.node, fieldType);
43+
if (doc) fieldInfo.doc = doc;
44+
const defaultValue = wrapperNode.meta?.defaultValue ?? binding.node.meta?.defaultValue;
45+
if (defaultValue !== undefined) fieldInfo.defaultValue = defaultValue;
46+
info.set(binding.name, fieldInfo);
47+
}
48+
49+
return info;
50+
}
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import type { BoundType } from "../bindings/index.js";
2+
import { Scope } from "./scope.js";
3+
import { structKey, unionKey } from "./type-keys.js";
4+
5+
/** A named compound type (struct or union) discovered during type collection. */
6+
export interface NamedType {
7+
name: string;
8+
type: BoundType;
9+
}
10+
11+
/**
12+
* Recursively collect all struct/union types and assign unique names.
13+
*
14+
* Walks the BoundType tree depth-first, using structural keys (`structKey`,
15+
* `unionKey`) to deduplicate types that appear multiple times in the tree.
16+
* Each unique struct/union gets a name generated by applying `nameTransform`
17+
* to a hint string (derived from the root name or field/variant names).
18+
*
19+
* @param rootType - The root BoundType to walk.
20+
* @param rootName - The hint for the root type's name.
21+
* @param scope - Symbol scope for collision avoidance.
22+
* @param nameTransform - Converts a hint string to a type name (e.g. pascalCase, snake_case).
23+
* @returns `namedTypes` maps structural keys to assigned names, `typeDecls` lists declarations in order.
24+
*/
25+
export function collectNamedTypes(
26+
rootType: BoundType,
27+
rootName: string,
28+
scope: Scope,
29+
nameTransform: (hint: string) => string,
30+
): { namedTypes: Map<string, string>; typeDecls: NamedType[] } {
31+
const namedTypes = new Map<string, string>();
32+
const typeDecls: NamedType[] = [];
33+
34+
function visit(type: BoundType, hint: string): void {
35+
switch (type.kind) {
36+
case "struct": {
37+
const key = structKey(type);
38+
if (!namedTypes.has(key)) {
39+
const name = scope.add(nameTransform(hint));
40+
namedTypes.set(key, name);
41+
typeDecls.push({ name, type });
42+
for (const [fieldName, fieldType] of Object.entries(type.fields)) {
43+
visit(fieldType, fieldName);
44+
}
45+
}
46+
break;
47+
}
48+
case "union": {
49+
const key = unionKey(type);
50+
if (!namedTypes.has(key)) {
51+
const name = scope.add(nameTransform(hint));
52+
namedTypes.set(key, name);
53+
typeDecls.push({ name, type });
54+
for (const v of type.variants) {
55+
visit(v.type, v.name ?? hint);
56+
}
57+
}
58+
break;
59+
}
60+
case "optional":
61+
visit(type.inner, hint);
62+
break;
63+
case "list":
64+
visit(type.item, hint);
65+
break;
66+
default:
67+
break;
68+
}
69+
}
70+
71+
visit(rootType, rootName);
72+
return { namedTypes, typeDecls };
73+
}
74+
75+
/**
76+
* Create a type name resolver from a namedTypes map.
77+
* Returns a function that maps a BoundType to its assigned name (if any).
78+
*/
79+
export function resolveTypeName(
80+
namedTypes: Map<string, string>,
81+
): (type: BoundType) => string | undefined {
82+
return (type) => {
83+
if (type.kind === "struct") return namedTypes.get(structKey(type));
84+
if (type.kind === "union") return namedTypes.get(unionKey(type));
85+
return undefined;
86+
};
87+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import type { BoundType } from "../bindings/index.js";
2+
import type { Expr } from "../ir/index.js";
3+
4+
/**
5+
* Find a description from an IR node, traversing through wrapper nodes.
6+
*
7+
* The parser's `wrapNode` hoists doc metadata to the outermost wrapper node,
8+
* but the solver's simplify pass can collapse sequences, burying descriptions
9+
* deeper in the tree.
10+
*
11+
* This traversal is type-aware: it only enters sequences when the corresponding
12+
* BoundType is not a struct. Struct sequences have their own field collection
13+
* call, so entering them would steal nested struct children's descriptions.
14+
*
15+
* @param node - The IR node to search for a description.
16+
* @param fieldType - The BoundType of the field, used to determine traversal boundaries.
17+
*/
18+
export function findDoc(node: Expr, fieldType: BoundType): string | undefined {
19+
if (node.meta?.doc?.description) return node.meta.doc.description;
20+
switch (node.kind) {
21+
case "optional":
22+
return findDoc(node.attrs.node, fieldType.kind === "optional" ? fieldType.inner : fieldType);
23+
case "repeat":
24+
return findDoc(node.attrs.node, fieldType.kind === "list" ? fieldType.item : fieldType);
25+
case "sequence": {
26+
// Only traverse into sequences that were collapsed (non-struct field types).
27+
// Struct sequences have their own collectFieldInfo call for their children.
28+
if (fieldType.kind === "struct") return undefined;
29+
for (const child of node.attrs.nodes) {
30+
const doc = findDoc(child, fieldType);
31+
if (doc) return doc;
32+
}
33+
return undefined;
34+
}
35+
default:
36+
return undefined;
37+
}
38+
}
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import type { BoundType } from "../bindings/index.js";
2+
import type { Expr } from "../ir/index.js";
3+
import type { CodegenContext } from "../manifest/index.js";
4+
import { resolveFieldBinding } from "./resolve-field-binding.js";
5+
6+
/**
7+
* Find the sequence node whose child bindings match a struct type's fields.
8+
*
9+
* Traverses through optional, repeat, and alternative wrappers to find the
10+
* sequence that directly contains the struct's field bindings. This is necessary
11+
* because the solver may collapse `seq(lit("--flag"), terminal)` into the terminal,
12+
* burying bindings deeper in the tree.
13+
*
14+
* Uses a two-phase check for sequences:
15+
* 1. Direct binding check (`ctx.resolve`) - matches when bindings are on immediate children
16+
* 2. Recursive binding check (`resolveFieldBinding`) - matches when solver collapsed
17+
* a seq(lit, terminal) and the binding is buried deeper
18+
*
19+
* Phase 1 is tried first to avoid falsely matching an outer sequence when an inner
20+
* sequence is the actual struct owner (e.g. `seq(lit("--flag"), seq(field1, field2))`).
21+
*/
22+
export function findStructNode(
23+
node: Expr,
24+
ctx: CodegenContext,
25+
structType: Extract<BoundType, { kind: "struct" }>,
26+
): Extract<Expr, { kind: "sequence" }> | undefined {
27+
switch (node.kind) {
28+
case "sequence": {
29+
// Phase 1: Check if any direct child has a binding matching a struct field
30+
for (const child of node.attrs.nodes) {
31+
const binding = ctx.resolve(child);
32+
if (
33+
binding &&
34+
binding.name in structType.fields &&
35+
binding.type === structType.fields[binding.name]
36+
) {
37+
return node;
38+
}
39+
}
40+
// Recurse into child nodes first (prefer deeper matches)
41+
for (const child of node.attrs.nodes) {
42+
const result = findStructNode(child, ctx, structType);
43+
if (result) return result;
44+
}
45+
// Phase 2: Check via resolveFieldBinding for collapsed sequences
46+
// where bindings are buried inside collapsed seq(lit, terminal)
47+
for (const child of node.attrs.nodes) {
48+
if (resolveFieldBinding(child, ctx, structType)) return node;
49+
}
50+
return undefined;
51+
}
52+
case "optional":
53+
return findStructNode(node.attrs.node, ctx, structType);
54+
case "repeat":
55+
return findStructNode(node.attrs.node, ctx, structType);
56+
case "alternative": {
57+
for (const alt of node.attrs.alts) {
58+
const result = findStructNode(alt, ctx, structType);
59+
if (result) return result;
60+
}
61+
return undefined;
62+
}
63+
default:
64+
return undefined;
65+
}
66+
}

packages/core/src/backend/index.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,15 @@
11
export type { Backend, EmitError, EmitResult, EmitWarning, TypeMap } from "./backend.js";
22
export { CodeBuilder } from "./code-builder.js";
3+
export type { FieldInfo } from "./collect-field-info.js";
4+
export { collectFieldInfo } from "./collect-field-info.js";
5+
export type { NamedType } from "./collect-named-types.js";
6+
export { collectNamedTypes, resolveTypeName } from "./collect-named-types.js";
7+
export { findDoc } from "./find-doc.js";
8+
export { findStructNode } from "./find-struct-node.js";
9+
export { resolveFieldBinding } from "./resolve-field-binding.js";
310
export { Scope } from "./scope.js";
411
export type { JsonSchema } from "./schema/index.js";
512
export { generateSchema, JsonSchemaBackend } from "./schema/index.js";
613
export { camelCase, pascalCase, screamingSnakeCase, snakeCase } from "./string-case.js";
14+
export { structKey, typeKey, unionKey } from "./type-keys.js";
715
export { generateTypeScript, TypeScriptBackend } from "./typescript/index.js";
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import type { Binding, BoundType } from "../bindings/index.js";
2+
import type { Expr } from "../ir/index.js";
3+
import type { CodegenContext } from "../manifest/index.js";
4+
5+
/**
6+
* Resolve a struct child node to its field binding, handling collapsed sequences.
7+
*
8+
* When the solver's simplify pass collapses `seq(lit("--flag"), terminal)` into
9+
* just the terminal, the binding ends up on the inner node while metadata (doc,
10+
* defaultValue) may remain on the outermost wrapper. This function recursively
11+
* descends through collapsed sequences to find the binding, tracking the outermost
12+
* node for metadata recovery.
13+
*
14+
* Uses type identity (`===`) to verify the binding matches the struct's field type,
15+
* preventing cross-nesting name collisions where an inner struct has a field with
16+
* the same name as the outer struct.
17+
*/
18+
export function resolveFieldBinding(
19+
node: Expr,
20+
ctx: CodegenContext,
21+
structType: Extract<BoundType, { kind: "struct" }>,
22+
outermost?: Expr,
23+
): { binding: Binding; wrapperNode: Expr } | undefined {
24+
const wrapper = outermost ?? node;
25+
const binding = ctx.resolve(node);
26+
if (
27+
binding &&
28+
binding.name in structType.fields &&
29+
binding.type === structType.fields[binding.name]
30+
) {
31+
return { binding, wrapperNode: wrapper };
32+
}
33+
// Recurse into collapsed sequences to find the binding deeper
34+
if (node.kind === "sequence") {
35+
for (const inner of node.attrs.nodes) {
36+
const result = resolveFieldBinding(inner, ctx, structType, wrapper);
37+
if (result) return result;
38+
}
39+
}
40+
return undefined;
41+
}

0 commit comments

Comments
 (0)