forked from nexu-io/open-design
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsystem.ts
More file actions
774 lines (723 loc) · 43.8 KB
/
system.ts
File metadata and controls
774 lines (723 loc) · 43.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
/**
* Prompt composer. The base is the OD-adapted "expert designer" system
* prompt (see ./official-system.ts) — a full identity, workflow, and
* content-philosophy charter. Stacked on top:
*
* 1. The discovery + planning + huashu-philosophy layer (./discovery.ts)
* — interactive question-form syntax, direction-picker fork,
* brand-spec extraction, TodoWrite reinforcement, 5-dim critique,
* and the embedded `directions.ts` library.
* 2. The active design system's DESIGN.md (if any) — palette, typography,
* spacing rules treated as authoritative tokens.
* 3. The active skill's SKILL.md (if any) — workflow specific to the
* kind of artifact being built. When the skill ships a seed
* (`assets/template.html`) and references (`references/layouts.md`,
* `references/checklist.md`), we inject a hard pre-flight rule above
* the skill body so the agent reads them BEFORE writing any code.
* 4. For decks (skillMode === 'deck' OR metadata.kind === 'deck'), the
* deck framework directive (./deck-framework.ts) is pinned LAST so it
* overrides any softer slide-handling wording earlier in the stack —
* this is the load-bearing nav / counter / scroll JS / print
* stylesheet contract that PDF stitching depends on. We also fire on
* the metadata path so deck-kind projects without a bound skill
* (skill_id null) still get a framework, instead of having the agent
* re-author scaling / nav / print logic from scratch each turn. When
* the active skill ships its own seed (skill body references
* `assets/template.html`), we defer to that seed and skip the generic
* skeleton — the skill's framework wins to avoid double-injection.
*
* The composed string is what the daemon sees as `systemPrompt` and what
* the Anthropic path sends as `system`.
*/
import type { ProjectMetadata, ProjectTemplate } from '../api/projects.js';
import { OFFICIAL_DESIGNER_PROMPT } from './official-system.js';
import { DISCOVERY_AND_PHILOSOPHY } from './discovery.js';
import { DECK_FRAMEWORK_DIRECTIVE } from './deck-framework.js';
import { MEDIA_GENERATION_CONTRACT } from './media-contract.js';
export const BASE_SYSTEM_PROMPT = OFFICIAL_DESIGNER_PROMPT;
const ELEVENLABS_VOICE_PROMPT_OPTION_LIMIT = 100;
export interface AudioVoiceOption {
name: string;
voiceId: string;
category?: string | null;
labels?: Record<string, string> | null;
}
const ELEVENLABS_VOICE_OPTIONS_PROMPT_PREFIX = 'ElevenLabs voice list could not be loaded';
const PROMPT_SAFE_HTTP_STATUS_LABELS: Record<string, string> = {
'400': 'Bad Request',
'401': 'Unauthorized',
'403': 'Forbidden',
'404': 'Not Found',
'429': 'Too Many Requests',
'500': 'Internal Server Error',
'502': 'Bad Gateway',
'503': 'Service Unavailable',
'504': 'Gateway Timeout',
};
function renderUiLocalePrompt(locale: string | undefined): string {
const normalized = locale?.trim();
if (!normalized || normalized.toLowerCase() === 'en') return '';
const languageName = normalized === 'zh-CN'
? 'Simplified Chinese'
: normalized === 'zh-TW'
? 'Traditional Chinese'
: normalized;
const lines = [
'# UI locale override',
'',
`The Open Design UI locale for this run is \`${normalized}\` (${languageName}). All user-visible chat prose and generated UI controls must follow this locale, especially \`<question-form>\` titles, descriptions, labels, placeholders, helper text, and option labels. Keep machine-readable ids and object option \`value\` fields exact and unlocalized.`,
'Exception: for the default task-type form, keep the `taskType` option labels as the canonical routing choices: `Prototype`, `Live artifact`, `Slide deck`, `Image`, `Video`, `HyperFrames`, `Audio`, `Other`. Do not translate, reorder, or rewrite those option labels.',
];
if (normalized === 'zh-CN') {
lines.push(
'',
'For the default quick brief in Simplified Chinese, use copy like:',
'- title: `快速简报 — 30 秒`',
'- description: `开始生成前我会先确认这些信息。不适用的可以跳过,我会补上默认值。`',
'- output label/options: `我们要做什么?` / `幻灯片 / 路演稿`, `单页网页原型 / 落地页`, `多屏应用原型`, `数据看板 / 工具界面`, `编辑式 / 营销页面`, `其他 — 我来描述`',
'- platform label/options: `目标平台` / `响应式网页`, `桌面网页`, `iOS 应用`, `Android 应用`, `平板应用`, `桌面应用`, `固定画布 (1920×1080)`',
'- audience label/placeholder: `目标用户` / `例如:早期投资人、开发者工具采购者、内部高管评审`',
'- tone label/options: `视觉调性` / `编辑 / 杂志感`, `现代极简`, `活泼 / 插画感`, `科技 / 工具型`, `奢华 / 精致`, `粗野 / 实验性`, `人性化 / 亲切`',
'- brand label/options: `品牌背景` / `帮我选一个方向`, `我有品牌规范 — 稍后分享`, `参考网站 / 截图 — 稍后附上`',
'- scale label/placeholder: `大概需要多少内容?` / `例如:8 页幻灯片、1 个落地页 + 3 个子页面、4 个移动端界面`',
'- constraints label/placeholder: `还有什么需要知道的吗?` / `真实文案、必须使用的字体、需要避免的内容、截止时间…`',
);
}
return lines.join('\n');
}
function normalizePromptText(value: string): string {
return value
.replace(/[\r\n]+/g, ' ')
.replace(/\s+/g, ' ')
.trim();
}
export function formatElevenLabsVoiceOptionsErrorForPrompt(
error: string | undefined,
): string | undefined {
const trimmed = normalizePromptText(error ?? '');
if (!trimmed) return undefined;
if (/no ElevenLabs API key/i.test(trimmed)) {
return `${ELEVENLABS_VOICE_OPTIONS_PROMPT_PREFIX} because the ElevenLabs API key is missing. Tell the user to configure it in Settings or paste a voice id manually.`;
}
const statusMatch = trimmed.match(
/(?:\((\d{3})(?:\s+([^)]+))?\)|\b(\d{3})(?:\s+([A-Za-z][A-Za-z -]{0,40}))?\b)/,
);
if (statusMatch) {
const statusCode = statusMatch[1] ?? statusMatch[3];
const statusText = statusCode ? PROMPT_SAFE_HTTP_STATUS_LABELS[statusCode] ?? '' : '';
const suffix = statusText ? ` ${statusText}` : '';
return `${ELEVENLABS_VOICE_OPTIONS_PROMPT_PREFIX} (${statusCode}${suffix}). Tell the user to retry the lookup or paste a voice id manually.`;
}
return `${ELEVENLABS_VOICE_OPTIONS_PROMPT_PREFIX}. Tell the user to retry the lookup or paste a voice id manually.`;
}
export const SKIP_DISCOVERY_BRIEF_OVERRIDE = `# Automated project mode — skip discovery form
This project was created through the daemon API with \`skipDiscoveryBrief: true\`. Override the discovery rules below: do NOT emit \`<question-form id="discovery">\`, do NOT show "Quick brief — 30 seconds", and do NOT ask a first-turn clarification form. Do not call AskUserQuestion, do not emit any question form or choice card, and do not wait for user input. Treat the user's first message and project metadata as the brief, choose reasonable defaults for any missing details, then proceed directly to planning/building under the normal artifact workflow.`;
const ACTIVE_DESIGN_SYSTEM_VISUAL_DIRECTION_OVERRIDE = `
---
## Active design system visual direction
Active design system exception: the active design system is the visual direction for this project. Use its DESIGN.md palette, typography, spacing, component rules, and theme tokens as the source of truth for color and mood.
- Do not ask the user to pick a separate theme color, visual direction, palette, typography mood, or direction card.
- Do not emit a direction question-form, a \`direction-cards\` picker, or any visual-direction card while an active design system is present.
- If an earlier discovery answer asks to "Pick a direction for me", treat that as already satisfied by the active design system and continue with the plan.
- When a downstream framework mentions "active direction" or "theme tokens", bind those fields from the active design system instead of the built-in direction library.
`;
export interface ComposeInput {
skillBody?: string | undefined;
skillName?: string | undefined;
skillMode?:
| 'prototype'
| 'deck'
| 'template'
| 'design-system'
| 'image'
| 'video'
| 'audio'
| undefined;
designSystemBody?: string | undefined;
designSystemTitle?: string | undefined;
// Personal-memory block (auto-extracted facts + the hand-edited
// MEMORY.md index). The daemon side composes this on disk and the
// BYOK side fetches it from `GET /api/memory/system-prompt`; either
// way the string is folded in right after the base charter so the
// model treats it as preferences/context rather than hard rules.
memoryBody?: string | undefined;
// Project-level metadata captured by the new-project panel. Drives the
// agent's understanding of artifact kind, fidelity, speaker-notes intent
// and animation intent. Missing fields here are exactly what the
// discovery form should re-ask the user about on turn 1.
metadata?: ProjectMetadata | undefined;
// The template the user picked in the From-template tab, when present.
// Snapshot of HTML files that the agent should treat as a starting
// reference rather than a fixed deliverable.
template?: ProjectTemplate | undefined;
// Optional `## Active plugin` / `## Plugin inputs` / `## Plugin atoms`
// block (PB1). Daemon callers feed in `renderPluginBlock(snapshot)`;
// contracts-side callers running the API fallback may still pass the
// block through. v1 spec §11.8 routes plugin runs through the daemon
// (web returns 409 when a plugin is bound), so contracts callers only
// see this on a daemon-bound run that uses the contracts composer.
pluginBlock?: string | undefined;
// Plan §3.L2 / spec §23.4 — pre-rendered `## Active stage` blocks
// produced by `renderActiveStageBlock(stageId, atomBodies)`. The
// contracts composer simply splices them in after the plugin block;
// every block is already self-contained markdown.
activeStageBlocks?: ReadonlyArray<string> | undefined;
// Provider voice choices fetched by the app before composing the
// prompt. Used for ElevenLabs speech discovery so the agent can
// render a select question-form instead of asking the user to paste
// raw ids.
audioVoiceOptions?: AudioVoiceOption[] | undefined;
// When voice discovery fails, surface the error reason so the agent
// can tell the user why the dropdown is unavailable instead of
// pretending there were simply no voices.
audioVoiceOptionsError?: string | undefined;
// When set to 'plain', suppresses tool_calls so API/BYOK-mode models
// only emit <artifact> blocks (they cannot execute tools).
streamFormat?: string | undefined;
// UI locale selected by the client. User-visible generated form copy
// must follow this locale even when the user's initial prompt is brief.
locale?: string | undefined;
// Free-form instructions the user set at the global (user-level)
// settings panel. Injected after personal memory.
userInstructions?: string | undefined;
// Free-form instructions the user set on this specific project.
// Injected after user-level instructions and before the design system.
projectInstructions?: string | undefined;
}
export function composeSystemPrompt({
skillBody,
skillName,
skillMode,
designSystemBody,
designSystemTitle,
memoryBody,
metadata,
template,
pluginBlock,
activeStageBlocks,
audioVoiceOptions,
audioVoiceOptionsError,
streamFormat,
locale,
userInstructions,
projectInstructions,
}: ComposeInput): string {
// Discovery + philosophy goes FIRST so its hard rules ("emit a form on
// turn 1", "branch on brand on turn 2", "TodoWrite on turn 3", run
// checklist + critique before <artifact>) win precedence over softer
// wording later in the official base prompt.
const parts: string[] = [];
const activeDesignSystemBody = designSystemBody?.trim();
// API/BYOK mode (streamFormat === 'plain'): no tools are wired through
// to the model, but the discovery layer + base prompt below still tell
// it to call TodoWrite/Read/Write/Edit/Bash/WebFetch. Without an
// explicit top-anchored override, the model invents pseudo-tool markup
// (`<todo-list>`, `[读取 X]`) instead of producing real progress
// events — see #313. Pin this preamble ABOVE DISCOVERY_AND_PHILOSOPHY
// so it beats the discovery layer's own "these override anything
// later" header.
if (streamFormat === 'plain') {
parts.push(API_MODE_OVERRIDE);
parts.push('\n\n---\n\n');
}
if (metadata?.skipDiscoveryBrief === true) {
parts.push(SKIP_DISCOVERY_BRIEF_OVERRIDE);
parts.push('\n\n---\n\n');
}
const localePrompt = renderUiLocalePrompt(locale);
if (localePrompt) {
parts.push(localePrompt);
parts.push('\n\n---\n\n');
}
parts.push(
DISCOVERY_AND_PHILOSOPHY,
'\n\n---\n\n# Identity and workflow charter (background)\n\n',
BASE_SYSTEM_PROMPT,
);
// Mirrors the daemon-side composer in apps/daemon/src/prompts/system.ts —
// keep both copies of this preamble in sync so a CLI chat and a BYOK
// chat with the same memory both see the same wording. The "brand
// wins on conflict / skill workflow wins on conflict / preferences
// are still authoritative for tone+terminology" framing is what
// stops the model from treating remembered preferences as harder
// than the active design system.
if (memoryBody && memoryBody.trim().length > 0) {
parts.push(
`\n\n## Personal memory (auto-extracted from past chats)\n\nThe following facts have been sedimented from this user's previous conversations and edited in the settings panel. Treat them as preferences and context, NOT hard rules: when they collide with the active design system tokens, the brand wins; when they collide with the active skill's workflow, the skill wins. They are still authoritative for tone, voice, terminology, and what the user already told you about themselves and their goals — never re-ask the user about something already captured here.\n\n${memoryBody.trim()}`,
);
}
if (userInstructions && userInstructions.trim().length > 0) {
parts.push(
`\n\n## Custom instructions (user-level)\n\nThe user has set the following persistent instructions. Apply them as defaults to every project. When a project-level instruction below contradicts a point here, the project-level version wins.\n\n${userInstructions.trim()}`,
);
}
if (projectInstructions && projectInstructions.trim().length > 0) {
parts.push(
`\n\n## Custom instructions (project-level)\n\nThe user has set the following instructions for this specific project. They take precedence over user-level custom instructions whenever both address the same topic (e.g. if user-level says "use spaces" but project-level says "use tabs", use tabs).\n\n${projectInstructions.trim()}`,
);
}
if (activeDesignSystemBody && activeDesignSystemBody.length > 0) {
parts.push(
`\n\n## Active design system${designSystemTitle ? ` — ${designSystemTitle}` : ''}\n\nTreat the following DESIGN.md as authoritative for color, typography, spacing, and component rules. Do not invent tokens outside this palette. When you copy the active skill's seed template, bind these tokens into its \`:root\` block before generating any layout.\n\n${activeDesignSystemBody}`,
);
}
if (skillBody && skillBody.trim().length > 0) {
const preflight = derivePreflight(skillBody);
parts.push(
`\n\n## Active skill${skillName ? ` — ${skillName}` : ''}\n\nFollow this skill's workflow exactly.${preflight}\n\n${skillBody.trim()}`,
);
}
if (pluginBlock && pluginBlock.trim().length > 0) {
parts.push(pluginBlock);
}
if (Array.isArray(activeStageBlocks) && activeStageBlocks.length > 0) {
for (const block of activeStageBlocks) {
if (typeof block === 'string' && block.trim().length > 0) {
parts.push(block);
}
}
}
const metaBlock = renderMetadataBlock(metadata, template, audioVoiceOptions, audioVoiceOptionsError);
if (metaBlock) parts.push(metaBlock);
// Decks have a load-bearing framework (nav, counter, scroll JS, print
// stylesheet for PDF stitching). Pin it last so it overrides any softer
// wording earlier in the stack ("write a script that handles arrows…").
//
// We fire on either (a) the active skill is a deck skill OR (b) the
// project metadata declares kind=deck. Case (b) catches projects created
// without a skill (skill_id null) — without this, a deck-kind project
// with no bound skill gets neither a skill seed nor the framework
// skeleton, and the agent writes scaling / nav / print logic from scratch
// with the same buggy `place-items: center` + transform pattern we keep
// having to fix at runtime. Skill seeds (when present) win — they
// already define their own opinionated framework (simple-deck's
// scroll-snap, guizang-ppt's magazine layout) and re-pinning the generic
// skeleton would conflict. The skill-seed path takes over via
// `derivePreflight` above, so we only fire the generic skeleton when no
// skill seed is on offer.
const isDeckProject = skillMode === 'deck' || metadata?.kind === 'deck';
const isFreeformProject = !skillMode && (!metadata || metadata.kind === 'other');
const hasSkillSeed =
!!skillBody && /assets\/template\.html/.test(skillBody);
if (isDeckProject && !hasSkillSeed) {
parts.push(`\n\n---\n\n${DECK_FRAMEWORK_DIRECTIVE}`);
} else if (isFreeformProject && !hasSkillSeed) {
// Freeform / kind=other projects skip the kind picker entirely and
// land here. If the user's brief is a deck/keynote/slides ("讲解",
// "presentation", "make a deck"), the agent used to invent its own
// scale-to-fit + slide visibility + nav script from scratch and
// shipped subtle CSS specificity bugs (per-slide layout classes
// overriding `.slide { display:none }`). Inject the same framework
// here, prefixed with a one-line conditional so the agent only
// adopts it when the brief actually is a deck — otherwise the
// directive is read as background reference and ignored.
parts.push(
`\n\n---\n\n## If this brief is a slide deck / keynote / presentation\n\nThe user did not pre-select a "Slide deck" surface, but their request may still call for one. **If — and only if — the brief reads as slides, keynote, presentation, deck, PPT, or 讲解, follow the framework below.** Otherwise ignore everything in this section and continue with the freeform output you would have written anyway.\n\n${DECK_FRAMEWORK_DIRECTIVE}`,
);
}
const isMediaSurface =
skillMode === 'image' ||
skillMode === 'video' ||
skillMode === 'audio' ||
metadata?.kind === 'image' ||
metadata?.kind === 'video' ||
metadata?.kind === 'audio';
if (isMediaSurface) {
parts.push(MEDIA_GENERATION_CONTRACT);
}
if (activeDesignSystemBody && activeDesignSystemBody.length > 0) {
parts.push(ACTIVE_DESIGN_SYSTEM_VISUAL_DIRECTION_OVERRIDE);
}
return parts.join('');
}
/**
* Top-anchored override for API/BYOK mode (streamFormat === 'plain').
*
* Why it sits ABOVE DISCOVERY_AND_PHILOSOPHY: that layer starts with
* "these override anything later in this prompt" and then mandates
* TodoWrite / Bash / Read / WebFetch on turns 2–3. In daemon mode those
* tools exist; in API mode they don't, so the agent narrates pseudo-tool
* markup (`<todo-list>...`, `[读取 X]`) instead of producing structured
* `tool_use` events the UI can render — bug #313. Pinning the override
* at the absolute top is the cleanest way to beat the discovery layer's
* precedence without restructuring its rules.
*
* The override does NOT block `<artifact>` blocks — those are how the
* web UI receives finished HTML in API mode.
*/
const API_MODE_OVERRIDE = `# API mode — no tools available (read first — overrides every rule below)
You are running through a plain Messages API. **No tools are wired through to you.** \`TodoWrite\`, \`Read\`, \`Write\`, \`Edit\`, \`Bash\`, and \`WebFetch\` are unavailable — calls to them will not execute and will not render in the UI.
Every later instruction in this prompt that tells you to "call TodoWrite", "run Bash", "read via Read", or otherwise invoke a tool is describing the daemon-mode workflow. In this API run those instructions are **overridden** — do not attempt them and do not pretend you did.
**Forbidden output:**
- Pseudo-tool markup such as \`<todo-list>...</todo-list>\`, \`<tool-call>\`, or invented XML wrappers around a plan.
- Fake-protocol prose such as \`[读取 template.html ...]\`, \`[读取 layouts.md ...]\`, \`[正在调用 TodoWrite ...]\`, or any \`[doing X]\` placeholder narrating a tool you cannot run.
- Statements like "I'll call TodoWrite to track this" or "let me read the skill file first" — there is no TodoWrite and no Read in this run.
**Allowed output:**
- Plain chat prose to the user (in their language). State your plan as prose — a short numbered list in markdown is fine; it just must not be wrapped in \`<todo-list>\` or claim to be a tool call.
- A final \`<artifact type="text/html">...</artifact>\` block containing a complete \`<!doctype html>\` document when the brief is ready to deliver.
- \`<question-form>\` blocks for discovery on turn 1, exactly as the rules below describe — question-form is markup the UI parses, not a tool call.
If the rules below tell you to plan with TodoWrite, write the plan as prose instead. If they tell you to read skill side files before writing, describe in one sentence which patterns/conventions you're going to apply and proceed. If they tell you to run brand-spec extraction via Bash + Read + WebFetch, ask the user the missing brand questions in the discovery form instead.`;
function renderMetadataBlock(
metadata: ProjectMetadata | undefined,
template: ProjectTemplate | undefined,
audioVoiceOptions: AudioVoiceOption[] | undefined,
audioVoiceOptionsError: string | undefined,
): string {
if (!metadata) return '';
const lines: string[] = [];
lines.push('\n\n## Project metadata');
lines.push(
'These are the structured choices the user made (or skipped) when creating this project. Treat known fields as authoritative; for any field marked "(unknown — ask)" you MUST include a matching question in your turn-1 discovery form.',
);
lines.push('');
lines.push(`- **kind**: ${metadata.kind}`);
if (metadata.platform) {
lines.push(`- **platform**: ${metadata.platform}`);
} else if (metadata.kind === 'prototype' || metadata.kind === 'template' || metadata.kind === 'other') {
lines.push('- **platform**: (unknown — ask: responsive web, desktop web, iOS app, Android app, tablet app, or desktop app?)');
}
if (metadata.platformTargets && metadata.platformTargets.length > 0) {
lines.push(`- **platformTargets**: ${metadata.platformTargets.join(', ')}`);
}
if (metadata.platform === 'responsive' || metadata.platformTargets?.includes('responsive')) {
lines.push(
'- **responsive web contract**: `responsive` means one web product experience that adapts across modern browser/device ranges, not only legacy desktop/tablet/mobile buckets. It is not an iOS app, Android app, or native tablet app target. Show responsive behavior through real product layout changes; do not render viewport labels as user-facing product content. Cover 2025–2026 breakpoints: mobile compact 360px, mobile standard 390–430px, foldable/small tablet 600–744px, tablet portrait 768–834px, tablet landscape/large tablet 1024–1180px, laptop 1280–1366px, desktop 1440–1536px, and wide 1920px. Use fluid `clamp()` scales, container queries where useful, and explicit layout changes at semantic thresholds. Verify no horizontal scroll at 360px, 390px, 430px, 768px, 820px, 1024px, 1366px, 1440px, and 1920px unless the brief explicitly asks for a pan/board canvas.',
);
}
if ((metadata.platformTargets?.length ?? 0) > 1) {
lines.push(
'- **cross-platform deliverable rule**: each selected target keeps the same product goal but MUST be delivered as its own product screen/file when more than one concrete target is selected. Use clear files such as `landing.html` (if enabled), `mobile-ios.html`, `mobile-android.html`, `tablet.html`, `desktop.html`, plus shared `css/` and `js/` when useful. `index.html` may be a launcher/overview that links to these files, but it must not be the only place where mobile/tablet/desktop designs live. Do not collapse cross-platform work into a single tabbed demo, selector UI, comparison board, platform map, or labelled documentation section inside one mock product page.',
);
}
if (metadata.kind === 'prototype' || metadata.kind === 'template' || metadata.kind === 'other') {
lines.push(
'- **screen-file-first rule**: each distinct user-facing screen or surface MUST be delivered as its own HTML file unless the user explicitly asks for a single-page scroll or single-file artifact. Do not combine landing pages, product app screens, dashboards, history, pricing, settings, mobile app, tablet app, desktop app, or OS widget surfaces into one long page. Use `index.html` as a launcher/overview that links to screen files when more than one screen exists; it may summarize the product and show screen cards, but it must not contain the full design for every screen.',
);
lines.push(
'- **product-realism rule**: final artifacts must look like real end-user product UI. Do not render project metadata, screen counts, target counts, state counts, "demo only" labels, "settings" panels for choosing platforms, "full design target" badges, viewport/device selector controls, theme/style knobs, platform output maps, behavior-spec sections, or design-process cards inside the product unless the user explicitly asks for a design spec/dashboard. Any navigation/tabs inside the artifact must be real product navigation, not designer controls for switching generated mockups.',
);
lines.push(
'- **visual-system rule**: when the user does not specify colors, layout, or visual direction, you must still make an intentional product-appropriate visual system. Infer a palette from the product category and audience with at least: neutral surface tokens, a primary action color, a secondary/domain accent, and status colors. Avoid plain monochrome/unstyled greyscale outputs. Use tasteful gradients, illustrations, iconography, device/product mockups, and colored state moments where they clarify the product, while still avoiding generic beige/peach/pink/brown AI washes.',
);
lines.push(
'- **app-specific modules rule**: include domain-specific in-app modules/components by default (cards, panels, controls, charts, lists, quick actions, status modules, mini players, checkout/cart summaries, etc. as appropriate). These are product UI modules, not OS home-screen widgets. Give each major module a clear purpose, states, and responsive behavior instead of generic card grids.',
);
lines.push(
'- **CJX-ready UX rule**: the artifact must be implementation-ready, not a static screenshot. Structure CSS tokens/components/responsive sections clearly; include real JavaScript behavior for meaningful UX such as tabs, dialogs, drawers, filters, generation/copy actions, validation, playback controls, or state transitions. If keeping a self-contained `index.html`, put the CSS/JS in clearly labelled blocks; for complex UX, generate `css/` and `js/` files when useful.',
);
lines.push(
'- **interaction-fidelity rule**: when the requested screen includes user input, generation, copying, validation, login, checkout, filtering, or any action verb, build real interactive controls for that screen. Do not substitute static text rows, prefilled-only mockups, screenshot-like device frames, or decorative state cards for editable inputs and working actions.',
);
}
if (metadata.includeLandingPage) {
lines.push(
'- **includeLandingPage**: true — create `landing.html` as a separate responsive marketing companion surface in addition to the selected product/app screens. Do not implement the landing page only as a section inside `index.html`, even for responsive-web-only projects. If there is a working product/app screen, create it as a separate file such as `app.html`, `dashboard.html`, or a domain-specific screen name. `index.html` should be a lightweight launcher/overview when multiple files exist. Include hero, value props, product screenshots/device mockups, proof/features, and an appropriate CTA such as waitlist, download, or contact sales.',
);
}
if (metadata.includeOsWidgets) {
lines.push(
'- **includeOsWidgets**: true — add platform-native OS home-screen / lock-screen / quick-access widget surfaces where relevant. These are outside-the-app widgets (for example iOS WidgetKit, Android home screen widget, Live Activity/lock screen, tablet glance panel), not in-app cards. Include realistic widget sizes and direct quick actions for the domain.',
);
}
if (metadata.intent === 'live-artifact') {
lines.push(
'- **intent**: live-artifact — the user chose New live artifact. The first output should be a live artifact/dashboard/report, not a one-off static mockup. Prefer the `live-artifact` skill workflow when available, keep source data compact, and register through the daemon live-artifact tool path once that wrapper/tooling is available.',
);
lines.push(
'- **connector-source rule**: if the user names a connector/source (for example Notion) and daemon connector tools are available, list connectors before asking where the data comes from. When the named connector is `connected`, use its read-only tools and ask follow-up questions only for missing topic/page/database details, multiple equally plausible matches, or an unconnected/missing connector.',
);
}
if (metadata.kind === 'prototype') {
lines.push(
`- **fidelity**: ${metadata.fidelity ?? '(unknown — ask: wireframe vs high-fidelity)'}`,
);
}
if (metadata.kind === 'deck') {
lines.push(
`- **slideCount**: ${metadata.slideCount ?? '(unknown — ask only if the Active plugin / Plugin inputs block does not already include slideCount)'}`,
);
lines.push(
`- **speakerNotes**: ${typeof metadata.speakerNotes === 'boolean' ? metadata.speakerNotes : '(unknown — ask: include speaker notes?)'}`,
);
}
if (metadata.kind === 'template') {
lines.push(
`- **animations**: ${typeof metadata.animations === 'boolean' ? metadata.animations : '(unknown — ask: include motion/animations?)'}`,
);
if (metadata.templateLabel) {
lines.push(`- **template**: ${metadata.templateLabel}`);
}
}
if (metadata.kind === 'image') {
lines.push(
`- **imageModel**: ${metadata.imageModel ?? '(unknown - ask: which image model to use)'}`,
);
lines.push(
`- **aspectRatio**: ${metadata.imageAspect ?? '(unknown - ask: 1:1, 16:9, 9:16, 4:3, 3:4)'}`,
);
if (metadata.imageStyle) {
lines.push(`- **styleNotes**: ${metadata.imageStyle}`);
}
if (metadata.promptTemplate && metadata.promptTemplate.prompt.trim().length > 0) {
lines.push(`- **referenceTemplate**: ${metadata.promptTemplate.title}`);
}
lines.push('');
lines.push(
'This is an **image** project. Plan the prompt carefully, then dispatch via the **media generation contract** using `"$OD_NODE_BIN" "$OD_BIN" media generate --surface image --model <imageModel>`. Do NOT emit `<artifact>` HTML for media surfaces.',
);
}
if (metadata.kind === 'video') {
lines.push(
`- **videoModel**: ${metadata.videoModel ?? '(unknown - ask: which video model to use)'}`,
);
lines.push(
`- **lengthSeconds**: ${typeof metadata.videoLength === 'number' ? metadata.videoLength : '(unknown - ask: 3s / 5s / 10s)'}`,
);
lines.push(
`- **aspectRatio**: ${metadata.videoAspect ?? '(unknown - ask: 16:9, 9:16, 1:1)'}`,
);
if (metadata.promptTemplate && metadata.promptTemplate.prompt.trim().length > 0) {
lines.push(`- **referenceTemplate**: ${metadata.promptTemplate.title}`);
}
lines.push('');
lines.push(
'This is a **video** project. Plan the shotlist and motion, then dispatch via the **media generation contract** using `"$OD_NODE_BIN" "$OD_BIN" media generate --surface video --model <videoModel> --length <seconds> --aspect <ratio>`. Do NOT emit `<artifact>` HTML.',
);
if (metadata.videoModel === 'hyperframes-html') {
lines.push(
'Special case: `hyperframes-html` is a local HTML-to-MP4 renderer, not a photoreal text-to-video model. Treat it like a motion design renderer, ask at most one clarifying question, then dispatch immediately.',
);
}
}
if (metadata.kind === 'audio') {
lines.push(
`- **audioKind**: ${metadata.audioKind ?? '(unknown - ask: music / speech / sfx)'}`,
);
lines.push(
`- **audioModel**: ${metadata.audioModel ?? '(unknown - ask: which audio model to use)'}`,
);
lines.push(
`- **durationSeconds**: ${typeof metadata.audioDuration === 'number' ? metadata.audioDuration : '(unknown - ask: target duration)'}`,
);
if (metadata.voice) {
lines.push(`- **voice**: ${metadata.voice}`);
} else if (metadata.audioKind === 'speech') {
lines.push('- **voice**: (unknown - ask: voice id / accent / pacing)');
}
const voiceOptions = shouldRenderElevenLabsVoiceOptions(metadata, audioVoiceOptions)
? audioVoiceOptions ?? []
: [];
if (voiceOptions.length > 0) {
lines.push(
'- **ElevenLabs voice options**: Ask the user to choose from a dropdown select. The visible labels are voice descriptions; the selected value must be the exact `voice_id` passed to `--voice`. Do not ask the user to type an id.',
);
if (voiceOptions.length > ELEVENLABS_VOICE_PROMPT_OPTION_LIMIT) {
lines.push(`- **ElevenLabs voice options**: showing the first ${ELEVENLABS_VOICE_PROMPT_OPTION_LIMIT} of ${voiceOptions.length} available voices.`);
}
lines.push('');
lines.push('<question-form id="elevenlabs-voice" title="Choose an ElevenLabs voice">');
lines.push(JSON.stringify(renderElevenLabsVoiceQuestionForm(voiceOptions), null, 2));
lines.push('</question-form>');
} else {
const audioVoiceOptionsPromptError = formatElevenLabsVoiceOptionsErrorForPrompt(audioVoiceOptionsError);
if (audioVoiceOptionsPromptError) {
lines.push(
`- **ElevenLabs voice options**: ${audioVoiceOptionsPromptError}`,
);
}
}
if (metadata.audioKind === 'sfx') {
lines.push(
'- **SFX discovery**: Ask about the sound source/action, materials, intensity, acoustic space, timing/tail, loop/non-loop, and "avoid" constraints. Do not ask for language or voice for SFX.',
);
}
lines.push('');
lines.push(
'This is an **audio** project. Lock the content intent first, then dispatch via the **media generation contract** using `"$OD_NODE_BIN" "$OD_BIN" media generate --surface audio --audio-kind <kind> --model <audioModel> --duration <seconds>` and add `--voice <voice-id>` for speech when you have a provider-specific voice id. Do NOT emit `<artifact>` HTML.',
);
}
if (metadata.inspirationDesignSystemIds && metadata.inspirationDesignSystemIds.length > 0) {
lines.push(
`- **inspirationDesignSystemIds**: ${metadata.inspirationDesignSystemIds.join(', ')} — the user picked these systems as *additional* inspiration alongside the primary one. Borrow palette accents, typographic personality, or component patterns from them; don't replace the primary system's tokens.`,
);
}
if (Array.isArray(metadata.contextPlugins) && metadata.contextPlugins.length > 0) {
lines.push('');
lines.push('### @ plugin context');
lines.push(
'The user selected these plugins as additive context via @ mentions. Treat them as requested references to combine with the brief; only the explicit active plugin block, if present, is the executable/pinned plugin snapshot.',
);
for (const plugin of metadata.contextPlugins) {
const id = typeof plugin.id === 'string' ? plugin.id : '';
const title = typeof plugin.title === 'string' && plugin.title.trim().length > 0
? plugin.title.trim()
: id;
if (!id && !title) continue;
const description = typeof plugin.description === 'string' && plugin.description.trim().length > 0
? ` — ${plugin.description.trim()}`
: '';
lines.push(`- ${title}${id ? ` (\`${id}\`)` : ''}${description}`);
}
}
// Curated prompt template reference for image/video projects. Inlined
// verbatim (with light truncation) so the agent can borrow structure,
// mood and phrasing without a separate fetch. The user may have edited
// the body before clicking Create — those edits land here and are now
// authoritative for the brief.
if (
(metadata.kind === 'image' || metadata.kind === 'video') &&
metadata.promptTemplate &&
metadata.promptTemplate.prompt.trim().length > 0
) {
const tpl = metadata.promptTemplate;
lines.push('');
lines.push(`### Reference prompt template — "${tpl.title}"`);
const meta: string[] = [];
if (tpl.category) meta.push(`category: ${tpl.category}`);
if (tpl.model) meta.push(`suggested model: ${tpl.model}`);
if (tpl.aspect) meta.push(`aspect: ${tpl.aspect}`);
if (tpl.tags && tpl.tags.length > 0) {
meta.push(`tags: ${tpl.tags.join(', ')}`);
}
if (meta.length > 0) lines.push(meta.join(' · '));
if (tpl.summary) {
lines.push('');
lines.push(tpl.summary);
}
lines.push('');
lines.push(
'The user picked this template as inspiration. Treat it as a structural and stylistic reference: borrow composition, palette cues, lighting language, lens/motion direction, and the level of detail. Adapt the wording to the user\'s actual subject and brief — do NOT generate the template subject verbatim. If a field above is unknown the user wants you to follow the template\'s defaults.',
);
// Escape triple-backticks so a user who pastes ``` into the editable
// template body can't break out of the markdown fence below and inject
// free-form instructions into the agent's system prompt. Zero-width
// joiner between the backticks keeps the prompt human-readable while
// preventing the closing fence from matching prematurely.
const safe = tpl.prompt.replace(/```/g, '`\u200b`\u200b`');
const truncated =
safe.length > 4000
? `${safe.slice(0, 4000)}\n… (truncated ${safe.length - 4000} chars)`
: safe;
lines.push('');
lines.push('```text');
lines.push(truncated);
lines.push('```');
if (tpl.source) {
const author = tpl.source.author ? ` by ${tpl.source.author}` : '';
lines.push('');
lines.push(
`Source: ${tpl.source.repo}${author} — license ${tpl.source.license}. Preserve attribution if you echo the template language directly.`,
);
}
}
if (metadata.kind === 'template' && template && template.files.length > 0) {
lines.push('');
lines.push(
`### Template reference — "${template.name}"${template.description ? ` (${template.description})` : ''}`,
);
lines.push(
'These HTML snapshots are what the user wants to start FROM. Read them as a stylistic + structural reference. You may copy structure, palette, typography, and component patterns; you may adapt them to the new brief; do NOT ship them verbatim. The agent should still produce its own artifact, just one that visibly inherits this template\'s design language.',
);
for (const f of template.files) {
// Cap each file at ~12k chars so a giant template doesn't blow out
// the system prompt budget. The agent gets enough to read structure.
const truncated =
f.content.length > 12000
? `${f.content.slice(0, 12000)}\n<!-- … truncated (${f.content.length - 12000} chars omitted) -->`
: f.content;
lines.push('');
lines.push(`#### \`${f.name}\``);
lines.push('```html');
lines.push(truncated);
lines.push('```');
}
}
return lines.join('\n');
}
function shouldRenderElevenLabsVoiceOptions(
metadata: ProjectMetadata,
audioVoiceOptions: AudioVoiceOption[] | undefined,
): boolean {
return metadata.kind === 'audio'
&& metadata.audioKind === 'speech'
&& metadata.audioModel === 'elevenlabs-v3'
&& !metadata.voice
&& Array.isArray(audioVoiceOptions)
&& audioVoiceOptions.length > 0;
}
function renderElevenLabsVoiceQuestionForm(voiceOptions: AudioVoiceOption[]): {
description: string;
questions: Array<{
id: string;
label: string;
type: 'select';
required: boolean;
placeholder: string;
help: string;
options: Array<{ label: string; value: string }>;
}>;
submitLabel: string;
} {
const options = voiceOptions.slice(0, ELEVENLABS_VOICE_PROMPT_OPTION_LIMIT).map((option) => ({
label: formatElevenLabsVoiceLabel(option),
value: option.voiceId,
}));
return {
description:
'Pick a voice by description. The selected answer will be the exact voice_id passed to the renderer.',
questions: [
{
id: 'voice',
label: 'Voice',
type: 'select',
required: true,
placeholder: 'Choose a voice',
help: 'Select a voice description; the answer submits the matching Voice ID.',
options,
},
],
submitLabel: 'Use voice',
};
}
function formatElevenLabsVoiceLabel(option: AudioVoiceOption): string {
const labels = option.labels && typeof option.labels === 'object'
? Object.values(option.labels)
.map((value) => (typeof value === 'string' ? value.trim() : ''))
.filter(Boolean)
: [];
const bits = [...labels];
if (bits.length > 0) return `${option.name} — ${bits.join(' · ')}`;
const category = typeof option.category === 'string' ? option.category.trim() : '';
return category ? `${option.name} — ${category}` : option.name;
}
/**
* Detect the seed/references pattern shipped by the upgraded
* web-prototype / mobile-app / simple-deck / guizang-ppt skills, and
* inject a hard pre-flight rule that lists which side files to Read
* before doing anything else. The skill body's own workflow already says
* this — but skills get truncated under context pressure and the agent
* sometimes skips Step 0. A short up-front directive helps.
*
* Returns an empty string when the skill ships no side files (legacy
* SKILL.md-only skills) so we don't add noise.
*/
function derivePreflight(skillBody: string): string {
const refs: string[] = [];
if (/assets\/template\.html/.test(skillBody)) refs.push('`assets/template.html`');
if (/references\/layouts\.md/.test(skillBody)) refs.push('`references/layouts.md`');
if (/references\/themes\.md/.test(skillBody)) refs.push('`references/themes.md`');
if (/references\/components\.md/.test(skillBody)) refs.push('`references/components.md`');
if (/references\/checklist\.md/.test(skillBody)) refs.push('`references/checklist.md`');
if (/references\/artifact-schema\.md/.test(skillBody)) refs.push('`references/artifact-schema.md`');
if (/references\/connector-policy\.md|connector-policy\.md/.test(skillBody)) {
refs.push('`references/connector-policy.md`');
}
if (/references\/refresh-contract\.md|refresh-contract\.md/.test(skillBody)) {
refs.push('`references/refresh-contract.md`');
}
if (/references\/html-in-canvas\.md|html-in-canvas\.md/.test(skillBody)) {
refs.push('`references/html-in-canvas.md`');
}
if (refs.length === 0) return '';
return ` **Pre-flight (do this before any other tool):** Read ${refs.join(', ')} via the path written in the skill-root preamble. If the skill asks for daemon wrapper commands, use the runtime tool environment documented below; it provides the daemon URL and whether a run-scoped tool token is available without exposing token internals. The seed template defines the class system you'll paste into; the layouts file is the only acceptable source of section/screen/slide skeletons; the checklist and live-artifact references are your validation gate before emitting \`<artifact>\` or registering a live artifact. Skipping this step is the #1 reason output regresses to generic AI-slop.`;
}