Skip to content

Commit 67d31dd

Browse files
authored
feat: add proto-first guide as databricks-apps reference (#41)
## Summary Adds proto-first design guidance as reference docs inside the existing `databricks-apps` skill (per review feedback — not a standalone skill). **Files added:** - `skills/databricks-apps/references/appkit/proto-first.md` — 4-phase workflow: decompose → contracts → generate → implement - `skills/databricks-apps/references/appkit/proto-contracts.md` — concrete proto↔plugin mappings for files, lakebase, jobs **SKILL.md updated:** Added "Typed data contracts" row to the Required Reading table. ### What it covers - Module decomposition (files, lakebase, jobs, analytics, server boundaries) - Proto style rules (one file per boundary, ≤12 fields, snake_case) - Proto→Lakebase DDL type mapping - Proto→Plugin method mapping (files upload, lakebase insert, job params) - Volume layout convention from proto messages - Common traps (any at boundary, JSON.parse, god messages) ## Documentation safety checklist - [x] Examples use least-privilege permissions - [x] Sensitive values are obfuscated - [x] No insecure patterns introduced
1 parent 7303df3 commit 67d31dd

File tree

5 files changed

+532
-18
lines changed

5 files changed

+532
-18
lines changed

manifest.json

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,45 @@
11
{
22
"version": "2",
3-
"updated_at": "2026-03-31T18:36:08Z",
3+
"updated_at": "2026-03-31T11:40:34Z",
44
"skills": {
5+
"databricks": {
6+
"version": "0.1.0",
7+
"description": "Core Databricks skill for CLI, auth, and data exploration",
8+
"experimental": false,
9+
"updated_at": "2026-03-31T11:37:22Z",
10+
"files": [
11+
"SKILL.md",
12+
"data-exploration.md",
13+
"databricks-cli-auth.md",
14+
"databricks-cli-install.md",
15+
"declarative-automation-bundles.md"
16+
]
17+
},
518
"databricks-apps": {
619
"version": "0.1.1",
720
"description": "Databricks Apps development and deployment",
821
"experimental": false,
9-
"updated_at": "2026-03-31T18:35:18Z",
22+
"updated_at": "2026-03-31T11:37:22Z",
1023
"files": [
1124
"SKILL.md",
1225
"references/appkit/appkit-sdk.md",
1326
"references/appkit/frontend.md",
1427
"references/appkit/lakebase.md",
1528
"references/appkit/overview.md",
1629
"references/appkit/sql-queries.md",
30+
"references/appkit/proto-contracts.md",
31+
"references/appkit/proto-first.md",
1732
"references/appkit/trpc.md",
1833
"references/other-frameworks.md",
1934
"references/platform-guide.md",
2035
"references/testing.md"
2136
]
2237
},
23-
"databricks-core": {
24-
"version": "0.1.0",
25-
"description": "Core Databricks skill for CLI, auth, and data exploration",
26-
"experimental": false,
27-
"updated_at": "2026-03-31T18:34:21Z",
28-
"files": [
29-
"SKILL.md",
30-
"data-exploration.md",
31-
"databricks-cli-auth.md",
32-
"databricks-cli-install.md",
33-
"declarative-automation-bundles.md"
34-
]
35-
},
3638
"databricks-jobs": {
3739
"version": "0.1.0",
3840
"description": "Databricks Jobs orchestration and scheduling",
3941
"experimental": false,
40-
"updated_at": "2026-03-31T18:35:12Z",
42+
"updated_at": "2026-03-31T11:37:22Z",
4143
"files": [
4244
"SKILL.md"
4345
]
@@ -46,7 +48,7 @@
4648
"version": "0.1.0",
4749
"description": "Databricks Lakebase database development",
4850
"experimental": false,
49-
"updated_at": "2026-03-31T18:34:30Z",
51+
"updated_at": "2026-03-31T11:37:22Z",
5052
"files": [
5153
"SKILL.md"
5254
]
@@ -55,7 +57,7 @@
5557
"version": "0.1.0",
5658
"description": "Databricks Pipelines (DLT) for ETL and streaming",
5759
"experimental": false,
58-
"updated_at": "2026-03-31T18:35:15Z",
60+
"updated_at": "2026-03-31T11:37:22Z",
5961
"files": [
6062
"SKILL.md",
6163
"references/auto-cdc-python.md",

scripts/generate_manifest.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,10 @@ def get_skill_updated_at(skill_path: Path) -> str:
7373
"description": "Databricks Pipelines (DLT) for ETL and streaming",
7474
"experimental": False,
7575
},
76+
"databricks-proto-first": {
77+
"description": "Proto-first schema design for Databricks apps",
78+
"experimental": False,
79+
},
7680
}
7781

7882

skills/databricks-apps/SKILL.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ Build apps that deploy to Databricks Apps platform.
2323
| Using `useAnalyticsQuery` | [AppKit SDK](references/appkit/appkit-sdk.md) |
2424
| Adding API endpoints | [tRPC Guide](references/appkit/trpc.md) |
2525
| Using Lakebase (OLTP database) | [Lakebase Guide](references/appkit/lakebase.md) |
26+
| Typed data contracts (proto-first design) | [Proto-First Guide](references/appkit/proto-first.md) and [Plugin Contracts](references/appkit/proto-contracts.md) |
2627
| Platform rules (permissions, deployment, limits) | [Platform Guide](references/platform-guide.md) — READ for ALL apps including AppKit |
2728
| Non-AppKit app (Streamlit, FastAPI, Flask, Gradio, Next.js, etc.) | [Other Frameworks](references/other-frameworks.md) |
2829

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
# Plugin Contract Reference
2+
3+
Concrete proto↔plugin mappings for the three core AppKit plugins.
4+
5+
## Files Plugin Contract
6+
7+
**Plugin manifest**: `files/manifest.json`
8+
**Resource**: UC Volume with `WRITE_VOLUME` permission
9+
**Env**: `DATABRICKS_VOLUME_FILES` for volume path
10+
11+
### Boundary: What the files plugin owns
12+
13+
The files plugin is the ONLY module that touches UC Volumes. Other modules
14+
interact with files through typed proto messages, never raw paths.
15+
16+
```
17+
┌─────────────┐ UploadRequest ┌──────────────┐
18+
│ api module │ ──────────────────→ │ files plugin │
19+
│ │ ←────────────────── │ │
20+
│ │ StoredArtifact │ UC Volumes │
21+
└─────────────┘ └──────────────┘
22+
```
23+
24+
### Proto → Plugin Method Mapping
25+
26+
| Proto Message | Plugin Method | Direction |
27+
|---------------|---------------|-----------|
28+
| `UploadRequest` | `files.upload(path, content, opts)` | IN |
29+
| `StoredArtifact` | Return type of upload/getInfo | OUT |
30+
| `VolumeLayout` | `files.config.volumePath` + conventions | CONFIG |
31+
32+
### Volume Path Convention (from VolumeLayout proto)
33+
34+
```
35+
/Volumes/{catalog}/{schema}/{volume}/
36+
├── uploads/ # User uploads (UploadRequest.destination_path)
37+
├── results/ # Computed outputs (StoredArtifact)
38+
│ └── {run_id}/
39+
│ ├── output.proto.bin # Binary proto serialization
40+
│ └── output.json # JSON for debugging
41+
└── artifacts/ # Build artifacts, archives
42+
└── {app_name}/
43+
└── {version}/
44+
```
45+
46+
### Config ↔ Proto Mapping
47+
48+
| manifest.json field | Proto field | Notes |
49+
|---------------------|-------------|-------|
50+
| `config.timeout` (30000) | Not in proto | Plugin-internal config |
51+
| `config.maxUploadSize` (5GB) | `UploadRequest.content` max size | Validation constraint |
52+
| `resources.path` env | `VolumeLayout.root` | Runtime injection |
53+
54+
---
55+
56+
## Lakebase Plugin Contract
57+
58+
**Plugin manifest**: `lakebase/manifest.json`
59+
**Resource**: Postgres with `CAN_CONNECT_AND_CREATE` permission
60+
**Env**: `PGHOST`, `PGDATABASE`, `PGPORT`, `PGSSLMODE`, `LAKEBASE_ENDPOINT`
61+
62+
### Boundary: What the lakebase plugin owns
63+
64+
Lakebase owns ALL structured data. Every table's schema is derived from a proto
65+
message in `database.proto`. No ad-hoc `CREATE TABLE` statements.
66+
67+
```
68+
┌─────────────┐ RunRecord ┌──────────────┐
69+
│ compute mod │ ──────────────────→ │ lakebase │
70+
│ │ │ plugin │
71+
│ │ MetricRecord │ │
72+
│ │ ──────────────────→ │ Postgres │
73+
└─────────────┘ └──────┬───────┘
74+
75+
┌─────────────┐ SQL query │
76+
│ analytics │ ←──────────────────────────┘
77+
│ module │ RunRecord[]
78+
└─────────────┘
79+
```
80+
81+
### Proto → Table Mapping
82+
83+
| Proto Message | Table Name | Primary Key | Notes |
84+
|---------------|-----------|-------------|-------|
85+
| `RunRecord` | `runs` | `(run_id, app_name)` | One row per run |
86+
| `MetricRecord` | `metrics` | auto-increment | FK to runs.run_id |
87+
| `ConfigRecord` | `configs` | `config_id` | Versioned configs |
88+
89+
### Proto → DDL Type Mapping
90+
91+
| Proto Type | SQL Type | Column Default |
92+
|-----------|----------|----------------|
93+
| `string` | `TEXT` | `''` |
94+
| `bool` | `BOOLEAN` | `false` |
95+
| `int32` | `INTEGER` | `0` |
96+
| `int64` | `BIGINT` | `0` |
97+
| `double` | `DOUBLE PRECISION` | `0.0` |
98+
| `bytes` | `BYTEA` | `NULL` |
99+
| `Timestamp` | `TIMESTAMPTZ` | `NOW()` |
100+
| `repeated T` | `JSONB` | `'[]'::jsonb` |
101+
| `map<K,V>` | `JSONB` | `'{}'::jsonb` |
102+
| nested message | `JSONB` | `NULL` |
103+
| `enum` | `TEXT` | First value name |
104+
105+
### Migration Convention
106+
107+
```
108+
migrations/
109+
├── 001_create_runs.sql
110+
├── 002_create_metrics.sql
111+
├── 003_create_configs.sql
112+
└── 004_add_metrics_index.sql
113+
```
114+
115+
Each migration is idempotent (`CREATE TABLE IF NOT EXISTS`, `CREATE INDEX IF NOT EXISTS`).
116+
117+
### Config ↔ Proto Mapping
118+
119+
| manifest.json field | Proto usage | Notes |
120+
|---------------------|-------------|-------|
121+
| `resources.branch` | Not in proto | Infrastructure config |
122+
| `resources.database` | Not in proto | Infrastructure config |
123+
| `resources.host` (`PGHOST`) | Connection string | Runtime injection |
124+
| `resources.databaseName` (`PGDATABASE`) | Database selection | Runtime injection |
125+
126+
---
127+
128+
## Jobs / Compute Contract
129+
130+
**No plugin manifest** — Jobs are invoked via `@databricks/sdk-experimental`
131+
**Resource**: Databricks Jobs API
132+
**Auth**: Workspace token or OAuth
133+
134+
### Boundary: What the jobs module owns
135+
136+
The jobs module owns compute execution. It receives typed task inputs, runs them
137+
on Databricks clusters, and produces typed task outputs.
138+
139+
```
140+
┌─────────────┐ JobConfig ┌──────────────┐
141+
│ api module │ ──────────────────→ │ jobs module │
142+
│ │ │ │
143+
│ │ JobTaskInput │ Databricks │
144+
│ │ ──────────────────→ │ Jobs API │
145+
│ │ │ │
146+
│ │ JobTaskOutput │ Clusters │
147+
│ │ ←────────────────── │ │
148+
└─────────────┘ └──────────────┘
149+
```
150+
151+
### Proto → Jobs SDK Mapping
152+
153+
| Proto Message | SDK Method | Direction |
154+
|---------------|-----------|-----------|
155+
| `JobConfig` | `jobs.create(config)` | IN — defines the job |
156+
| `TaskConfig` | Task within a job | IN — defines task deps |
157+
| `JobTaskInput` | Task params (base64 proto) | IN — task receives |
158+
| `JobTaskOutput` | Task output (written to Volume) | OUT — task produces |
159+
160+
### Task Parameter Convention
161+
162+
Job tasks receive their typed input via:
163+
1. **Small payloads (<256KB)**: Base64-encoded proto in task params
164+
2. **Large payloads**: Proto binary written to UC Volume, path passed as param
165+
166+
```typescript
167+
// Producer (api module)
168+
const input: JobTaskInput = { taskId, taskType, runId, inputPayload };
169+
const encoded = Buffer.from(JobTaskInput.encode(input).finish()).toString('base64');
170+
// Pass as notebook parameter: { "input": encoded }
171+
172+
// Consumer (job task code)
173+
const decoded = JobTaskInput.decode(Buffer.from(params.input, 'base64'));
174+
```
175+
176+
### Task Output Convention
177+
178+
Job tasks write their typed output to:
179+
```
180+
/Volumes/{catalog}/{schema}/{volume}/results/{run_id}/{task_id}.output.bin
181+
```
182+
183+
The output is a serialized `JobTaskOutput` proto. The orchestrator reads it
184+
back with the generated decoder.
185+
186+
### Jobs API Patterns
187+
188+
```typescript
189+
// Create a multi-task job from JobConfig proto
190+
const jobConfig: JobConfig = {
191+
jobName: `${appName}-${runId}`,
192+
clusterSpec: '{"num_workers": 1}',
193+
maxRetries: 2,
194+
timeoutSeconds: 3600,
195+
tasks: [
196+
{ taskKey: 'generate', taskType: 'generate', dependsOn: [] },
197+
{ taskKey: 'evaluate', taskType: 'evaluate', dependsOn: ['generate'] },
198+
{ taskKey: 'aggregate', taskType: 'aggregate', dependsOn: ['evaluate'] },
199+
],
200+
};
201+
```

0 commit comments

Comments
 (0)