diff --git a/.gitignore b/.gitignore
index 38efbb64..8723be1a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -42,3 +42,6 @@ context/
 # Playwright
 .playwright-mcp
 
+# Local development scratch dir (temp clones, build outputs, etc.)
+.tmp/
+
diff --git a/deploy/helm/humr/templates/onecli/app.yaml b/deploy/helm/humr/templates/onecli/app.yaml
index 260556d6..cb40893c 100644
--- a/deploy/helm/humr/templates/onecli/app.yaml
+++ b/deploy/helm/humr/templates/onecli/app.yaml
@@ -92,6 +92,9 @@ spec:
       containers:
         - name: onecli
           image: {{ .Values.onecli.image }}
+          {{- with .Values.onecli.imagePullPolicy }}
+          imagePullPolicy: {{ . }}
+          {{- end }}
           ports:
             - containerPort: {{ .Values.onecli.gateway.port }}
               name: gateway
diff --git a/deploy/helm/humr/templates/onedrive-transcript-template.yaml b/deploy/helm/humr/templates/onedrive-transcript-template.yaml
new file mode 100644
index 00000000..1b7bcde2
--- /dev/null
+++ b/deploy/helm/humr/templates/onedrive-transcript-template.yaml
@@ -0,0 +1,40 @@
+{{- if .Values.onedriveTranscriptTemplate.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ .Values.onedriveTranscriptTemplate.name }}
+  namespace: {{ .Values.agentNamespace }}
+  labels:
+    {{- include "humr.labels" . | nindent 4 }}
+    humr.ai/type: agent-template
+data:
+  spec.yaml: |
+    version: humr.ai/v1
+    image: "{{ .Values.onedriveTranscriptTemplate.image.repository }}:{{ .Values.onedriveTranscriptTemplate.image.tag | default .Chart.AppVersion }}"
+    description: {{ .Values.onedriveTranscriptTemplate.description | quote }}
+    mounts:
+      - path: /home/agent
+        persist: true
+      - path: /tmp
+        persist: false
+    init: |
+      #!/bin/bash
+      # Seed home from image on first boot
+      if [ ! -f /home/agent/.initialized ]; then
+        cp -rn /app/working-dir/. /home/agent/ 2>/dev/null || true
+        touch /home/agent/.initialized
+      fi
+      mkdir -p /home/agent/work
+    env:
+      - name: PORT
+        value: "8080"
+    resources:
+      requests:
+        cpu: {{ .Values.onedriveTranscriptTemplate.resources.requests.cpu | quote }}
+        memory: {{ .Values.onedriveTranscriptTemplate.resources.requests.memory | quote }}
+      limits:
+        cpu: {{ .Values.onedriveTranscriptTemplate.resources.limits.cpu | quote }}
+        memory: {{ .Values.onedriveTranscriptTemplate.resources.limits.memory | quote }}
+    securityContext:
+      readOnlyRootFilesystem: false
+{{- end }}
diff --git a/deploy/helm/humr/values-local.yaml b/deploy/helm/humr/values-local.yaml
index 85c7ef29..9929bfe9 100644
--- a/deploy/helm/humr/values-local.yaml
+++ b/deploy/helm/humr/values-local.yaml
@@ -56,6 +56,13 @@ codeGuardianTemplate:
     tag: latest
     pullPolicy: Never
 
+onedriveTranscriptTemplate:
+  enabled: true
+  image:
+    repository: humr-onedrive-transcript
+    tag: latest
+    pullPolicy: Never
+
 # Bootstrap a known dev/dev user for local cluster — never enabled in production.
 keycloak:
   testUser:
diff --git a/deploy/helm/humr/values.yaml b/deploy/helm/humr/values.yaml
index 7c21896a..be17de7d 100644
--- a/deploy/helm/humr/values.yaml
+++ b/deploy/helm/humr/values.yaml
@@ -381,3 +381,20 @@ codeGuardianTemplate:
     limits:
       cpu: "1"
       memory: "2Gi"
+
+# -- OneDrive transcript processing agent template
+onedriveTranscriptTemplate:
+  enabled: false
+  name: onedrive-transcript
+  image:
+    repository: ghcr.io/kagenti/humr/onedrive-transcript
+    tag: ""
+    pullPolicy: IfNotPresent
+  description: "OneDrive Teams transcript processing agent"
+  resources:
+    requests:
+      cpu: "250m"
+      memory: "512Mi"
+    limits:
+      cpu: "1"
+      memory: "2Gi"
diff --git a/deploy/tasks.toml b/deploy/tasks.toml
index f994cd98..1b9b015a 100644
--- a/deploy/tasks.toml
+++ b/deploy/tasks.toml
@@ -60,7 +60,7 @@ dir = "{{config_root}}"
 run = 'docker build -f packages/ui/Dockerfile -t humr-ui:latest .'
 
 ["image:agent"]
-description = "Build agent Docker images (humr-base + claude-code + google-workspace + pi-agent + code-guardian)"
+description = "Build agent Docker images (humr-base + claude-code + google-workspace + pi-agent + code-guardian + onedrive-transcript)"
 dir = "{{config_root}}"
 run = '''
 #!/usr/bin/env bash
@@ -70,6 +70,7 @@ docker build -t humr-claude-code:latest packages/agents/claude-code
 docker build -t humr-google-workspace-agent:latest packages/agents/google-workspace
 docker build -t humr-pi-agent:latest packages/agents/pi-agent
 docker build -t humr-code-guardian:latest packages/agents/code-guardian
+docker build -t humr-onedrive-transcript:latest packages/agents/onedrive-transcript
 '''
 
 # -- Cluster lifecycle (k3s via lima) --
@@ -150,7 +151,7 @@ fi
 # 3. Load images into k3s (built by depends: image:*)
 echo "Loading images into k3s..."
 tar="/tmp/humr-images.tar"
-docker save -o "$tar" humr-controller:latest humr-api-server:latest humr-ui:latest humr-claude-code:latest humr-google-workspace-agent:latest humr-pi-agent:latest humr-code-guardian:latest
+docker save -o "$tar" humr-controller:latest humr-api-server:latest humr-ui:latest humr-claude-code:latest humr-google-workspace-agent:latest humr-pi-agent:latest humr-code-guardian:latest humr-onedrive-transcript:latest
 if [ -n "${IS_SANDBOX:-}" ]; then
   docker image prune --all --force >/dev/null 2>&1 || true
   sudo k3s ctr images import "$tar"
@@ -267,7 +268,7 @@ set -eo pipefail
 
 echo "Loading into k3s..."
 tar="/tmp/humr-agent-images.tar"
-docker save humr-claude-code:latest humr-google-workspace-agent:latest humr-pi-agent:latest humr-code-guardian:latest -o "$tar"
+docker save humr-claude-code:latest humr-google-workspace-agent:latest humr-pi-agent:latest humr-code-guardian:latest humr-onedrive-transcript:latest -o "$tar"
 
 if [ -n "${IS_SANDBOX:-}" ]; then
   KUBECONFIG="/etc/rancher/k3s/k3s.yaml"
diff --git a/packages/agents/onedrive-transcript/Dockerfile b/packages/agents/onedrive-transcript/Dockerfile
new file mode 100644
index 00000000..af98e122
--- /dev/null
+++ b/packages/agents/onedrive-transcript/Dockerfile
@@ -0,0 +1,10 @@
+ARG BASE_IMAGE=humr-base
+FROM ${BASE_IMAGE}
+
+# Claude Code harness + uv for running the Python VTT parser
+RUN cd /app && npm install @agentclientprotocol/claude-agent-acp @anthropic-ai/claude-agent-sdk \
+    && npm install -g @anthropic-ai/claude-code \
+    && curl -LsSf https://astral.sh/uv/install.sh | sh \
+    && ln -s /root/.local/bin/uv /usr/local/bin/uv
+
+COPY workspace/ /app/working-dir/
diff --git a/packages/agents/onedrive-transcript/README.md b/packages/agents/onedrive-transcript/README.md
new file mode 100644
index 00000000..2ed163d3
--- /dev/null
+++ b/packages/agents/onedrive-transcript/README.md
@@ -0,0 +1,93 @@
+# OneDrive Transcript Processing Agent
+
+A Humr agent that polls Microsoft Graph for new Teams meeting transcripts, converts them into structured markdown meeting notes, and posts the results to a Slack channel.
+
+## How It Works
+
+On a cron schedule (default: every 30 minutes), the agent:
+
+1. Reads `state/processed.json` to skip transcripts it has already handled.
+2. Lists the connected user's calendar events with Teams online meetings (`/me/events` with `isOnlineMeeting=true`).
+3. For each meeting, resolves the `onlineMeeting` resource by its `joinUrl`, then lists transcripts.
+4. Downloads each new transcript as VTT.
+5. Parses the VTT into structured JSON (speakers, segments, duration).
+6. Generates structured markdown meeting notes (subject, attendees, summary, key topics, action items, detailed notes).
+7. Posts the notes to the configured Slack channel.
+8. Records the processed transcript ID in `state/processed.json` (capped at 20 entries).
+
+Authentication to Microsoft Graph goes through OneCLI's MITM proxy — the agent uses `MICROSOFT_GRAPH_TOKEN=humr:sentinel` and the proxy swaps in a real OAuth bearer token transparently.
+
+### Scope and limits
+
+- ✅ **Scheduled Teams meetings** (those that appear on the user's calendar) are fully supported.
+- ❌ **MeetNow / ad-hoc channel meetings** are not supported. They have no calendar entry, and the bulk `getAllTranscripts` API requires application permissions + a Teams Application Access Policy (heavy admin overhead). For transcripts to be processed, the meeting must be scheduled via the calendar (not started with "Meet now").
+
+## Setup
+
+### 1. Register an Azure app
+
+1. Go to [Azure Portal > App registrations](https://portal.azure.com/#blade/Microsoft_AAD_RegisteredApps/ApplicationsListBlade) > **New registration**.
+2. **Supported account types**: single-tenant.
+3. **Redirect URI**: `http://localhost:4444/api/apps/microsoft-graph/callback` (for local dev). Production: `http://<your-onecli-host>/api/apps/microsoft-graph/callback`.
+4. Under **API permissions**, add Microsoft Graph **Delegated** permissions:
+   - `Calendars.Read` — list calendar events to find scheduled Teams meetings
+   - `OnlineMeetings.Read` — resolve a meeting ID from its Teams join URL
+   - `OnlineMeetingTranscript.Read.All` — list and download VTT transcripts (admin consent required by Microsoft policy, but the scope only grants per-user access)
+   - `User.Read` — sign in
+   - `offline_access` — refresh tokens
+5. Click **Grant admin consent for {tenant}**.
+6. Under **Certificates & secrets**, create a client secret. Copy the **Application (client) ID**, **Client Secret**, and **Tenant ID**.
+
+### 2. Connect Microsoft Graph in OneCLI
+
+1. Open OneCLI at http://localhost:4444 → **Apps** → **Microsoft Graph**.
+2. Enter Client ID, Client Secret, Tenant ID. Click **Save**.
+3. Click **Connect** to start the OAuth flow. Sign in as the user whose meeting transcripts you want to process. Approve the requested scopes.
+
+### 3. Grant the connection to the agent
+
+1. Open the Humr UI at http://humr.localhost:4444.
+2. Add a new agent from the **onedrive-transcript** template.
+3. Open **Configure** → **Connections** → check **Microsoft Graph**. Save.
+
+### 4. Configure Slack
+
+The agent posts via a Slack MCP server configured in the schedule's `mcpServers` field. You'll need a Slack app with `chat:write` permission and a bot token. Reference: [Slack MCP server](https://github.com/modelcontextprotocol/servers/tree/main/src/slack) (or any other Slack MCP server).
+
+### 5. Create a schedule
+
+In the Humr UI, create a schedule on the agent with:
+
+- **Cron**: `*/30 * * * *` (every 30 minutes)
+- **Session mode**: `continuous` — the agent maintains context across runs
+- **Task prompt**: e.g.
+  ```
+  Check for new Teams meeting transcripts since the last run, process them
+  into meeting notes, and post each set of notes to the #meetings channel
+  in Slack.
+  ```
+- **MCP servers**: configure the Slack MCP server with the bot token
+
+## Workspace contents
+
+```
+/home/agent/work/
+├── CLAUDE.md                       # Agent operating manual
+├── scripts/
+│   ├── fetch-new-transcripts.py    # List events, resolve meetings, download new VTTs
+│   ├── parse-vtt.py                # VTT → structured JSON
+│   └── mark-processed.py           # Append entry to state/processed.json
+└── state/
+    └── processed.json              # Last 20 processed transcripts (managed by scripts)
+```
+
+The workspace is persisted on the `/home/agent` PVC, so `state/processed.json` survives pod restarts.
+
+## Architecture
+
+The agent uses the Microsoft Graph REST API directly (no MCP server), with the OneCLI gateway handling token injection and refresh. The `microsoft-graph` provider in OneCLI is configured with tenant-aware token URL refresh — see `apps/web/src/lib/apps/microsoft-graph.ts` and `apps/gateway/src/apps.rs` in the OneCLI repo.
+
+## Future considerations
+
+- **Box upload**: post-processing to a Box folder (separate from Slack). Out of scope for this initial version.
+- **Application permissions**: required for processing MeetNow / channel meetings. Would need a Teams Application Access Policy configured by the tenant admin (PowerShell). Not implemented today; delegated auth covers scheduled meetings only.
diff --git a/packages/agents/onedrive-transcript/workspace/work/CLAUDE.md b/packages/agents/onedrive-transcript/workspace/work/CLAUDE.md
new file mode 100644
index 00000000..70dce525
--- /dev/null
+++ b/packages/agents/onedrive-transcript/workspace/work/CLAUDE.md
@@ -0,0 +1,122 @@
+## OneDrive Transcript Processing Agent
+
+You are a meeting transcript processor. You retrieve Teams **scheduled** meeting transcripts via Microsoft Graph, convert them into structured meeting notes, and post the results to Slack.
+
+### Authentication
+
+Outbound HTTPS requests go through a credential-injection proxy that automatically replaces the sentinel token with a real OAuth bearer token. Use `$MICROSOFT_GRAPH_TOKEN` as the bearer token in all Graph API calls — the proxy swaps it transparently.
+
+### Scope and Limits
+
+You have **delegated** Microsoft Graph permissions. This means:
+
+- ✅ You can access transcripts for meetings the connected user organized or attended
+- ✅ Scheduled Teams meetings (those that appear on the user's calendar) are fully supported
+- ❌ MeetNow / ad-hoc channel meetings are **not supported** — they don't have calendar entries, and the delegated `getAllTranscripts` API is unavailable. If users want a transcript processed, they must schedule the meeting via the calendar (not click "Meet now")
+
+### Helper Scripts
+
+Three Python helpers live in `scripts/` and run via `uv run`. Use them instead of constructing curl pipelines by hand.
+
+#### `scripts/fetch-new-transcripts.py`
+
+Lists calendar events, resolves meeting IDs, lists transcripts, downloads VTTs to `/tmp`, and filters out anything already in `state/processed.json`. Prints a JSON array of new transcripts:
+
+```bash
+uv run scripts/fetch-new-transcripts.py [--since ISO8601] [--state state/processed.json]
+```
+
+Output entries: `{subject, meetingId, transcriptId, vttPath, meetingStart}`. Default `--since` is 24 hours ago.
+
+#### `scripts/parse-vtt.py`
+
+Parses a VTT file into structured JSON (metadata, speakers, segments). Pass `--subject` and `--meeting-start` to embed meeting context in the metadata:
+
+```bash
+uv run scripts/parse-vtt.py /tmp/transcript-XYZ.vtt \
+  --subject "Meeting subject" --meeting-start "2026-04-27T13:40:00"
+```
+
+Prints JSON to stdout. Read this output directly — no temp file needed.
+
+#### `scripts/mark-processed.py`
+
+Appends an entry to `state/processed.json` (capped at 20):
+
+```bash
+uv run scripts/mark-processed.py \
+  --transcript-id ID --meeting-id ID --subject "Meeting subject"
+```
+
+### Workflow
+
+Each run follows this sequence:
+
+1. **Fetch new transcripts** — run `scripts/fetch-new-transcripts.py`. The script reads `state/processed.json` itself and only returns unprocessed entries.
+2. **For each entry** in the JSON output:
+   - Run `scripts/parse-vtt.py` on the VTT, passing `--subject` and `--meeting-start`.
+   - Generate structured meeting notes from the parsed JSON (see format below).
+   - Post the notes to the configured Slack channel.
+   - Run `scripts/mark-processed.py` to record completion.
+3. **No new transcripts** — exit quietly without posting anything.
+
+### Meeting Notes Format
+
+Generate notes in this markdown structure:
+
+```markdown
+# Meeting Notes: <subject>
+
+**Date:** <meeting_start>
+**Duration:** <metadata.duration>
+**Attendees:** <comma-separated speakers list>
+
+## Summary
+
+<2-4 sentence executive summary of the meeting>
+
+## Key Topics
+
+### <Topic 1>
+<Summary of discussion with speaker attribution>
+
+### <Topic 2>
+<Summary of discussion with speaker attribution>
+
+## Action Items
+
+- [ ] <action> — **<owner>**
+- [ ] <action> — **<owner>**
+
+## Detailed Notes
+
+<Chronological notes with speaker attribution, organized by topic shifts.
+Use > blockquotes for notable direct quotes.>
+```
+
+### Notes Guidelines
+
+- **Speaker attribution**: Use first names where possible. If the VTT uses full names ("John Smith"), use "John" in the body but list full names in Attendees.
+- **Summary**: Focus on decisions made and outcomes, not play-by-play.
+- **Action items**: Extract explicit commitments ("I'll do X", "Can you handle Y") with the responsible person.
+- **Key topics**: Group related discussion into logical topics rather than following strict chronological order.
+- **Direct quotes**: Use sparingly — only for important statements, decisions, or commitments.
+- **Filler removal**: Omit filler words, false starts, and crosstalk artifacts from the VTT.
+
+### State Tracking
+
+`state/processed.json` is managed entirely by the helper scripts — `fetch-new-transcripts.py` reads it to filter, `mark-processed.py` appends to it. Do not edit it manually. Structure:
+
+```json
+{
+  "processed": [
+    {"id": "<transcript-id>", "meetingId": "<meeting-id>", "subject": "<subject>", "processedAt": "<ISO-8601>"}
+  ]
+}
+```
+
+### Tips
+
+- A meeting can have multiple transcripts (transcription started/stopped multiple times). Process each independently.
+- If `fetch-new-transcripts.py` returns `[]`, there's nothing to do — exit quietly.
+- If the Slack post fails for one transcript, log the error but continue with the rest. Don't mark a transcript as processed if its notes weren't successfully delivered.
diff --git a/packages/agents/onedrive-transcript/workspace/work/scripts/fetch-new-transcripts.py b/packages/agents/onedrive-transcript/workspace/work/scripts/fetch-new-transcripts.py
new file mode 100644
index 00000000..bf05a236
--- /dev/null
+++ b/packages/agents/onedrive-transcript/workspace/work/scripts/fetch-new-transcripts.py
@@ -0,0 +1,165 @@
+#!/usr/bin/env python3
+# /// script
+# requires-python = ">=3.11"
+# ///
+"""Fetch all new Teams meeting transcripts not yet recorded in state/processed.json.
+
+Downloads each new transcript VTT to /tmp and prints a JSON array to stdout:
+  [{"subject", "meetingId", "transcriptId", "vttPath", "meetingStart"}, ...]
+
+Per-meeting and per-transcript failures are isolated: a failure on one
+transcript is logged to stderr and the script continues with the rest.
+The exit code is non-zero only if the initial calendar listing fails
+(nothing else can proceed without it).
+
+Usage:
+    uv run scripts/fetch-new-transcripts.py [--since ISO8601] [--state PATH]
+
+Defaults:
+    --since   24 hours ago
+    --state   state/processed.json
+"""
+
+import argparse
+import json
+import os
+import sys
+import urllib.parse
+import urllib.request
+from datetime import datetime, timezone, timedelta
+from pathlib import Path
+
+
+GRAPH = "https://graph.microsoft.com/v1.0"
+
+# Network timeout for all Graph calls. Graph APIs typically respond in <1s
+# but can stall on backend issues; 30s is generous without hanging the agent.
+TIMEOUT = 30
+
+
+def graph_get(path: str, token: str) -> dict:
+    url = path if path.startswith("http") else f"{GRAPH}{path}"
+    req = urllib.request.Request(url, headers={"Authorization": f"Bearer {token}"})
+    with urllib.request.urlopen(req, timeout=TIMEOUT) as resp:
+        return json.loads(resp.read())
+
+
+def graph_get_bytes(path: str, token: str, accept: str) -> bytes:
+    url = path if path.startswith("http") else f"{GRAPH}{path}"
+    req = urllib.request.Request(url, headers={"Authorization": f"Bearer {token}", "Accept": accept})
+    with urllib.request.urlopen(req, timeout=TIMEOUT) as resp:
+        return resp.read()
+
+
+def get_all_pages(first_url: str, token: str) -> list:
+    items = []
+    url = first_url
+    while url:
+        data = graph_get(url, token)
+        items.extend(data.get("value", []))
+        url = data.get("@odata.nextLink")
+    return items
+
+
+def load_processed_ids(state_path: Path) -> set:
+    if not state_path.exists():
+        return set()
+    data = json.loads(state_path.read_text())
+    return {e["id"] for e in data.get("processed", [])}
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--since", default=None, help="ISO8601 start datetime (default: 24h ago)")
+    parser.add_argument("--state", default="state/processed.json", help="Path to processed.json")
+    args = parser.parse_args()
+
+    token = os.environ.get("MICROSOFT_GRAPH_TOKEN")
+    if not token:
+        print("Error: MICROSOFT_GRAPH_TOKEN not set", file=sys.stderr)
+        sys.exit(1)
+
+    since = args.since or (
+        datetime.now(timezone.utc) - timedelta(hours=24)
+    ).strftime("%Y-%m-%dT%H:%M:%SZ")
+
+    processed_ids = load_processed_ids(Path(args.state))
+
+    since_enc = urllib.parse.quote(since)
+    # Failure here is fatal — without the calendar list we have nothing to work with.
+    events = get_all_pages(
+        f"{GRAPH}/me/events?$filter=start/dateTime%20ge%20'{since_enc}'"
+        f"&$select=id,subject,start,isOnlineMeeting,onlineMeeting&$top=50&$orderby=start/dateTime%20desc",
+        token,
+    )
+
+    results = []
+
+    for event in events:
+        if not event.get("isOnlineMeeting"):
+            continue
+        join_url = (event.get("onlineMeeting") or {}).get("joinUrl")
+        if not join_url:
+            continue
+        subject = event.get("subject", "")
+        meeting_start = (event.get("start") or {}).get("dateTime", "")
+        event_id = event.get("id", "<unknown>")
+
+        # Resolve meeting resource ID from join URL.
+        try:
+            join_url_enc = urllib.parse.quote(join_url, safe="")
+            meeting_resp = graph_get(
+                f"{GRAPH}/me/onlineMeetings?$filter=JoinWebUrl%20eq%20'{join_url_enc}'",
+                token,
+            )
+        except Exception as e:
+            print(f"warn: failed to resolve meeting for event {event_id} ({subject}): {e}", file=sys.stderr)
+            continue
+        meetings = meeting_resp.get("value", [])
+        if not meetings:
+            continue
+        meeting_id = meetings[0]["id"]
+
+        # List transcripts for the meeting.
+        try:
+            transcripts_resp = graph_get(
+                f"{GRAPH}/me/onlineMeetings/{meeting_id}/transcripts", token
+            )
+        except Exception as e:
+            print(f"warn: failed to list transcripts for {subject} ({meeting_id}): {e}", file=sys.stderr)
+            continue
+
+        for transcript in transcripts_resp.get("value", []):
+            transcript_id = transcript["id"]
+            if transcript_id in processed_ids:
+                continue
+
+            # Download VTT. Per-transcript failure must NOT lose the rest.
+            try:
+                vtt_path = f"/tmp/transcript-{transcript_id[:20]}.vtt"
+                content = graph_get_bytes(
+                    f"{GRAPH}/me/onlineMeetings/{meeting_id}/transcripts/{transcript_id}/content?$format=text/vtt",
+                    token,
+                    accept="text/vtt",
+                )
+                Path(vtt_path).write_bytes(content)
+            except Exception as e:
+                print(
+                    f"warn: failed to download transcript {transcript_id[:20]}… for {subject}: {e}",
+                    file=sys.stderr,
+                )
+                continue
+
+            results.append({
+                "subject": subject,
+                "meetingId": meeting_id,
+                "transcriptId": transcript_id,
+                "vttPath": vtt_path,
+                "meetingStart": meeting_start,
+            })
+
+    print(json.dumps(results, indent=2))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/packages/agents/onedrive-transcript/workspace/work/scripts/mark-processed.py b/packages/agents/onedrive-transcript/workspace/work/scripts/mark-processed.py
new file mode 100644
index 00000000..0bff0d59
--- /dev/null
+++ b/packages/agents/onedrive-transcript/workspace/work/scripts/mark-processed.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+# /// script
+# requires-python = ">=3.11"
+# ///
+"""Append a transcript entry to state/processed.json, keeping the last 20.
+
+Usage:
+    uv run scripts/mark-processed.py \\
+        --transcript-id ID --meeting-id ID --subject TEXT [--state PATH]
+"""
+
+import argparse
+import json
+from datetime import datetime, timezone
+from pathlib import Path
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--transcript-id", required=True)
+    parser.add_argument("--meeting-id", required=True)
+    parser.add_argument("--subject", required=True)
+    parser.add_argument("--state", default="state/processed.json")
+    args = parser.parse_args()
+
+    state_path = Path(args.state)
+    state_path.parent.mkdir(parents=True, exist_ok=True)
+
+    if state_path.exists():
+        data = json.loads(state_path.read_text())
+    else:
+        data = {"processed": []}
+
+    data["processed"].append({
+        "id": args.transcript_id,
+        "meetingId": args.meeting_id,
+        "subject": args.subject,
+        "processedAt": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
+    })
+    data["processed"] = data["processed"][-20:]
+
+    state_path.write_text(json.dumps(data, indent=2) + "\n")
+    print(f"Marked {args.transcript_id[:20]}… as processed ({len(data['processed'])} total)")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/packages/agents/onedrive-transcript/workspace/work/scripts/parse-vtt.py b/packages/agents/onedrive-transcript/workspace/work/scripts/parse-vtt.py
new file mode 100644
index 00000000..fdd41f5a
--- /dev/null
+++ b/packages/agents/onedrive-transcript/workspace/work/scripts/parse-vtt.py
@@ -0,0 +1,196 @@
+#!/usr/bin/env python3
+"""Parse a WebVTT transcript into structured JSON.
+
+Usage:
+    uv run scripts/parse-vtt.py <input.vtt> [--output <output.json>] \\
+        [--subject "Meeting subject"] [--meeting-start "2026-04-27T13:40:00"]
+
+Reads a VTT file (Teams meeting transcript format) and produces JSON with:
+- metadata (filename, total duration, speaker count, optional subject + meeting_start)
+- speakers (list of unique speakers)
+- segments (merged consecutive same-speaker blocks with timestamps)
+"""
+
+import argparse
+import json
+import re
+import sys
+from pathlib import Path
+
+
+def parse_timestamp(ts: str) -> float:
+    """Convert VTT timestamp (HH:MM:SS.mmm) to seconds."""
+    parts = ts.strip().replace(",", ".").split(":")
+    if len(parts) == 3:
+        h, m, s = parts
+        return int(h) * 3600 + int(m) * 60 + float(s)
+    if len(parts) == 2:
+        m, s = parts
+        return int(m) * 60 + float(s)
+    return float(parts[0])
+
+
+def format_duration(seconds: float) -> str:
+    """Format seconds as HH:MM:SS."""
+    h = int(seconds // 3600)
+    m = int((seconds % 3600) // 60)
+    s = int(seconds % 60)
+    if h > 0:
+        return f"{h}:{m:02d}:{s:02d}"
+    return f"{m}:{s:02d}"
+
+
+def parse_vtt(content: str) -> dict:
+    """Parse VTT content into structured data."""
+    lines = content.strip().splitlines()
+
+    # Skip BOM and WEBVTT header
+    start = 0
+    for i, line in enumerate(lines):
+        cleaned = line.strip().lstrip("﻿")
+        if cleaned.startswith("WEBVTT"):
+            start = i + 1
+            break
+    else:
+        start = 0
+
+    # Skip any header metadata lines (NOTE, empty lines after WEBVTT)
+    while start < len(lines) and (not lines[start].strip() or lines[start].strip().startswith("NOTE")):
+        start += 1
+
+    # Parse cue blocks
+    timestamp_re = re.compile(
+        r"(\d{1,2}:\d{2}:\d{2}[.,]\d{3})\s*-->\s*(\d{1,2}:\d{2}:\d{2}[.,]\d{3})"
+    )
+    # Teams VTT speaker tag: <v Speaker Name>text</v> or <v Speaker Name>text
+    speaker_re = re.compile(r"<v\s+([^>]+)>(.*?)(?:</v>)?$")
+
+    raw_cues: list[dict] = []
+    i = start
+    while i < len(lines):
+        line = lines[i].strip()
+
+        # Look for timestamp line
+        m = timestamp_re.search(line)
+        if m:
+            start_ts = parse_timestamp(m.group(1))
+            end_ts = parse_timestamp(m.group(2))
+
+            # Collect text lines until next blank line or timestamp
+            text_lines = []
+            i += 1
+            while i < len(lines) and lines[i].strip() and not timestamp_re.search(lines[i]):
+                text_lines.append(lines[i].strip())
+                i += 1
+
+            text = " ".join(text_lines)
+
+            # Extract speaker if present
+            speaker = None
+            sm = speaker_re.match(text)
+            if sm:
+                speaker = sm.group(1).strip()
+                text = sm.group(2).strip()
+                # Handle remaining lines that may not have speaker tags
+                if not text and text_lines:
+                    text = " ".join(text_lines)
+
+            # Strip any remaining VTT tags
+            text = re.sub(r"<[^>]+>", "", text).strip()
+
+            if text:
+                raw_cues.append({
+                    "start": start_ts,
+                    "end": end_ts,
+                    "speaker": speaker,
+                    "text": text,
+                })
+        else:
+            i += 1
+
+    # Merge consecutive cues from the same speaker
+    segments: list[dict] = []
+    for cue in raw_cues:
+        if segments and segments[-1]["speaker"] == cue["speaker"]:
+            segments[-1]["end"] = cue["end"]
+            segments[-1]["text"] += " " + cue["text"]
+        else:
+            segments.append({
+                "start": cue["start"],
+                "end": cue["end"],
+                "speaker": cue["speaker"],
+                "text": cue["text"],
+            })
+
+    # Format timestamps in segments for output
+    for seg in segments:
+        seg["start_fmt"] = format_duration(seg["start"])
+        seg["end_fmt"] = format_duration(seg["end"])
+
+    speakers = sorted({s["speaker"] for s in segments if s["speaker"]})
+    total_duration = max((s["end"] for s in segments), default=0)
+
+    return {
+        "metadata": {
+            "speaker_count": len(speakers),
+            "segment_count": len(segments),
+            "duration": format_duration(total_duration),
+            "duration_seconds": round(total_duration, 1),
+        },
+        "speakers": speakers,
+        "segments": [
+            {
+                "speaker": s["speaker"],
+                "start": s["start_fmt"],
+                "end": s["end_fmt"],
+                "text": s["text"],
+            }
+            for s in segments
+        ],
+    }
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Parse VTT transcript to structured JSON")
+    parser.add_argument("input", help="Path to .vtt file")
+    parser.add_argument("--output", "-o", help="Output JSON path (default: stdout)")
+    parser.add_argument("--subject", help="Meeting subject to embed in metadata")
+    parser.add_argument("--meeting-start", help="Meeting start datetime (ISO8601) to embed in metadata")
+    args = parser.parse_args()
+
+    input_path = Path(args.input)
+    if not input_path.exists():
+        print(f"Error: {input_path} not found", file=sys.stderr)
+        sys.exit(1)
+
+    # Try common encodings for VTT files
+    content = None
+    for encoding in ("utf-8-sig", "utf-8", "utf-16", "latin-1"):
+        try:
+            content = input_path.read_text(encoding=encoding)
+            break
+        except (UnicodeDecodeError, UnicodeError):
+            continue
+
+    if content is None:
+        print(f"Error: could not decode {input_path}", file=sys.stderr)
+        sys.exit(1)
+
+    result = parse_vtt(content)
+    result["metadata"]["source_file"] = input_path.name
+    if args.subject:
+        result["metadata"]["subject"] = args.subject
+    if args.meeting_start:
+        result["metadata"]["meeting_start"] = args.meeting_start
+
+    output = json.dumps(result, indent=2, ensure_ascii=False)
+
+    if args.output:
+        Path(args.output).write_text(output + "\n")
+        print(f"Written to {args.output}", file=sys.stderr)
+    else:
+        print(output)
+
+
+if __name__ == "__main__":
+    main()