kagenti · JanPokorny · Apr 22, 2026 · Apr 28, 2026 · Apr 28, 2026 · Apr 28, 2026
diff --git a/.gitignore b/.gitignore
@@ -42,3 +42,6 @@ context/
 # Playwright
 .playwright-mcp
 
+# Local development scratch dir (temp clones, build outputs, etc.)
+.tmp/
+
diff --git a/deploy/helm/humr/templates/onecli/app.yaml b/deploy/helm/humr/templates/onecli/app.yaml
@@ -92,6 +92,9 @@ spec:
       containers:
         - name: onecli
           image: {{ .Values.onecli.image }}
+          {{- with .Values.onecli.imagePullPolicy }}
+          imagePullPolicy: {{ . }}
+          {{- end }}
           ports:
             - containerPort: {{ .Values.onecli.gateway.port }}
               name: gateway

diff --git a/deploy/helm/humr/templates/onedrive-transcript-template.yaml b/deploy/helm/humr/templates/onedrive-transcript-template.yaml
@@ -0,0 +1,40 @@
+{{- if .Values.onedriveTranscriptTemplate.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ .Values.onedriveTranscriptTemplate.name }}
+  namespace: {{ .Values.agentNamespace }}
+  labels:
+    {{- include "humr.labels" . | nindent 4 }}
+    humr.ai/type: agent-template
+data:
+  spec.yaml: |
+    version: humr.ai/v1
+    image: "{{ .Values.onedriveTranscriptTemplate.image.repository }}:{{ .Values.onedriveTranscriptTemplate.image.tag | default .Chart.AppVersion }}"
+    description: {{ .Values.onedriveTranscriptTemplate.description | quote }}
+    mounts:
+      - path: /home/agent
+        persist: true
+      - path: /tmp
+        persist: false
+    init: |
+      #!/bin/bash
+      # Seed home from image on first boot
+      if [ ! -f /home/agent/.initialized ]; then
+        cp -rn /app/working-dir/. /home/agent/ 2>/dev/null || true
+        touch /home/agent/.initialized
+      fi
+      mkdir -p /home/agent/work
+    env:
+      - name: PORT
+        value: "8080"
+    resources:
+      requests:
+        cpu: {{ .Values.onedriveTranscriptTemplate.resources.requests.cpu | quote }}
+        memory: {{ .Values.onedriveTranscriptTemplate.resources.requests.memory | quote }}
+      limits:
+        cpu: {{ .Values.onedriveTranscriptTemplate.resources.limits.cpu | quote }}
+        memory: {{ .Values.onedriveTranscriptTemplate.resources.limits.memory | quote }}
+    securityContext:
+      readOnlyRootFilesystem: false
+{{- end }}
diff --git a/deploy/helm/humr/values-local.yaml b/deploy/helm/humr/values-local.yaml
@@ -56,6 +56,13 @@ codeGuardianTemplate:
     tag: latest
     pullPolicy: Never
 
+onedriveTranscriptTemplate:
+  enabled: true
+  image:
+    repository: humr-onedrive-transcript
+    tag: latest
+    pullPolicy: Never
+
 # Bootstrap a known dev/dev user for local cluster — never enabled in production.
 keycloak:
   testUser:

diff --git a/deploy/helm/humr/values.yaml b/deploy/helm/humr/values.yaml
@@ -381,3 +381,20 @@ codeGuardianTemplate:
     limits:
       cpu: "1"
       memory: "2Gi"
+
+# -- OneDrive transcript processing agent template
+onedriveTranscriptTemplate:
+  enabled: false
+  name: onedrive-transcript
+  image:
+    repository: ghcr.io/kagenti/humr/onedrive-transcript
+    tag: ""
+    pullPolicy: IfNotPresent
+  description: "OneDrive Teams transcript processing agent"
+  resources:
+    requests:
+      cpu: "250m"
+      memory: "512Mi"
+    limits:
+      cpu: "1"
+      memory: "2Gi"
diff --git a/deploy/tasks.toml b/deploy/tasks.toml
@@ -60,7 +60,7 @@ dir = "{{config_root}}"
 run = 'docker build -f packages/ui/Dockerfile -t humr-ui:latest .'
 
 ["image:agent"]
-description = "Build agent Docker images (humr-base + claude-code + google-workspace + pi-agent + code-guardian)"
+description = "Build agent Docker images (humr-base + claude-code + google-workspace + pi-agent + code-guardian + onedrive-transcript)"
 dir = "{{config_root}}"
 run = '''
 #!/usr/bin/env bash
@@ -70,6 +70,7 @@ docker build -t humr-claude-code:latest packages/agents/claude-code
 docker build -t humr-google-workspace-agent:latest packages/agents/google-workspace
 docker build -t humr-pi-agent:latest packages/agents/pi-agent
 docker build -t humr-code-guardian:latest packages/agents/code-guardian
+docker build -t humr-onedrive-transcript:latest packages/agents/onedrive-transcript
 '''
 
 # -- Cluster lifecycle (k3s via lima) --
@@ -150,7 +151,7 @@ fi
 # 3. Load images into k3s (built by depends: image:*)
 echo "Loading images into k3s..."
 tar="/tmp/humr-images.tar"
-docker save -o "$tar" humr-controller:latest humr-api-server:latest humr-ui:latest humr-claude-code:latest humr-google-workspace-agent:latest humr-pi-agent:latest humr-code-guardian:latest
+docker save -o "$tar" humr-controller:latest humr-api-server:latest humr-ui:latest humr-claude-code:latest humr-google-workspace-agent:latest humr-pi-agent:latest humr-code-guardian:latest humr-onedrive-transcript:latest
 if [ -n "${IS_SANDBOX:-}" ]; then
   docker image prune --all --force >/dev/null 2>&1 || true
   sudo k3s ctr images import "$tar"
@@ -267,7 +268,7 @@ set -eo pipefail
 
 echo "Loading into k3s..."
 tar="/tmp/humr-agent-images.tar"
-docker save humr-claude-code:latest humr-google-workspace-agent:latest humr-pi-agent:latest humr-code-guardian:latest -o "$tar"
+docker save humr-claude-code:latest humr-google-workspace-agent:latest humr-pi-agent:latest humr-code-guardian:latest humr-onedrive-transcript:latest -o "$tar"
 
 if [ -n "${IS_SANDBOX:-}" ]; then
   KUBECONFIG="/etc/rancher/k3s/k3s.yaml"

diff --git a/packages/agents/onedrive-transcript/Dockerfile b/packages/agents/onedrive-transcript/Dockerfile
@@ -0,0 +1,10 @@
+ARG BASE_IMAGE=humr-base
+FROM ${BASE_IMAGE}
+
+# Claude Code harness + uv for running the Python VTT parser
+RUN cd /app && npm install @agentclientprotocol/claude-agent-acp @anthropic-ai/claude-agent-sdk \
+    && npm install -g @anthropic-ai/claude-code \
+    && curl -LsSf https://astral.sh/uv/install.sh | sh \
+    && ln -s /root/.local/bin/uv /usr/local/bin/uv
+
+COPY workspace/ /app/working-dir/
diff --git a/packages/agents/onedrive-transcript/README.md b/packages/agents/onedrive-transcript/README.md
@@ -0,0 +1,93 @@
+# OneDrive Transcript Processing Agent
+
+A Humr agent that polls Microsoft Graph for new Teams meeting transcripts, converts them into structured markdown meeting notes, and posts the results to a Slack channel.
+
+## How It Works
+
+On a cron schedule (default: every 30 minutes), the agent:
+
+1. Reads `state/processed.json` to skip transcripts it has already handled.
+2. Lists the connected user's calendar events with Teams online meetings (`/me/events` with `isOnlineMeeting=true`).
+3. For each meeting, resolves the `onlineMeeting` resource by its `joinUrl`, then lists transcripts.
+4. Downloads each new transcript as VTT.
+5. Parses the VTT into structured JSON (speakers, segments, duration).
+6. Generates structured markdown meeting notes (subject, attendees, summary, key topics, action items, detailed notes).
+7. Posts the notes to the configured Slack channel.
+8. Records the processed transcript ID in `state/processed.json` (capped at 20 entries).
+
+Authentication to Microsoft Graph goes through OneCLI's MITM proxy — the agent uses `MICROSOFT_GRAPH_TOKEN=humr:sentinel` and the proxy swaps in a real OAuth bearer token transparently.
+
+### Scope and limits
+
+- ✅ **Scheduled Teams meetings** (those that appear on the user's calendar) are fully supported.
+- ❌ **MeetNow / ad-hoc channel meetings** are not supported. They have no calendar entry, and the bulk `getAllTranscripts` API requires application permissions + a Teams Application Access Policy (heavy admin overhead). For transcripts to be processed, the meeting must be scheduled via the calendar (not started with "Meet now").
+
+## Setup
+
+### 1. Register an Azure app
+
+1. Go to [Azure Portal > App registrations](https://portal.azure.com/#blade/Microsoft_AAD_RegisteredApps/ApplicationsListBlade) > **New registration**.
+2. **Supported account types**: single-tenant.
+3. **Redirect URI**: `http://localhost:4444/api/apps/microsoft-graph/callback` (for local dev). Production: `http://<your-onecli-host>/api/apps/microsoft-graph/callback`.
+4. Under **API permissions**, add Microsoft Graph **Delegated** permissions:
+   - `Calendars.Read` — list calendar events to find scheduled Teams meetings
+   - `OnlineMeetings.Read` — resolve a meeting ID from its Teams join URL
+   - `OnlineMeetingTranscript.Read.All` — list and download VTT transcripts (admin consent required by Microsoft policy, but the scope only grants per-user access)
+   - `User.Read` — sign in
+   - `offline_access` — refresh tokens
+5. Click **Grant admin consent for {tenant}**.
+6. Under **Certificates & secrets**, create a client secret. Copy the **Application (client) ID**, **Client Secret**, and **Tenant ID**.
+
+### 2. Connect Microsoft Graph in OneCLI
+
+1. Open OneCLI at http://localhost:4444 → **Apps** → **Microsoft Graph**.
+2. Enter Client ID, Client Secret, Tenant ID. Click **Save**.
+3. Click **Connect** to start the OAuth flow. Sign in as the user whose meeting transcripts you want to process. Approve the requested scopes.
+
+### 3. Grant the connection to the agent
+
+1. Open the Humr UI at http://humr.localhost:4444.
+2. Add a new agent from the **onedrive-transcript** template.
+3. Open **Configure** → **Connections** → check **Microsoft Graph**. Save.
+
+### 4. Configure Slack
+
+The agent posts via a Slack MCP server configured in the schedule's `mcpServers` field. You'll need a Slack app with `chat:write` permission and a bot token. Reference: [Slack MCP server](https://github.com/modelcontextprotocol/servers/tree/main/src/slack) (or any other Slack MCP server).
+
+### 5. Create a schedule
+
+In the Humr UI, create a schedule on the agent with:
+
+- **Cron**: `*/30 * * * *` (every 30 minutes)
+- **Session mode**: `continuous` — the agent maintains context across runs
+- **Task prompt**: e.g.
+  ```
+  Check for new Teams meeting transcripts since the last run, process them
+  into meeting notes, and post each set of notes to the #meetings channel
+  in Slack.
+  ```
+- **MCP servers**: configure the Slack MCP server with the bot token
+
+## Workspace contents
+
+```
+/home/agent/work/
+├── CLAUDE.md                       # Agent operating manual
+├── scripts/
+│   ├── fetch-new-transcripts.py    # List events, resolve meetings, download new VTTs
+│   ├── parse-vtt.py                # VTT → structured JSON
+│   └── mark-processed.py           # Append entry to state/processed.json
+└── state/
+    └── processed.json              # Last 20 processed transcripts (managed by scripts)
+```
+
+The workspace is persisted on the `/home/agent` PVC, so `state/processed.json` survives pod restarts.
+
+## Architecture
+
+The agent uses the Microsoft Graph REST API directly (no MCP server), with the OneCLI gateway handling token injection and refresh. The `microsoft-graph` provider in OneCLI is configured with tenant-aware token URL refresh — see `apps/web/src/lib/apps/microsoft-graph.ts` and `apps/gateway/src/apps.rs` in the OneCLI repo.
+
+## Future considerations
+
+- **Box upload**: post-processing to a Box folder (separate from Slack). Out of scope for this initial version.
+- **Application permissions**: required for processing MeetNow / channel meetings. Would need a Teams Application Access Policy configured by the tenant admin (PowerShell). Not implemented today; delegated auth covers scheduled meetings only.
diff --git a/packages/agents/onedrive-transcript/workspace/work/CLAUDE.md b/packages/agents/onedrive-transcript/workspace/work/CLAUDE.md
@@ -0,0 +1,122 @@
+## OneDrive Transcript Processing Agent
+
+You are a meeting transcript processor. You retrieve Teams **scheduled** meeting transcripts via Microsoft Graph, convert them into structured meeting notes, and post the results to Slack.
+
+### Authentication
+
+Outbound HTTPS requests go through a credential-injection proxy that automatically replaces the sentinel token with a real OAuth bearer token. Use `$MICROSOFT_GRAPH_TOKEN` as the bearer token in all Graph API calls — the proxy swaps it transparently.
+
+### Scope and Limits
+
+You have **delegated** Microsoft Graph permissions. This means:
+
+- ✅ You can access transcripts for meetings the connected user organized or attended
+- ✅ Scheduled Teams meetings (those that appear on the user's calendar) are fully supported
+- ❌ MeetNow / ad-hoc channel meetings are **not supported** — they don't have calendar entries, and the delegated `getAllTranscripts` API is unavailable. If users want a transcript processed, they must schedule the meeting via the calendar (not click "Meet now")
+
+### Helper Scripts
+
+Three Python helpers live in `scripts/` and run via `uv run`. Use them instead of constructing curl pipelines by hand.
+
+#### `scripts/fetch-new-transcripts.py`
+
+Lists calendar events, resolves meeting IDs, lists transcripts, downloads VTTs to `/tmp`, and filters out anything already in `state/processed.json`. Prints a JSON array of new transcripts:
+
+```bash
+uv run scripts/fetch-new-transcripts.py [--since ISO8601] [--state state/processed.json]
+```
+
+Output entries: `{subject, meetingId, transcriptId, vttPath, meetingStart}`. Default `--since` is 24 hours ago.
+
+#### `scripts/parse-vtt.py`
+
+Parses a VTT file into structured JSON (metadata, speakers, segments). Pass `--subject` and `--meeting-start` to embed meeting context in the metadata:
+
+```bash
+uv run scripts/parse-vtt.py /tmp/transcript-XYZ.vtt \
+  --subject "Meeting subject" --meeting-start "2026-04-27T13:40:00"
+```
+
+Prints JSON to stdout. Read this output directly — no temp file needed.
+
+#### `scripts/mark-processed.py`
+
+Appends an entry to `state/processed.json` (capped at 20):
+
+```bash
+uv run scripts/mark-processed.py \
+  --transcript-id ID --meeting-id ID --subject "Meeting subject"
+```
+
+### Workflow
+
+Each run follows this sequence:
+
+1. **Fetch new transcripts** — run `scripts/fetch-new-transcripts.py`. The script reads `state/processed.json` itself and only returns unprocessed entries.
+2. **For each entry** in the JSON output:
+   - Run `scripts/parse-vtt.py` on the VTT, passing `--subject` and `--meeting-start`.
+   - Generate structured meeting notes from the parsed JSON (see format below).
+   - Post the notes to the configured Slack channel.
+   - Run `scripts/mark-processed.py` to record completion.
+3. **No new transcripts** — exit quietly without posting anything.
+
+### Meeting Notes Format
+
+Generate notes in this markdown structure:
+
+```markdown
+# Meeting Notes: <subject>
+
+**Date:** <meeting_start>
+**Duration:** <metadata.duration>
+**Attendees:** <comma-separated speakers list>
+
+## Summary
+
+<2-4 sentence executive summary of the meeting>
+
+## Key Topics
+
+### <Topic 1>
+<Summary of discussion with speaker attribution>
+
+### <Topic 2>
+<Summary of discussion with speaker attribution>
+
+## Action Items
+
+- [ ] <action> — **<owner>**
+- [ ] <action> — **<owner>**
+
+## Detailed Notes
+
+<Chronological notes with speaker attribution, organized by topic shifts.
+Use > blockquotes for notable direct quotes.>
+```
+
+### Notes Guidelines
+
+- **Speaker attribution**: Use first names where possible. If the VTT uses full names ("John Smith"), use "John" in the body but list full names in Attendees.
+- **Summary**: Focus on decisions made and outcomes, not play-by-play.
+- **Action items**: Extract explicit commitments ("I'll do X", "Can you handle Y") with the responsible person.
+- **Key topics**: Group related discussion into logical topics rather than following strict chronological order.
+- **Direct quotes**: Use sparingly — only for important statements, decisions, or commitments.
+- **Filler removal**: Omit filler words, false starts, and crosstalk artifacts from the VTT.
+
+### State Tracking
+
+`state/processed.json` is managed entirely by the helper scripts — `fetch-new-transcripts.py` reads it to filter, `mark-processed.py` appends to it. Do not edit it manually. Structure:
+
+```json
+{
+  "processed": [
+    {"id": "<transcript-id>", "meetingId": "<meeting-id>", "subject": "<subject>", "processedAt": "<ISO-8601>"}
+  ]
+}
+```
+
+### Tips
+
+- A meeting can have multiple transcripts (transcription started/stopped multiple times). Process each independently.
+- If `fetch-new-transcripts.py` returns `[]`, there's nothing to do — exit quietly.
+- If the Slack post fails for one transcript, log the error but continue with the rest. Don't mark a transcript as processed if its notes weren't successfully delivered.