Skip to content

Commit 2e7f4eb

Browse files
committed
fix(onedrive-transcript): harden fetch-new-transcripts; ignore .tmp/
- Wrap each meeting/transcript Graph call in try/except so a single failure (rate limit, transient 5xx, missing meeting) only loses that one item instead of aborting the whole run. - Add timeout=30 to all urlopen calls so a stalled Graph response can't hang the agent indefinitely. - Add .tmp/ to .gitignore — local-development scratch dir for temp clones, build outputs, etc., should never be committed. Signed-off-by: Jan Pokorný <JenomPokorny@gmail.com>
1 parent cd26a7f commit 2e7f4eb

2 files changed

Lines changed: 50 additions & 20 deletions

File tree

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,6 @@ context/
4242
# Playwright
4343
.playwright-mcp
4444

45+
# Local development scratch dir (temp clones, build outputs, etc.)
46+
.tmp/
47+

packages/agents/onedrive-transcript/workspace/work/scripts/fetch-new-transcripts.py

Lines changed: 47 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@
77
Downloads each new transcript VTT to /tmp and prints a JSON array to stdout:
88
[{"subject", "meetingId", "transcriptId", "vttPath", "meetingStart"}, ...]
99
10+
Per-meeting and per-transcript failures are isolated: a failure on one
11+
transcript is logged to stderr and the script continues with the rest.
12+
The exit code is non-zero only if the initial calendar listing fails
13+
(nothing else can proceed without it).
14+
1015
Usage:
1116
uv run scripts/fetch-new-transcripts.py [--since ISO8601] [--state PATH]
1217
@@ -27,18 +32,22 @@
2732

2833
GRAPH = "https://graph.microsoft.com/v1.0"
2934

35+
# Network timeout for all Graph calls. Graph APIs typically respond in <1s
36+
# but can stall on backend issues; 30s is generous without hanging the agent.
37+
TIMEOUT = 30
38+
3039

3140
def graph_get(path: str, token: str) -> dict:
3241
url = path if path.startswith("http") else f"{GRAPH}{path}"
3342
req = urllib.request.Request(url, headers={"Authorization": f"Bearer {token}"})
34-
with urllib.request.urlopen(req) as resp:
43+
with urllib.request.urlopen(req, timeout=TIMEOUT) as resp:
3544
return json.loads(resp.read())
3645

3746

3847
def graph_get_bytes(path: str, token: str, accept: str) -> bytes:
3948
url = path if path.startswith("http") else f"{GRAPH}{path}"
4049
req = urllib.request.Request(url, headers={"Authorization": f"Bearer {token}", "Accept": accept})
41-
with urllib.request.urlopen(req) as resp:
50+
with urllib.request.urlopen(req, timeout=TIMEOUT) as resp:
4251
return resp.read()
4352

4453

@@ -77,6 +86,7 @@ def main():
7786
processed_ids = load_processed_ids(Path(args.state))
7887

7988
since_enc = urllib.parse.quote(since)
89+
# Failure here is fatal — without the calendar list we have nothing to work with.
8090
events = get_all_pages(
8191
f"{GRAPH}/me/events?$filter=start/dateTime%20ge%20'{since_enc}'"
8292
f"&$select=id,subject,start,isOnlineMeeting,onlineMeeting&$top=50&$orderby=start/dateTime%20desc",
@@ -93,35 +103,52 @@ def main():
93103
continue
94104
subject = event.get("subject", "")
95105
meeting_start = (event.get("start") or {}).get("dateTime", "")
106+
event_id = event.get("id", "<unknown>")
96107

97-
# Resolve meeting resource ID from join URL
98-
join_url_enc = urllib.parse.quote(join_url, safe="")
99-
meeting_resp = graph_get(
100-
f"{GRAPH}/me/onlineMeetings?$filter=JoinWebUrl%20eq%20'{join_url_enc}'",
101-
token,
102-
)
108+
# Resolve meeting resource ID from join URL.
109+
try:
110+
join_url_enc = urllib.parse.quote(join_url, safe="")
111+
meeting_resp = graph_get(
112+
f"{GRAPH}/me/onlineMeetings?$filter=JoinWebUrl%20eq%20'{join_url_enc}'",
113+
token,
114+
)
115+
except Exception as e:
116+
print(f"warn: failed to resolve meeting for event {event_id} ({subject}): {e}", file=sys.stderr)
117+
continue
103118
meetings = meeting_resp.get("value", [])
104119
if not meetings:
105120
continue
106121
meeting_id = meetings[0]["id"]
107122

108-
# List transcripts
109-
transcripts_resp = graph_get(
110-
f"{GRAPH}/me/onlineMeetings/{meeting_id}/transcripts", token
111-
)
123+
# List transcripts for the meeting.
124+
try:
125+
transcripts_resp = graph_get(
126+
f"{GRAPH}/me/onlineMeetings/{meeting_id}/transcripts", token
127+
)
128+
except Exception as e:
129+
print(f"warn: failed to list transcripts for {subject} ({meeting_id}): {e}", file=sys.stderr)
130+
continue
131+
112132
for transcript in transcripts_resp.get("value", []):
113133
transcript_id = transcript["id"]
114134
if transcript_id in processed_ids:
115135
continue
116136

117-
# Download VTT
118-
vtt_path = f"/tmp/transcript-{transcript_id[:20]}.vtt"
119-
content = graph_get_bytes(
120-
f"{GRAPH}/me/onlineMeetings/{meeting_id}/transcripts/{transcript_id}/content?$format=text/vtt",
121-
token,
122-
accept="text/vtt",
123-
)
124-
Path(vtt_path).write_bytes(content)
137+
# Download VTT. Per-transcript failure must NOT lose the rest.
138+
try:
139+
vtt_path = f"/tmp/transcript-{transcript_id[:20]}.vtt"
140+
content = graph_get_bytes(
141+
f"{GRAPH}/me/onlineMeetings/{meeting_id}/transcripts/{transcript_id}/content?$format=text/vtt",
142+
token,
143+
accept="text/vtt",
144+
)
145+
Path(vtt_path).write_bytes(content)
146+
except Exception as e:
147+
print(
148+
f"warn: failed to download transcript {transcript_id[:20]}… for {subject}: {e}",
149+
file=sys.stderr,
150+
)
151+
continue
125152

126153
results.append({
127154
"subject": subject,

0 commit comments

Comments
 (0)