77Downloads each new transcript VTT to /tmp and prints a JSON array to stdout:
88 [{"subject", "meetingId", "transcriptId", "vttPath", "meetingStart"}, ...]
99
10+ Per-meeting and per-transcript failures are isolated: a failure on one
11+ transcript is logged to stderr and the script continues with the rest.
12+ The exit code is non-zero only if the initial calendar listing fails
13+ (nothing else can proceed without it).
14+
1015Usage:
1116 uv run scripts/fetch-new-transcripts.py [--since ISO8601] [--state PATH]
1217
2732
2833GRAPH = "https://graph.microsoft.com/v1.0"
2934
35+ # Network timeout for all Graph calls. Graph APIs typically respond in <1s
36+ # but can stall on backend issues; 30s is generous without hanging the agent.
37+ TIMEOUT = 30
38+
3039
3140def graph_get (path : str , token : str ) -> dict :
3241 url = path if path .startswith ("http" ) else f"{ GRAPH } { path } "
3342 req = urllib .request .Request (url , headers = {"Authorization" : f"Bearer { token } " })
34- with urllib .request .urlopen (req ) as resp :
43+ with urllib .request .urlopen (req , timeout = TIMEOUT ) as resp :
3544 return json .loads (resp .read ())
3645
3746
3847def graph_get_bytes (path : str , token : str , accept : str ) -> bytes :
3948 url = path if path .startswith ("http" ) else f"{ GRAPH } { path } "
4049 req = urllib .request .Request (url , headers = {"Authorization" : f"Bearer { token } " , "Accept" : accept })
41- with urllib .request .urlopen (req ) as resp :
50+ with urllib .request .urlopen (req , timeout = TIMEOUT ) as resp :
4251 return resp .read ()
4352
4453
@@ -77,6 +86,7 @@ def main():
7786 processed_ids = load_processed_ids (Path (args .state ))
7887
7988 since_enc = urllib .parse .quote (since )
89+ # Failure here is fatal — without the calendar list we have nothing to work with.
8090 events = get_all_pages (
8191 f"{ GRAPH } /me/events?$filter=start/dateTime%20ge%20'{ since_enc } '"
8292 f"&$select=id,subject,start,isOnlineMeeting,onlineMeeting&$top=50&$orderby=start/dateTime%20desc" ,
@@ -93,35 +103,52 @@ def main():
93103 continue
94104 subject = event .get ("subject" , "" )
95105 meeting_start = (event .get ("start" ) or {}).get ("dateTime" , "" )
106+ event_id = event .get ("id" , "<unknown>" )
96107
97- # Resolve meeting resource ID from join URL
98- join_url_enc = urllib .parse .quote (join_url , safe = "" )
99- meeting_resp = graph_get (
100- f"{ GRAPH } /me/onlineMeetings?$filter=JoinWebUrl%20eq%20'{ join_url_enc } '" ,
101- token ,
102- )
108+ # Resolve meeting resource ID from join URL.
109+ try :
110+ join_url_enc = urllib .parse .quote (join_url , safe = "" )
111+ meeting_resp = graph_get (
112+ f"{ GRAPH } /me/onlineMeetings?$filter=JoinWebUrl%20eq%20'{ join_url_enc } '" ,
113+ token ,
114+ )
115+ except Exception as e :
116+ print (f"warn: failed to resolve meeting for event { event_id } ({ subject } ): { e } " , file = sys .stderr )
117+ continue
103118 meetings = meeting_resp .get ("value" , [])
104119 if not meetings :
105120 continue
106121 meeting_id = meetings [0 ]["id" ]
107122
108- # List transcripts
109- transcripts_resp = graph_get (
110- f"{ GRAPH } /me/onlineMeetings/{ meeting_id } /transcripts" , token
111- )
123+ # List transcripts for the meeting.
124+ try :
125+ transcripts_resp = graph_get (
126+ f"{ GRAPH } /me/onlineMeetings/{ meeting_id } /transcripts" , token
127+ )
128+ except Exception as e :
129+ print (f"warn: failed to list transcripts for { subject } ({ meeting_id } ): { e } " , file = sys .stderr )
130+ continue
131+
112132 for transcript in transcripts_resp .get ("value" , []):
113133 transcript_id = transcript ["id" ]
114134 if transcript_id in processed_ids :
115135 continue
116136
117- # Download VTT
118- vtt_path = f"/tmp/transcript-{ transcript_id [:20 ]} .vtt"
119- content = graph_get_bytes (
120- f"{ GRAPH } /me/onlineMeetings/{ meeting_id } /transcripts/{ transcript_id } /content?$format=text/vtt" ,
121- token ,
122- accept = "text/vtt" ,
123- )
124- Path (vtt_path ).write_bytes (content )
137+ # Download VTT. Per-transcript failure must NOT lose the rest.
138+ try :
139+ vtt_path = f"/tmp/transcript-{ transcript_id [:20 ]} .vtt"
140+ content = graph_get_bytes (
141+ f"{ GRAPH } /me/onlineMeetings/{ meeting_id } /transcripts/{ transcript_id } /content?$format=text/vtt" ,
142+ token ,
143+ accept = "text/vtt" ,
144+ )
145+ Path (vtt_path ).write_bytes (content )
146+ except Exception as e :
147+ print (
148+ f"warn: failed to download transcript { transcript_id [:20 ]} … for { subject } : { e } " ,
149+ file = sys .stderr ,
150+ )
151+ continue
125152
126153 results .append ({
127154 "subject" : subject ,
0 commit comments