lablup
diff --git a/‎Cargo.toml‎
Lines changed: 1 addition & 1 deletion b/‎Cargo.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/server/batch/scheduler.rs‎
Lines changed: 22 additions & 11 deletions b/‎src/server/batch/scheduler.rs‎
Lines changed: 22 additions & 11 deletions
diff --git a/‎src/server/chat_request.rs‎
Lines changed: 32 additions & 2 deletions b/‎src/server/chat_request.rs‎
Lines changed: 32 additions & 2 deletions
diff --git a/‎src/server/chat_request_tests.rs‎
Lines changed: 81 additions & 0 deletions b/‎src/server/chat_request_tests.rs‎
Lines changed: 81 additions & 0 deletions
@@ -100,7 +100,7 @@ tower-http = { version = "0.5", features = ["cors", "trace"] }
 tower = { version = "0.4", features = ["util"] }
 hyper = { version = "1.1", features = ["server"] }
 hyper-util = { version = "0.1", features = ["tokio", "server-auto"] }
-reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
+reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls", "stream"] }
 async-stream = "0.3"
 fancy-regex = "0.17.0"
 toml = "0.8"
 
@@ -1023,10 +1023,19 @@ impl BatchScheduler {
                 options,
                 images,
                 audio,
+                videos,
                 response_tx,
                 cancelled,
             } => {
-                self.enqueue_request(prompt, options, images, audio, response_tx, cancelled);
+                self.enqueue_request(
+                    prompt,
+                    options,
+                    images,
+                    audio,
+                    videos,
+                    response_tx,
+                    cancelled,
+                );
                 false
             }
             ModelRequest::Shutdown => {
@@ -1042,6 +1051,7 @@ impl BatchScheduler {
         options: ServerGenerateOptions,
         images: Vec<Vec<u8>>,
         audio: Vec<Vec<u8>>,
+        videos: Vec<(std::path::PathBuf, Option<f64>)>,
         response_tx: mpsc::Sender<GenerateEvent>,
         cancelled: Arc<AtomicBool>,
     ) {
@@ -1067,9 +1077,9 @@ impl BatchScheduler {
         // that refuses to pad/concatenate a cache with no tensors. VLM
         // requests may legitimately start with an empty token list (image
         // tokens are injected later by `prepare_request_vlm_embeddings`),
-        // so this guard only applies to pure-text requests without images
-        // or audio.
-        if prompt_tokens.is_empty() && images.is_empty() && audio.is_empty() {
+        // so this guard only applies to pure-text requests without images,
+        // audio, or videos.
+        if prompt_tokens.is_empty() && images.is_empty() && audio.is_empty() && videos.is_empty() {
             let _ = response_tx.send(GenerateEvent::Error(
                 "Empty prompt: request has no input tokens to process".to_string(),
             ));
@@ -1096,13 +1106,13 @@ impl BatchScheduler {
         // feature-disabled, no ctx, and race paths), fall through to the
         // cold-allocation path below.
         //
-        // VLM / audio requests opt out of the cache path entirely: their
-        // pre-injection token stream is not self-describing (image token
-        // placeholders expand later inside `prepare_request_vlm_embeddings`),
-        // so matching against it risks reusing a KV slice built for a
-        // different media payload. Support for image-aware cache keys is
-        // tracked separately in issue #425.
-        let is_multimodal = !images.is_empty() || !audio.is_empty();
+        // VLM / audio / video requests opt out of the cache path entirely:
+        // their pre-injection token stream is not self-describing (image
+        // and video frame placeholders expand later inside
+        // `prepare_request_vlm_embeddings`), so matching against it risks
+        // reusing a KV slice built for a different media payload. Support
+        // for image-aware cache keys is tracked separately in issue #425.
+        let is_multimodal = !images.is_empty() || !audio.is_empty() || !videos.is_empty();
         let ctx_ref = if is_multimodal {
             None
         } else {
@@ -1139,6 +1149,7 @@ impl BatchScheduler {
             &mut prompt_tokens,
             &images,
             &audio,
+            &videos,
             Some(self.vision_caches.as_ref()),
         ) {
             Ok(emb) => emb,
 
@@ -65,7 +65,7 @@ use super::chat_template_kwargs::{
     ChatTemplateKwargs, extract_request_kwargs, merge_server_and_request, strip_rolling_checkpoint,
     strip_think_block,
 };
-use super::media::{extract_chat_audio_data, extract_chat_image_data};
+use super::media::{extract_chat_audio_data, extract_chat_image_data, extract_chat_video_paths};
 use super::prompt_cache::key::resolve_session_key;
 use super::types::ChatCompletionRequest;
 use super::types::request::Tool;
@@ -74,6 +74,33 @@ pub(crate) struct PreparedChatRequest {
     pub(crate) prompt: String,
     pub(crate) image_data: Vec<Vec<u8>>,
     pub(crate) audio_data: Vec<Vec<u8>>,
+    /// Resolved video paths (issue #596). Each entry has been canonicalised
+    /// and validated against `MLXCEL_VIDEO_DIR_ALLOWLIST`; the paired
+    /// `Option<f64>` is the per-video sampling rate override from
+    /// [`crate::server::types::request::VideoUrl::fps`].
+    pub(crate) video_paths: Vec<(std::path::PathBuf, Option<f64>)>,
+    /// RAII drop guards for every server-owned temp file backing
+    /// `video_paths` (PR #600 review fix for the temp-file leak).
+    ///
+    /// We keep these here rather than return them from
+    /// [`super::media::extract_chat_video_paths`] so the lifetime of the
+    /// guard equals the lifetime of the response handler: as soon as the
+    /// last consumer of `PreparedChatRequest` drops it, the temp files
+    /// vanish from `/tmp` regardless of which return path we took
+    /// (success, early error, panic).
+    ///
+    /// The scheduler still receives only `Vec<(PathBuf, Option<f64>)>` —
+    /// paths are forwarded by value to the worker thread, but ownership of
+    /// the temp file (i.e., the responsibility for deletion) stays inside
+    /// `PreparedChatRequest`. ffmpeg reads the file by path during prefill;
+    /// once that finishes the worker has no further reference to disk and
+    /// dropping the guards in the HTTP handler is safe.
+    ///
+    /// Read solely for its `Drop` impl; `dead_code` suppression here is
+    /// intentional — every other access path would defeat the whole point
+    /// of the guard.
+    #[allow(dead_code)]
+    pub(crate) video_temp_guards: Vec<crate::multimodal::video::TempFile>,
 }
 
 /// Dedup set for the "defaulted `preserve_thinking=true`" info log.
@@ -183,15 +210,18 @@ pub(crate) async fn prepare_chat_request_with_cache(
             })
     };
 
-    let (image_data, audio_data) = tokio::join!(
+    let (image_data, audio_data, (video_paths, video_temp_guards)) = tokio::join!(
         extract_chat_image_data(request),
         extract_chat_audio_data(request),
+        extract_chat_video_paths(request),
     );
 
     PreparedChatRequest {
         prompt,
         image_data,
         audio_data,
+        video_paths,
+        video_temp_guards,
     }
 }
 
 
@@ -1215,3 +1215,84 @@ fn tool_call_message_history_round_trips_and_digests() {
         "tool reorder must change the digest"
     );
 }
+
+// ---------------------------------------------------------------------------
+// PR #600 review fix (HIGH-1): temp-file lifetime tied to PreparedChatRequest
+// ---------------------------------------------------------------------------
+
+/// A `data:video/...;base64,...` URL produces a server-owned temp file that
+/// must exist while `PreparedChatRequest` is alive and disappear once it
+/// drops. The previous wiring leaked the file because nothing held a Drop
+/// guard — every request added up to 1 GiB of `/tmp` debris.
+///
+/// This test does not require ffmpeg or a real video; it only checks the
+/// resolver-to-guard wiring. The cap-checking and ffmpeg path are exercised
+/// elsewhere.
+#[tokio::test]
+async fn chat_request_drops_temp_files_on_completion() {
+    use base64::Engine;
+
+    // Tiny payload — base64 of "hi" — is enough to trigger the temp-file
+    // write path without straining CI.
+    let payload = base64::engine::general_purpose::STANDARD.encode(b"hi");
+    let data_url = format!("data:video/mp4;base64,{payload}");
+
+    let request = ChatCompletionRequest {
+        model: "test-model".to_string(),
+        messages: vec![Message {
+            role: Role::User,
+            content: MessageContent::Parts(vec![ContentPart::VideoUrl {
+                video_url: crate::server::types::request::VideoUrl {
+                    url: data_url,
+                    fps: None,
+                },
+            }]),
+            name: None,
+            tool_call_id: None,
+            tool_calls: None,
+        }],
+        stream: false,
+        stream_options: None,
+        logprobs: None,
+        top_logprobs: None,
+        tools: None,
+        tool_choice: None,
+        parallel_tool_calls: None,
+        chat_template_kwargs: None,
+        extra_body: None,
+        prompt_cache_key: None,
+        user: None,
+        extra_body_fields: serde_json::Map::new(),
+        response_format: None,
+        params: SamplingParams::default(),
+    };
+
+    // Render with a no-op template — we only care about the media plumbing.
+    let processor = ChatTemplateProcessor::with_template(
+        "{% for m in messages %}{{ m.content }}{% endfor %}".to_string(),
+    );
+    let prepared = prepare_chat_request(&processor, &request, None).await;
+
+    // Resolved: exactly one temp path, exactly one guard.
+    assert_eq!(prepared.video_paths.len(), 1, "data:video URL must resolve");
+    assert_eq!(
+        prepared.video_temp_guards.len(),
+        1,
+        "data:video URL must yield one Drop guard"
+    );
+    let temp_path = prepared.video_paths[0].0.clone();
+    assert!(
+        temp_path.exists(),
+        "temp file must exist while PreparedChatRequest is alive"
+    );
+
+    // Drop the prepared struct; the guard's Drop impl should remove the file.
+    drop(prepared);
+
+    // Drop is synchronous and the file removal happens inside Drop, so the
+    // file must be gone by the time we reach this line.
+    assert!(
+        !temp_path.exists(),
+        "temp file must be removed once PreparedChatRequest drops; remained at {temp_path:?}"
+    );
+}