fix: honor Ollama timeout during streaming

vincenzopalazzo · claude · vincenzopalazzo · commit a1928e815501 · 2026-04-09T20:29:02.000+02:00
Fixes #8437 Ollama requests used OLLAMA_TIMEOUT for the initial HTTP request but still used a separate hardcoded 30s stall timeout while streaming. Default the stream timeout to the configured request timeout, while still allowing GOOSE_STREAM_TIMEOUT to override it explicitly. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Signed-off-by: Vincenzo Palazzo <vincenzopalazzodev@gmail.com>
diff --git a/crates/goose/src/providers/ollama.rs b/crates/goose/src/providers/ollama.rs
@@ -52,6 +52,7 @@ pub struct OllamaProvider {
     supports_streaming: bool,
     name: String,
     skip_canonical_filtering: bool,
+    stream_timeout_secs: u64,
 }
 fn resolve_ollama_num_ctx(model_config: &ModelConfig) -> Option<usize> {
     let config = crate::config::Config::global();
@@ -68,6 +69,19 @@ fn resolve_ollama_num_ctx(model_config: &ModelConfig) -> Option<usize> {
     input_limit.or(model_config.context_limit)
 }
 
+fn resolve_ollama_stream_timeout_secs(request_timeout_secs: u64) -> u64 {
+    let config = crate::config::Config::global();
+    match config.get_param::<u64>("GOOSE_STREAM_TIMEOUT") {
+        Ok(0) => request_timeout_secs,
+        Ok(timeout_secs) => timeout_secs,
+        Err(crate::config::ConfigError::NotFound(_)) => request_timeout_secs,
+        Err(e) => {
+            tracing::warn!("Invalid GOOSE_STREAM_TIMEOUT value: {}", e);
+            request_timeout_secs
+        }
+    }
+}
+
 fn apply_ollama_options(payload: &mut Value, model_config: &ModelConfig) {
     if let Some(obj) = payload.as_object_mut() {
         // Ollama does not support stream_options; remove it to prevent hangs.
@@ -102,8 +116,9 @@ impl OllamaProvider {
             .get_param("OLLAMA_HOST")
             .unwrap_or_else(|_| OLLAMA_HOST.to_string());
 
-        let timeout: Duration =
-            Duration::from_secs(config.get_param("OLLAMA_TIMEOUT").unwrap_or(OLLAMA_TIMEOUT));
+        let request_timeout_secs = config.get_param("OLLAMA_TIMEOUT").unwrap_or(OLLAMA_TIMEOUT);
+        let timeout: Duration = Duration::from_secs(request_timeout_secs);
+        let stream_timeout_secs = resolve_ollama_stream_timeout_secs(request_timeout_secs);
 
         let base = if host.starts_with("http://") || host.starts_with("https://") {
             host.clone()
@@ -133,14 +148,17 @@ impl OllamaProvider {
             supports_streaming: true,
             name: OLLAMA_PROVIDER_NAME.to_string(),
             skip_canonical_filtering: false,
+            stream_timeout_secs,
         })
     }
 
     pub fn from_custom_config(
         model: ModelConfig,
         config: DeclarativeProviderConfig,
     ) -> Result<Self> {
-        let timeout = Duration::from_secs(config.timeout_seconds.unwrap_or(OLLAMA_TIMEOUT));
+        let request_timeout_secs = config.timeout_seconds.unwrap_or(OLLAMA_TIMEOUT);
+        let timeout = Duration::from_secs(request_timeout_secs);
+        let stream_timeout_secs = resolve_ollama_stream_timeout_secs(request_timeout_secs);
 
         let base =
             if config.base_url.starts_with("http://") || config.base_url.starts_with("https://") {
@@ -196,6 +214,7 @@ impl OllamaProvider {
             supports_streaming,
             name: config.name.clone(),
             skip_canonical_filtering: config.skip_canonical_filtering,
+            stream_timeout_secs,
         })
     }
 }
@@ -287,7 +306,7 @@ impl Provider for OllamaProvider {
             .inspect_err(|e| {
                 let _ = log.error(e);
             })?;
-        stream_ollama(response, log)
+        stream_ollama(response, log, self.stream_timeout_secs)
     }
 
     async fn fetch_supported_models(&self) -> Result<Vec<String>, ProviderError> {
@@ -327,10 +346,6 @@ impl Provider for OllamaProvider {
     }
 }
 
-/// Per-chunk timeout for Ollama streaming responses.
-/// If no new raw SSE data arrives within this duration, the connection is considered dead.
-const OLLAMA_CHUNK_TIMEOUT_SECS: u64 = 30;
-
 /// Wraps a line stream with a per-item timeout at the raw SSE level.
 /// This detects dead connections without false-positive stalls during long
 /// tool-call generations where response_to_streaming_message_ollama buffers.
@@ -370,15 +385,19 @@ fn with_line_timeout(
 /// preventing duplicate content from being emitted to the UI.
 /// Timeout is applied at the raw SSE line level via with_line_timeout so that
 /// buffering inside response_to_streaming_message_ollama does not cause false stalls.
-fn stream_ollama(response: Response, mut log: RequestLog) -> Result<MessageStream, ProviderError> {
+fn stream_ollama(
+    response: Response,
+    mut log: RequestLog,
+    stream_timeout_secs: u64,
+) -> Result<MessageStream, ProviderError> {
     let stream = response.bytes_stream().map_err(std::io::Error::other);
 
     Ok(Box::pin(try_stream! {
         let stream_reader = StreamReader::new(stream);
         let framed = FramedRead::new(stream_reader, LinesCodec::new())
             .map_err(Error::from);
 
-        let timed_lines = with_line_timeout(framed, OLLAMA_CHUNK_TIMEOUT_SECS);
+        let timed_lines = with_line_timeout(framed, stream_timeout_secs);
         let message_stream = response_to_streaming_message_ollama(timed_lines);
         pin!(message_stream);
 
@@ -522,20 +541,20 @@ mod tests {
         )
         .unwrap();
 
-        let mut msg_stream = stream_ollama(response, log).unwrap();
+        let stream_timeout_secs = 30;
+        let mut msg_stream = stream_ollama(response, log, stream_timeout_secs).unwrap();
 
-        let result =
-            tokio::time::timeout(Duration::from_secs(OLLAMA_CHUNK_TIMEOUT_SECS + 5), async {
-                let mut last_err = None;
-                while let Some(item) = msg_stream.next().await {
-                    if let Err(e) = item {
-                        last_err = Some(e);
-                        break;
-                    }
+        let result = tokio::time::timeout(Duration::from_secs(stream_timeout_secs + 5), async {
+            let mut last_err = None;
+            while let Some(item) = msg_stream.next().await {
+                if let Err(e) = item {
+                    last_err = Some(e);
+                    break;
                 }
-                last_err
-            })
-            .await;
+            }
+            last_err
+        })
+        .await;
 
         match result {
             Ok(Some(err)) => {
@@ -553,6 +572,24 @@ mod tests {
         drop(tx);
     }
 
+    #[test]
+    fn test_resolve_ollama_stream_timeout_uses_request_timeout_by_default() {
+        let _guard = env_lock::lock_env([("GOOSE_STREAM_TIMEOUT", None::<&str>)]);
+        assert_eq!(resolve_ollama_stream_timeout_secs(1200), 1200);
+    }
+
+    #[test]
+    fn test_resolve_ollama_stream_timeout_uses_override_when_present() {
+        let _guard = env_lock::lock_env([("GOOSE_STREAM_TIMEOUT", Some("45"))]);
+        assert_eq!(resolve_ollama_stream_timeout_secs(1200), 45);
+    }
+
+    #[test]
+    fn test_resolve_ollama_stream_timeout_falls_back_for_zero() {
+        let _guard = env_lock::lock_env([("GOOSE_STREAM_TIMEOUT", Some("0"))]);
+        assert_eq!(resolve_ollama_stream_timeout_secs(1200), 1200);
+    }
+
     #[test]
     fn test_ollama_retry_config_is_transient_only() {
         let config = RetryConfig::new(