stream option

tiltwind · tiltwind · commit 01264ba70843 · 2026-04-01T15:44:23.000+08:00
diff --git a/chat.go b/chat.go
@@ -100,6 +100,11 @@ func (c *Client) openaiChatCompletionStream(ctx context.Context, req *ChatReques
 	r := req.clone()
 	r.Stream = true
 
+	// Request usage data in the final stream chunk if not already set.
+	if r.StreamOptions == nil {
+		r.StreamOptions = &StreamOptions{IncludeUsage: true}
+	}
+
 	c.applyDefaultModel(&r)
 
 	resp, err := c.doRequest(ctx, &r)
diff --git a/openai_chat_test.go b/openai_chat_test.go
@@ -291,6 +291,159 @@ func TestChatCompletionStreamAPIError(t *testing.T) {
 	}
 }
 
+func TestChatCompletionStreamAutoInjectsStreamOptions(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		var req ChatRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			t.Errorf("decode request: %v", err)
+		}
+
+		if req.StreamOptions == nil {
+			t.Error("StreamOptions = nil, want non-nil")
+		} else if !req.StreamOptions.IncludeUsage {
+			t.Error("StreamOptions.IncludeUsage = false, want true")
+		}
+
+		w.Header().Set("Content-Type", "text/event-stream")
+		flusher, _ := w.(http.Flusher)
+		_, _ = fmt.Fprint(w, "data: [DONE]\n\n")
+		flusher.Flush()
+	}))
+	defer srv.Close()
+
+	c, err := NewClient(WithAPIKey("sk-test"), WithBaseURL(srv.URL))
+	if err != nil {
+		t.Fatalf("NewClient: %v", err)
+	}
+
+	// Caller does NOT set StreamOptions.
+	stream, err := c.ChatCompletionStream(context.Background(), &ChatRequest{
+		Model:    ModelOpenaiGPT4o,
+		Messages: []Message{{Role: RoleUser, Content: NewTextContent("Hi")}},
+	})
+	if err != nil {
+		t.Fatalf("ChatCompletionStream: %v", err)
+	}
+	defer func() { _ = stream.Close() }()
+
+	// Drain the stream.
+	for {
+		_, err := stream.Recv()
+		if errors.Is(err, io.EOF) {
+			break
+		}
+		if err != nil {
+			t.Fatalf("Recv: %v", err)
+		}
+	}
+}
+
+func TestChatCompletionStreamPreservesExplicitStreamOptions(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		var req ChatRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			t.Errorf("decode request: %v", err)
+		}
+
+		if req.StreamOptions == nil {
+			t.Error("StreamOptions = nil, want non-nil")
+		} else if req.StreamOptions.IncludeUsage {
+			t.Error("StreamOptions.IncludeUsage = true, want false (caller explicitly set false)")
+		}
+
+		w.Header().Set("Content-Type", "text/event-stream")
+		flusher, _ := w.(http.Flusher)
+		_, _ = fmt.Fprint(w, "data: [DONE]\n\n")
+		flusher.Flush()
+	}))
+	defer srv.Close()
+
+	c, err := NewClient(WithAPIKey("sk-test"), WithBaseURL(srv.URL))
+	if err != nil {
+		t.Fatalf("NewClient: %v", err)
+	}
+
+	// Caller explicitly sets IncludeUsage: false.
+	stream, err := c.ChatCompletionStream(context.Background(), &ChatRequest{
+		Model:         ModelOpenaiGPT4o,
+		Messages:      []Message{{Role: RoleUser, Content: NewTextContent("Hi")}},
+		StreamOptions: &StreamOptions{IncludeUsage: false},
+	})
+	if err != nil {
+		t.Fatalf("ChatCompletionStream: %v", err)
+	}
+	defer func() { _ = stream.Close() }()
+
+	for {
+		_, err := stream.Recv()
+		if errors.Is(err, io.EOF) {
+			break
+		}
+		if err != nil {
+			t.Fatalf("Recv: %v", err)
+		}
+	}
+}
+
+func TestChatCompletionStreamUsage(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "text/event-stream")
+		flusher, _ := w.(http.Flusher)
+
+		chunks := []string{
+			`{"id":"1","object":"chat.completion.chunk","created":1,"model":"gpt-4o","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello"},"finish_reason":null}]}`,
+			`{"id":"1","object":"chat.completion.chunk","created":1,"model":"gpt-4o","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":8,"completion_tokens":16,"total_tokens":24}}`,
+		}
+
+		for _, chunk := range chunks {
+			_, _ = fmt.Fprintf(w, "data: %s\n\n", chunk)
+			flusher.Flush()
+		}
+
+		_, _ = fmt.Fprint(w, "data: [DONE]\n\n")
+		flusher.Flush()
+	}))
+	defer srv.Close()
+
+	c, err := NewClient(WithAPIKey("sk-test"), WithBaseURL(srv.URL))
+	if err != nil {
+		t.Fatalf("NewClient: %v", err)
+	}
+
+	stream, err := c.ChatCompletionStream(context.Background(), &ChatRequest{
+		Model:    ModelOpenaiGPT4o,
+		Messages: []Message{{Role: RoleUser, Content: NewTextContent("Hi")}},
+	})
+	if err != nil {
+		t.Fatalf("ChatCompletionStream: %v", err)
+	}
+	defer func() { _ = stream.Close() }()
+
+	for {
+		_, err := stream.Recv()
+		if errors.Is(err, io.EOF) {
+			break
+		}
+		if err != nil {
+			t.Fatalf("Recv: %v", err)
+		}
+	}
+
+	usage := stream.Usage()
+	if usage == nil {
+		t.Fatal("Usage() = nil, want non-nil")
+	}
+	if usage.PromptTokens != 8 {
+		t.Errorf("prompt_tokens = %d, want 8", usage.PromptTokens)
+	}
+	if usage.CompletionTokens != 16 {
+		t.Errorf("completion_tokens = %d, want 16", usage.CompletionTokens)
+	}
+	if usage.TotalTokens != 24 {
+		t.Errorf("total_tokens = %d, want 24", usage.TotalTokens)
+	}
+}
+
 func TestChatCompletionWithTools(t *testing.T) {
 	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		var req ChatRequest
diff --git a/openai_stream_test.go b/openai_stream_test.go
@@ -174,3 +174,169 @@ func TestStreamAPIError(t *testing.T) {
 		t.Errorf("code = %q", apiErr.Code)
 	}
 }
+
+func TestStreamUsageFromFinalChunk(t *testing.T) {
+	body := ""
+	body += "data: " + `{"id":"1","object":"chat.completion.chunk","created":1,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"Hi"},"finish_reason":null}]}` + "\n\n"
+	body += "data: " + `{"id":"1","object":"chat.completion.chunk","created":1,"model":"gpt-4o","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":10,"completion_tokens":20,"total_tokens":30}}` + "\n\n"
+	body += "data: [DONE]\n\n"
+
+	s := newStream(io.NopCloser(strings.NewReader(body)))
+
+	for {
+		_, err := s.Recv()
+		if errors.Is(err, io.EOF) {
+			break
+		}
+		if err != nil {
+			t.Fatalf("Recv: %v", err)
+		}
+	}
+
+	usage := s.Usage()
+	if usage == nil {
+		t.Fatal("Usage() = nil, want non-nil")
+	}
+	if usage.PromptTokens != 10 {
+		t.Errorf("prompt_tokens = %d, want 10", usage.PromptTokens)
+	}
+	if usage.CompletionTokens != 20 {
+		t.Errorf("completion_tokens = %d, want 20", usage.CompletionTokens)
+	}
+	if usage.TotalTokens != 30 {
+		t.Errorf("total_tokens = %d, want 30", usage.TotalTokens)
+	}
+}
+
+func TestStreamUsageNilWhenNoUsageChunk(t *testing.T) {
+	body := ""
+	body += "data: " + `{"id":"1","object":"chat.completion.chunk","created":1,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}` + "\n\n"
+	body += "data: " + `{"id":"1","object":"chat.completion.chunk","created":1,"model":"gpt-4o","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}` + "\n\n"
+	body += "data: [DONE]\n\n"
+
+	s := newStream(io.NopCloser(strings.NewReader(body)))
+
+	for {
+		_, err := s.Recv()
+		if errors.Is(err, io.EOF) {
+			break
+		}
+		if err != nil {
+			t.Fatalf("Recv: %v", err)
+		}
+	}
+
+	if usage := s.Usage(); usage != nil {
+		t.Errorf("Usage() = %+v, want nil", usage)
+	}
+}
+
+func TestStreamCloseOnCloseCallback(t *testing.T) {
+	body := ""
+	body += "data: " + `{"id":"1","object":"chat.completion.chunk","created":1,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"Hi"},"finish_reason":null}]}` + "\n\n"
+	body += "data: " + `{"id":"1","object":"chat.completion.chunk","created":1,"model":"gpt-4o","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":5,"completion_tokens":10,"total_tokens":15}}` + "\n\n"
+	body += "data: [DONE]\n\n"
+
+	var callbackUsage *Usage
+	var callbackCalled bool
+
+	s := newStream(io.NopCloser(strings.NewReader(body)))
+	s = WrapStream(s, func(u *Usage) {
+		callbackCalled = true
+		callbackUsage = u
+	})
+
+	for {
+		_, err := s.Recv()
+		if errors.Is(err, io.EOF) {
+			break
+		}
+		if err != nil {
+			t.Fatalf("Recv: %v", err)
+		}
+	}
+
+	if err := s.Close(); err != nil {
+		t.Fatalf("Close: %v", err)
+	}
+
+	if !callbackCalled {
+		t.Fatal("onClose callback was not called")
+	}
+	if callbackUsage == nil {
+		t.Fatal("callback received nil usage, want non-nil")
+	}
+	if callbackUsage.PromptTokens != 5 {
+		t.Errorf("prompt_tokens = %d, want 5", callbackUsage.PromptTokens)
+	}
+	if callbackUsage.CompletionTokens != 10 {
+		t.Errorf("completion_tokens = %d, want 10", callbackUsage.CompletionTokens)
+	}
+	if callbackUsage.TotalTokens != 15 {
+		t.Errorf("total_tokens = %d, want 15", callbackUsage.TotalTokens)
+	}
+}
+
+func TestStreamCloseOnCloseCallbackWithoutUsage(t *testing.T) {
+	body := ""
+	body += "data: " + `{"id":"1","object":"chat.completion.chunk","created":1,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"Hi"},"finish_reason":null}]}` + "\n\n"
+	body += "data: [DONE]\n\n"
+
+	var callbackCalled bool
+	var callbackUsage *Usage
+
+	s := newStream(io.NopCloser(strings.NewReader(body)))
+	s = WrapStream(s, func(u *Usage) {
+		callbackCalled = true
+		callbackUsage = u
+	})
+
+	for {
+		_, err := s.Recv()
+		if errors.Is(err, io.EOF) {
+			break
+		}
+		if err != nil {
+			t.Fatalf("Recv: %v", err)
+		}
+	}
+
+	if err := s.Close(); err != nil {
+		t.Fatalf("Close: %v", err)
+	}
+
+	if !callbackCalled {
+		t.Fatal("onClose callback was not called")
+	}
+	if callbackUsage != nil {
+		t.Errorf("callback usage = %+v, want nil", callbackUsage)
+	}
+}
+
+func TestWrapStreamNil(t *testing.T) {
+	var callbackCalled bool
+	var callbackUsage *Usage
+
+	result := WrapStream(nil, func(u *Usage) {
+		callbackCalled = true
+		callbackUsage = u
+	})
+
+	if result != nil {
+		t.Errorf("WrapStream(nil, cb) = %v, want nil", result)
+	}
+	if !callbackCalled {
+		t.Fatal("onClose callback was not called immediately for nil stream")
+	}
+	if callbackUsage != nil {
+		t.Errorf("callback usage = %+v, want nil", callbackUsage)
+	}
+}
+
+func TestWrapStreamNilCallbackNil(t *testing.T) {
+	// Should not panic and should return nil.
+	result := WrapStream(nil, nil)
+	if result != nil {
+		t.Errorf("WrapStream(nil, nil) = %v, want nil", result)
+	}
+}
diff --git a/schema.go b/schema.go
@@ -49,25 +49,31 @@ type Thinking struct {
 	BudgetTokens int    `json:"budget_tokens,omitempty"`
 }
 
+// StreamOptions configures streaming behavior.
+type StreamOptions struct {
+	IncludeUsage bool `json:"include_usage"`
+}
+
 // ChatRequest represents a request to the chat completions API.
 type ChatRequest struct {
-	Model            string    `json:"model"`
-	Messages         []Message `json:"messages"`
-	Temperature      *float64  `json:"temperature,omitempty"`
-	MaxTokens        *int      `json:"max_tokens,omitempty"`
-	TopP             *float64  `json:"top_p,omitempty"`
-	N                *int      `json:"n,omitempty"`
-	Stop             []string  `json:"stop,omitempty"`
-	FrequencyPenalty *float64  `json:"frequency_penalty,omitempty"`
-	PresencePenalty  *float64  `json:"presence_penalty,omitempty"`
-	Seed             *int      `json:"seed,omitempty"`
-	User             string    `json:"user,omitempty"`
-	ResponseFormat   any       `json:"response_format,omitempty"`
-	Stream           bool      `json:"stream,omitempty"`
-	Tools            []Tool    `json:"tools,omitempty"`
-	ToolChoice       any       `json:"tool_choice,omitempty"`
-	Thinking         *Thinking `json:"thinking,omitempty"`
-	ReasoningEffort  string    `json:"reasoning_effort,omitempty"`
+	Model            string         `json:"model"`
+	Messages         []Message      `json:"messages"`
+	Temperature      *float64       `json:"temperature,omitempty"`
+	MaxTokens        *int           `json:"max_tokens,omitempty"`
+	TopP             *float64       `json:"top_p,omitempty"`
+	N                *int           `json:"n,omitempty"`
+	Stop             []string       `json:"stop,omitempty"`
+	FrequencyPenalty *float64       `json:"frequency_penalty,omitempty"`
+	PresencePenalty  *float64       `json:"presence_penalty,omitempty"`
+	Seed             *int           `json:"seed,omitempty"`
+	User             string         `json:"user,omitempty"`
+	ResponseFormat   any            `json:"response_format,omitempty"`
+	Stream           bool           `json:"stream,omitempty"`
+	StreamOptions    *StreamOptions `json:"stream_options,omitempty"`
+	Tools            []Tool         `json:"tools,omitempty"`
+	ToolChoice       any            `json:"tool_choice,omitempty"`
+	Thinking         *Thinking      `json:"thinking,omitempty"`
+	ReasoningEffort  string         `json:"reasoning_effort,omitempty"`
 }
 
 // ChatResponse represents a response from the chat completions API.
diff --git a/stream.go b/stream.go