fix(enricher): clean up streaming output and add --no-stream to generate (#67)

spencercjh · web-flow · commit 63460fc890f5 · 2026-04-01T17:44:45.000+08:00
* fix(enricher): clean up streaming output and add --no-stream to generate

Remove per-chunk [api]/[schema] prefixes from streaming output — they
were injected on every flush (each newline), making the LLM response
unreadable. Stream raw chunks instead via new StreamWriter.WriteRaw().

Redirect streaming output to stderr (alongside the progress bar) so the
two no longer interleave unpredictably on the terminal.

Add --no-stream and --concurrency flags to the generate command,
bringing feature parity with the enrich command.

Signed-off-by: spencercjh &lt;spencercjh@gmail.com&gt;

* test(enricher): strengthen streaming assertions and update docs

- Assert deterministic chunk content/order in streaming tests instead of
  just checking "non-empty and no prefix" (addresses Copilot C1, C2)
- Remove stale [api]/[schema]/[param] prefix references from CLAUDE.md
  and docs/ai-enrichment.md (addresses Qodo Q2)

Signed-off-by: spencercjh &lt;spencercjh@gmail.com&gt;

---------

Signed-off-by: spencercjh &lt;spencercjh@gmail.com&gt;
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -263,8 +263,8 @@ LLM_API_KEY="your-deepseek-api-key" ./build/spec-forge enrich \
     --no-stream
 ```
 
-> **Note:** Streaming is enabled by default, showing real-time LLM output with batch-type prefixes
-> (`[api]`, `[schema]`, `[param]`). With streaming on, batches are processed sequentially for readable output.
+> **Note:** Streaming is enabled by default, showing real-time LLM output to stderr.
+> With streaming on, batches are processed sequentially for readable output.
 > Use `--no-stream` to enable concurrent processing across batches for faster enrichment.
 
 Expected output:
diff --git a/cmd/enrich.go b/cmd/enrich.go
@@ -161,6 +161,7 @@ func runEnrich(cmd *cobra.Command, args []string) error {
 	result, err := e.Enrich(ctx, spec, &enricher.EnrichOptions{
 		Language: lang,
 		Stream:   &streamEnabled,
+		Writer:   os.Stderr,
 		Force:    forceFlag,
 	})
 	if err != nil {
diff --git a/cmd/generate.go b/cmd/generate.go
@@ -81,6 +81,10 @@ func runGenerate(cmd *cobra.Command, args []string) error { //nolint:gocyclo //
 	overwriteOutput, _ := cmd.Flags().GetBool("overwrite-output")
 	//nolint:errcheck
 	protoImportPaths, _ := cmd.Flags().GetStringSlice("proto-import-path")
+	//nolint:errcheck
+	noStream, _ := cmd.Flags().GetBool("no-stream")
+	//nolint:errcheck
+	concurrency, _ := cmd.Flags().GetInt("concurrency")
 
 	// Step 1: Detect framework - try all registered extractors
 	extractorImpl, info, err := builtin.DetectFramework(path)
@@ -187,7 +191,7 @@ func runGenerate(cmd *cobra.Command, args []string) error { //nolint:gocyclo //
 	// Step 6: Enrich with AI (optional)
 	cfg := config.Get()
 	if !skipEnrich && cfg.Enrich.Enabled && cfg.Enrich.Provider != "" && cfg.Enrich.Model != "" {
-		if enrichErr := enrichGeneratedSpec(ctx, genResult.SpecFilePath, cfg, language); enrichErr != nil {
+		if enrichErr := enrichGeneratedSpec(ctx, genResult.SpecFilePath, cfg, language, noStream, concurrency); enrichErr != nil {
 			// Log warning but don't fail - enrichment is optional
 			slog.WarnContext(ctx, "Enrichment failed (non-fatal)", "error", enrichErr)
 		}
@@ -313,6 +317,10 @@ to preserve your project's formatting. Use --keep-patched to keep the changes.`,
 		"overwrite existing local spec file if it already exists")
 	c.Flags().StringSlice("proto-import-path", nil,
 		"additional import paths for protoc (-I flags), can be specified multiple times")
+	c.Flags().Bool("no-stream", false,
+		"disable streaming to enable concurrent LLM calls (faster, but no real-time output)")
+	c.Flags().Int("concurrency", 3,
+		"max concurrent LLM calls (only effective with --no-stream)")
 
 	registerCompletion(c, "output", []string{"yaml", "json"})
 	registerCompletion(c, "language", []string{"en", "zh"})
@@ -335,6 +343,8 @@ var (
 	generatePublishOverwrite bool
 	generateOverwriteOutput  bool
 	generateProtoImportPaths []string
+	generateNoStream         bool
+	generateConcurrency      int
 )
 
 func init() {
@@ -364,14 +374,18 @@ func init() {
 		"overwrite existing local spec file if it already exists")
 	generateCmd.Flags().StringSliceVar(&generateProtoImportPaths, "proto-import-path", nil,
 		"additional import paths for protoc (-I flags), can be specified multiple times")
+	generateCmd.Flags().BoolVar(&generateNoStream, "no-stream", false,
+		"disable streaming to enable concurrent LLM calls (faster, but no real-time output)")
+	generateCmd.Flags().IntVar(&generateConcurrency, "concurrency", 3,
+		"max concurrent LLM calls (only effective with --no-stream)")
 
 	registerCompletion(generateCmd, "output", []string{"yaml", "json"})
 	registerCompletion(generateCmd, "language", []string{"en", "zh"})
 	registerCompletion(generateCmd, "publish-target", []string{"readme"})
 }
 
 // enrichGeneratedSpec enriches the generated spec with AI-generated descriptions
-func enrichGeneratedSpec(ctx context.Context, specFilePath string, cfg *config.Config, language string) error {
+func enrichGeneratedSpec(ctx context.Context, specFilePath string, cfg *config.Config, language string, noStream bool, concurrency int) error {
 	cli.Statusf(os.Stderr, "Enriching OpenAPI spec with AI descriptions...")
 
 	// Determine language
@@ -417,6 +431,7 @@ func enrichGeneratedSpec(ctx context.Context, specFilePath string, cfg *config.C
 		Timeout:       timeout,
 		CustomBaseURL: cfg.Enrich.BaseURL,
 		CustomPrompts: customPrompts,
+		Concurrency:   concurrency,
 	}
 	enricherCfg = enricherCfg.MergeWithDefaults()
 
@@ -427,7 +442,12 @@ func enrichGeneratedSpec(ctx context.Context, specFilePath string, cfg *config.C
 	}
 
 	// Enrich
-	result, err := e.Enrich(ctx, spec, &enricher.EnrichOptions{Language: lang})
+	streamEnabled := !noStream
+	result, err := e.Enrich(ctx, spec, &enricher.EnrichOptions{
+		Language: lang,
+		Stream:   &streamEnabled,
+		Writer:   os.Stderr,
+	})
 	if err != nil {
 		// Check if partial enrichment
 		if partialErr, ok := errors.AsType[*processor.PartialEnrichmentError](err); ok {
diff --git a/docs/ai-enrichment.md b/docs/ai-enrichment.md
@@ -110,12 +110,10 @@ LLM_API_KEY="sk-xxx" spec-forge enrich ./openapi.json \
 
 ### Streaming (Default)
 
-Shows real-time progress with batch type prefixes:
+Shows real-time LLM output as it is generated:
 
 ```
-[api] Processing batch 1/3...
-[schema] Processing batch 2/3...
-[param] Processing batch 3/3...
+{"summary": "获取用户列表", "description": "检索系统中所有可用的用户信息..."}
 ```
 
 Best for: Interactive use, seeing progress
diff --git a/internal/enricher/enricher_test.go b/internal/enricher/enricher_test.go
@@ -309,10 +309,11 @@ func TestEnricher_WithStreaming(t *testing.T) {
 	})
 	require.NoError(t, err)
 
-	// Verify streaming output was written
-	// Note: TemplateType is lowercase, so prefix is "[api]" not "[API]"
+	// Verify streaming output was written (raw, no prefix)
+	expected := strings.Join(chunks, "")
 	output := buf.String()
-	assert.Contains(t, output, "[api]", "Expected [api] prefix in streaming output")
+	assert.Contains(t, output, expected, "Expected streaming output to contain concatenated chunks")
+	assert.NotContains(t, output, "[api]", "Streaming output should not contain prefix markers")
 }
 
 // mockStreamingProvider simulates streaming behavior
diff --git a/internal/enricher/processor/batch.go b/internal/enricher/processor/batch.go
@@ -68,9 +68,8 @@ func (p *BatchProcessor) ProcessBatch(ctx context.Context, batch *Batch) (*provi
 		// Prepare options for provider
 		var genOpts []provider.Option
 		if p.streamWriter != nil {
-			prefix := string(batch.Type) // e.g., "api", "schema", "param" (lowercase from TemplateType)
 			genOpts = append(genOpts, provider.WithStreamingFunc(func(_ context.Context, chunk []byte) error {
-				return p.streamWriter.WriteWithPrefix(prefix, chunk)
+				return p.streamWriter.WriteRaw(chunk)
 			}))
 		}
 
diff --git a/internal/enricher/processor/batch_test.go b/internal/enricher/processor/batch_test.go
@@ -408,10 +408,16 @@ func TestBatchProcessor_ProcessBatch_WithStreaming(t *testing.T) {
 		t.Fatalf("Flush() error = %v", err)
 	}
 
-	// Verify output contains the prefix
+	// Verify output was streamed (raw, no prefix)
 	output := buf.String()
-	if !strings.Contains(output, "[api]") {
-		t.Errorf("Expected output to contain '[api]' prefix, got: %s", output)
+	if !strings.Contains(output, "chunk1") || !strings.Contains(output, "chunk2") {
+		t.Errorf("Streaming output should contain 'chunk1' and 'chunk2', got: %s", output)
+	}
+	if idx1, idx2 := strings.Index(output, "chunk1"), strings.Index(output, "chunk2"); idx1 > idx2 {
+		t.Errorf("Streaming chunks out of order: expected 'chunk1' before 'chunk2', got: %s", output)
+	}
+	if strings.Contains(output, "[api]") {
+		t.Errorf("Streaming output should not contain prefix, got: %s", output)
 	}
 }
 
diff --git a/internal/enricher/processor/stream_writer.go b/internal/enricher/processor/stream_writer.go
@@ -171,6 +171,38 @@ func (sw *StreamWriter) flushLocked() error {
 	return nil
 }
 
+// WriteRaw writes a chunk directly to the underlying writer without any prefix.
+// It is thread-safe and can be used for streaming LLM output where per-chunk
+// prefixes would create visual noise.
+func (sw *StreamWriter) WriteRaw(chunk []byte) error {
+	sw.mu.Lock()
+	defer sw.mu.Unlock()
+
+	sw.metrics.TotalChunks++
+	sw.metrics.TotalBytes += int64(len(chunk))
+
+	if sw.debug {
+		slog.Debug("stream raw chunk received",
+			"chunk_size", len(chunk),
+			"total_chunks", sw.metrics.TotalChunks,
+			"total_bytes", sw.metrics.TotalBytes,
+		)
+	}
+
+	// Write directly, handling potential short writes
+	for len(chunk) > 0 {
+		n, err := sw.writer.Write(chunk)
+		if err != nil {
+			return err
+		}
+		if n == 0 {
+			return io.ErrShortWrite
+		}
+		chunk = chunk[n:]
+	}
+	return nil
+}
+
 // GetMetrics returns current streaming metrics
 func (sw *StreamWriter) GetMetrics() StreamWriterMetrics {
 	sw.mu.Lock()

Original file line number	Diff line number	Diff line change
`@@ -408,10 +408,16 @@ func TestBatchProcessor_ProcessBatch_WithStreaming(t *testing.T) {`
`408`	`408`	`t.Fatalf("Flush() error = %v", err)`
`409`	`409`	`}`
`410`	`410`
`411`		`- // Verify output contains the prefix`
	`411`	`+ // Verify output was streamed (raw, no prefix)`
`412`	`412`	`output := buf.String()`
`413`		`- if !strings.Contains(output, "[api]") {`
`414`		`- t.Errorf("Expected output to contain '[api]' prefix, got: %s", output)`
	`413`	`+ if !strings.Contains(output, "chunk1") \|\| !strings.Contains(output, "chunk2") {`
	`414`	`+ t.Errorf("Streaming output should contain 'chunk1' and 'chunk2', got: %s", output)`
	`415`	`+ }`
	`416`	`+ if idx1, idx2 := strings.Index(output, "chunk1"), strings.Index(output, "chunk2"); idx1 > idx2 {`
	`417`	`+ t.Errorf("Streaming chunks out of order: expected 'chunk1' before 'chunk2', got: %s", output)`
	`418`	`+ }`
	`419`	`+ if strings.Contains(output, "[api]") {`
	`420`	`+ t.Errorf("Streaming output should not contain prefix, got: %s", output)`
`415`	`421`	`}`
`416`	`422`	`}`
`417`	`423`