diff --git a/.spec-forge.example.yaml b/.spec-forge.example.yaml index 39f7b80..80f0f07 100644 --- a/.spec-forge.example.yaml +++ b/.spec-forge.example.yaml @@ -16,6 +16,16 @@ enrich: language: zh # Request timeout timeout: 60s + # Custom prompt templates (optional) + # Override built-in prompt templates for any type: api, schema, param, response + # Supports Go template syntax (same as built-in templates) + # customPrompts: + # api: + # system: "You are a REST API documentation writer..." + # user: "API: {{.Method}} {{.Path}}\nDescribe this endpoint." + # schema: + # system: "You are a data model documenter..." + # user: "Schema: {{.SchemaName}}\nDescribe this data model and its fields." # Output Settings output: diff --git a/cmd/enrich.go b/cmd/enrich.go index 7b99a6e..24185b0 100644 --- a/cmd/enrich.go +++ b/cmd/enrich.go @@ -139,6 +139,9 @@ func runEnrich(cmd *cobra.Command, args []string) error { customAPIKeyEnv = cfg.Enrich.APIKeyEnv } + // Map custom prompts from config + customPrompts := enricher.CustomPromptsFromMap(cfg.Enrich.CustomPrompts) + enricherCfg := enricher.Config{ Provider: prov, Model: model, @@ -147,6 +150,7 @@ func runEnrich(cmd *cobra.Command, args []string) error { Timeout: timeoutFlag, CustomBaseURL: customBaseURL, CustomAPIKeyEnv: customAPIKeyEnv, + CustomPrompts: customPrompts, } enricherCfg = enricherCfg.MergeWithDefaults() diff --git a/cmd/generate.go b/cmd/generate.go index 8861878..25416ee 100644 --- a/cmd/generate.go +++ b/cmd/generate.go @@ -409,6 +409,9 @@ func enrichGeneratedSpec(ctx context.Context, specFilePath string, cfg *config.C } } + // Map custom prompts from config + customPrompts := enricher.CustomPromptsFromMap(cfg.Enrich.CustomPrompts) + // Create enricher config enricherCfg := enricher.Config{ Provider: cfg.Enrich.Provider, @@ -416,6 +419,7 @@ func enrichGeneratedSpec(ctx context.Context, specFilePath string, cfg *config.C Language: lang, Timeout: timeout, CustomBaseURL: cfg.Enrich.BaseURL, + CustomPrompts: customPrompts, } enricherCfg = enricherCfg.MergeWithDefaults() diff --git a/docs/plans/2026-03-31-p5-prompt-optimization-design.md b/docs/plans/2026-03-31-p5-prompt-optimization-design.md new file mode 100644 index 0000000..ee1441e --- /dev/null +++ b/docs/plans/2026-03-31-p5-prompt-optimization-design.md @@ -0,0 +1,157 @@ +# P5 Prompt Optimization Design + +> **Status:** Implemented +> **Date:** 2026-03-31 +> **Parent Issue:** #40 (Phase 4 - LangchainGo Features) + +## Overview + +Improve LLM enrichment output quality by enriching context passing, rewriting built-in prompt templates, and adding custom prompt file support. + +## Current State + +| Feature | Status | Detail | +|-------------------|---------------|----------------------------------------------------------| +| Context passing | Minimal | Only name, type, required passed to templates | +| System prompts | Generic | All 4 types use "You are an API documentation expert" | +| Few-shot examples | None | Templates contain no input/output examples | +| Constraints/enums | Ignored | OpenAPI format, enum, min/max, pattern not passed to LLM | +| API tags | Ignored | Operation tags not included in context | +| Custom prompts | Not supported | No way to override built-in templates | + +## Design + +### Optimization 1: Enriched Context Passing + +**Problem:** Templates receive only `Name`, `Type`, `Required` for fields/params. The OpenAPI spec contains much richer metadata that would help LLMs generate more precise descriptions. + +**Solution:** Pass additional spec metadata to templates: + +**FieldContext additions:** +- `Format` — e.g., `"email"`, `"date-time"`, `"uuid"` +- `Enum` — allowed values, e.g., `["active", "inactive"]` +- `Constraints` — human-readable string: `"min: 0, max: 100, pattern: ^[a-z]+$"` +- `ExistingDescription` — existing description from spec (useful in `--force` mode) + +**ParamFieldContext additions:** Same as FieldContext. + +**TemplateContext additions (API-specific):** +- `Tags` — operation tags from the spec +- `ExistingSummary` / `ExistingDescription` — existing partial documentation + +**Example impact on Schema prompt:** + +``` +Before: +- email (string, required) + +After: +- email (string, required, format: email, maxLength: 255) +- role (string, optional, enum: [admin, user, guest]) +``` + +### Optimization 2: Improved Built-in Prompts + +**Problem:** Generic system prompts produce generic descriptions. No examples, no quality guidelines, no output constraints. + +**Solution:** Type-specific system prompts with: + +1. **Role definition** — Different expert roles per type (API writer, data modeler, parameter documenter) +2. **Quality guidelines** — Specific rules per type (e.g., "Summary starts with a verb", "Avoid repeating field name") +3. **Few-shot examples** — Input/output pairs showing expected quality +4. **Explicit output format** — JSON schema with constraints + +**API Template (before):** +``` +System: You are an API documentation expert. Generate concise, clear descriptions. +Respond in {{.Language}} language. +Output format: JSON with "summary" and "description" fields. + +User: API Endpoint: {{.Path}} +HTTP Method: {{.Method}} + +Generate the summary (one line) and description (1-3 sentences) for this API. +``` + +**API Template (after):** +``` +System: You are an expert OpenAPI documentation writer specializing in REST API descriptions. +Your task is to write clear, concise, and informative API summaries and descriptions. + +Guidelines: +- Summary: A single line (max 80 chars) starting with a verb (e.g., "List", "Create", "Delete") +- Description: 1-3 sentences explaining what the endpoint does, when to use it, and notable behavior +- Be specific: mention resource names, ID formats, and key constraints +- Avoid generic phrases like "This API is used for..." + +Respond in {{.Language}} language. +Output MUST be valid JSON: {"summary": "...", "description": "..."} + +Example input: + POST /users +Example output: + {"summary": "Create a new user", "description": "Registers a new user account..."} + +User: API Endpoint: {{.Method}} {{.Path}} +{{- if .Tags}} +Tags: {{join .Tags ", "}} +{{- end}} +... +``` + +### Optimization 3: Custom Prompt File Support + +**Problem:** Users cannot customize prompts for their domain without modifying source code. + +**Solution:** Add `customPrompts` section to `.spec-forge.yaml`: + +```yaml +enrich: + customPrompts: + api: + system: "You are a Chinese API documentation writer..." + user: "API: {{.Method}} {{.Path}}\n用中文描述这个接口。" + schema: + system: "You are a data model expert..." +``` + +**Implementation:** Config loads custom prompts via Viper, passes through `enricher.Config.CustomPrompts`, and applies via `TemplateManager.Set()`. + +## Architecture + +``` + ┌─────────────────────┐ + │ .spec-forge.yaml │ + │ customPrompts: │ + │ api/system/user │ + └──────────┬──────────┘ + │ + ▼ +┌──────────────┐ ┌─────────────────────┐ ┌──────────────┐ +│ OpenAPI │───▶│ Collection Layer │───▶│ Template │ +│ Spec │ │ (enricher.go) │ │ Manager │ +│ │ │ - format │ │ │ +│ - format │ │ - enum │ │ Built-in │ +│ - enum │ │ - constraints │ │ + Custom │ +│ - min/max │ │ - tags │ │ overrides │ +│ - pattern │ │ - existing desc │ │ │ +│ - tags │ └─────────────────────┘ └──────┬───────┘ +└──────────────┘ │ + ▼ + ┌──────────────────┐ + │ LLM Provider │ + │ (OpenAI/etc.) │ + └──────────────────┘ +``` + +## Key Decisions + +1. **Backward compatible** — Output format stays the same (`{"summary": "...", "description": "..."}` for API, `{"field": "desc"}` for schema/param). No response parsing changes. + +2. **Constraint helper reuse** — `enricher.go` reuses `processor.BuildConstraintsString` and `processor.BuildEnumStrings` to keep constraint/enum formatting logic centralized in the `processor` package, avoiding duplicated helpers. + +3. **ExistingDescription in templates** — Only visible in `--force` mode (fields with existing descriptions are skipped otherwise). When force is on, the LLM can improve or translate existing descriptions. + +4. **Template FuncMap** — Added `join` function (maps to `strings.Join`) for rendering enum/tag lists. Registered in `renderString` via `template.FuncMap`. + +5. **Config key mapping** — Custom prompt keys (`"api"`, `"schema"`, `"param"`, `"response"`) directly match `TemplateType` string constants. diff --git a/docs/plans/2026-03-31-p5-prompt-optimization-implementation.md b/docs/plans/2026-03-31-p5-prompt-optimization-implementation.md new file mode 100644 index 0000000..226b25a --- /dev/null +++ b/docs/plans/2026-03-31-p5-prompt-optimization-implementation.md @@ -0,0 +1,152 @@ +# P5 Prompt Optimization Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Improve LLM enrichment quality by enriching context passing, rewriting built-in prompts with few-shot examples, and adding custom prompt file support. + +**Architecture:** Three-layer improvement: (1) pass more OpenAPI spec metadata (format, enum, constraints, tags) to templates, (2) rewrite templates with type-specific system prompts and examples, (3) allow users to override prompts via `.spec-forge.yaml`. Output format stays the same — no response parsing changes needed. + +**Tech Stack:** Go 1.26, text/template, Viper config, kin-openapi + +--- + +## File Structure + +| File | Responsibility | +|----------------------------------------------|-------------------------------------------------------------| +| `internal/enricher/prompt/templates.go` | Context types, built-in templates, FuncMap, TemplateManager | +| `internal/enricher/prompt/templates_test.go` | Template rendering tests | +| `internal/enricher/processor/schema.go` | Schema field collection with enriched metadata | +| `internal/enricher/processor/processor.go` | FieldElement/ParamFieldItem types, conversion helpers | +| `internal/enricher/enricher.go` | Parameter collection, API context, custom prompt wiring | +| `internal/enricher/config.go` | CustomPrompts field on enricher Config | +| `internal/config/config.go` | CustomPrompts on EnrichConfig | +| `cmd/enrich.go` | Wire custom prompts from config to enricher | +| `cmd/generate.go` | Wire custom prompts in generate pipeline | +| `.spec-forge.example.yaml` | Document new config option | + +--- + +### Task 1: Enrich FieldContext, ParamFieldContext, and TemplateContext types + +**Files:** +- Modify: `internal/enricher/prompt/templates.go` +- Modify: `internal/enricher/prompt/templates_test.go` + +- [x] Add enriched fields to `FieldContext` (Format, Enum, Constraints, ExistingDescription) +- [x] Add enriched fields to `ParamFieldContext` (Format, Enum, Constraints, ExistingDescription) +- [x] Add Tags, ExistingSummary, ExistingDescription to `TemplateContext` +- [x] Add `join` func to `renderString` FuncMap +- [x] Add tests for enriched field rendering +- [x] Commit: `feat(enricher): add enriched context fields to FieldContext, ParamFieldContext, and TemplateContext` + +--- + +### Task 2: Populate enriched context in schema field collection + +**Files:** +- Modify: `internal/enricher/processor/processor.go` +- Modify: `internal/enricher/processor/schema.go` +- Test: `internal/enricher/processor/schema_test.go` + +- [x] Add enriched fields to `FieldElement` and `ParamFieldItem` +- [x] Update `convertFieldElements` and `convertParamFieldItems` to propagate enriched fields +- [x] Add `buildConstraintsString` and `buildEnumStrings` helpers to schema.go +- [x] Update `CollectSchemaFields` to populate Format, Enum, Constraints, ExistingDescription +- [x] Add `TestCollectSchemaFields_EnrichedContext` +- [x] Commit: `feat(enricher): populate enriched context (format, enum, constraints) in schema field collection` + +--- + +### Task 3: Populate enriched context in parameter and API collection + +**Files:** +- Modify: `internal/enricher/enricher.go` +- Modify: `internal/enricher/enricher_test.go` + +- [x] Reuse shared `processor.BuildConstraintsString` and `processor.BuildEnumStrings` helpers for parameter constraint/enum extraction +- [x] Update `collectParameterGroups` to extract format, enum, constraints from param schemas +- [x] Update `collectElements` to pass Tags, ExistingSummary, ExistingDescription for API operations +- [x] Add `TestEnricher_CollectParameters_EnrichedContext` and `TestEnricher_CollectElements_APITags` +- [x] Commit: `feat(enricher): populate enriched context (tags, format, enum, constraints) in parameter and API collection` + +--- + +### Task 4: Rewrite built-in prompt templates + +**Files:** +- Modify: `internal/enricher/prompt/templates.go` +- Modify: `internal/enricher/prompt/templates_test.go` + +- [x] Replace all 4 templates with type-specific system prompts, few-shot examples, quality guidelines +- [x] API template: verb-led summaries, specificity guidelines, tags/existing desc support +- [x] Schema template: constraint-aware descriptions, enum explanation guidance +- [x] Param template: location context, enum guidance, format hints +- [x] Response template: error cause guidance, success content hints +- [x] Add `TestNewTemplateManager_RendersAllTypesWithEnrichedContext` and `TestNewTemplateManager_APITemplateUsesTags` +- [x] Commit: `feat(enricher): rewrite built-in prompts with type-specific system prompts, few-shot examples, and enriched context` + +--- + +### Task 5: Add custom prompt config + +**Files:** +- Modify: `internal/config/config.go` +- Modify: `internal/enricher/config.go` +- Modify: `.spec-forge.example.yaml` + +- [x] Add `CustomPrompts map[string]CustomPromptCfg` to `config.EnrichConfig` +- [x] Add `CustomPromptConfig` type and `CustomPrompts` field to `enricher.Config` +- [x] Update `.spec-forge.example.yaml` with commented customPrompts section + +--- + +### Task 6: Wire custom prompts through enricher pipeline + +**Files:** +- Modify: `internal/enricher/enricher.go` +- Modify: `cmd/enrich.go` +- Modify: `cmd/generate.go` + +- [x] Apply custom prompts via `TemplateManager.Set()` in `Enrich()` method +- [x] Map custom prompts from config in `cmd/enrich.go` +- [x] Map custom prompts from config in `cmd/generate.go` +- [x] Commit: `feat(enricher): add custom prompt configuration and wire through enricher pipeline` + +--- + +### Task 7: Integration test and verification + +**Files:** +- Modify: `internal/enricher/enricher_test.go` + +- [x] Add `TestEnricher_CustomPrompts` with `trackingMockProvider` +- [x] Fix lint issues (perfsprint, gocritic rangeValCopy) +- [x] `make fmt`, `make lint` (0 issues), `make test` (all pass) +- [x] Commit: `chore: fix lint issues and add custom prompts integration test` + +--- + +## Verification + +```bash +# Build +go build -o ./build/spec-forge . + +# Test with real LLM +LLM_API_KEY="your-key" ./build/spec-forge enrich \ + ./integration-tests/maven-springboot-openapi-demo/target/openapi.json \ + --provider custom --model deepseek-chat \ + --custom-base-url https://api.deepseek.com/v1 \ + --language zh -v + +# Test custom prompts via config +# Add to .spec-forge.yaml: +# enrich: +# customPrompts: +# api: +# system: "You are a Chinese API writer..." +# user: "Endpoint: {{.Method}} {{.Path}}\nWrite summary+description in JSON." +``` + +Expected: Descriptions leverage format/enum/constraint context for more specific output. diff --git a/internal/config/config.go b/internal/config/config.go index c921d13..c64ca70 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -26,16 +26,23 @@ type ReadMeConfig struct { // EnrichConfig contains LLM enrichment settings. type EnrichConfig struct { - Enabled bool `mapstructure:"enabled"` - Provider string `mapstructure:"provider"` - Model string `mapstructure:"model"` - Language string `mapstructure:"language"` - APIKey string `mapstructure:"apiKey"` - Headers map[string]string `mapstructure:"headers"` - BaseURL string `mapstructure:"baseUrl"` - APIKeyEnv string `mapstructure:"apiKeyEnv"` - Timeout string `mapstructure:"timeout"` - SkipEnrich bool `mapstructure:"skipEnrich"` + Enabled bool `mapstructure:"enabled"` + Provider string `mapstructure:"provider"` + Model string `mapstructure:"model"` + Language string `mapstructure:"language"` + APIKey string `mapstructure:"apiKey"` + Headers map[string]string `mapstructure:"headers"` + BaseURL string `mapstructure:"baseUrl"` + APIKeyEnv string `mapstructure:"apiKeyEnv"` + Timeout string `mapstructure:"timeout"` + SkipEnrich bool `mapstructure:"skipEnrich"` + CustomPrompts map[string]CustomPromptCfg `mapstructure:"customPrompts"` +} + +// CustomPromptCfg holds custom system/user prompt overrides for a template type. +type CustomPromptCfg struct { + System string `mapstructure:"system"` + User string `mapstructure:"user"` } // OutputConfig contains output settings. diff --git a/internal/enricher/config.go b/internal/enricher/config.go index 891a526..54bc155 100644 --- a/internal/enricher/config.go +++ b/internal/enricher/config.go @@ -3,8 +3,32 @@ package enricher import ( "errors" "time" + + "github.com/spencercjh/spec-forge/internal/config" ) +// CustomPromptConfig holds custom prompt overrides for a template type. +type CustomPromptConfig struct { + System string + User string +} + +// CustomPromptsFromMap converts config-layer prompt overrides into enricher-layer +// values. Accepts config.CustomPromptCfg from Viper-loaded configuration. +func CustomPromptsFromMap(m map[string]config.CustomPromptCfg) map[string]CustomPromptConfig { + if m == nil { + return nil + } + result := make(map[string]CustomPromptConfig, len(m)) + for k, v := range m { + result[k] = CustomPromptConfig{ + System: v.System, + User: v.User, + } + } + return result +} + // Config Enricher configuration type Config struct { // Provider type: "openai", "anthropic", "ollama", "custom" @@ -29,6 +53,9 @@ type Config struct { // Advanced configuration PromptTemplateDir string + + // Custom prompt overrides keyed by template type name (e.g., "api", "schema") + CustomPrompts map[string]CustomPromptConfig } // DefaultConfig provides sensible defaults diff --git a/internal/enricher/enricher.go b/internal/enricher/enricher.go index 8a5298a..4870ed1 100644 --- a/internal/enricher/enricher.go +++ b/internal/enricher/enricher.go @@ -7,6 +7,7 @@ import ( "log/slog" "net/http" "os" + "strings" "github.com/getkin/kin-openapi/openapi3" @@ -117,7 +118,9 @@ func (e *Enricher) Enrich(ctx context.Context, spec *openapi3.T, opts *EnrichOpt slog.Info("Enriching spec", "batches", len(batches), "language", language) // Process batches + // Create template manager and apply custom prompts if configured tmplMgr := prompt.NewTemplateManager() + e.applyCustomPrompts(tmplMgr) batchProcessor := processor.NewBatchProcessor(e.provider, tmplMgr, processor.WithStreamWriter(streamWriter)) concurrentProcessor := processor.NewConcurrentProcessor(batchProcessor, e.config.Concurrency) @@ -199,10 +202,13 @@ func (e *Enricher) collectElements(spec *openapi3.T, _ *specctx.EnrichmentContex Type: prompt.TemplateTypeAPI, Path: item.method + " " + pathStr, Context: prompt.TemplateContext{ - Type: prompt.TemplateTypeAPI, - Language: language, - Method: item.method, - Path: pathStr, + Type: prompt.TemplateTypeAPI, + Language: language, + Method: item.method, + Path: pathStr, + Tags: op.Tags, + ExistingSummary: op.Summary, + ExistingDescription: op.Description, }, SetValue: func(desc string) { // Parse response and set summary/description @@ -270,17 +276,28 @@ func collectParameterGroups(spec *openapi3.T, collector *processor.SpecCollector } param := paramRef.Value fieldType := "" + format := "" + var enum []string + var constraints string if param.Schema != nil && param.Schema.Value != nil { - fieldType = getSchemaTypeString(param.Schema.Value) + schemaVal := param.Schema.Value + fieldType = getSchemaTypeString(schemaVal) + format = schemaVal.Format + enum = processor.BuildEnumStrings(schemaVal.Enum) + constraints = processor.BuildConstraintsString(schemaVal) } // Capture for closure p := param params = append(params, processor.ParamFieldItem{ - ParamName: param.Name, - ParamIn: param.In, - FieldType: fieldType, - Required: param.Required, + ParamName: param.Name, + ParamIn: param.In, + FieldType: fieldType, + Required: param.Required, + Format: format, + Enum: enum, + Constraints: constraints, + ExistingDescription: param.Description, SetValue: func(desc string) { p.Description = desc }, @@ -297,14 +314,46 @@ func collectParameterGroups(spec *openapi3.T, collector *processor.SpecCollector } } +// applyCustomPrompts merges user-configured custom prompts into the template manager. +// Only non-empty fields override built-in templates; empty fields keep the built-in value. +func (e *Enricher) applyCustomPrompts(tmplMgr *prompt.TemplateManager) { + validTypes := map[string]bool{ + string(prompt.TemplateTypeAPI): true, + string(prompt.TemplateTypeSchema): true, + string(prompt.TemplateTypeParam): true, + string(prompt.TemplateTypeResponse): true, + } + for typeKey, customPrompt := range e.config.CustomPrompts { + if !validTypes[typeKey] { + slog.Warn("ignoring custom prompt with invalid type key", "type", typeKey, "valid_keys", []string{string(prompt.TemplateTypeAPI), string(prompt.TemplateTypeSchema), string(prompt.TemplateTypeParam), string(prompt.TemplateTypeResponse)}) + continue + } + ttype := prompt.TemplateType(typeKey) + // Merge with built-in: only override non-empty fields + builtIn, _ := tmplMgr.Get(ttype) //nolint:errcheck // merge handles nil case + system := customPrompt.System + if strings.TrimSpace(system) == "" && builtIn != nil { + system = builtIn.System + } + user := customPrompt.User + if strings.TrimSpace(user) == "" && builtIn != nil { + user = builtIn.User + } + if setErr := tmplMgr.Set(ttype, &prompt.Template{ + System: system, + User: user, + }); setErr != nil { + slog.Warn("ignoring invalid custom prompt template", "type", typeKey, "error", setErr) + continue + } + slog.Debug("applied custom prompt", "type", typeKey) + } +} + // getSchemaTypeString returns a string representation of a schema type. func getSchemaTypeString(schema *openapi3.Schema) string { if schema.Type != nil && len(*schema.Type) > 0 { - typeStr := (*schema.Type)[0] - if schema.Format != "" { - return typeStr + "(" + schema.Format + ")" - } - return typeStr + return (*schema.Type)[0] } return "object" } diff --git a/internal/enricher/enricher_test.go b/internal/enricher/enricher_test.go index 63eba68..e373308 100644 --- a/internal/enricher/enricher_test.go +++ b/internal/enricher/enricher_test.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "errors" + "strings" "testing" "time" @@ -449,3 +450,134 @@ func TestEnricher_WithStreamingDisabled(t *testing.T) { // Buffer should be empty since no streaming occurred assert.Empty(t, buf.String(), "Buffer should be empty when streaming is disabled") } + +func TestEnricher_CollectParameters_EnrichedContext(t *testing.T) { + maxLen := uint64(50) + paths := openapi3.NewPaths() + paths.Set("/users", &openapi3.PathItem{ + Get: &openapi3.Operation{ + Parameters: openapi3.Parameters{ + &openapi3.ParameterRef{ + Value: &openapi3.Parameter{ + Name: "status", + In: "query", + Required: false, + Schema: &openapi3.SchemaRef{Value: &openapi3.Schema{ + Type: &openapi3.Types{"string"}, + Enum: []any{"active", "inactive"}, + MaxLength: &maxLen, + }}, + }, + }, + }, + }, + }) + spec := &openapi3.T{Paths: paths} + + collector := &processor.SpecCollector{} + collectParameterGroups(spec, collector, "en", false) + + batches := collector.GroupByType() + for _, batch := range batches { + if batch.Type == prompt.TemplateTypeParam { + for _, elem := range batch.Elements { + if len(elem.Context.ParamFields) > 0 { + pf := elem.Context.ParamFields[0] + if len(pf.Enum) != 2 { + t.Errorf("param enum count = %d, want 2", len(pf.Enum)) + } + if pf.Constraints == "" { + t.Error("param constraints should not be empty") + } + } + } + } + } +} + +func TestEnricher_CustomPrompts(t *testing.T) { + var capturedPrompt string + mp := &trackingMockProvider{ + response: `{"summary": "Custom", "description": "Custom desc"}`, + capture: &capturedPrompt, + } + + cfg := Config{ + Provider: "openai", + Model: "gpt-4o", + Concurrency: 1, + CustomPrompts: map[string]CustomPromptConfig{ + "api": { + System: "Custom system prompt for {{.Language}}.", + User: "Custom user prompt: {{.Method}} {{.Path}}", + }, + }, + } + cfg = cfg.MergeWithDefaults() + + e, err := NewEnricher(cfg, mp) + if err != nil { + t.Fatalf("NewEnricher() error = %v", err) + } + + paths := openapi3.NewPaths() + paths.Set("/test", &openapi3.PathItem{ + Get: &openapi3.Operation{}, + }) + spec := &openapi3.T{Paths: paths} + + _, err = e.Enrich(context.Background(), spec, nil) + if err != nil { + t.Fatalf("Enrich() error = %v", err) + } + + if !strings.Contains(capturedPrompt, "Custom system prompt") { + t.Errorf("expected custom system prompt, got: %s", capturedPrompt) + } + if !strings.Contains(capturedPrompt, "Custom user prompt: GET /test") { + t.Errorf("expected custom user prompt, got: %s", capturedPrompt) + } +} + +// trackingMockProvider captures the prompt sent to Generate. +type trackingMockProvider struct { + response string + capture *string +} + +func (m *trackingMockProvider) Generate(_ context.Context, p string, _ ...provider.Option) (string, *provider.TokenUsage, error) { + *m.capture = p + return m.response, nil, nil +} + +func (m *trackingMockProvider) Name() string { return "tracking-mock" } + +func TestEnricher_CollectElements_APITags(t *testing.T) { + paths := openapi3.NewPaths() + paths.Set("/users/{id}", &openapi3.PathItem{ + Get: &openapi3.Operation{ + Tags: []string{"users", "admin"}, + Summary: "", + }, + }) + spec := &openapi3.T{Paths: paths} + + cfg := Config{Provider: "openai", Model: "gpt-4o", Concurrency: 1} + cfg = cfg.MergeWithDefaults() + e, _ := NewEnricher(cfg, &mockProvider{response: `{"summary": "test"}`}) + + collector := e.collectElements(spec, &specctx.EnrichmentContext{Schemas: make(map[string]*specctx.SchemaContext)}, "en", false) + batches := collector.GroupByType() + + for _, batch := range batches { + if batch.Type == prompt.TemplateTypeAPI { + for _, elem := range batch.Elements { + if elem.Context.Path == "/users/{id}" { + if len(elem.Context.Tags) != 2 { + t.Errorf("Tags = %d, want 2", len(elem.Context.Tags)) + } + } + } + } + } +} diff --git a/internal/enricher/processor/batch.go b/internal/enricher/processor/batch.go index c9ed94d..1e57bc6 100644 --- a/internal/enricher/processor/batch.go +++ b/internal/enricher/processor/batch.go @@ -100,7 +100,7 @@ func (p *BatchProcessor) ProcessBatch(ctx context.Context, batch *Batch) (*provi } } else if batch.Type == prompt.TemplateTypeParam && len(elem.ParamGroupFields) > 0 { paramDescriptions := parseSchemaResponse(response) - for _, param := range elem.ParamGroupFields { + for _, param := range elem.ParamGroupFields { //nolint:gocritic // copying is acceptable for callback access if desc, ok := paramDescriptions[param.ParamName]; ok { param.SetValue(desc) } diff --git a/internal/enricher/processor/processor.go b/internal/enricher/processor/processor.go index b16db2b..912f7bb 100644 --- a/internal/enricher/processor/processor.go +++ b/internal/enricher/processor/processor.go @@ -85,10 +85,14 @@ type SchemaElement struct { // FieldElement represents a field to be enriched. type FieldElement struct { - FieldName string - FieldType string - Required bool - SetValue func(description string) + FieldName string + FieldType string + Required bool + Format string + Enum []string + Constraints string + ExistingDescription string + SetValue func(description string) } // ParamGroupElement represents a group of parameters from the same API endpoint. @@ -100,11 +104,15 @@ type ParamGroupElement struct { // ParamFieldItem represents a single parameter within a group. type ParamFieldItem struct { - ParamName string - ParamIn string - FieldType string - Required bool - SetValue func(description string) + ParamName string + ParamIn string + FieldType string + Required bool + Format string + Enum []string + Constraints string + ExistingDescription string + SetValue func(description string) } // AddSchemaElement adds a schema element to the collector. @@ -133,9 +141,13 @@ func convertFieldElements(fields []FieldElement) []prompt.FieldContext { result := make([]prompt.FieldContext, len(fields)) for i, f := range fields { result[i] = prompt.FieldContext{ - Name: f.FieldName, - Type: f.FieldType, - Required: f.Required, + Name: f.FieldName, + Type: f.FieldType, + Required: f.Required, + Format: f.Format, + Enum: f.Enum, + Constraints: f.Constraints, + ExistingDescription: f.ExistingDescription, } } return result @@ -144,12 +156,16 @@ func convertFieldElements(fields []FieldElement) []prompt.FieldContext { // convertParamFieldItems converts ParamFieldItem slice to ParamFieldContext slice. func convertParamFieldItems(items []ParamFieldItem) []prompt.ParamFieldContext { result := make([]prompt.ParamFieldContext, len(items)) - for i, p := range items { + for i, p := range items { //nolint:gocritic // copying is acceptable for small conversion result[i] = prompt.ParamFieldContext{ - Name: p.ParamName, - Type: p.FieldType, - ParamIn: p.ParamIn, - Required: p.Required, + Name: p.ParamName, + Type: p.FieldType, + ParamIn: p.ParamIn, + Required: p.Required, + Format: p.Format, + Enum: p.Enum, + Constraints: p.Constraints, + ExistingDescription: p.ExistingDescription, } } return result diff --git a/internal/enricher/processor/schema.go b/internal/enricher/processor/schema.go index 1f2c6bf..8df1eaa 100644 --- a/internal/enricher/processor/schema.go +++ b/internal/enricher/processor/schema.go @@ -1,8 +1,10 @@ package processor import ( + "fmt" "log/slog" "slices" + "strings" "github.com/getkin/kin-openapi/openapi3" @@ -54,9 +56,13 @@ func CollectSchemaFields( } field := FieldElement{ - FieldName: propName, - FieldType: getSchemaTypeName(prop), - Required: containsString(schema.Required, propName), + FieldName: propName, + FieldType: getSchemaTypeName(prop), + Required: containsString(schema.Required, propName), + Format: prop.Format, + Enum: BuildEnumStrings(prop.Enum), + Constraints: BuildConstraintsString(prop), + ExistingDescription: prop.Description, } // Capture prop for closure @@ -109,3 +115,42 @@ func getSchemaTypeName(schema *openapi3.Schema) string { func containsString(slice []string, s string) bool { return slices.Contains(slice, s) } + +// BuildConstraintsString builds a human-readable constraints description from a schema. +func BuildConstraintsString(schema *openapi3.Schema) string { + var parts []string + if schema.Min != nil { + parts = append(parts, fmt.Sprintf("min: %v", *schema.Min)) + } + if schema.Max != nil { + parts = append(parts, fmt.Sprintf("max: %v", *schema.Max)) + } + if schema.MinLength > 0 { + parts = append(parts, fmt.Sprintf("minLength: %d", schema.MinLength)) + } + if schema.MaxLength != nil { + parts = append(parts, fmt.Sprintf("maxLength: %d", *schema.MaxLength)) + } + if schema.Pattern != "" { + parts = append(parts, "pattern: "+schema.Pattern) + } + if schema.MinItems > 0 { + parts = append(parts, fmt.Sprintf("minItems: %d", schema.MinItems)) + } + if schema.MaxItems != nil { + parts = append(parts, fmt.Sprintf("maxItems: %d", *schema.MaxItems)) + } + return strings.Join(parts, ", ") +} + +// BuildEnumStrings converts []any to []string for enum values. +func BuildEnumStrings(enum []any) []string { + if len(enum) == 0 { + return nil + } + result := make([]string, len(enum)) + for i, v := range enum { + result[i] = fmt.Sprintf("%v", v) + } + return result +} diff --git a/internal/enricher/processor/schema_test.go b/internal/enricher/processor/schema_test.go index d03e2d6..ece6971 100644 --- a/internal/enricher/processor/schema_test.go +++ b/internal/enricher/processor/schema_test.go @@ -1,6 +1,7 @@ package processor_test import ( + "strings" "testing" "github.com/getkin/kin-openapi/openapi3" @@ -144,3 +145,55 @@ func TestCollectSchemaFields_SkipFieldsWithDescription(t *testing.T) { t.Errorf("expected 2 fields (id, description), got %d", len(userSchema.Fields)) } } + +func TestCollectSchemaFields_EnrichedContext(t *testing.T) { + maxLen := uint64(255) + spec := &openapi3.T{ + Components: &openapi3.Components{ + Schemas: openapi3.Schemas{ + "User": &openapi3.SchemaRef{ + Value: &openapi3.Schema{ + Type: &openapi3.Types{"object"}, + Properties: openapi3.Schemas{ + "email": &openapi3.SchemaRef{Value: &openapi3.Schema{ + Type: &openapi3.Types{"string"}, + Format: "email", + MaxLength: &maxLen, + }}, + "role": &openapi3.SchemaRef{Value: &openapi3.Schema{ + Type: &openapi3.Types{"string"}, + Enum: []any{"admin", "user", "guest"}, + }}, + }, + Required: []string{"email"}, + }, + }, + }, + }, + } + + collector := &processor.SpecCollector{} + processed := make(map[string]bool) + processor.CollectSchemaFields("User", spec.Components.Schemas["User"], collector, processed, "en", 0, false) + + schemas := collector.GetSchemas() + if len(schemas) != 1 { + t.Fatalf("expected 1 schema, got %d", len(schemas)) + } + + for _, f := range schemas[0].Fields { + if f.FieldName == "email" { + if f.Format != "email" { + t.Errorf("email Format = %q, want %q", f.Format, "email") + } + if !strings.Contains(f.Constraints, "maxLength: 255") { + t.Errorf("email Constraints = %q, want to contain maxLength: 255", f.Constraints) + } + } + if f.FieldName == "role" { + if len(f.Enum) != 3 { + t.Errorf("role Enum = %d, want 3", len(f.Enum)) + } + } + } +} diff --git a/internal/enricher/prompt/templates.go b/internal/enricher/prompt/templates.go index 6ef7a03..3266e6e 100644 --- a/internal/enricher/prompt/templates.go +++ b/internal/enricher/prompt/templates.go @@ -2,6 +2,8 @@ package prompt import ( "bytes" + "fmt" + "strings" "text/template" ) @@ -17,17 +19,25 @@ const ( // FieldContext provides specctx for a schema field type FieldContext struct { - Name string - Type string - Required bool + Name string + Type string + Required bool + Format string // e.g., "email", "date-time", "uuid" + Enum []string // allowed values, e.g., ["active", "inactive"] + Constraints string // human-readable: "min: 0, max: 100, pattern: ^[a-z]+$" + ExistingDescription string // existing description from the spec, if any } // ParamFieldContext provides specctx for a parameter in a group. type ParamFieldContext struct { - Name string - Type string - ParamIn string // path, query, header, cookie - Required bool + Name string + Type string + ParamIn string // path, query, header, cookie + Required bool + Format string // e.g., "int32", "uuid" + Enum []string // allowed values + Constraints string // human-readable validation rules + ExistingDescription string // existing description from the spec, if any } // TemplateContext provides specctx for template rendering @@ -36,8 +46,11 @@ type TemplateContext struct { Language string // API specctx - Path string - Method string + Path string + Method string + Tags []string + ExistingSummary string + ExistingDescription string // Schema specctx SchemaName string @@ -81,7 +94,9 @@ func (t *Template) Render(ctx TemplateContext) (system, user string, err error) } func renderString(tmpl string, data any) (string, error) { - t, err := template.New("prompt").Parse(tmpl) + t, err := template.New("prompt").Funcs(template.FuncMap{ + "join": strings.Join, + }).Parse(tmpl) if err != nil { return "", err } @@ -104,40 +119,107 @@ func NewTemplateManager() *TemplateManager { return &TemplateManager{ templates: map[TemplateType]*Template{ TemplateTypeAPI: { - System: `You are an API documentation expert. Generate concise, clear descriptions. + System: `You are an expert OpenAPI documentation writer specializing in REST API descriptions. +Your task is to write clear, concise, and informative API summaries and descriptions. + +Guidelines: +- Summary: A single line (max 80 chars) starting with a verb (e.g., "List", "Create", "Delete") +- Description: 1-3 sentences explaining what the endpoint does, when to use it, and notable behavior +- Be specific: mention resource names, ID formats, and key constraints +- Avoid generic phrases like "This API is used for..." + Respond in {{.Language}} language. -Output format: JSON with "summary" and "description" fields.`, - User: `API Endpoint: {{.Path}} -HTTP Method: {{.Method}} +Output MUST be valid JSON: {"summary": "...", "description": "..."} + +Example input: + POST /users +Example output: + {"summary": "Create a new user", "description": "Registers a new user account in the system. The request body must include a valid email address and password. Returns the created user object with a generated ID."}`, + User: `API Endpoint: {{.Method}} {{.Path}} +{{- if .Tags}} +Tags: {{join .Tags ", "}} +{{- end}} +{{- if .ExistingSummary}} +Existing summary: {{.ExistingSummary}} +{{- end}} +{{- if .ExistingDescription}} +Existing description: {{.ExistingDescription}} +{{- end}} Generate the summary (one line) and description (1-3 sentences) for this API.`, }, TemplateTypeSchema: { - System: `You are an API documentation expert. Generate concise field descriptions. + System: `You are an expert OpenAPI data model documenter. +Your task is to write concise, precise field descriptions for API data schemas. + +Guidelines: +- Each description should be 1-2 sentences explaining what the field represents +- Mention constraints (format, range, pattern) when relevant to clarify the field's purpose +- For enum fields, briefly explain what the values represent if the field name alone isn't clear +- Avoid repeating the field name in the description +- Be specific about units, formats, and expected values + Respond in {{.Language}} language. -Output format: JSON mapping field names to descriptions.`, +Output MUST be valid JSON mapping field names to descriptions: {"field1": "description1", "field2": "description2", ...} + +Example input: + Schema: User + Fields: + - email (string, required, format: email, maxLength: 255) + - role (string, optional, enum: [admin, user, guest]) +Example output: + {"email": "The user's primary email address used for authentication and notifications", "role": "The user's permission level determining access to system features"}`, User: `Schema: {{.SchemaName}} Fields: -{{range .Fields}}- {{.Name}} ({{.Type}}, {{if .Required}}required{{else}}optional{{end}}) +{{range .Fields}}- {{.Name}} ({{.Type}}, {{if .Required}}required{{else}}optional{{end}}{{if .Format}}, format: {{.Format}}{{end}}{{if .Constraints}}, {{.Constraints}}{{end}}{{if .Enum}}, enum: [{{join .Enum ", "}}]{{end}}{{if .ExistingDescription}}, existing: {{.ExistingDescription}}{{end}}) {{end}} - Generate a description for each field.`, }, TemplateTypeParam: { - System: `You are an API documentation expert. Generate concise parameter descriptions. + System: `You are an expert API parameter documenter. +Your task is to write concise, precise parameter descriptions for REST API endpoints. + +Guidelines: +- Each description should be 1-2 sentences explaining what the parameter controls +- Mention the parameter location context (path, query, header) when it affects behavior +- For enum parameters, briefly describe what the allowed values represent +- Include the unit or format when relevant (e.g., "page number starting from 1") +- Avoid generic descriptions like "the X parameter" + Respond in {{.Language}} language. -Output format: JSON mapping parameter names to descriptions.`, +Output MUST be valid JSON mapping parameter names to descriptions: {"param1": "description1", ...} + +Example input: + API: GET /users + Parameters: + - page (integer, in: query, optional) + - status (string, in: query, optional, enum: [active, inactive]) +Example output: + {"page": "Page number for pagination, starting from 1. Defaults to 1 if not specified.", "status": "Filter users by account status. Use 'active' for current users or 'inactive' for deactivated accounts."}`, User: `API: {{.Method}} {{.Path}} Parameters: -{{range .ParamFields}}- {{.Name}} ({{.Type}}, in: {{.ParamIn}}, {{if .Required}}required{{else}}optional{{end}}) +{{range .ParamFields}}- {{.Name}} ({{.Type}}, in: {{.ParamIn}}, {{if .Required}}required{{else}}optional{{end}}{{if .Format}}, format: {{.Format}}{{end}}{{if .Constraints}}, {{.Constraints}}{{end}}{{if .Enum}}, enum: [{{join .Enum ", "}}]{{end}}{{if .ExistingDescription}}, existing: {{.ExistingDescription}}{{end}}) {{end}} - Generate a description for each parameter.`, }, TemplateTypeResponse: { - System: `You are an API documentation expert. Generate concise response descriptions. + System: `You are an expert API response documenter. +Your task is to write brief, informative response descriptions for REST API endpoints. + +Guidelines: +- Describe what the response represents and when it is returned +- For error responses (4xx, 5xx), mention common causes +- For success responses (2xx), mention what data is returned +- Keep descriptions to 1-2 sentences + Respond in {{.Language}} language. -Output format: JSON with "description" field.`, +Output MUST be valid JSON: {"description": "..."} + +Example input: + API: GET /users/{id} + Response Code: 404 +Example output: + {"description": "The requested user was not found. Verify the user ID is correct and the user has not been deleted."}`, User: `API: {{.Method}} {{.Path}} Response Code: {{.ResponseCode}} @@ -157,6 +239,20 @@ func (m *TemplateManager) Get(ttype TemplateType) (*Template, error) { } // Set sets a custom template -func (m *TemplateManager) Set(ttype TemplateType, tmpl *Template) { +func (m *TemplateManager) Set(ttype TemplateType, tmpl *Template) error { + if tmpl == nil { + return fmt.Errorf("template for %q cannot be nil", ttype) + } + // Validate templates can be parsed + if _, err := template.New("system").Funcs(template.FuncMap{"join": strings.Join}).Parse(tmpl.System); err != nil { + return fmt.Errorf("invalid system prompt template for %q: %w", ttype, err) + } + if strings.TrimSpace(tmpl.User) == "" { + return fmt.Errorf("user prompt template for %q cannot be empty", ttype) + } + if _, err := template.New("user").Funcs(template.FuncMap{"join": strings.Join}).Parse(tmpl.User); err != nil { + return fmt.Errorf("invalid user prompt template for %q: %w", ttype, err) + } m.templates[ttype] = tmpl + return nil } diff --git a/internal/enricher/prompt/templates_test.go b/internal/enricher/prompt/templates_test.go index 60e591a..5b7f210 100644 --- a/internal/enricher/prompt/templates_test.go +++ b/internal/enricher/prompt/templates_test.go @@ -1,6 +1,7 @@ package prompt import ( + "strings" "testing" ) @@ -157,13 +158,147 @@ func TestTemplateManager_GetAllTypes(t *testing.T) { // Helper function func containsAll(s string, substrs ...string) bool { for _, substr := range substrs { - if !contains(s, substr) { + if !strings.Contains(s, substr) { return false } } return true } -func contains(s, substr string) bool { - return len(s) >= len(substr) && s[0:len(substr)] == substr || len(s) > len(substr) && contains(s[1:], substr) +func TestTemplateContext_EnrichedFields(t *testing.T) { + ctx := TemplateContext{ + Type: TemplateTypeAPI, + Language: "en", + Method: "GET", + Path: "/users/{id}", + Tags: []string{"users", "admin"}, + ExistingSummary: "Get user", + ExistingDescription: "Returns a user by ID", + Fields: []FieldContext{ + { + Name: "email", + Type: "string", + Required: true, + Format: "email", + Enum: []string{}, + Constraints: "maxLength: 255", + }, + }, + ParamFields: []ParamFieldContext{ + { + Name: "status", + Type: "string", + ParamIn: "query", + Enum: []string{"active", "inactive"}, + }, + }, + } + + if len(ctx.Tags) != 2 { + t.Errorf("Tags = %d, want 2", len(ctx.Tags)) + } + if ctx.ExistingSummary != "Get user" { + t.Errorf("ExistingSummary = %q, want %q", ctx.ExistingSummary, "Get user") + } + if ctx.Fields[0].Format != "email" { + t.Errorf("Field Format = %q, want %q", ctx.Fields[0].Format, "email") + } + if len(ctx.ParamFields[0].Enum) != 2 { + t.Errorf("Param Enum = %d, want 2", len(ctx.ParamFields[0].Enum)) + } +} + +func TestNewTemplateManager_RendersAllTypesWithEnrichedContext(t *testing.T) { + mgr := NewTemplateManager() + + types := []TemplateType{TemplateTypeAPI, TemplateTypeSchema, TemplateTypeParam, TemplateTypeResponse} + for _, tt := range types { + t.Run(string(tt), func(t *testing.T) { + tmpl, err := mgr.Get(tt) + if err != nil { + t.Fatalf("Get(%s) error = %v", tt, err) + } + + ctx := TemplateContext{ + Type: tt, + Language: "en", + Method: "GET", + Path: "/users/{id}", + Tags: []string{"users"}, + Fields: []FieldContext{ + {Name: "email", Type: "string", Required: true, Format: "email", Constraints: "maxLength: 255"}, + }, + ParamFields: []ParamFieldContext{ + {Name: "id", Type: "integer", ParamIn: "path", Required: true}, + }, + SchemaName: "User", + ResponseCode: "200", + } + + system, user, err := tmpl.Render(ctx) + if err != nil { + t.Fatalf("Render() error = %v", err) + } + if system == "" { + t.Errorf("%s: system prompt should not be empty", tt) + } + if user == "" { + t.Errorf("%s: user prompt should not be empty", tt) + } + }) + } +} + +func TestNewTemplateManager_APITemplateUsesTags(t *testing.T) { + mgr := NewTemplateManager() + tmpl, err := mgr.Get(TemplateTypeAPI) + if err != nil { + t.Fatalf("Get(API) error = %v", err) + } + + ctx := TemplateContext{ + Language: "en", + Method: "GET", + Path: "/users/{id}", + Tags: []string{"users", "admin"}, + ExistingSummary: "Get user", + ExistingDescription: "Returns a user by ID", + } + + _, user, err := tmpl.Render(ctx) + if err != nil { + t.Fatalf("Render() error = %v", err) + } + + if !containsAll(user, "users, admin", "Get user", "Returns a user by ID") { + t.Errorf("API user prompt should contain tags and existing descriptions, got: %s", user) + } +} + +func TestTemplate_RenderWithEnrichedFieldContext(t *testing.T) { + tmpl := &Template{ + User: `Schema: {{.SchemaName}} +{{range .Fields}}- {{.Name}} ({{.Type}}, {{if .Format}}format: {{.Format}}, {{end}}{{if .Required}}required{{else}}optional{{end}}{{if .Constraints}}, {{.Constraints}}{{end}}{{if .Enum}}, enum: [{{join .Enum ", "}}]{{end}}) +{{end}}`, + } + + ctx := TemplateContext{ + SchemaName: "User", + Fields: []FieldContext{ + {Name: "email", Type: "string", Required: true, Format: "email", Constraints: "maxLength: 255"}, + {Name: "role", Type: "string", Required: false, Enum: []string{"admin", "user", "guest"}}, + }, + } + + _, user, err := tmpl.Render(ctx) + if err != nil { + t.Fatalf("Render() error = %v", err) + } + + if !containsAll(user, "email", "format: email", "required", "maxLength: 255") { + t.Errorf("User prompt missing expected enriched content: %s", user) + } + if !containsAll(user, "role", "enum: [admin, user, guest]") { + t.Errorf("User prompt missing enum content: %s", user) + } }