Skip to content

Commit b770e66

Browse files
erraggyclaude
andcommitted
fix(joiner): include documentation fields in semantic-dedup equivalence by default
Two schemas that differ only in title/description/example/examples are no longer treated as equivalent by the joiner's schema comparison. Semantic deduplication previously replaced the surviving canonical schema's docs at every $ref site, producing misleading API docs (a 403 response landing on a schema whose description said "The request is invalid"). - Add EquivalenceDocs type with "include" (default, strict) and "ignore" (legacy loose) values - Add CompareSchemasWithOptions accepting CompareOptions; CompareSchemas delegates with the strict default - Thread compareDocs bool through compareShallow/compareDeep and all recursive helpers - Add JoinerConfig.EquivalenceDocs, WithEquivalenceDocs option, and --equivalence-docs CLI flag - Update metadata-only tests to reflect the new strict default; add regression test mirroring issue #363 (BadRequest/Forbidden/NotFound error-response schemas) - Update joiner/deep_dive.md with the new flag and default change Callers needing the old loose behavior can opt in via WithEquivalenceDocs("ignore") or --equivalence-docs ignore. Fixes #363 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent bdfe66f commit b770e66

14 files changed

Lines changed: 609 additions & 68 deletions

builder/builder_dedupe_test.go

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -164,10 +164,13 @@ func TestBuilder_WithSemanticDeduplication_Disabled(t *testing.T) {
164164
assert.Len(t, doc.Components.Schemas, 2)
165165
}
166166

167-
func TestBuilder_DeduplicateSchemas_MetadataIgnored(t *testing.T) {
167+
func TestBuilder_DeduplicateSchemas_MetadataPreserved(t *testing.T) {
168+
// Regression for issue #363: under the strict equivalence default, the
169+
// builder must not consolidate schemas that differ in documentation.
170+
// Doing so would replace the surviving canonical schema's docs at every
171+
// reference site, producing misleading API documentation.
168172
b := New(parser.OASVersion320, WithSemanticDeduplication(true))
169173

170-
// Schemas differ only in metadata - should still be deduplicated
171174
b.addSchema("Address", &parser.Schema{
172175
Type: "object",
173176
Title: "An Address",
@@ -188,8 +191,11 @@ func TestBuilder_DeduplicateSchemas_MetadataIgnored(t *testing.T) {
188191
doc, err := b.BuildOAS3()
189192
require.NoError(t, err)
190193

191-
// Should deduplicate since structural properties are the same
192-
assert.Len(t, doc.Components.Schemas, 1)
194+
// Both schemas must survive because their documentation differs.
195+
assert.Len(t, doc.Components.Schemas, 2,
196+
"strict equivalence should preserve schemas with divergent metadata")
197+
assert.Equal(t, "An Address", doc.Components.Schemas["Address"].Title)
198+
assert.Equal(t, "A Location", doc.Components.Schemas["Location"].Title)
193199
}
194200

195201
func TestBuilder_DeduplicateSchemas_MultipleGroups(t *testing.T) {

cmd/oastools/commands/common.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,15 @@ func ValidateEquivalenceMode(value string) error {
7373
return nil
7474
}
7575

76+
// ValidateEquivalenceDocs validates the equivalence-docs flag and returns an
77+
// error if invalid. Empty values are accepted and resolved to the default.
78+
func ValidateEquivalenceDocs(value string) error {
79+
if value != "" && !joiner.IsValidEquivalenceDocs(value) {
80+
return fmt.Errorf("invalid equivalence-docs '%s'. Valid values: %v", value, joiner.ValidEquivalenceDocs())
81+
}
82+
return nil
83+
}
84+
7685
// ValidatePrimaryOperationPolicy validates the primary operation policy flag value.
7786
func ValidatePrimaryOperationPolicy(policy string) error {
7887
if policy == "" {

cmd/oastools/commands/common_test.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,31 @@ func TestValidateEquivalenceMode(t *testing.T) {
8989
}
9090
}
9191

92+
func TestValidateEquivalenceDocs(t *testing.T) {
93+
tests := []struct {
94+
name string
95+
value string
96+
wantErr bool
97+
}{
98+
{"empty value", "", false},
99+
{"valid include", "include", false},
100+
{"valid ignore", "ignore", false},
101+
{"invalid value", "strict", true},
102+
{"case sensitive INCLUDE", "INCLUDE", true},
103+
}
104+
105+
for _, tt := range tests {
106+
t.Run(tt.name, func(t *testing.T) {
107+
err := ValidateEquivalenceDocs(tt.value)
108+
if tt.wantErr {
109+
assert.Error(t, err)
110+
} else {
111+
assert.NoError(t, err)
112+
}
113+
})
114+
}
115+
}
116+
92117
func TestMarshalDocument(t *testing.T) {
93118
doc := map[string]string{"key": "value"}
94119

cmd/oastools/commands/join.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ type JoinFlags struct {
7878
// Advanced collision strategies
7979
RenameTemplate string
8080
EquivalenceMode string
81+
EquivalenceDocs string
8182
CollisionReport bool
8283
SemanticDedup bool
8384
// Namespace prefix configuration
@@ -114,6 +115,8 @@ func SetupJoinFlags() (*flag.FlagSet, *JoinFlags) {
114115
// Advanced collision strategies
115116
fs.StringVar(&flags.RenameTemplate, "rename-template", "{{.Name}}_{{.Source}}", "template for renamed schema names")
116117
fs.StringVar(&flags.EquivalenceMode, "equivalence-mode", "none", "schema comparison mode for deduplication (none, shallow, deep)")
118+
fs.StringVar(&flags.EquivalenceDocs, "equivalence-docs", "include",
119+
"whether title/description/example/examples participate in schema equivalence (include, ignore)")
117120
fs.BoolVar(&flags.CollisionReport, "collision-report", false, "generate detailed collision analysis report")
118121
fs.BoolVar(&flags.SemanticDedup, "semantic-dedup", false, "enable semantic deduplication to consolidate identical schemas")
119122

@@ -243,6 +246,9 @@ func HandleJoin(args []string) error {
243246
// Apply advanced collision strategy settings
244247
config.RenameTemplate = flags.RenameTemplate
245248
config.EquivalenceMode = flags.EquivalenceMode
249+
if flags.EquivalenceDocs != "" {
250+
config.EquivalenceDocs = flags.EquivalenceDocs
251+
}
246252
config.CollisionReport = flags.CollisionReport
247253
config.SemanticDeduplication = flags.SemanticDedup
248254

@@ -266,6 +272,9 @@ func HandleJoin(args []string) error {
266272
if err := ValidateEquivalenceMode(flags.EquivalenceMode); err != nil {
267273
return err
268274
}
275+
if err := ValidateEquivalenceDocs(flags.EquivalenceDocs); err != nil {
276+
return err
277+
}
269278
if err := ValidatePrimaryOperationPolicy(flags.PrimaryOperationPolicy); err != nil {
270279
return err
271280
}

docs/cli-reference.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,7 @@ oastools join [flags] <file1> <file2> [file3...]
599599
| `--primary-operation-policy` | | Policy for selecting primary operation: `first`, `most-specific`, `alphabetical` (default: `first`) |
600600
| `--semantic-dedup` | | Enable semantic deduplication to consolidate identical schemas |
601601
| `--equivalence-mode` | | Schema comparison mode for deduplication: `none`, `shallow`, `deep` (default: `none`) |
602+
| `--equivalence-docs` | | Whether `title`, `description`, `example`, and `examples` participate in schema equivalence: `include` (default, strict), `ignore` (legacy loose) |
602603
| `--collision-report` | | Generate detailed collision analysis report |
603604
| `--namespace-prefix` | | Namespace prefix for source file (format: source=prefix, can be repeated) |
604605
| `--always-prefix` | | Apply namespace prefix to all schemas, not just on collision |

joiner/deep_dive.md

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1374,6 +1374,45 @@ func main() {
13741374
}
13751375
```
13761376

1377+
### Preserving Documentation During Deduplication
1378+
1379+
Since v1.54.0, semantic equivalence is **strict by default**: two schemas that
1380+
differ only in `title`, `description`, `example`, or `examples` are treated
1381+
as **not equivalent** and are preserved as separate schemas. This prevents a
1382+
subtle documentation-clobbering bug where every `$ref` site to a
1383+
consolidated schema ended up pointing at a canonical schema whose prose
1384+
applied to a different context (for example, a 403 response landing on a
1385+
schema whose description said "The request is invalid").
1386+
1387+
Callers that explicitly want the legacy loose behavior — where schemas with
1388+
identical structure are merged regardless of docs — can opt in with
1389+
`WithEquivalenceDocs`:
1390+
1391+
```go
1392+
result, err := joiner.JoinWithOptions(
1393+
joiner.WithFilePaths("users-api.yaml", "admin-api.yaml"),
1394+
joiner.WithSemanticDeduplication(true),
1395+
// Accept that consolidated schemas' docs will be replaced
1396+
// at every $ref site.
1397+
joiner.WithEquivalenceDocs(string(joiner.EquivalenceDocsIgnore)),
1398+
)
1399+
```
1400+
1401+
The CLI exposes the same control via `--equivalence-docs`:
1402+
1403+
```bash
1404+
# Default: strict. Schemas with differing docs are preserved.
1405+
oastools join --semantic-dedup -o merged.yaml api1.yaml api2.yaml
1406+
1407+
# Loose: legacy behavior. Differing docs are discarded during dedup.
1408+
oastools join --semantic-dedup --equivalence-docs ignore \
1409+
-o merged.yaml api1.yaml api2.yaml
1410+
```
1411+
1412+
The same `WithEquivalenceDocs` option applies to the `deduplicate` collision
1413+
strategy (`SchemaStrategy = StrategyDeduplicateEquivalent`) and to the
1414+
`SemanticDeduplication` post-merge pass.
1415+
13771416
### High-Performance Joining with Pre-Parsed Documents
13781417

13791418
For integration with other oastools packages, use pre-parsed documents for 154x faster performance:

0 commit comments

Comments
 (0)