Skip to content

Commit 0bd0aca

Browse files
feat: add sanitize command to remove extensions and clean OpenAPI specs (#59)
Co-authored-by: Blake Preston <[email protected]>
1 parent eed3300 commit 0bd0aca

22 files changed

+1665
-7
lines changed

.github/workflows/ci.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,10 @@ jobs:
166166
timeout 300 go test -coverprofile=main-coverage.out -covermode=atomic ./... > /dev/null 2>&1 || echo "Main branch tests failed or timed out"
167167
168168
if [ -f main-coverage.out ]; then
169+
# Filter out cmd and tests folders from main branch coverage (same as current branch)
170+
grep -v -E '/cmd/|/tests/' main-coverage.out > main-coverage.filtered.out || true
171+
mv main-coverage.filtered.out main-coverage.out
172+
169173
MAIN_COVERAGE=$(go tool cover -func=main-coverage.out | grep total | awk '{print $3}' || echo "0.0%")
170174
echo "main-coverage=$MAIN_COVERAGE" >> $GITHUB_OUTPUT
171175
echo "Main branch coverage: $MAIN_COVERAGE"

marshaller/coremodel.go

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,16 @@ type CoreModeler interface {
2424
SetConfig(config *yml.Config)
2525
GetConfig() *yml.Config
2626
Marshal(ctx context.Context, w io.Writer) error
27+
SetUnknownProperties(props []string)
28+
GetUnknownProperties() []string
2729
}
2830

2931
type CoreModel struct {
30-
RootNode *yaml.Node // RootNode is the node that was unmarshaled into this model
31-
Valid bool // Valid indicates whether the model passed validation, ie all its required fields were present and ValidYaml is true
32-
ValidYaml bool // ValidYaml indicates whether the model's underlying YAML representation is valid, for example a mapping node was received for a model
33-
Config *yml.Config // Generally only set on the top-level model that was unmarshaled
32+
RootNode *yaml.Node // RootNode is the node that was unmarshaled into this model
33+
Valid bool // Valid indicates whether the model passed validation, ie all its required fields were present and ValidYaml is true
34+
ValidYaml bool // ValidYaml indicates whether the model's underlying YAML representation is valid, for example a mapping node was received for a model
35+
Config *yml.Config // Generally only set on the top-level model that was unmarshaled
36+
UnknownProperties []string // UnknownProperties lists property keys that were present in the YAML but not defined in the model (excludes extensions which start with "x-")
3437
}
3538

3639
var _ CoreModeler = (*CoreModel)(nil)
@@ -86,6 +89,18 @@ func (c *CoreModel) GetConfig() *yml.Config {
8689
return c.Config
8790
}
8891

92+
func (c *CoreModel) SetUnknownProperties(props []string) {
93+
c.UnknownProperties = props
94+
}
95+
96+
func (c *CoreModel) GetUnknownProperties() []string {
97+
if c.UnknownProperties == nil {
98+
return []string{}
99+
}
100+
101+
return c.UnknownProperties
102+
}
103+
89104
// GetJSONPointer returns the JSON pointer path from the topLevelRootNode to this CoreModel's RootNode.
90105
// Returns an empty string if the node is not found or if either node is nil.
91106
// The returned pointer follows RFC6901 format (e.g., "/path/to/node").

marshaller/unmarshaller.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,10 @@ func unmarshalModel(ctx context.Context, parentName string, node *yaml.Node, str
347347

348348
jobValidationErrs := make([][]error, numJobs)
349349

350+
// Track unknown properties (non-extension, non-field, non-embedded map properties)
351+
var unknownPropertiesMutex sync.Mutex
352+
unknownProperties := make([]string, 0, numJobs)
353+
350354
// Mutex to protect concurrent access to extensionsField
351355
var extensionsMutex sync.Mutex
352356

@@ -363,15 +367,16 @@ func unmarshalModel(ctx context.Context, parentName string, node *yaml.Node, str
363367
// Direct field index lookup (eliminates map[string]Field allocation)
364368
fieldIndex, ok := fieldMap.FieldIndexes[key]
365369
if !ok {
366-
if strings.HasPrefix(key, "x-") && extensionsField != nil {
370+
switch {
371+
case strings.HasPrefix(key, "x-") && extensionsField != nil:
367372
// Lock access to extensionsField to prevent concurrent modification
368373
extensionsMutex.Lock()
369374
defer extensionsMutex.Unlock()
370375
err := UnmarshalExtension(keyNode, valueNode, *extensionsField)
371376
if err != nil {
372377
return err
373378
}
374-
} else if embeddedMap != nil {
379+
case embeddedMap != nil:
375380
// Skip alias definitions - these are nodes where:
376381
// 1. The value node has an anchor (e.g., &keyAlias)
377382
// 2. The key is not an alias reference (doesn't start with *)
@@ -381,6 +386,11 @@ func unmarshalModel(ctx context.Context, parentName string, node *yaml.Node, str
381386
return nil
382387
}
383388
jobMapContent[i/2] = append(jobMapContent[i/2], keyNode, valueNode)
389+
default:
390+
// This is an unknown property (not a recognized field, not an extension, not in embedded map)
391+
unknownPropertiesMutex.Lock()
392+
unknownProperties = append(unknownProperties, key)
393+
unknownPropertiesMutex.Unlock()
384394
}
385395
} else {
386396
// Get field info from cache and field value directly
@@ -438,6 +448,11 @@ func unmarshalModel(ctx context.Context, parentName string, node *yaml.Node, str
438448
validationErrs = append(validationErrs, embeddedMapValidationErrs...)
439449
}
440450

451+
// Store unknown properties in the core model if any were found
452+
if len(unknownProperties) > 0 {
453+
unmarshallable.SetUnknownProperties(unknownProperties)
454+
}
455+
441456
// Use the errors to determine the validity of the model
442457
unmarshallable.DetermineValidity(validationErrs)
443458

mise-tasks/test-cli

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ $CLI spec validate --help > /dev/null
2626
$CLI spec upgrade --help > /dev/null
2727
$CLI spec inline --help > /dev/null
2828
$CLI spec clean --help > /dev/null
29+
$CLI spec sanitize --help > /dev/null
2930
$CLI spec bundle --help > /dev/null
3031
$CLI spec join --help > /dev/null
3132
$CLI spec bootstrap --help > /dev/null
@@ -122,6 +123,23 @@ if ! diff -q dist/test/test-cleaned-empty.yaml openapi/testdata/clean/clean_empt
122123
exit 1
123124
fi
124125

126+
# Test sanitize command with known test files
127+
echo " ✓ Testing sanitize command..."
128+
$CLI spec sanitize openapi/testdata/sanitize/sanitize_input.yaml dist/test/test-sanitized.yaml > /dev/null
129+
$CLI spec sanitize --config openapi/testdata/sanitize/sanitize_pattern_config.yaml openapi/testdata/sanitize/sanitize_pattern_input.yaml dist/test/test-sanitized-pattern.yaml > /dev/null
130+
131+
# Compare sanitize outputs with expected
132+
echo " ✓ Comparing sanitize outputs with expected..."
133+
if ! diff -q dist/test/test-sanitized.yaml openapi/testdata/sanitize/sanitize_expected.yaml > /dev/null; then
134+
echo " ❌ Sanitize output differs from expected"
135+
exit 1
136+
fi
137+
138+
if ! diff -q dist/test/test-sanitized-pattern.yaml openapi/testdata/sanitize/sanitize_pattern_expected.yaml > /dev/null; then
139+
echo " ❌ Sanitize pattern output differs from expected"
140+
exit 1
141+
fi
142+
125143
# Test join command with known test files
126144
echo " ✓ Testing join command..."
127145
$CLI spec join openapi/testdata/join/main.yaml openapi/testdata/join/subdir/second.yaml openapi/testdata/join/third.yaml dist/test/test-joined-counter.yaml > /dev/null
@@ -161,6 +179,8 @@ $CLI spec validate dist/test/test-joined-counter.yaml > /dev/null
161179
$CLI spec validate dist/test/test-joined-filepath.yaml > /dev/null
162180
$CLI spec validate dist/test/test-cleaned.yaml > /dev/null
163181
$CLI spec validate dist/test/test-cleaned-empty.yaml > /dev/null
182+
$CLI spec validate dist/test/test-sanitized.yaml > /dev/null
183+
$CLI spec validate dist/test/test-sanitized-pattern.yaml > /dev/null
164184
$CLI spec validate dist/test/test-joined-conflicts.yaml > /dev/null
165185

166186
# Test arazzo validation with known test files
@@ -232,7 +252,7 @@ echo "✅ All CLI integration tests passed!"
232252
echo "📊 Test summary:"
233253
echo " - Tested all command help outputs"
234254
echo " - Validated known good and bad files"
235-
echo " - Tested bootstrap, upgrade, inline, clean, bundle, join commands"
255+
echo " - Tested bootstrap, upgrade, inline, clean, sanitize, bundle, join commands"
236256
echo " - Compared outputs with expected results"
237257
echo " - Tested arazzo validation"
238258
echo " - Tested overlay validation, apply, and compare"

mise-tasks/test-coverage

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,14 @@ if ! gotestsum --format testname -- -race -coverprofile=coverage.out -covermode=
77
exit 1
88
fi
99

10+
# Filter out cmd and tests folders from coverage report
11+
if [ -f coverage.out ]; then
12+
echo "🔧 Filtering cmd and tests folders from coverage report..."
13+
grep -v -E '/cmd/|/tests/' coverage.out > coverage.filtered.out || true
14+
# Keep original for reference, use filtered for reporting
15+
mv coverage.filtered.out coverage.out
16+
fi
17+
1018
echo ""
1119
echo "## 📊 Test Coverage Report"
1220
echo ""

openapi/cmd/README.md

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ OpenAPI specifications define REST APIs in a standard format. These commands hel
1212
- [`upgrade`](#upgrade)
1313
- [`inline`](#inline)
1414
- [`clean`](#clean)
15+
- [`sanitize`](#sanitize)
1516
- [`bundle`](#bundle)
1617
- [Bundle vs Inline](#bundle-vs-inline)
1718
- [`join`](#join)
@@ -216,6 +217,166 @@ components:
216217
- You're preparing a specification for publication or distribution
217218
- You want to reduce document size and complexity
218219
- You're maintaining a large specification with many components
220+
221+
### `sanitize`
222+
223+
Remove unwanted elements from an OpenAPI specification to create clean, standards-compliant documents.
224+
225+
```bash
226+
# Default sanitization (remove all extensions and unused components)
227+
openapi spec sanitize ./spec.yaml
228+
229+
# Sanitize and write to new file
230+
openapi spec sanitize ./spec.yaml ./clean-spec.yaml
231+
232+
# Sanitize in-place
233+
openapi spec sanitize -w ./spec.yaml
234+
235+
# Use config file for selective sanitization
236+
openapi spec sanitize --config sanitize-config.yaml ./spec.yaml
237+
```
238+
239+
**Default Behavior (no config):**
240+
241+
By default, sanitize performs aggressive cleanup:
242+
243+
- Removes ALL x-* vendor extensions throughout the document
244+
- Removes unused components (schemas, responses, parameters, etc.)
245+
- Removes unknown properties not defined in the OpenAPI specification
246+
247+
**Configuration File Support:**
248+
249+
Create a YAML configuration file to control sanitization behavior:
250+
251+
```yaml
252+
# sanitize-config.yaml
253+
254+
# Only remove extensions that match these patterns, null will remove ALL extensions, [] will remove no extensions (default: null, removes ALL extensions)
255+
extensionPatterns:
256+
- "x-go-*"
257+
- "x-internal-*"
258+
259+
# Keep unused components (default: false, removes them)
260+
keepUnusedComponents: true
261+
262+
# Keep unknown properties (default: false, removes them)
263+
keepUnknownProperties: true
264+
```
265+
266+
**What gets sanitized:**
267+
268+
- **Extensions**: All x-* vendor extensions (info, paths, operations, schemas, etc.)
269+
- **Unused Components**: Schemas, responses, parameters, examples, request bodies, headers, security schemes, links, callbacks, and path items that aren't referenced
270+
- **Unknown Properties**: Properties not defined in the OpenAPI specification
271+
272+
**Before sanitization:**
273+
274+
```yaml
275+
openapi: 3.1.0
276+
info:
277+
title: My API
278+
version: 1.0.0
279+
x-api-id: internal-123
280+
x-go-package: myapi
281+
paths:
282+
/users:
283+
get:
284+
operationId: listUsers
285+
x-go-name: ListUsers
286+
x-rate-limit: 100
287+
responses:
288+
'200':
289+
description: Success
290+
content:
291+
application/json:
292+
schema:
293+
$ref: '#/components/schemas/User'
294+
components:
295+
schemas:
296+
User:
297+
type: object
298+
x-go-type: User
299+
properties:
300+
id:
301+
type: string
302+
UnusedSchema:
303+
type: object
304+
description: Not referenced anywhere
305+
```
306+
307+
**After sanitization (default):**
308+
309+
```yaml
310+
openapi: 3.1.0
311+
info:
312+
title: My API
313+
version: 1.0.0
314+
paths:
315+
/users:
316+
get:
317+
operationId: listUsers
318+
responses:
319+
'200':
320+
description: Success
321+
content:
322+
application/json:
323+
schema:
324+
$ref: '#/components/schemas/User'
325+
components:
326+
schemas:
327+
User:
328+
type: object
329+
properties:
330+
id:
331+
type: string
332+
```
333+
334+
**After sanitization (with pattern config):**
335+
336+
Using config with `extensionPatterns: ["x-go-*"]`:
337+
338+
```yaml
339+
openapi: 3.1.0
340+
info:
341+
title: My API
342+
version: 1.0.0
343+
x-api-id: internal-123 # kept (doesn't match x-go-*)
344+
paths:
345+
/users:
346+
get:
347+
operationId: listUsers
348+
x-rate-limit: 100 # kept (doesn't match x-go-*)
349+
responses:
350+
'200':
351+
description: Success
352+
content:
353+
application/json:
354+
schema:
355+
$ref: '#/components/schemas/User'
356+
components:
357+
schemas:
358+
User:
359+
type: object
360+
properties:
361+
id:
362+
type: string
363+
```
364+
365+
**Benefits of sanitization:**
366+
367+
- **Standards compliance**: Remove vendor-specific extensions for clean, standard specs
368+
- **Clean distribution**: Prepare specifications for public sharing or publishing
369+
- **Reduced size**: Remove unnecessary extensions and unused components
370+
- **Selective cleanup**: Use patterns to target specific extension families
371+
- **Flexible control**: Config file allows fine-grained control over what to keep
372+
373+
**Use Sanitize when:**
374+
375+
- You want to remove all vendor extensions before publishing
376+
- You're preparing specifications for standards-compliant distribution
377+
- You need to clean up internal annotations before sharing externally
378+
- You want to remove specific extension families (e.g., x-go-*, x-internal-*)
379+
- You're combining extension removal with component cleanup in one operation
219380

220381
### `bundle`
221382

openapi/cmd/root.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ func Apply(rootCmd *cobra.Command) {
88
rootCmd.AddCommand(upgradeCmd)
99
rootCmd.AddCommand(inlineCmd)
1010
rootCmd.AddCommand(cleanCmd)
11+
rootCmd.AddCommand(sanitizeCmd)
1112
rootCmd.AddCommand(bundleCmd)
1213
rootCmd.AddCommand(joinCmd)
1314
rootCmd.AddCommand(bootstrapCmd)

0 commit comments

Comments
 (0)