Skip to content

Commit 4ad246b

Browse files
authored
Merge pull request #30 from DhruvGarg111/feat/add-json-schema-validation
Add JSON Schema validation for output files
2 parents 5bda190 + 8c4eb8e commit 4ad246b

File tree

6 files changed

+171
-2
lines changed

6 files changed

+171
-2
lines changed

README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ That's it. Scans your project and prints a risk-scored inventory of every AI com
4949
# CycloneDX SBOM for compliance
5050
ai-bom scan . -f cyclonedx -o ai-bom.cdx.json
5151

52+
# Validate JSON output against schema
53+
ai-bom scan . -f cyclonedx --validate
54+
5255
# SARIF for GitHub Code Scanning
5356
ai-bom scan . -f sarif -o results.sarif
5457

@@ -328,6 +331,15 @@ graph LR
328331
| CSV | `-f csv` | Spreadsheet analysis |
329332
| JUnit | `-f junit` | CI/CD test reporting |
330333

334+
## JSON Schema Validation
335+
336+
AI-BOM provides a built-in JSON Schema for validating scan results, ensuring they conform to the expected structure (CycloneDX 1.6 + Trusera extensions).
337+
338+
- **Schema file:** `src/ai_bom/schema/bom-schema.json`
339+
- **Validation command:** `ai-bom scan . --format cyclonedx --validate`
340+
341+
This is particularly useful in CI/CD pipelines to ensure generated SBOMs are valid before ingestion into tools like Dependency-Track.
342+
331343
<details>
332344
<summary>CycloneDX output example</summary>
333345

src/ai_bom/cli.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from ai_bom.scanners import get_all_scanners
2626
from ai_bom.scanners.ast_scanner import ASTScanner
2727
from ai_bom.utils.risk_scorer import score_component
28+
from ai_bom.utils.validator import validate_output
2829

2930
# Exit codes
3031
EXIT_ERROR = 2 # Operational errors (bad path, network failure, parse error, etc.)
@@ -345,6 +346,11 @@ def scan(
345346
"--max-file-size",
346347
help="Max file size in MB (default: 10). Increase for large models.",
347348
),
349+
validate_schema: bool = typer.Option(
350+
False,
351+
"--validate",
352+
help="Validate JSON output against schema",
353+
),
348354
json_output: bool = typer.Option(
349355
False,
350356
"--json",
@@ -560,6 +566,17 @@ def scan(
560566
reporter = get_reporter(format)
561567
output_str = reporter.render(result)
562568

569+
# Validate schema if requested
570+
if validate_schema and format in ["json", "cyclonedx"]:
571+
try:
572+
data = json.loads(output_str)
573+
validate_output(data)
574+
if format == "table" and not quiet:
575+
console.print("[green]JSON Schema validation passed.[/green]")
576+
except Exception as e:
577+
console.print(f"[red]Schema validation failed: {e}[/red]")
578+
raise typer.Exit(1) from None
579+
563580
# Write to file if output specified
564581
if output:
565582
reporter.write(result, output)

src/ai_bom/schema/bom-schema.json

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
{
2+
"$schema": "http://json-schema.org/draft-07/schema#",
3+
"title": "AI-BOM CycloneDX Export",
4+
"description": "JSON Schema for AI-BOM's CycloneDX 1.6 output format with Trusera extensions.",
5+
"type": "object",
6+
"required": ["bomFormat", "specVersion", "version", "serialNumber", "metadata", "components"],
7+
"properties": {
8+
"bomFormat": {
9+
"type": "string",
10+
"const": "CycloneDX"
11+
},
12+
"specVersion": {
13+
"type": "string",
14+
"const": "1.6"
15+
},
16+
"version": {
17+
"type": "integer"
18+
},
19+
"serialNumber": {
20+
"type": "string",
21+
"pattern": "^urn:uuid:[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
22+
},
23+
"metadata": {
24+
"type": "object",
25+
"required": ["timestamp", "tools", "properties"],
26+
"properties": {
27+
"timestamp": {
28+
"type": "string",
29+
"format": "date-time"
30+
},
31+
"tools": {
32+
"type": "object"
33+
},
34+
"properties": {
35+
"type": "array",
36+
"items": {
37+
"$ref": "#/definitions/property"
38+
}
39+
}
40+
}
41+
},
42+
"components": {
43+
"type": "array",
44+
"items": {
45+
"type": "object",
46+
"required": ["bom-ref", "type", "name", "description", "properties", "purl"],
47+
"properties": {
48+
"bom-ref": {
49+
"type": "string"
50+
},
51+
"type": {
52+
"type": "string",
53+
"enum": [
54+
"application",
55+
"framework",
56+
"library",
57+
"container",
58+
"machine-learning-model",
59+
"service",
60+
"file"
61+
]
62+
},
63+
"name": {
64+
"type": "string"
65+
},
66+
"version": {
67+
"type": "string"
68+
},
69+
"description": {
70+
"type": "string"
71+
},
72+
"purl": {
73+
"type": "string"
74+
},
75+
"properties": {
76+
"type": "array",
77+
"items": {
78+
"$ref": "#/definitions/property"
79+
}
80+
}
81+
}
82+
}
83+
}
84+
},
85+
"definitions": {
86+
"property": {
87+
"type": "object",
88+
"required": ["name", "value"],
89+
"properties": {
90+
"name": {
91+
"type": "string"
92+
},
93+
"value": {
94+
"type": "string"
95+
}
96+
}
97+
}
98+
}
99+
}

src/ai_bom/utils/validator.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
"""JSON Schema validation for AI-BOM output."""
2+
3+
from __future__ import annotations
4+
5+
import json
6+
from pathlib import Path
7+
from typing import Any
8+
9+
import jsonschema
10+
from jsonschema import validate
11+
12+
13+
def get_schema() -> dict[str, Any]:
14+
"""Load the AI-BOM JSON schema."""
15+
schema_path = Path(__file__).parent.parent / "schema" / "bom-schema.json"
16+
with open(schema_path, encoding="utf-8") as f:
17+
return json.load(f)
18+
19+
20+
def validate_output(data: dict[str, Any]) -> None:
21+
"""Validate scan output against the JSON schema.
22+
23+
Args:
24+
data: The JSON-compatible dict to validate.
25+
26+
Raises:
27+
jsonschema.ValidationError: If validation fails.
28+
"""
29+
schema = get_schema()
30+
validate(instance=data, schema=schema)

tests/test_cli.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,3 +248,13 @@ def test_scan_no_star_message_in_json_format():
248248
# JSON format should only contain JSON output, not the star message
249249
data = json.loads(result.output)
250250
assert "bomFormat" in data # Valid CycloneDX JSON
251+
252+
253+
def test_scan_validate_schema():
254+
"""Test that --validate flag works for JSON output."""
255+
demo_file = Path(__file__).parent.parent / "examples" / "demo-project" / "app.py"
256+
result = runner.invoke(app, ["scan", str(demo_file), "--format", "json", "--validate"])
257+
assert result.exit_code == 0
258+
# Output should still be valid JSON
259+
data = json.loads(result.output)
260+
assert "bomFormat" in data

tests/test_output_schemas.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -236,15 +236,16 @@ def test_cyclonedx_component_properties(self, sample_scan_result):
236236
assert isinstance(prop["name"], str)
237237
assert isinstance(prop["value"], str)
238238

239-
@pytest.mark.skipif(not JSONSCHEMA_AVAILABLE, reason="jsonschema not installed")
240239
def test_cyclonedx_schema_validation(self, multi_component_result):
241240
"""Test that output validates against CycloneDX schema."""
241+
from ai_bom.utils.validator import get_schema
242242
reporter = CycloneDXReporter()
243243
output = reporter.render(multi_component_result)
244244
parsed = json.loads(output)
245245

246246
# Validate against schema
247-
validator = Draft7Validator(CYCLONEDX_SCHEMA)
247+
schema = get_schema()
248+
validator = Draft7Validator(schema)
248249
errors = list(validator.iter_errors(parsed))
249250
assert len(errors) == 0, f"Schema validation errors: {errors}"
250251

0 commit comments

Comments
 (0)