Skip to content

Commit 3037b5b

Browse files
committed
API: JSON Schema validator class
1 parent cb859d8 commit 3037b5b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+14039
-2
lines changed

API/fleece/JSONSchema.hh

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
//
2+
// JSONSchema.hh
3+
//
4+
// Copyright 2025-Present Couchbase, Inc.
5+
//
6+
// Use of this software is governed by the Business Source License included
7+
// in the file licenses/BSL-Couchbase.txt. As of the Change Date specified
8+
// in that file, in accordance with the Business Source License, use of this
9+
// software will be governed by the Apache License, Version 2.0, included in
10+
// the file licenses/APL2.txt.
11+
//
12+
13+
#pragma once
14+
#ifndef _FLEECE_JSONSCHEMA_HH
15+
#define _FLEECE_JSONSCHEMA_HH
16+
#include "fleece/Fleece.hh"
17+
#include "fleece/Mutable.hh"
18+
#include <stdexcept>
19+
#include <string>
20+
#include <string_view>
21+
22+
FL_ASSUME_NONNULL_BEGIN
23+
24+
namespace fleece {
25+
26+
/** Validates Values against a JSON Schema. (See https://json-schema.org )
27+
*
28+
* Unsupported features (will throw an `unsupported_schema` exception if detected):
29+
* - Path-relative `$ref`s (URIs that start with `/`)
30+
* - `$dynamicRef`, `$dynamicAnchor`, `$vocabulary`
31+
* - `format`, `contentEncoding`, `contentMediaType`
32+
* - `dependencies`, `dependentRequired`, `dependentSchemas`, `extends`
33+
* - `unevaluatedItems`, `unevaluatedProperties`
34+
*
35+
* Known bugs:
36+
* - JSON Schema's equality comparisons do not distinguish between integers and floats,
37+
* so `7` is equal to `7.0`. However, Fleece considers ints and floats distinct types.
38+
* This implementation conforms to JSON Schema equality when making direct comparisons
39+
* between numeric Values, bbut _not_ when the numbers are nested in collections.
40+
* So for example `[7]` will not match `[7.0]`.
41+
*
42+
* @note This class does not download schemas on demand; it does no I/O at all.
43+
* See the docs of \ref unknownSchemaID to see how to handle external schema refs.
44+
* @note This class is thread-safe.
45+
*/
46+
class JSONSchema {
47+
public:
48+
49+
/** Thrown if errors are discovered in a schema. */
50+
class invalid_schema : public std::runtime_error { using runtime_error::runtime_error; };
51+
/** Thrown if a schema is found to use unsupported/unimplemented features. */
52+
class unsupported_schema : public std::runtime_error { using runtime_error::runtime_error; };
53+
54+
class Validation;
55+
56+
57+
/// Constructor that takes a parsed JSON schema object.
58+
/// @note The Value will be retained, so the caller doesn't need to keep a reference.
59+
/// @param schemaRoot The parsed schema.
60+
/// @param id_uri The absolute URI identifying this schema. Optional.
61+
/// @throws invalid_schema if the schema is invalid.
62+
/// @throws unsupported_schema if the schema uses unsupported features.
63+
explicit JSONSchema(Value schemaRoot, std::string_view id_uri = "");
64+
65+
/// Convenience constructor that takes a JSON schema string and parses it.
66+
/// @param json The schema as JSON data.
67+
/// @param id_uri The absolute URI identifying this schema. Optional.
68+
/// @throws invalid_schema if the schema is invalid.
69+
/// @throws unsupported_schema if the schema uses unsupported features.
70+
explicit JSONSchema(std::string_view json, std::string_view id_uri = "");
71+
72+
~JSONSchema();
73+
74+
/// The root of the parsed schema. (Almost always a Dict.)
75+
Value schema() const;
76+
77+
/// Registers an external schema that the main schema may refer to.
78+
/// @note The Dict will be retained, so the caller doesn't need to keep a reference.
79+
/// @param schemaRoot The parsed schema.
80+
/// @param id_uri The absolute URI identifying this schema.
81+
/// @throws invalid_schema if the schema is invalid.
82+
/// @throws unsupported_schema if the schema uses unsupported features.
83+
void addSchema(Dict schemaRoot, std::string_view id_uri);
84+
85+
/// Validates a parsed Fleece value against the schema.
86+
/// @returns A \ref Validation object describing the result.
87+
/// @throws invalid_schema if the schema itself is invalid.
88+
/// @throws unsupported_schema if the schema uses unsupported features.
89+
Validation validate(Value value) const LIFETIMEBOUND;
90+
91+
/// Convenience method that parses JSON and then validates it against the schema.
92+
/// @returns A \ref Validation object describing the result.
93+
/// @throws std::invalid_argument if the JSON fails to parse.
94+
/// @throws invalid_schema if the schema itself is invalid.
95+
/// @throws unsupported_schema if the schema uses unsupported features.
96+
Validation validate(std::string_view json) const LIFETIMEBOUND;
97+
Validation validate(std::string_view json, SharedKeys) const LIFETIMEBOUND;
98+
99+
100+
/** Errors that can occur during validation. */
101+
enum class Error : unsigned {
102+
ok = 0,
103+
invalid, // value matched against a "false" in the schema
104+
typeMismatch, // value doesn't match "type" property in schema
105+
outOfRange, // Number is out of range of "minimum", etc.
106+
notMultiple, // Number is not a multiple of the "multipleOf"
107+
tooShort, // String is too short or collection has too few items
108+
tooLong, // String is too long or collection has too many items
109+
patternMismatch, // String doesn't match regex pattern
110+
missingProperty, // Dict is missing a required property
111+
unknownProperty, // Dict has an invalid property
112+
notEnum, // Value doesn't match any "enum" or "const" value
113+
tooFew, // Value doesn't match anything in an "anyOf" or "oneOf" array
114+
tooMany, // "oneOf" or "maxContains" failed
115+
notNot, // Value matched a "not" schema
116+
notUnique, // Array items are not unique
117+
invalidUTF8, // A string's length could not be checked because of invalid UTF-8
118+
unknownSchemaRef, // Reference to a schema URI that's not registered
119+
};
120+
121+
static bool ok(Error e) noexcept {return e == Error::ok;}
122+
static std::string_view errorString(Error) noexcept;
123+
124+
private:
125+
struct Impl;
126+
std::unique_ptr<Impl> _impl;
127+
};
128+
129+
130+
/** The result of validating against a JSONSchema. */
131+
class JSONSchema::Validation {
132+
public:
133+
/// True if validation succeeded.
134+
bool ok() const noexcept {return _result.error == Error::ok;}
135+
explicit operator bool() const {return ok();}
136+
137+
/// The specific error. (Will be `Error::ok` if there was no error.)
138+
Error error() const noexcept { return _result.error; }
139+
140+
/// The specific error, as a string.
141+
std::string errorString() const noexcept;
142+
143+
/// The detected invalid Value; either the one passed to \ref validate
144+
/// or something nested in it. (Will be nullptr if there was no error.)
145+
Value errorValue() const noexcept {return _result.value;}
146+
147+
/// On error, this is the path to the detected invalid Value, in \ref KeyPath syntax.
148+
std::string errorPath() const noexcept;
149+
150+
/// The key and value of the item in the schema that caused the failure;
151+
/// e.g. `{"maxLength", 5}`.
152+
std::pair<slice,Value> errorSchema() const noexcept;
153+
154+
/// A URI pointing to the item in the schema that caused the failure.
155+
std::string errorSchemaURI() const noexcept;
156+
157+
/// If the error is `Error::unknownSchemaRef`, this is the URI of the unknown schema.
158+
/// If you can download or otherwise look up the schema, you can call \ref addSchema
159+
/// to register it, then call \ref validate again to retry.
160+
std::string const& unknownSchemaID() const noexcept {return _unknownSchema;}
161+
162+
struct Result {Error error; Value value; Value schema; slice schemaKey;};
163+
static bool ok(Result const& e) noexcept {return e.error == Error::ok;}
164+
private:
165+
friend class JSONSchema;
166+
167+
Validation(JSONSchema const& schema, Value value);
168+
Result check(Value value, Value schema, Dict schemaBase);
169+
Result checkValue(Value value, Dict schema, Dict schemaBase);
170+
Result checkNumber(Value value, Dict schema, Dict schemaBase);
171+
Result checkString(Value value, Dict schema, Dict schemaBase);
172+
Result checkArray(Array, Dict schema, Dict schemaBase);
173+
Result checkDict(Dict, Dict schema, Dict schemaBase);
174+
175+
static bool isType(Value value, Value typeVal);
176+
static bool isType(Value value, slice schemaType);
177+
178+
Impl const& _schemaImpl; // The guts of the owning JSONSchema
179+
RetainedValue _value; // The root Value being validated (only after failure)
180+
Result _result {}; // Details of validation error
181+
std::string _unknownSchema; // Unknown schema ID found during validation
182+
};
183+
184+
}
185+
186+
FL_ASSUME_NONNULL_END
187+
188+
#endif // _FLEECE_JSONSCHEMA_HH

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,11 @@ add_executable(FleeceTests EXCLUDE_FROM_ALL
150150
${FLEECE_TEST_SRC}
151151
vendor/catch/catch_amalgamated.cpp
152152
vendor/catch/CaseListReporter.cc
153+
Tests/SchemaTests.cc
153154
)
154155
setup_test_build()
155156
target_include_directories(FleeceTests PRIVATE
157+
Experimental
156158
Tests
157159
vendor/catch
158160
)

Experimental/JSONSchema.cc

Whitespace-only changes.

Fleece.xcodeproj/project.pbxproj

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@
4747
2739971725CDBD8E000C1C1B /* SmallVectorBase.hh in Headers */ = {isa = PBXBuildFile; fileRef = 2739971625CDBD8E000C1C1B /* SmallVectorBase.hh */; };
4848
273CD2D825E874CD00B93C59 /* Base64.hh in Headers */ = {isa = PBXBuildFile; fileRef = 273CD2D625E874CD00B93C59 /* Base64.hh */; };
4949
273CD2D925E874CD00B93C59 /* Base64.cc in Sources */ = {isa = PBXBuildFile; fileRef = 273CD2D725E874CD00B93C59 /* Base64.cc */; };
50+
273F3BCE2D4AA26D00BFAD13 /* JSONSchema.hh in Headers */ = {isa = PBXBuildFile; fileRef = 273F3BC92D4AA26D00BFAD13 /* JSONSchema.hh */; };
51+
273F3BD02D4AA26D00BFAD13 /* JSONSchema.cc in Sources */ = {isa = PBXBuildFile; fileRef = 273F3BCA2D4AA26D00BFAD13 /* JSONSchema.cc */; };
52+
273F3BD42D4AA2A800BFAD13 /* SchemaTests.cc in Sources */ = {isa = PBXBuildFile; fileRef = 273F3BD22D4AA2A800BFAD13 /* SchemaTests.cc */; };
5053
274281A4262F7CBF00862700 /* slice+ObjC.mm in Sources */ = {isa = PBXBuildFile; fileRef = 274281A3262F7CBF00862700 /* slice+ObjC.mm */; };
5154
274D8244209A3A77008BB39F /* HeapDict.cc in Sources */ = {isa = PBXBuildFile; fileRef = 274D8242209A3A77008BB39F /* HeapDict.cc */; };
5255
274D8245209A3A77008BB39F /* HeapDict.hh in Headers */ = {isa = PBXBuildFile; fileRef = 274D8243209A3A77008BB39F /* HeapDict.hh */; };
@@ -86,7 +89,6 @@
8689
279AC5381C096B5C002C80DB /* libfleeceStatic.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 270FA25C1BF53CAD005DCB13 /* libfleeceStatic.a */; };
8790
279AC53C1C097941002C80DB /* Value+Dump.cc in Sources */ = {isa = PBXBuildFile; fileRef = 279AC53B1C097941002C80DB /* Value+Dump.cc */; };
8891
27A0E3DF24DCD86900380563 /* ConcurrentArena.hh in Headers */ = {isa = PBXBuildFile; fileRef = 27A0E3DD24DCD86900380563 /* ConcurrentArena.hh */; };
89-
27A0E3E024DCD86900380563 /* ConcurrentArena.cc in Sources */ = {isa = PBXBuildFile; fileRef = 27A0E3DE24DCD86900380563 /* ConcurrentArena.cc */; };
9092
27A1327B2C700D45008E84FA /* JSLexer.hh in Headers */ = {isa = PBXBuildFile; fileRef = 27A1327A2C700D45008E84FA /* JSLexer.hh */; };
9193
27A132812C73BF8C008E84FA /* FLEncoder.cc in Sources */ = {isa = PBXBuildFile; fileRef = 27A132802C73BF8C008E84FA /* FLEncoder.cc */; };
9294
27A2F73B21248DA50081927B /* FLSlice.h in Headers */ = {isa = PBXBuildFile; fileRef = 27A2F73A21248DA40081927B /* FLSlice.h */; };
@@ -350,6 +352,10 @@
350352
2739971625CDBD8E000C1C1B /* SmallVectorBase.hh */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = SmallVectorBase.hh; sourceTree = "<group>"; };
351353
273CD2D625E874CD00B93C59 /* Base64.hh */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Base64.hh; sourceTree = "<group>"; };
352354
273CD2D725E874CD00B93C59 /* Base64.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = Base64.cc; sourceTree = "<group>"; };
355+
273F3BC92D4AA26D00BFAD13 /* JSONSchema.hh */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = JSONSchema.hh; sourceTree = "<group>"; };
356+
273F3BCA2D4AA26D00BFAD13 /* JSONSchema.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = JSONSchema.cc; sourceTree = "<group>"; };
357+
273F3BD22D4AA2A800BFAD13 /* SchemaTests.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = SchemaTests.cc; sourceTree = "<group>"; };
358+
273F3BD32D4AA2A800BFAD13 /* travel-schema.json */ = {isa = PBXFileReference; lastKnownFileType = text.json; path = "travel-schema.json"; sourceTree = "<group>"; };
353359
274281A3262F7CBF00862700 /* slice+ObjC.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = "slice+ObjC.mm"; sourceTree = "<group>"; };
354360
2746DD3B1D931BE9000517BC /* Benchmark.hh */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Benchmark.hh; sourceTree = "<group>"; };
355361
2747D9841CFB9BC300C48211 /* 1person.json */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.json; path = 1person.json; sourceTree = "<group>"; };
@@ -560,6 +566,7 @@
560566
274C948B2150175700F9AEA9 /* Doxyfile */,
561567
274C948C215058BB00F9AEA9 /* Doxyfile_C++ */,
562568
271507DE212225B3005FE6E8 /* API */,
569+
273F3BCD2D4AA26D00BFAD13 /* Experimental */,
563570
270FA25E1BF53CAD005DCB13 /* Fleece */,
564571
279AC5321C096872002C80DB /* Tool */,
565572
272E5A441BF7FD1700848580 /* Tests */,
@@ -686,11 +693,13 @@
686693
272E5A5E1BF91DBE00848580 /* ObjCTests.mm */,
687694
27AEFAC4210913C500106ED8 /* DeltaTests.cc */,
688695
271507F6212349B8005FE6E8 /* API_ValueTests.cc */,
696+
273F3BD22D4AA2A800BFAD13 /* SchemaTests.cc */,
689697
278163B81CE6BB8C00B94E32 /* C_Test.c */,
690698
27EC8D5B1CEBA72E00199FE6 /* mn_wordlist.h */,
691699
2747D9841CFB9BC300C48211 /* 1person.json */,
692700
2776AA232086C94B004ACE85 /* 1person-deepIterOutput.txt */,
693701
2776AA242086CC1F004ACE85 /* 1person-shallowIterOutput.txt */,
702+
273F3BD32D4AA2A800BFAD13 /* travel-schema.json */,
694703
);
695704
path = Tests;
696705
sourceTree = "<group>";
@@ -712,6 +721,15 @@
712721
path = Integration;
713722
sourceTree = "<group>";
714723
};
724+
273F3BCD2D4AA26D00BFAD13 /* Experimental */ = {
725+
isa = PBXGroup;
726+
children = (
727+
273F3BC92D4AA26D00BFAD13 /* JSONSchema.hh */,
728+
273F3BCA2D4AA26D00BFAD13 /* JSONSchema.cc */,
729+
);
730+
path = Experimental;
731+
sourceTree = "<group>";
732+
};
715733
2760A4DA25E96DB000E2ECB2 /* wyhash */ = {
716734
isa = PBXGroup;
717735
children = (
@@ -969,6 +987,7 @@
969987
27AEFAC321090FF400106ED8 /* JSONDelta.hh in Headers */,
970988
27298E661C00F8A9000CFBA8 /* jsonsl.h in Headers */,
971989
277A06B420B36D1A00970354 /* FileUtils.hh in Headers */,
990+
273F3BCE2D4AA26D00BFAD13 /* JSONSchema.hh in Headers */,
972991
27E3DD4D1DB6C32400F2872D /* CatchHelper.hh in Headers */,
973992
27AEFAC921091A8C00106ED8 /* diff_match_patch.hh in Headers */,
974993
2734B8A51F8583FF00BE5249 /* MDict.hh in Headers */,
@@ -1000,7 +1019,6 @@
10001019
278343132A675A7000621050 /* function_ref.hh in Headers */,
10011020
270FA2801BF53CEA005DCB13 /* Writer.hh in Headers */,
10021021
270FA27D1BF53CEA005DCB13 /* Encoder.hh in Headers */,
1003-
27C8DF072084102900A99BFC /* MutableHashTree.hh in Headers */,
10041022
27A1327B2C700D45008E84FA /* JSLexer.hh in Headers */,
10051023
27A924D01D9C32E800086206 /* Path.hh in Headers */,
10061024
274D8245209A3A77008BB39F /* HeapDict.hh in Headers */,
@@ -1325,6 +1343,7 @@
13251343
buildActionMask = 2147483647;
13261344
files = (
13271345
270FA27E1BF53CEA005DCB13 /* Encoder+ObjC.mm in Sources */,
1346+
273F3BD02D4AA26D00BFAD13 /* JSONSchema.cc in Sources */,
13281347
270FA27B1BF53CEA005DCB13 /* Value+ObjC.mm in Sources */,
13291348
27C4ACAC1CE5146500938365 /* Array.cc in Sources */,
13301349
277A06B320B36D1A00970354 /* FileUtils.cc in Sources */,
@@ -1380,6 +1399,7 @@
13801399
272E5A5D1BF800A100848580 /* EncoderTests.cc in Sources */,
13811400
27E3DD531DB7DB1C00F2872D /* SharedKeysTests.cc in Sources */,
13821401
27AEFAC5210913C500106ED8 /* DeltaTests.cc in Sources */,
1402+
273F3BD42D4AA2A800BFAD13 /* SchemaTests.cc in Sources */,
13831403
274D824F209A8D01008BB39F /* MutableTests.cc in Sources */,
13841404
271507F7212349B8005FE6E8 /* API_ValueTests.cc in Sources */,
13851405
27298E781C01A461000CFBA8 /* PerfTests.cc in Sources */,

0 commit comments

Comments
 (0)