Skip to content

Commit 864875b

Browse files
Correct schema comparison when posting to subject
After refactoring schema storage to use non-parsed versions an error was introduced to schema comparison when posting to subject. The original Avro schema string can differ from the parsed schema, e.g. names are not necessary for comparison. Example below: Stored schema string: {"type":"int","name":"example_name"} -> parsed schema: "int" New schema posted to subject: {"type":"int"} -> parsed new schema: "int"
1 parent 27f10b9 commit 864875b

2 files changed

Lines changed: 42 additions & 4 deletions

File tree

karapace/schema_registry_apis.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -741,19 +741,22 @@ async def subjects_schema_post(self, content_type, *, subject, request):
741741
status=HTTPStatus.INTERNAL_SERVER_ERROR,
742742
)
743743
for schema in subject_data["schemas"].values():
744-
typed_schema = schema["schema"]
745-
if typed_schema == new_schema:
744+
validated_typed_schema = ValidatedTypedSchema.parse(schema["schema"].schema_type, schema["schema"].schema_str)
745+
if (
746+
validated_typed_schema.schema_type == new_schema.schema_type
747+
and validated_typed_schema.schema == new_schema.schema
748+
):
746749
ret = {
747750
"subject": subject,
748751
"version": schema["version"],
749752
"id": schema["id"],
750-
"schema": typed_schema.schema_str,
753+
"schema": validated_typed_schema.schema_str,
751754
}
752755
if schema_type is not SchemaType.AVRO:
753756
ret["schemaType"] = schema_type
754757
self.r(ret, content_type)
755758
else:
756-
self.log.debug("Schema %r did not match %r", schema, typed_schema)
759+
self.log.debug("Schema %r did not match %r", schema, validated_typed_schema)
757760
self.r(
758761
body={
759762
"error_code": SchemaErrorCodes.SCHEMA_NOT_FOUND.value,

tests/integration/test_schema.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1526,6 +1526,41 @@ async def test_schema_same_subject(registry_async_client: Client, trail: str) ->
15261526
assert json == {"id": schema_id, "subject": subject, "schema": ujson.loads(schema_str), "version": 1}
15271527

15281528

1529+
async def test_schema_same_subject_unnamed(registry_async_client: Client) -> None:
1530+
"""
1531+
The same schema JSON should be returned when checking the same schema str against the same subject
1532+
"""
1533+
subject_name_factory = create_subject_name_factory("test_schema_same_subject_unnamed")
1534+
schema_name = create_schema_name_factory("test_schema_same_subject_unnamed")()
1535+
1536+
schema_str = ujson.dumps(
1537+
{
1538+
"type": "int",
1539+
"name": schema_name,
1540+
}
1541+
)
1542+
subject = subject_name_factory()
1543+
res = await registry_async_client.post(
1544+
f"subjects/{subject}/versions",
1545+
json={"schema": schema_str},
1546+
)
1547+
assert res.status_code == 200
1548+
schema_id = res.json()["id"]
1549+
1550+
unnamed_schema_str = ujson.dumps({"type": "int"})
1551+
1552+
res = await registry_async_client.post(
1553+
f"subjects/{subject}",
1554+
json={"schema": unnamed_schema_str},
1555+
)
1556+
assert res.status_code == 200
1557+
1558+
# Switch the str schema to a dict for comparison
1559+
json = res.json()
1560+
json["schema"] = ujson.loads(json["schema"])
1561+
assert json == {"id": schema_id, "subject": subject, "schema": ujson.loads(schema_str), "version": 1}
1562+
1563+
15291564
@pytest.mark.parametrize("trail", ["", "/"])
15301565
async def test_schema_version_number_existing_schema(registry_async_client: Client, trail: str) -> None:
15311566
"""

0 commit comments

Comments
 (0)