Skip to content

Commit 6e61dfe

Browse files
Merge pull request #1027 from Aiven-Open/keejon/fix-delete-references
fix: delete references if schema is deleted
2 parents 2da698f + 485a95c commit 6e61dfe

7 files changed

Lines changed: 101 additions & 40 deletions

File tree

src/karapace/in_memory_database.py

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -125,18 +125,10 @@ def num_subjects(self) -> int:
125125
def num_schema_versions(self) -> tuple[int, int]:
126126
pass
127127

128-
@abstractmethod
129-
def insert_referenced_by(self, *, subject: Subject, version: Version, schema_id: SchemaId) -> None:
130-
pass
131-
132128
@abstractmethod
133129
def get_referenced_by(self, subject: Subject, version: Version) -> Referents | None:
134130
pass
135131

136-
@abstractmethod
137-
def remove_referenced_by(self, schema_id: SchemaId, references: Iterable[Reference]) -> None:
138-
pass
139-
140132

141133
class InMemoryDatabase(KarapaceDatabase):
142134
def __init__(self) -> None:
@@ -257,6 +249,9 @@ def insert_schema_version(
257249
schema=schema,
258250
schema_id=schema_id,
259251
)
252+
if references:
253+
for ref in references:
254+
self._insert_referenced_by(subject=ref.subject, version=ref.version, schema_id=schema_id)
260255
else:
261256
self._delete_from_schema_id_on_subject(
262257
subject=subject,
@@ -352,12 +347,19 @@ def delete_subject(self, *, subject: Subject, version: Version) -> None:
352347

353348
def delete_subject_hard(self, *, subject: Subject) -> None:
354349
with self.schema_lock_thread:
350+
for schema in self.subjects[subject].schemas.values():
351+
if schema.references:
352+
self._remove_referenced_by(schema.schema_id, schema.references)
355353
del self.subjects[subject]
356354
self._delete_subject_from_schema_id_on_subject(subject=subject)
357355

358356
def delete_subject_schema(self, *, subject: Subject, version: Version) -> None:
359357
with self.schema_lock_thread:
360-
self.subjects[subject].schemas.pop(version, None)
358+
schema = self.subjects[subject].schemas.pop(version, None)
359+
if schema:
360+
if schema.references:
361+
self._remove_referenced_by(schema.schema_id, schema.references)
362+
self._delete_from_schema_id_on_subject(subject=subject, schema=schema.schema)
361363

362364
def num_schemas(self) -> int:
363365
return len(self.schemas)
@@ -377,19 +379,19 @@ def num_schema_versions(self) -> tuple[int, int]:
377379
soft_deleted_versions += 1
378380
return (live_versions, soft_deleted_versions)
379381

380-
def insert_referenced_by(self, *, subject: Subject, version: Version, schema_id: SchemaId) -> None:
382+
def _insert_referenced_by(self, *, subject: Subject, version: Version, schema_id: SchemaId) -> None:
381383
with self.schema_lock_thread:
382384
referents = self.referenced_by.get((subject, version), None)
383385
if referents:
384-
referents.append(schema_id)
386+
referents.add(schema_id)
385387
else:
386-
self.referenced_by[(subject, version)] = Referents([schema_id])
388+
self.referenced_by[(subject, version)] = Referents({schema_id})
387389

388390
def get_referenced_by(self, subject: Subject, version: Version) -> Referents | None:
389391
with self.schema_lock_thread:
390392
return self.referenced_by.get((subject, version), None)
391393

392-
def remove_referenced_by(self, schema_id: SchemaId, references: Iterable[Reference]) -> None:
394+
def _remove_referenced_by(self, schema_id: SchemaId, references: Iterable[Reference]) -> None:
393395
with self.schema_lock_thread:
394396
for ref in references:
395397
key = (ref.subject, ref.version)

src/karapace/schema_reader.py

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
from karapace.schema_models import parse_protobuf_schema_definition, SchemaType, TypedSchema, ValidatedTypedSchema
4444
from karapace.schema_references import LatestVersionReference, Reference, reference_from_mapping, Referents
4545
from karapace.statsd import StatsClient
46-
from karapace.typing import JsonObject, SchemaId, SchemaReaderStoppper, Subject, Version
46+
from karapace.typing import JsonObject, SchemaReaderStoppper, Subject, Version
4747
from karapace.utils import json_decode, JSONDecodeError, shutdown
4848
from threading import Event, Lock, Thread
4949
from typing import Final
@@ -660,10 +660,6 @@ def _handle_msg_schema(self, key: dict, value: dict | None) -> None:
660660
references=resolved_references,
661661
)
662662

663-
if resolved_references:
664-
for ref in resolved_references:
665-
self.database.insert_referenced_by(subject=ref.subject, version=ref.version, schema_id=schema_id)
666-
667663
def handle_msg(self, key: dict, value: dict | None) -> None:
668664
if "keytype" in key:
669665
try:
@@ -687,13 +683,6 @@ def handle_msg(self, key: dict, value: dict | None) -> None:
687683
)
688684
raise InvalidSchema("Message key doesn't contain the `keytype` attribute")
689685

690-
def remove_referenced_by(
691-
self,
692-
schema_id: SchemaId,
693-
references: Sequence[Reference],
694-
) -> None:
695-
self.database.remove_referenced_by(schema_id, references)
696-
697686
def get_referenced_by(
698687
self,
699688
subject: Subject,

src/karapace/schema_references.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from karapace.typing import JsonData, JsonObject, SchemaId, Subject, Version
1313
from typing import cast, NewType, TypeVar
1414

15-
Referents = NewType("Referents", list[SchemaId])
15+
Referents = NewType("Referents", set[SchemaId])
1616

1717
T = TypeVar("T")
1818

src/karapace/schema_registry.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -170,8 +170,6 @@ async def subject_delete_local(self, subject: Subject, permanent: bool) -> list[
170170
deleted=True,
171171
references=schema_version.references,
172172
)
173-
if schema_version.references and len(schema_version.references) > 0:
174-
self.schema_reader.remove_referenced_by(schema_version.schema_id, schema_version.references)
175173
else:
176174
try:
177175
schema_versions_live = self.subject_get(subject, include_deleted=False)
@@ -225,8 +223,6 @@ async def subject_version_delete_local(self, subject: Subject, version: Version,
225223
deleted=True,
226224
references=schema_version.references,
227225
)
228-
if schema_version.references and len(schema_version.references) > 0:
229-
self.schema_reader.remove_referenced_by(schema_version.schema_id, schema_version.references)
230226
return resolved_version
231227

232228
def subject_get(self, subject: Subject, include_deleted: bool = False) -> dict[Version, SchemaVersion]:

stubs/confluent_kafka/admin/_config.pyi

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from ._resource import ResourceType
22
from enum import Enum
3+
from typing import cast
34

45
class ConfigResource:
56
Type = ResourceType
@@ -12,7 +13,7 @@ class ConfigResource:
1213
) -> None: ...
1314

1415
class ConfigSource(Enum):
15-
UNKNOWN_CONFIG: int
16-
DYNAMIC_TOPIC_CONFIG: int
16+
UNKNOWN_CONFIG = cast(int, ...)
17+
DYNAMIC_TOPIC_CONFIG = cast(int, ...)
1718

1819
class ConfigEntry: ...
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from enum import Enum
2+
from typing import cast
23

34
class ResourceType(Enum):
4-
TOPIC: int
5+
TOPIC = cast(int, ...)

tests/unit/test_in_memory_database.py

Lines changed: 79 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,25 @@
55
from __future__ import annotations
66

77
from collections import defaultdict
8-
from collections.abc import Iterable, Sequence
8+
from collections.abc import Sequence
99
from confluent_kafka.cimpl import KafkaError
1010
from karapace.config import DEFAULTS
1111
from karapace.constants import DEFAULT_SCHEMA_TOPIC
1212
from karapace.in_memory_database import InMemoryDatabase, KarapaceDatabase, Subject, SubjectData
1313
from karapace.kafka.types import Timestamp
1414
from karapace.key_format import KeyFormatter
1515
from karapace.offset_watcher import OffsetWatcher
16+
from karapace.protobuf.schema import ProtobufSchema
1617
from karapace.schema_models import SchemaVersion, TypedSchema
1718
from karapace.schema_reader import KafkaSchemaReader
1819
from karapace.schema_references import Reference, Referents
20+
from karapace.schema_type import SchemaType
1921
from karapace.typing import SchemaId, Version
2022
from pathlib import Path
2123
from typing import Final
2224

25+
import pytest
26+
2327
TEST_DATA_FOLDER: Final = Path("tests/unit/test_data/")
2428

2529

@@ -176,15 +180,9 @@ def num_subjects(self) -> int:
176180
def num_schema_versions(self) -> tuple[int, int]:
177181
return self.db.num_schema_versions()
178182

179-
def insert_referenced_by(self, *, subject: Subject, version: Version, schema_id: SchemaId) -> None:
180-
return self.db.insert_referenced_by(subject=subject, version=version, schema_id=schema_id)
181-
182183
def get_referenced_by(self, subject: Subject, version: Version) -> Referents | None:
183184
return self.db.get_referenced_by(subject=subject, version=version)
184185

185-
def remove_referenced_by(self, schema_id: SchemaId, references: Iterable[Reference]) -> None:
186-
return self.db.remove_referenced_by(schema_id=schema_id, references=references)
187-
188186
def duplicates(self) -> dict[SchemaId, list[tuple[Subject, TypedSchema]]]:
189187
duplicate_data = defaultdict(list)
190188
for schema_id, schemas in self._duplicates.items():
@@ -259,3 +257,77 @@ def test_can_ingest_schemas_from_log() -> None:
259257
schema_id_to_duplicated_subjects = compute_schema_id_to_subjects(duplicates, database.subject_to_subject_data())
260258
assert schema_id_to_duplicated_subjects == {}, "there shouldn't be any duplicated schemas"
261259
assert duplicates == {}, "the schema database is broken. The id should be unique"
260+
261+
262+
@pytest.fixture(name="db_with_schemas")
263+
def fixture_in_memory_database_with_schemas() -> InMemoryDatabase:
264+
db = InMemoryDatabase()
265+
schema_str = "syntax = 'proto3'; message Test { string test = 1; }"
266+
267+
subject_a = Subject("subject_a")
268+
schema_a = TypedSchema(
269+
schema_type=SchemaType.PROTOBUF,
270+
schema_str=schema_str,
271+
schema=ProtobufSchema(schema=schema_str),
272+
)
273+
db.insert_subject(subject=subject_a)
274+
schema_id_a = db.get_schema_id(schema_a)
275+
db.insert_schema_version(
276+
subject=subject_a, schema_id=schema_id_a, version=Version(1), schema=schema_a, deleted=False, references=None
277+
)
278+
db.insert_schema_version(
279+
subject=subject_a, schema_id=schema_id_a, version=Version(2), schema=schema_a, deleted=False, references=None
280+
)
281+
282+
subject_b = Subject("subject_b")
283+
references_b = [Reference(name="test", subject=subject_a, version=Version(1))]
284+
schema_b = TypedSchema(
285+
schema_type=SchemaType.PROTOBUF,
286+
schema_str=schema_str,
287+
schema=ProtobufSchema(schema=schema_str),
288+
references=references_b,
289+
)
290+
db.insert_subject(subject=subject_b)
291+
schema_id_b = db.get_schema_id(schema_b)
292+
db.insert_schema_version(
293+
subject=subject_b,
294+
schema_id=schema_id_b,
295+
version=Version(1),
296+
schema=schema_b,
297+
deleted=False,
298+
references=references_b,
299+
)
300+
301+
return db
302+
303+
304+
def test_delete_schema_references(db_with_schemas: InMemoryDatabase) -> None:
305+
# Check that the schema is referenced by subject_b
306+
referents = db_with_schemas.get_referenced_by(subject=Subject("subject_a"), version=Version(1))
307+
assert referents is not None
308+
version = db_with_schemas.find_schema_versions_by_schema_id(schema_id=referents.pop(), include_deleted=False)[0]
309+
assert version.subject == Subject("subject_b")
310+
assert version.version == Version(1)
311+
312+
# Delete the schema from subject_b
313+
db_with_schemas.delete_subject_schema(subject=Subject("subject_b"), version=Version(1))
314+
315+
# Check that the schema is no longer referenced by subject_b
316+
referents = db_with_schemas.get_referenced_by(subject=Subject("subject_a"), version=Version(1))
317+
assert len(referents) == 0, "referents should be gone after deleting the schema"
318+
319+
320+
def test_delete_subject(db_with_schemas: InMemoryDatabase) -> None:
321+
# Check that the schema is referenced by subject_b
322+
referents = db_with_schemas.get_referenced_by(subject=Subject("subject_a"), version=Version(1))
323+
assert referents is not None
324+
version = db_with_schemas.find_schema_versions_by_schema_id(schema_id=referents.pop(), include_deleted=False)[0]
325+
assert version.subject == Subject("subject_b")
326+
assert version.version == Version(1)
327+
328+
# Hard delete subject_b
329+
db_with_schemas.delete_subject_hard(subject=Subject("subject_b"))
330+
331+
# Check that the schema is no longer referenced by subject_b
332+
referents = db_with_schemas.get_referenced_by(subject=Subject("subject_a"), version=Version(1))
333+
assert len(referents) == 0, "referents should be gone after hard deleting the subject"

0 commit comments

Comments
 (0)