Skip to content

Commit 4e7c1dc

Browse files
committed
pandaproxy/sr: Refactor store internal state to use unparsed schemas
- Replace canonical schemas for unparsed schemas stored in store and move all schema processing to sharded_store. This allows to have lazy schema validation on startup and relax requirements for valid schema. With this changes, a schema that had been imported before its dependencies will be found valid when it is requested. - Add backport to sharded_store to be able to access an unparsed schema as-is, without processing it. This is needed as now there might be an invalid schema loaded at startup. Operations like delete_subject_version, which needs to reference a schema before deleting it would be problematic.
1 parent ef0dfb0 commit 4e7c1dc

14 files changed

+891
-94
lines changed

src/v/pandaproxy/schema_registry/seq_writer.cc

+6-6
Original file line numberDiff line numberDiff line change
@@ -433,17 +433,17 @@ ss::future<std::optional<bool>> seq_writer::do_delete_subject_version(
433433
throw as_exception(has_references(sub, version));
434434
}
435435

436-
auto s_res = co_await _store.get_subject_schema(
437-
sub, version, include_deleted::yes);
438-
subject_schema ss = std::move(s_res);
436+
schema_id s_id = co_await _store.get_id(sub, version);
437+
unparsed_schema_definition schema
438+
= co_await _store.get_unparsed_schema_definition(s_id);
439439

440440
auto key = schema_key{
441441
.seq{write_at}, .node{_node_id}, .sub{sub}, .version{version}};
442442
vlog(plog.debug, "seq_writer::delete_subject_version {}", key);
443-
auto value = canonical_schema_value{
444-
.schema{std::move(ss.schema)},
443+
unparsed_schema_value value{
444+
.schema{unparsed_schema{sub, std::move(schema)}},
445445
.version{version},
446-
.id{ss.id},
446+
.id{s_id},
447447
.deleted{is_deleted::yes}};
448448

449449
batch_builder rb(write_at, sub);

src/v/pandaproxy/schema_registry/sharded_store.cc

+81-29
Original file line numberDiff line numberDiff line change
@@ -167,14 +167,10 @@ sharded_store::get_schema_version(subject_schema schema, normalize norm) {
167167
if (s_res.has_error()) {
168168
return ss::make_ready_future();
169169
}
170-
auto [raw, type, refs]
171-
= std::move(s_res.value()).destructure();
172170
return this
173171
->make_canonical_schema(
174172
unparsed_schema{
175-
subject{},
176-
unparsed_schema_definition{
177-
std::move(raw), type, std::move(refs)}},
173+
subject{}, std::move(s_res.value())},
178174
norm)
179175
.then([id, &ret_id, &canonical_def](
180176
canonical_schema processed) {
@@ -299,22 +295,6 @@ ss::future<bool> sharded_store::upsert(
299295
unparsed_schema schema,
300296
schema_id id,
301297
schema_version version,
302-
is_deleted deleted) {
303-
auto norm = normalize{
304-
config::shard_local_cfg().schema_registry_normalize_on_startup()};
305-
co_return co_await upsert(
306-
marker,
307-
co_await make_canonical_schema(std::move(schema), norm),
308-
id,
309-
version,
310-
deleted);
311-
}
312-
313-
ss::future<bool> sharded_store::upsert(
314-
seq_marker marker,
315-
canonical_schema schema,
316-
schema_id id,
317-
schema_version version,
318298
is_deleted deleted) {
319299
auto [sub, def] = std::move(schema).destructure();
320300
co_await upsert_schema(id, std::move(def));
@@ -365,6 +345,16 @@ ss::future<subject_schema> sharded_store::has_schema(
365345
if (
366346
e.code() == error_code::subject_not_found
367347
|| e.code() == error_code::subject_version_not_found) {
348+
} else if (
349+
// Stored schemas might be invalid if imported improperly
350+
e.code() == error_code::schema_invalid) {
351+
vlog(
352+
plog.warn,
353+
"Failed to parse stored schema, subject '{}', version {}. "
354+
"Error: ",
355+
schema.sub(),
356+
ver,
357+
e.what());
368358
} else {
369359
throw;
370360
}
@@ -378,10 +368,10 @@ ss::future<subject_schema> sharded_store::has_schema(
378368

379369
ss::future<std::optional<canonical_schema_definition>>
380370
sharded_store::maybe_get_schema_definition(schema_id id) {
381-
co_return co_await _store.invoke_on(
371+
auto unparsed = co_await _store.invoke_on(
382372
shard_for(id),
383373
_smp_opts,
384-
[id](store& s) -> std::optional<canonical_schema_definition> {
374+
[id](store& s) -> std::optional<unparsed_schema_definition> {
385375
auto s_res = s.get_schema_definition(id);
386376
if (
387377
s_res.has_error()
@@ -390,10 +380,49 @@ sharded_store::maybe_get_schema_definition(schema_id id) {
390380
}
391381
return std::move(s_res.value());
392382
});
383+
if (!unparsed.has_value()) {
384+
co_return std::nullopt;
385+
}
386+
387+
try {
388+
auto canonical = co_await make_canonical_schema(
389+
{{}, std::move(unparsed.value())});
390+
391+
co_return std::move(canonical).def();
392+
} catch (const exception& e) {
393+
vlog(
394+
plog.warn,
395+
"Failed to parse stored schema with id {}: Error {}",
396+
id,
397+
e.what());
398+
throw;
399+
}
393400
}
394401

395402
ss::future<canonical_schema_definition>
396403
sharded_store::get_schema_definition(schema_id id) {
404+
auto unparsed = co_await _store.invoke_on(
405+
shard_for(id), _smp_opts, [id](store& s) {
406+
return s.get_schema_definition(id).value();
407+
});
408+
409+
try {
410+
auto canonical = co_await make_canonical_schema(
411+
{{}, std::move(unparsed)});
412+
413+
co_return std::move(canonical).def();
414+
} catch (const exception& e) {
415+
vlog(
416+
plog.warn,
417+
"Failed to parse stored schema with id {}: Error {}",
418+
id,
419+
e.what());
420+
throw;
421+
}
422+
}
423+
424+
ss::future<unparsed_schema_definition>
425+
sharded_store::get_unparsed_schema_definition(schema_id id) {
397426
co_return co_await _store.invoke_on(
398427
shard_for(id), _smp_opts, [id](store& s) {
399428
return s.get_schema_definition(id).value();
@@ -428,6 +457,17 @@ sharded_store::get_schema_subjects(schema_id id, include_deleted inc_del) {
428457
co_return subs;
429458
}
430459

460+
ss::future<schema_id>
461+
sharded_store::get_id(subject sub, std::optional<schema_version> version) {
462+
auto v_id = co_await _store.invoke_on(
463+
shard_for(sub), _smp_opts, [sub, version](store& s) {
464+
return s.get_subject_version_id(sub, version, include_deleted::yes)
465+
.value();
466+
});
467+
468+
co_return v_id.id;
469+
}
470+
431471
ss::future<subject_schema> sharded_store::get_subject_schema(
432472
subject sub, std::optional<schema_version> version, include_deleted inc_del) {
433473
auto sub_shard{shard_for(sub)};
@@ -441,11 +481,23 @@ ss::future<subject_schema> sharded_store::get_subject_schema(
441481
return s.get_schema_definition(id).value();
442482
});
443483

444-
co_return subject_schema{
445-
.schema = {sub, std::move(def)},
446-
.version = v_id.version,
447-
.id = v_id.id,
448-
.deleted = v_id.deleted};
484+
try {
485+
auto canonical = co_await make_canonical_schema({sub, std::move(def)});
486+
487+
co_return subject_schema{
488+
.schema = std::move(canonical),
489+
.version = v_id.version,
490+
.id = v_id.id,
491+
.deleted = v_id.deleted};
492+
} catch (const exception& e) {
493+
vlog(
494+
plog.warn,
495+
"Failed to parse stored schema, subject {}, version {}: {}",
496+
sub,
497+
v_id.id,
498+
e.what());
499+
throw;
500+
}
449501
}
450502

451503
ss::future<chunked_vector<subject>> sharded_store::get_subjects(
@@ -703,7 +755,7 @@ sharded_store::clear_compatibility(seq_marker marker, subject sub) {
703755
}
704756

705757
ss::future<bool>
706-
sharded_store::upsert_schema(schema_id id, canonical_schema_definition def) {
758+
sharded_store::upsert_schema(schema_id id, unparsed_schema_definition def) {
707759
co_await maybe_update_max_schema_id(id);
708760
co_return co_await _store.invoke_on(
709761
shard_for(id), _smp_opts, [id, def{std::move(def)}](store& s) mutable {

src/v/pandaproxy/schema_registry/sharded_store.h

+9-8
Original file line numberDiff line numberDiff line change
@@ -56,13 +56,6 @@ class sharded_store final : public schema_getter {
5656
};
5757
ss::future<insert_result> project_ids(subject_schema schema);
5858

59-
ss::future<bool> upsert(
60-
seq_marker marker,
61-
canonical_schema schema,
62-
schema_id id,
63-
schema_version version,
64-
is_deleted deleted);
65-
6659
ss::future<bool> upsert(
6760
seq_marker marker,
6861
unparsed_schema schema,
@@ -80,6 +73,10 @@ class sharded_store final : public schema_getter {
8073
ss::future<canonical_schema_definition>
8174
get_schema_definition(schema_id id) override;
8275

76+
///\brief Return a schema definition by id, without any processing.
77+
ss::future<unparsed_schema_definition>
78+
get_unparsed_schema_definition(schema_id id);
79+
8380
ss::future<std::optional<canonical_schema_definition>>
8481
maybe_get_schema_definition(schema_id id) override;
8582

@@ -97,6 +94,10 @@ class sharded_store final : public schema_getter {
9794
std::optional<schema_version> version,
9895
include_deleted inc_dec) final;
9996

97+
///\brief Return the id of a schema by subject and version (or latest).
98+
ss::future<schema_id>
99+
get_id(subject sub, std::optional<schema_version> version);
100+
100101
///\brief Return a list of subjects.
101102
ss::future<chunked_vector<subject>> get_subjects(
102103
include_deleted inc_del,
@@ -213,7 +214,7 @@ class sharded_store final : public schema_getter {
213214
schema_version version, canonical_schema new_schema, verbose is_verbose);
214215

215216
ss::future<bool>
216-
upsert_schema(schema_id id, canonical_schema_definition def);
217+
upsert_schema(schema_id id, unparsed_schema_definition def);
217218
ss::future<> delete_schema(schema_id id);
218219

219220
struct insert_subject_result {

src/v/pandaproxy/schema_registry/store.h

+8-8
Original file line numberDiff line numberDiff line change
@@ -87,15 +87,15 @@ class store {
8787
/// version.
8888
///
8989
/// return the schema_version and schema_id, and whether it's new.
90-
insert_result insert(canonical_schema schema) {
90+
insert_result insert(unparsed_schema schema) {
9191
auto [sub, def] = std::move(schema).destructure();
9292
auto id = insert_schema(std::move(def)).id;
9393
auto [version, inserted] = insert_subject(std::move(sub), id);
9494
return {version, id, inserted};
9595
}
9696

9797
///\brief Return a schema definition by id.
98-
result<canonical_schema_definition>
98+
result<unparsed_schema_definition>
9999
get_schema_definition(const schema_id& id) const {
100100
auto it = _schemas.find(id);
101101
if (it == _schemas.end()) {
@@ -157,7 +157,7 @@ class store {
157157
}
158158

159159
///\brief Return a schema by subject and version.
160-
result<subject_schema> get_subject_schema(
160+
result<unparsed_subject_schema> get_subject_schema(
161161
const subject& sub,
162162
std::optional<schema_version> version,
163163
include_deleted inc_del) const {
@@ -166,7 +166,7 @@ class store {
166166

167167
auto def = BOOST_OUTCOME_TRYX(get_schema_definition(v_id.id));
168168

169-
return subject_schema{
169+
return unparsed_subject_schema{
170170
.schema = {sub, std::move(def)},
171171
.version = v_id.version,
172172
.id = v_id.id,
@@ -614,7 +614,7 @@ class store {
614614
schema_id id;
615615
bool inserted;
616616
};
617-
insert_schema_result insert_schema(canonical_schema_definition def) {
617+
insert_schema_result insert_schema(unparsed_schema_definition def) {
618618
const auto s_it = std::find_if(
619619
_schemas.begin(), _schemas.end(), [&](const auto& s) {
620620
const auto& entry = s.second;
@@ -630,7 +630,7 @@ class store {
630630
return {id, inserted};
631631
}
632632

633-
bool upsert_schema(schema_id id, canonical_schema_definition def) {
633+
bool upsert_schema(schema_id id, unparsed_schema_definition def) {
634634
return _schemas.insert_or_assign(id, schema_entry(std::move(def)))
635635
.second;
636636
}
@@ -776,10 +776,10 @@ class store {
776776

777777
private:
778778
struct schema_entry {
779-
explicit schema_entry(canonical_schema_definition definition)
779+
explicit schema_entry(unparsed_schema_definition definition)
780780
: definition{std::move(definition)} {}
781781

782-
canonical_schema_definition definition;
782+
unparsed_schema_definition definition;
783783
};
784784

785785
class subject_entry {

0 commit comments

Comments
 (0)