Skip to content

Commit 850c7af

Browse files
iahsfacebook-github-bot
authored andcommitted
Implement schema merging
Summary: Merges deserialized schemas into one before storing. This lets us benefit from value deduplication and means we don't need to build a separate index of definitions and values, as well as reducing the number of allocations needed. Reviewed By: thedavekwon Differential Revision: D53835988 fbshipit-source-id: 430222f41878c1a104108b5a819c37eb4eda208f
1 parent 04a0d21 commit 850c7af

File tree

3 files changed

+62
-27
lines changed

3 files changed

+62
-27
lines changed

thrift/lib/cpp2/runtime/SchemaRegistry.cpp

+36-12
Original file line numberDiff line numberDiff line change
@@ -38,20 +38,44 @@ void SchemaRegistry::registerSchema(
3838
}
3939

4040
#ifdef FBTHRIFT_HAS_SCHEMA
41-
folly::F14FastMap<type::ProgramId, type::Schema>& SchemaRegistry::getSchemas() {
42-
static folly::Indestructible<folly::F14FastMap<type::ProgramId, type::Schema>>
43-
schemas = [&] {
44-
folly::F14FastMap<type::ProgramId, type::Schema> schemaMap;
45-
accessed() = true;
46-
for (auto& [name, data] : getRawSchemas()) {
47-
auto schema = CompactSerializer::deserialize<type::Schema>(data.data);
48-
auto id = *schema.programs()[0].id();
49-
schemaMap[id] = std::move(schema);
41+
const type::Schema& SchemaRegistry::getMergedSchema() {
42+
static const folly::Indestructible<type::Schema> merged = [&] {
43+
accessed() = true;
44+
type::Schema mergedSchema;
45+
std::unordered_set<type::ProgramId> includedPrograms;
46+
47+
for (auto& [name, data] : getRawSchemas()) {
48+
auto schema = CompactSerializer::deserialize<type::Schema>(data.data);
49+
50+
for (auto& program : *schema.programs()) {
51+
auto id = *program.id();
52+
if (!includedPrograms.insert(id).second) {
53+
// We checked during registration that the program ids are unique,
54+
// so this file was already included by another program bundle.
55+
continue;
5056
}
51-
return schemaMap;
52-
}();
57+
mergedSchema.programs()->push_back(std::move(program));
58+
}
5359

54-
return *schemas;
60+
mergedSchema.valuesMap()->insert(
61+
std::make_move_iterator(schema.valuesMap()->begin()),
62+
std::make_move_iterator(schema.valuesMap()->end()));
63+
// This deduplicates common values.
64+
65+
auto ndefs = mergedSchema.definitionsMap()->size();
66+
mergedSchema.definitionsMap()->insert(
67+
std::make_move_iterator(schema.definitionsMap()->begin()),
68+
std::make_move_iterator(schema.definitionsMap()->end()));
69+
if (mergedSchema.definitionsMap()->size() !=
70+
ndefs + schema.definitionsMap()->size()) {
71+
throw std::runtime_error("DefinitionKey collision");
72+
}
73+
}
74+
75+
return mergedSchema;
76+
}();
77+
78+
return *merged;
5579
}
5680
#endif
5781

thrift/lib/cpp2/runtime/SchemaRegistry.h

+1-8
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,7 @@ class SchemaRegistry {
4242

4343
#ifdef FBTHRIFT_HAS_SCHEMA
4444
public:
45-
struct Iter {
46-
auto begin() const { return SchemaRegistry::getSchemas().begin(); }
47-
auto end() const { return SchemaRegistry::getSchemas().end(); }
48-
};
49-
static Iter iter() { return {}; }
50-
51-
private:
52-
static folly::F14FastMap<type::ProgramId, type::Schema>& getSchemas();
45+
static const type::Schema& getMergedSchema();
5346
#endif
5447
};
5548

thrift/test/SchemaTest.cpp

+25-7
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,31 @@
2222

2323
// These deps pull in the corresponding schemas when enabled.
2424
#include <thrift/annotation/gen-cpp2/thrift_types.h>
25+
#include <thrift/test/gen-cpp2/TopologicallySortObjectsTest_types.h>
2526
#include <thrift/test/gen-cpp2/schema_types.h>
2627

2728
using namespace apache::thrift;
2829

2930
TEST(SchemaTest, not_linked) {
30-
for (const auto& [id, schema] : SchemaRegistry::iter()) {
31-
EXPECT_NE(schema.programs()[0].path(), "thrift/annotation/thrift.thrift");
31+
const auto& schema = SchemaRegistry::getMergedSchema();
32+
for (const auto& program : *schema.programs()) {
33+
EXPECT_NE(
34+
program.path(), "thrift/test/TopologicallySortObjectsTest.thrift");
35+
}
36+
37+
// Use the types target
38+
(void)apache::thrift::test::IncompleteMap{};
39+
}
40+
41+
TEST(SchemaTest, not_linked_but_included) {
42+
const auto& schema = SchemaRegistry::getMergedSchema();
43+
for (const auto& program : *schema.programs()) {
44+
if (program.path() != "thrift/annotation/thrift.thrift") {
45+
continue;
46+
}
47+
for (const auto& def : *program.definitionKeys()) {
48+
EXPECT_FALSE(schema.definitionsMap()->contains(def));
49+
}
3250
}
3351

3452
// Use the types target
@@ -37,8 +55,9 @@ TEST(SchemaTest, not_linked) {
3755

3856
TEST(SchemaTest, linked) {
3957
bool found = false;
40-
for (const auto& [id, schema] : SchemaRegistry::iter()) {
41-
if (schema.programs()[0].path() == "thrift/test/schema.thrift") {
58+
const auto& schema = SchemaRegistry::getMergedSchema();
59+
for (const auto& program : *schema.programs()) {
60+
if (program.path() == "thrift/test/schema.thrift") {
4261
found = true;
4362
} else {
4463
continue;
@@ -47,15 +66,14 @@ TEST(SchemaTest, linked) {
4766
// Includes definitions from root program
4867
EXPECT_EQ(
4968
schema.definitionsMap()
50-
->at(schema.programs()[0].definitionKeys()[0])
69+
->at(program.definitionKeys()[0])
5170
.structDef_ref()
5271
->name(),
5372
"Empty");
5473

5574
// Only includes definitions from root program
5675
EXPECT_EQ(
57-
schema.definitionsMap()->size(),
58-
schema.programs()[0].definitionKeys()->size());
76+
schema.definitionsMap()->size(), program.definitionKeys()->size());
5977

6078
// Transitive includes are listed
6179
EXPECT_GT(schema.programs()->size(), 4);

0 commit comments

Comments
 (0)