Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Tiered Storage] Replace penultimate naming with proximal #13460

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 51 additions & 55 deletions db/compaction/compaction.cc
Original file line number Diff line number Diff line change
Expand Up @@ -338,16 +338,16 @@ Compaction::Compaction(
_blob_garbage_collection_age_cutoff > 1
? mutable_cf_options().blob_garbage_collection_age_cutoff
: _blob_garbage_collection_age_cutoff),
penultimate_level_(
// For simplicity, we don't support the concept of "penultimate level"
proximal_level_(
// For simplicity, we don't support the concept of "proximal level"
// with `CompactionReason::kExternalSstIngestion` and
// `CompactionReason::kRefitLevel`
_compaction_reason == CompactionReason::kExternalSstIngestion ||
_compaction_reason == CompactionReason::kRefitLevel
? Compaction::kInvalidLevel
: EvaluatePenultimateLevel(vstorage, mutable_cf_options_,
immutable_options_, start_level_,
output_level_)) {
: EvaluateProximalLevel(vstorage, mutable_cf_options_,
immutable_options_, start_level_,
output_level_)) {
MarkFilesBeingCompacted(true);
if (is_manual_compaction_) {
compaction_reason_ = CompactionReason::kManualCompaction;
Expand Down Expand Up @@ -405,10 +405,10 @@ Compaction::Compaction(
}
}

PopulatePenultimateLevelOutputRange();
PopulateProximalLevelOutputRange();
}

void Compaction::PopulatePenultimateLevelOutputRange() {
void Compaction::PopulateProximalLevelOutputRange() {
if (!SupportsPerKeyPlacement()) {
assert(keep_in_last_level_through_seqno_ == kMaxSequenceNumber);
return;
Expand All @@ -417,46 +417,42 @@ void Compaction::PopulatePenultimateLevelOutputRange() {
// exclude the last level, the range of all input levels is the safe range
// of keys that can be moved up.
int exclude_level = number_levels_ - 1;
penultimate_output_range_type_ = PenultimateOutputRangeType::kNonLastRange;
proximal_output_range_type_ = ProximalOutputRangeType::kNonLastRange;

// For universal compaction, the penultimate_output_range could be extended if
// all penultimate level files are included in the compaction (which includes
// the case that the penultimate level is empty).
// For universal compaction, the proximal_output_range could be extended if
// all proximal level files are included in the compaction (which includes
// the case that the proximal level is empty).
if (immutable_options_.compaction_style == kCompactionStyleUniversal) {
exclude_level = kInvalidLevel;
penultimate_output_range_type_ = PenultimateOutputRangeType::kFullRange;
std::set<uint64_t> penultimate_inputs;
proximal_output_range_type_ = ProximalOutputRangeType::kFullRange;
std::set<uint64_t> proximal_inputs;
for (const auto& input_lvl : inputs_) {
if (input_lvl.level == penultimate_level_) {
if (input_lvl.level == proximal_level_) {
for (const auto& file : input_lvl.files) {
penultimate_inputs.emplace(file->fd.GetNumber());
proximal_inputs.emplace(file->fd.GetNumber());
}
}
}
auto penultimate_files = input_vstorage_->LevelFiles(penultimate_level_);
for (const auto& file : penultimate_files) {
if (penultimate_inputs.find(file->fd.GetNumber()) ==
penultimate_inputs.end()) {
auto proximal_files = input_vstorage_->LevelFiles(proximal_level_);
for (const auto& file : proximal_files) {
if (proximal_inputs.find(file->fd.GetNumber()) == proximal_inputs.end()) {
exclude_level = number_levels_ - 1;
penultimate_output_range_type_ =
PenultimateOutputRangeType::kNonLastRange;
proximal_output_range_type_ = ProximalOutputRangeType::kNonLastRange;
break;
}
}
}

// FIXME: should make use of `penultimate_output_range_type_`.
// FIXME: should make use of `proximal_output_range_type_`.
// FIXME: when last level's input range does not overlap with
// penultimate level, and penultimate level input is empty,
// this call will not set penultimate_level_smallest_ or
// penultimate_level_largest_. No keys will be compacted up.
GetBoundaryInternalKeys(input_vstorage_, inputs_,
&penultimate_level_smallest_,
&penultimate_level_largest_, exclude_level);

if (penultimate_output_range_type_ !=
PenultimateOutputRangeType::kFullRange) {
// If not full range in penultimate level, must keep everything already
// proximal level, and proximal level input is empty,
// this call will not set proximal_level_smallest_ or
// proximal_level_largest_. No keys will be compacted up.
GetBoundaryInternalKeys(input_vstorage_, inputs_, &proximal_level_smallest_,
&proximal_level_largest_, exclude_level);

if (proximal_output_range_type_ != ProximalOutputRangeType::kFullRange) {
// If not full range in proximal level, must keep everything already
// in the last level there, because moving it back up might cause
// overlap/placement issues that are difficult to resolve properly in the
// presence of range deletes
Expand Down Expand Up @@ -486,53 +482,53 @@ Compaction::~Compaction() {
}

bool Compaction::SupportsPerKeyPlacement() const {
return penultimate_level_ != kInvalidLevel;
return proximal_level_ != kInvalidLevel;
}

int Compaction::GetPenultimateLevel() const { return penultimate_level_; }
int Compaction::GetProximalLevel() const { return proximal_level_; }

// smallest_key and largest_key include timestamps if user-defined timestamp is
// enabled.
bool Compaction::OverlapPenultimateLevelOutputRange(
bool Compaction::OverlapProximalLevelOutputRange(
const Slice& smallest_key, const Slice& largest_key) const {
if (!SupportsPerKeyPlacement()) {
return false;
}

// See FIXME in Compaction::PopulatePenultimateLevelOutputRange().
// See FIXME in Compaction::PopulateProximalLevelOutputRange().
// We do not compact any key up in this case.
if (penultimate_level_smallest_.size() == 0 ||
penultimate_level_largest_.size() == 0) {
if (proximal_level_smallest_.size() == 0 ||
proximal_level_largest_.size() == 0) {
return false;
}

const Comparator* ucmp =
input_vstorage_->InternalComparator()->user_comparator();

return ucmp->CompareWithoutTimestamp(
smallest_key, penultimate_level_largest_.user_key()) <= 0 &&
smallest_key, proximal_level_largest_.user_key()) <= 0 &&
ucmp->CompareWithoutTimestamp(
largest_key, penultimate_level_smallest_.user_key()) >= 0;
largest_key, proximal_level_smallest_.user_key()) >= 0;
}

// key includes timestamp if user-defined timestamp is enabled.
void Compaction::TEST_AssertWithinPenultimateLevelOutputRange(
void Compaction::TEST_AssertWithinProximalLevelOutputRange(
const Slice& user_key, bool expect_failure) const {
#ifdef NDEBUG
(void)user_key;
(void)expect_failure;
#else
assert(SupportsPerKeyPlacement());

assert(penultimate_level_smallest_.size() > 0);
assert(penultimate_level_largest_.size() > 0);
assert(proximal_level_smallest_.size() > 0);
assert(proximal_level_largest_.size() > 0);

auto* cmp = input_vstorage_->user_comparator();

// op_type of a key can change during compaction, e.g. Merge -> Put.
if (!(cmp->Compare(user_key, penultimate_level_smallest_.user_key()) >= 0)) {
if (!(cmp->Compare(user_key, proximal_level_smallest_.user_key()) >= 0)) {
assert(expect_failure);
} else if (!(cmp->Compare(user_key, penultimate_level_largest_.user_key()) <=
} else if (!(cmp->Compare(user_key, proximal_level_largest_.user_key()) <=
0)) {
assert(expect_failure);
} else {
Expand Down Expand Up @@ -1018,7 +1014,7 @@ uint64_t Compaction::MinInputFileEpochNumber() const {
return min_epoch_number;
}

int Compaction::EvaluatePenultimateLevel(
int Compaction::EvaluateProximalLevel(
const VersionStorageInfo* vstorage,
const MutableCFOptions& mutable_cf_options,
const ImmutableOptions& immutable_options, const int start_level,
Expand All @@ -1033,21 +1029,21 @@ int Compaction::EvaluatePenultimateLevel(
return kInvalidLevel;
}

int penultimate_level = output_level - 1;
assert(penultimate_level < immutable_options.num_levels);
if (penultimate_level <= 0) {
int proximal_level = output_level - 1;
assert(proximal_level < immutable_options.num_levels);
if (proximal_level <= 0) {
return kInvalidLevel;
}

// If the penultimate level is not within input level -> output level range
// check if the penultimate output level is empty, if it's empty, it could
// also be locked for the penultimate output.
// If the proximal level is not within input level -> output level range
// check if the proximal output level is empty, if it's empty, it could
// also be locked for the proximal output.
// TODO: ideally, it only needs to check if there's a file within the
// compaction output key range. For simplicity, it just check if there's any
// file on the penultimate level.
// file on the proximal level.
if (start_level == immutable_options.num_levels - 1 &&
(immutable_options.compaction_style != kCompactionStyleUniversal ||
!vstorage->LevelFiles(penultimate_level).empty())) {
!vstorage->LevelFiles(proximal_level).empty())) {
return kInvalidLevel;
}

Expand All @@ -1061,7 +1057,7 @@ int Compaction::EvaluatePenultimateLevel(
return kInvalidLevel;
}

return penultimate_level;
return proximal_level;
}

void Compaction::FilterInputsForCompactionIterator() {
Expand Down
86 changes: 43 additions & 43 deletions db/compaction/compaction.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,13 @@ class Compaction {
BlobGarbageCollectionPolicy::kUseDefault,
double blob_garbage_collection_age_cutoff = -1);

// The type of the penultimate level output range
enum class PenultimateOutputRangeType : int {
kNotSupported, // it cannot output to the penultimate level
kFullRange, // any data could be output to the penultimate level
// The type of the proximal level output range
enum class ProximalOutputRangeType : int {
kNotSupported, // it cannot output to the proximal level
kFullRange, // any data could be output to the proximal level
kNonLastRange, // only the keys within non_last_level compaction inputs can
// be outputted to the penultimate level
kDisabled, // no data can be outputted to the penultimate level
// be outputted to the proximal level
kDisabled, // no data can be outputted to the proximal level
};

// No copying allowed
Expand Down Expand Up @@ -370,29 +370,29 @@ class Compaction {

Slice GetLargestUserKey() const { return largest_user_key_; }

PenultimateOutputRangeType GetPenultimateOutputRangeType() const {
return penultimate_output_range_type_;
ProximalOutputRangeType GetProximalOutputRangeType() const {
return proximal_output_range_type_;
}

// Return true if the compaction supports per_key_placement
bool SupportsPerKeyPlacement() const;

// Get per_key_placement penultimate output level, which is `last_level - 1`
// Get per_key_placement proximal output level, which is `last_level - 1`
// if per_key_placement feature is supported. Otherwise, return -1.
int GetPenultimateLevel() const;
int GetProximalLevel() const;

// Return true if the given range is overlap with penultimate level output
// Return true if the given range is overlap with proximal level output
// range.
// Both smallest_key and largest_key include timestamps if user-defined
// timestamp is enabled.
bool OverlapPenultimateLevelOutputRange(const Slice& smallest_key,
const Slice& largest_key) const;
bool OverlapProximalLevelOutputRange(const Slice& smallest_key,
const Slice& largest_key) const;

// For testing purposes, check that a key is within penultimate level
// For testing purposes, check that a key is within proximal level
// output range for per_key_placement feature, which is safe to place the key
// to the penultimate level. Different compaction strategies have different
// to the proximal level. Different compaction strategies have different
// rules. `user_key` includes timestamp if user-defined timestamp is enabled.
void TEST_AssertWithinPenultimateLevelOutputRange(
void TEST_AssertWithinProximalLevelOutputRange(
const Slice& user_key, bool expect_failure = false) const;

CompactionReason compaction_reason() const { return compaction_reason_; }
Expand Down Expand Up @@ -441,20 +441,20 @@ class Compaction {

static constexpr int kInvalidLevel = -1;

// Evaluate penultimate output level. If the compaction supports
// per_key_placement feature, it returns the penultimate level number.
// Evaluate proximal output level. If the compaction supports
// per_key_placement feature, it returns the proximal level number.
// Otherwise, it's set to kInvalidLevel (-1), which means
// output_to_penultimate_level is not supported.
// Note: even the penultimate level output is supported (PenultimateLevel !=
// output_to_proximal_level is not supported.
// Note: even the proximal level output is supported (ProximalLevel !=
// kInvalidLevel), some key range maybe unsafe to be outputted to the
// penultimate level. The safe key range is populated by
// `PopulatePenultimateLevelOutputRange()`.
// Which could potentially disable all penultimate level output.
static int EvaluatePenultimateLevel(
const VersionStorageInfo* vstorage,
const MutableCFOptions& mutable_cf_options,
const ImmutableOptions& immutable_options, const int start_level,
const int output_level);
// proximal level. The safe key range is populated by
// `PopulateProximalLevelOutputRange()`.
// Which could potentially disable all proximal level output.
static int EvaluateProximalLevel(const VersionStorageInfo* vstorage,
const MutableCFOptions& mutable_cf_options,
const ImmutableOptions& immutable_options,
const int start_level,
const int output_level);

// If some data cannot be safely migrated "up" the LSM tree due to a change
// in the preclude_last_level_data_seconds setting, this indicates a sequence
Expand Down Expand Up @@ -482,10 +482,10 @@ class Compaction {
InternalKey* smallest_key, InternalKey* largest_key,
int exclude_level = -1);

// populate penultimate level output range, which will be used to determine if
// a key is safe to output to the penultimate level (details see
// `Compaction::WithinPenultimateLevelOutputRange()`.
void PopulatePenultimateLevelOutputRange();
// populate proximal level output range, which will be used to determine if
// a key is safe to output to the proximal level (details see
// `Compaction::WithinProximalLevelOutputRange()`.
void PopulateProximalLevelOutputRange();

// If oldest snapshot is specified at Compaction construction time, we have
// an opportunity to optimize inputs for compaction iterator for this case:
Expand Down Expand Up @@ -616,37 +616,37 @@ class Compaction {

// only set when per_key_placement feature is enabled, -1 (kInvalidLevel)
// means not supported.
const int penultimate_level_;
const int proximal_level_;

// Key range for penultimate level output
// Key range for proximal level output
// includes timestamp if user-defined timestamp is enabled.
// penultimate_output_range_type_ shows the range type
InternalKey penultimate_level_smallest_;
InternalKey penultimate_level_largest_;
PenultimateOutputRangeType penultimate_output_range_type_ =
PenultimateOutputRangeType::kNotSupported;
// proximal_output_range_type_ shows the range type
InternalKey proximal_level_smallest_;
InternalKey proximal_level_largest_;
ProximalOutputRangeType proximal_output_range_type_ =
ProximalOutputRangeType::kNotSupported;
};

#ifndef NDEBUG
// Helper struct only for tests, which contains the data to decide if a key
// should be output to the penultimate level.
// should be output to the proximal level.
// TODO: remove this when the public feature knob is available
struct PerKeyPlacementContext {
const int level;
const Slice key;
const Slice value;
const SequenceNumber seq_num;

bool& output_to_penultimate_level;
bool& output_to_proximal_level;

PerKeyPlacementContext(int _level, Slice _key, Slice _value,
SequenceNumber _seq_num,
bool& _output_to_penultimate_level)
bool& _output_to_proximal_level)
: level(_level),
key(_key),
value(_value),
seq_num(_seq_num),
output_to_penultimate_level(_output_to_penultimate_level) {}
output_to_proximal_level(_output_to_proximal_level) {}
};
#endif /* !NDEBUG */

Expand Down
Loading
Loading