Skip to content

import: write RPC will check region epoch before continue && br: pre-check TiKV disk space before download (#17238) (#17569) #401

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/pr-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ on:
# - 'raftstore-proxy*'
pull_request:
branches:
- 'raftstore-proxy*'
- 'raftstore-proxy-6.*'
- 'raftstore-proxy-5.*'

jobs:
build-check-old:
Expand Down
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ unset-override:

pre-format: unset-override
@rustup component add rustfmt
@cargo install --force --locked -q cargo-sort
@cargo install --force --locked --version=1.0.9 -q cargo-sort

pre-format-fast: unset-override
@rustup component add rustfmt
Expand Down Expand Up @@ -249,3 +249,4 @@ clippy: pre-clippy
@./scripts/check-redact-log
@./scripts/check-docker-build
@./scripts/clippy-all

8 changes: 7 additions & 1 deletion components/error_code/src/sst_importer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,11 @@ define_error_codes!(
TTL_LEN_NOT_EQUALS_TO_PAIRS => ("TtlLenNotEqualsToPairs", "", ""),
INCOMPATIBLE_API_VERSION => ("IncompatibleApiVersion", "", ""),
INVALID_KEY_MODE => ("InvalidKeyMode", "", ""),
RESOURCE_NOT_ENOUTH => ("ResourceNotEnough", "", "")
RESOURCE_NOT_ENOUTH => ("ResourceNotEnough", "", ""),
SUSPENDED => ("Suspended",
"this request has been suspended.",
"Probably there are some export tools don't support exporting data inserted by `ingest`(say, snapshot backup). Check the user manual and stop them."),
REQUEST_TOO_NEW => ("RequestTooNew", "", ""),
REQUEST_TOO_OLD => ("RequestTooOld", "", ""),
DISK_SPACE_NOT_ENOUGH => ("DiskSpaceNotEnough", "", "")
);
9 changes: 7 additions & 2 deletions components/raftstore-v2/src/operation/command/write/ingest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,21 @@ impl Store {
ctx: &mut StoreContext<EK, ER, T>,
) -> Result<()> {
let ssts = box_try!(ctx.sst_importer.list_ssts());
// filter old version SSTs
let ssts: Vec<_> = ssts
.into_iter()
.filter(|sst| sst.api_version >= sst_importer::API_VERSION_2)
.collect();
if ssts.is_empty() {
return Ok(());
}

let mut region_ssts: HashMap<_, Vec<_>> = HashMap::default();
for sst in ssts {
region_ssts
.entry(sst.get_region_id())
.entry(sst.meta.get_region_id())
.or_default()
.push(sst);
.push(sst.meta);
}

let ranges = ctx.sst_importer.ranges_in_import();
Expand Down
83 changes: 13 additions & 70 deletions components/raftstore/src/store/fsm/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ use futures::{compat::Future01CompatExt, FutureExt};
use grpcio_health::HealthService;
use keys::{self, data_end_key, data_key, enc_end_key, enc_start_key};
use kvproto::{
import_sstpb::{SstMeta, SwitchMode},
metapb::{self, Region, RegionEpoch},
pdpb::{self, QueryStats, StoreStats},
raft_cmdpb::{AdminCmdType, AdminRequest},
Expand Down Expand Up @@ -814,9 +813,6 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport>
}
}
StoreMsg::CompactedEvent(event) => self.on_compaction_finished(event),
StoreMsg::ValidateSstResult { invalid_ssts } => {
self.on_validate_sst_result(invalid_ssts)
}
StoreMsg::ClearRegionSizeInRange { start_key, end_key } => {
self.clear_region_size_in_range(&start_key, &end_key)
}
Expand Down Expand Up @@ -1656,12 +1652,7 @@ impl<EK: KvEngine, ER: RaftEngine> RaftBatchSystem<EK, ER> {
);

let compact_runner = CompactRunner::new(engines.kv.clone());
let cleanup_sst_runner = CleanupSstRunner::new(
meta.get_id(),
self.router.clone(),
Arc::clone(&importer),
Arc::clone(&pd_client),
);
let cleanup_sst_runner = CleanupSstRunner::new(Arc::clone(&importer));
let gc_snapshot_runner = GcSnapshotRunner::new(
meta.get_id(),
self.router.clone(), // RaftRouter
Expand Down Expand Up @@ -2760,42 +2751,8 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER
}

impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER, T> {
fn on_validate_sst_result(&mut self, ssts: Vec<SstMeta>) {
if ssts.is_empty() || self.ctx.importer.get_mode() == SwitchMode::Import {
return;
}
// A stale peer can still ingest a stale Sst before it is
// destroyed. We need to make sure that no stale peer exists.
let mut delete_ssts = Vec::new();
{
let meta = self.ctx.store_meta.lock().unwrap();
for sst in ssts {
if !meta.regions.contains_key(&sst.get_region_id()) {
delete_ssts.push(sst);
}
}
}
if delete_ssts.is_empty() {
return;
}

let task = CleanupSstTask::DeleteSst { ssts: delete_ssts };
if let Err(e) = self
.ctx
.cleanup_scheduler
.schedule(CleanupTask::CleanupSst(task))
{
error!(
"schedule to delete ssts failed";
"store_id" => self.fsm.store.id,
"err" => ?e,
);
}
}

fn on_cleanup_import_sst(&mut self) -> Result<()> {
let mut delete_ssts = Vec::new();
let mut validate_ssts = Vec::new();

let ssts = box_try!(self.ctx.importer.list_ssts());
if ssts.is_empty() {
Expand All @@ -2804,15 +2761,22 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER
{
let meta = self.ctx.store_meta.lock().unwrap();
for sst in ssts {
if let Some(r) = meta.regions.get(&sst.get_region_id()) {
if sst.api_version < sst_importer::API_VERSION_2 {
// SST of old versions are created by old TiKV and have different prerequisite
// we can't delete them here. They can only be deleted manually
continue;
}
if let Some(r) = meta.regions.get(&sst.meta.get_region_id()) {
let region_epoch = r.get_region_epoch();
if util::is_epoch_stale(sst.get_region_epoch(), region_epoch) {
if util::is_epoch_stale(sst.meta.get_region_epoch(), region_epoch) {
// If the SST epoch is stale, it will not be ingested anymore.
delete_ssts.push(sst);
delete_ssts.push(sst.meta);
}
} else {
// If the peer doesn't exist, we need to validate the SST through PD.
validate_ssts.push(sst);
// The write RPC of import sst service have make sure the region do exist at the
// write time, and now the region is not found, sst can be
// deleted because it won't be used by ingest in future.
delete_ssts.push(sst.meta);
}
}
}
Expand All @@ -2832,27 +2796,6 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER
}
}

// When there is an import job running, the region which this sst belongs may
// has not been split from the origin region because the apply thread is so busy
// that it can not apply SplitRequest as soon as possible. So we can not
// delete this sst file.
if !validate_ssts.is_empty() && self.ctx.importer.get_mode() != SwitchMode::Import {
let task = CleanupSstTask::ValidateSst {
ssts: validate_ssts,
};
if let Err(e) = self
.ctx
.cleanup_scheduler
.schedule(CleanupTask::CleanupSst(task))
{
error!(
"schedule to validate ssts failed";
"store_id" => self.fsm.store.id,
"err" => ?e,
);
}
}

Ok(())
}

Expand Down
6 changes: 0 additions & 6 deletions components/raftstore/src/store/msg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ use engine_traits::{CompactedEvent, KvEngine, Snapshot};
use futures::channel::mpsc::UnboundedSender;
use kvproto::{
brpb::CheckAdminResponse,
import_sstpb::SstMeta,
kvrpcpb::{DiskFullOpt, ExtraOp as TxnExtraOp},
metapb,
metapb::RegionEpoch,
Expand Down Expand Up @@ -823,10 +822,6 @@ where
{
RaftMessage(InspectedRaftMessage),

ValidateSstResult {
invalid_ssts: Vec<SstMeta>,
},

// Clear region size and keys for all regions in the range, so we can force them to
// re-calculate their size later.
ClearRegionSizeInRange {
Expand Down Expand Up @@ -883,7 +878,6 @@ where
write!(fmt, "Store {} is unreachable", store_id)
}
StoreMsg::CompactedEvent(ref event) => write!(fmt, "CompactedEvent cf {}", event.cf()),
StoreMsg::ValidateSstResult { .. } => write!(fmt, "Validate SST Result"),
StoreMsg::ClearRegionSizeInRange {
ref start_key,
ref end_key,
Expand Down
19 changes: 6 additions & 13 deletions components/raftstore/src/store/worker/cleanup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,13 @@
use std::fmt::{self, Display, Formatter};

use engine_traits::{KvEngine, RaftEngine};
use pd_client::PdClient;
use tikv_util::worker::Runnable;

use super::{
cleanup_snapshot::{Runner as GcSnapshotRunner, Task as GcSnapshotTask},
cleanup_sst::{Runner as CleanupSstRunner, Task as CleanupSstTask},
compact::{Runner as CompactRunner, Task as CompactTask},
};
use crate::store::StoreRouter;

pub enum Task {
Compact(CompactTask),
Expand All @@ -29,29 +27,26 @@ impl Display for Task {
}
}

pub struct Runner<E, R, C, S>
pub struct Runner<E, R>
where
E: KvEngine,
R: RaftEngine,
S: StoreRouter<E>,
{
compact: CompactRunner<E>,
cleanup_sst: CleanupSstRunner<E, C, S>,
cleanup_sst: CleanupSstRunner,
gc_snapshot: GcSnapshotRunner<E, R>,
}

impl<E, R, C, S> Runner<E, R, C, S>
impl<E, R> Runner<E, R>
where
E: KvEngine,
R: RaftEngine,
C: PdClient,
S: StoreRouter<E>,
{
pub fn new(
compact: CompactRunner<E>,
cleanup_sst: CleanupSstRunner<E, C, S>,
cleanup_sst: CleanupSstRunner,
gc_snapshot: GcSnapshotRunner<E, R>,
) -> Runner<E, R, C, S> {
) -> Runner<E, R> {
Runner {
compact,
cleanup_sst,
Expand All @@ -60,12 +55,10 @@ where
}
}

impl<E, R, C, S> Runnable for Runner<E, R, C, S>
impl<E, R> Runnable for Runner<E, R>
where
E: KvEngine,
R: RaftEngine,
C: PdClient,
S: StoreRouter<E>,
{
type Task = Task;

Expand Down
Loading