Skip to content

Delete old snapshots with constant memory usage #9668

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion rust/cubestore/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions rust/cubestore/cubestore/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ chrono-tz = "0.8.2"
lazy_static = "1.4.0"
mockall = "0.8.1"
async-std = "0.99"
async-stream = "0.3.6"
itertools = "0.11.0"
bigdecimal = { version = "0.2.0", features = ["serde"] }
# Right now, it's not possible to use the 0.33 release because it has bugs
Expand Down
21 changes: 18 additions & 3 deletions rust/cubestore/cubestore/src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use crate::remotefs::gcs::GCSRemoteFs;
use crate::remotefs::minio::MINIORemoteFs;
use crate::remotefs::queue::QueueRemoteFs;
use crate::remotefs::s3::S3RemoteFs;
use crate::remotefs::{LocalDirRemoteFs, RemoteFs};
use crate::remotefs::{ExtendedRemoteFs, LocalDirRemoteFs, RemoteFs};
use crate::scheduler::SchedulerImpl;
use crate::sql::cache::SqlResultCache;
use crate::sql::{SqlService, SqlServiceImpl};
Expand Down Expand Up @@ -518,6 +518,8 @@ pub trait ConfigObj: DIService {

fn dump_dir(&self) -> &Option<PathBuf>;

fn snapshots_deletion_batch_size(&self) -> u64;

fn minimum_metastore_snapshots_count(&self) -> u64;

fn metastore_snapshots_lifetime(&self) -> u64;
Expand Down Expand Up @@ -630,6 +632,7 @@ pub struct ConfigObjImpl {
pub drop_ws_processing_messages_after_secs: u64,
pub drop_ws_complete_messages_after_secs: u64,
pub skip_kafka_parsing_errors: bool,
pub snapshots_deletion_batch_size: u64,
pub minimum_metastore_snapshots_count: u64,
pub metastore_snapshots_lifetime: u64,
pub minimum_cachestore_snapshots_count: u64,
Expand Down Expand Up @@ -953,6 +956,10 @@ impl ConfigObj for ConfigObjImpl {
&self.dump_dir
}

fn snapshots_deletion_batch_size(&self) -> u64 {
self.snapshots_deletion_batch_size
}

fn minimum_metastore_snapshots_count(&self) -> u64 {
self.minimum_metastore_snapshots_count
}
Expand Down Expand Up @@ -1486,6 +1493,11 @@ impl Config {
10 * 60,
),
skip_kafka_parsing_errors: env_parse("CUBESTORE_SKIP_KAFKA_PARSING_ERRORS", false),
// Presently, not useful to make more than upload_concurrency times constant
snapshots_deletion_batch_size: env_parse(
"CUBESTORE_SNAPSHOTS_DELETION_BATCH_SIZE",
80,
),
minimum_metastore_snapshots_count: env_parse(
"CUBESTORE_MINIMUM_METASTORE_SNAPSHOTS_COUNT",
5,
Expand Down Expand Up @@ -1652,6 +1664,7 @@ impl Config {
drop_ws_processing_messages_after_secs: 60,
drop_ws_complete_messages_after_secs: 10,
skip_kafka_parsing_errors: false,
snapshots_deletion_batch_size: 80,
minimum_metastore_snapshots_count: 3,
metastore_snapshots_lifetime: 24 * 3600,
minimum_cachestore_snapshots_count: 3,
Expand Down Expand Up @@ -1894,7 +1907,8 @@ impl Config {
self.injector
.register("cachestore_fs", async move |i| {
// TODO metastore works with non queue remote fs as it requires loops to be started prior to load_from_remote call
let original_remote_fs = i.get_service("original_remote_fs").await;
let original_remote_fs: Arc<dyn ExtendedRemoteFs> =
i.get_service("original_remote_fs").await;
let arc: Arc<dyn DIService> = BaseRocksStoreFs::new_for_cachestore(
original_remote_fs,
i.get_service_typed().await,
Expand Down Expand Up @@ -1969,7 +1983,8 @@ impl Config {
self.injector
.register("metastore_fs", async move |i| {
// TODO metastore works with non queue remote fs as it requires loops to be started prior to load_from_remote call
let original_remote_fs = i.get_service("original_remote_fs").await;
let original_remote_fs: Arc<dyn ExtendedRemoteFs> =
i.get_service("original_remote_fs").await;
let arc: Arc<dyn DIService> = BaseRocksStoreFs::new_for_metastore(
original_remote_fs,
i.get_service_typed().await,
Expand Down
15 changes: 12 additions & 3 deletions rust/cubestore/cubestore/src/metastore/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6546,8 +6546,9 @@ mod tests {

#[tokio::test]
async fn delete_old_snapshots() {
let metastore_snapshots_lifetime_secs = 1;
let config = Config::test("delete_old_snapshots").update_config(|mut obj| {
obj.metastore_snapshots_lifetime = 1;
obj.metastore_snapshots_lifetime = metastore_snapshots_lifetime_secs;
obj.minimum_metastore_snapshots_count = 2;
obj
});
Expand Down Expand Up @@ -6616,14 +6617,22 @@ mod tests {
.await
.unwrap();

assert_eq!(uploaded3.len(), 3);
assert_eq!(
uploaded3.len(),
3,
"uploaded3 keys: {}",
uploaded3.keys().join(", ")
);

meta_store
.create_schema("foo4".to_string(), false)
.await
.unwrap();

tokio::time::sleep(Duration::from_millis(1100)).await;
tokio::time::sleep(Duration::from_millis(
metastore_snapshots_lifetime_secs * 1000 + 100,
))
.await;
meta_store.upload_check_point().await.unwrap();

let uploaded4 =
Expand Down
Loading
Loading