Skip to content

Commit 4b723ea

Browse files
authored
Upgrade to cr-sqlite 0.16.0 + a few performance improvements (#75)
Upgrading to cr-sqlite 0.16.0 (unreleased, for now) gives a significant disk space reduction by adding a "lookaside" table for primary keys. Instead of repeating a whole primary key for each row in the clock table (there's 1 row per column + 1 sentinel row), this assigns an integer that can be looked up. For consul services and checks, the savings are significant (about 30% in our case). This PR also adds a few performance improvements: - Process all "empty" (cleared) db versions out of band in bigger transactions - Buffer more data when synchronizing change before sending them
1 parent b4dac80 commit 4b723ea

14 files changed

+351
-175
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
## Unreleased
44

5+
- Upgraded to cr-sqlite 0.16.0 (unreleased) ([#75](../../pull/75))
6+
- Rewrite compaction logic to be more correct and efficient ([#74](../../pull/74))
57
- `corrosion consul sync` will now bundle services and checks in a single transaction (changeset) ([#73](../../pull/73))
68
- (**BREAKING**) Persist subscriptions across reboots, including many reliability improvements ([#69](../../pull/69))
79
- Support existing tables being added to the schema ([#64](../../pull/64))

crates/corro-agent/src/agent.rs

+170-37
Original file line numberDiff line numberDiff line change
@@ -84,14 +84,15 @@ const COMPACT_BOOKED_INTERVAL: Duration = Duration::from_secs(300);
8484
const ANNOUNCE_INTERVAL: Duration = Duration::from_secs(300);
8585

8686
pub struct AgentOptions {
87-
actor_id: ActorId,
88-
gossip_server_endpoint: quinn::Endpoint,
89-
transport: Transport,
90-
api_listener: TcpListener,
91-
rx_bcast: Receiver<BroadcastInput>,
92-
rx_apply: Receiver<(ActorId, i64)>,
93-
rtt_rx: Receiver<(SocketAddr, Duration)>,
94-
tripwire: Tripwire,
87+
pub actor_id: ActorId,
88+
pub gossip_server_endpoint: quinn::Endpoint,
89+
pub transport: Transport,
90+
pub api_listener: TcpListener,
91+
pub rx_bcast: Receiver<BroadcastInput>,
92+
pub rx_apply: Receiver<(ActorId, i64)>,
93+
pub rx_empty: Receiver<(ActorId, RangeInclusive<i64>)>,
94+
pub rtt_rx: Receiver<(SocketAddr, Duration)>,
95+
pub tripwire: Tripwire,
9596
}
9697

9798
pub async fn setup(conf: Config, tripwire: Tripwire) -> eyre::Result<(Agent, AgentOptions)> {
@@ -261,13 +262,16 @@ pub async fn setup(conf: Config, tripwire: Tripwire) -> eyre::Result<(Agent, Age
261262

262263
let (tx_bcast, rx_bcast) = channel(10240);
263264

265+
let (tx_empty, rx_empty) = channel(10240);
266+
264267
let opts = AgentOptions {
265268
actor_id,
266269
gossip_server_endpoint,
267270
transport,
268271
api_listener,
269272
rx_bcast,
270273
rx_apply,
274+
rx_empty,
271275
rtt_rx,
272276
tripwire: tripwire.clone(),
273277
};
@@ -283,6 +287,7 @@ pub async fn setup(conf: Config, tripwire: Tripwire) -> eyre::Result<(Agent, Age
283287
bookie,
284288
tx_bcast,
285289
tx_apply,
290+
tx_empty,
286291
schema: RwLock::new(schema),
287292
tripwire,
288293
});
@@ -307,6 +312,7 @@ pub async fn run(agent: Agent, opts: AgentOptions) -> eyre::Result<()> {
307312
mut tripwire,
308313
rx_bcast,
309314
rx_apply,
315+
rx_empty,
310316
rtt_rx,
311317
} = opts;
312318

@@ -893,8 +899,14 @@ pub async fn run(agent: Agent, opts: AgentOptions) -> eyre::Result<()> {
893899
);
894900

895901
spawn_counted(
896-
sync_loop(agent.clone(), transport.clone(), rx_apply, tripwire.clone())
897-
.inspect(|_| info!("corrosion agent sync loop is done")),
902+
sync_loop(
903+
agent.clone(),
904+
transport.clone(),
905+
rx_apply,
906+
rx_empty,
907+
tripwire.clone(),
908+
)
909+
.inspect(|_| info!("corrosion agent sync loop is done")),
898910
);
899911

900912
let mut db_cleanup_interval = tokio::time::interval(Duration::from_secs(60 * 15));
@@ -987,7 +999,15 @@ async fn clear_overwritten_versions(agent: Agent) {
987999
{
9881000
let booked = bookie.read().await;
9891001
for (actor_id, booked) in booked.iter() {
990-
let versions = booked.read().await.current_versions();
1002+
let versions = {
1003+
match timeout(Duration::from_secs(1), booked.read()).await {
1004+
Ok(booked) => booked.current_versions(),
1005+
Err(_) => {
1006+
info!(%actor_id, "timed out acquiring read lock on bookkeeping, skipping for now");
1007+
continue;
1008+
}
1009+
}
1010+
};
9911011
if versions.is_empty() {
9921012
continue;
9931013
}
@@ -1081,7 +1101,7 @@ async fn clear_overwritten_versions(agent: Agent) {
10811101
db_version = NULL,
10821102
last_seq = NULL,
10831103
ts = NULL
1084-
WHERE end_version != excluded.end_version
1104+
WHERE end_version < excluded.end_version
10851105
",
10861106
)?
10871107
.execute(params![actor_id, range.start(), range.end()])?;
@@ -1271,12 +1291,16 @@ fn find_cleared_db_versions(tx: &Transaction) -> rusqlite::Result<BTreeSet<i64>>
12711291
.query_map([], |row| row.get(0))?
12721292
.collect::<Result<BTreeSet<String>, _>>()?;
12731293

1294+
if tables.is_empty() {
1295+
return Ok(BTreeSet::new());
1296+
}
1297+
12741298
let to_clear_query = format!(
12751299
"SELECT DISTINCT(db_version) FROM __corro_bookkeeping WHERE db_version IS NOT NULL
12761300
EXCEPT SELECT db_version FROM ({});",
12771301
tables
12781302
.iter()
1279-
.map(|table| format!("SELECT DISTINCT(__crsql_db_version) AS db_version FROM {table}"))
1303+
.map(|table| format!("SELECT DISTINCT(db_version) FROM {table}"))
12801304
.collect::<Vec<_>>()
12811305
.join(" UNION ")
12821306
);
@@ -1585,7 +1609,12 @@ fn store_empty_changeset(
15851609
"
15861610
INSERT INTO __corro_bookkeeping (actor_id, start_version, end_version, db_version, ts)
15871611
VALUES (?, ?, ?, ?, ?)
1588-
ON CONFLICT (actor_id, start_version) DO NOTHING;
1612+
ON CONFLICT (actor_id, start_version) DO UPDATE SET
1613+
end_version = excluded.end_version,
1614+
db_version = NULL,
1615+
last_seq = NULL,
1616+
ts = NULL
1617+
WHERE end_version < excluded.end_version;
15891618
",
15901619
)?
15911620
.execute(params![
@@ -1830,17 +1859,36 @@ pub async fn process_multiple_changes(
18301859
continue;
18311860
}
18321861

1833-
let tx = conn.transaction()?;
1834-
1835-
let (known, changeset) = match process_single_version(&tx, change) {
1836-
Ok(res) => res,
1837-
Err(e) => {
1838-
error!(%actor_id, ?versions, "could not process single change: {e}");
1839-
continue;
1862+
// optimizing this, insert later!
1863+
let (known, changeset) = if change.is_complete() && change.is_empty() {
1864+
if let Err(e) = agent
1865+
.tx_empty()
1866+
.blocking_send((actor_id, change.versions()))
1867+
{
1868+
error!("could not send empty changed versions into channel: {e}");
18401869
}
1841-
};
1870+
// insert into in-memory bookkeeping right away
1871+
booked_write.insert_many(change.versions(), KnownDbVersion::Cleared);
1872+
(
1873+
KnownDbVersion::Cleared,
1874+
Changeset::Empty {
1875+
versions: change.versions(),
1876+
},
1877+
)
1878+
} else {
1879+
let tx = conn.transaction()?;
18421880

1843-
tx.commit()?;
1881+
let (known, changeset) = match process_single_version(&tx, change) {
1882+
Ok(res) => res,
1883+
Err(e) => {
1884+
error!(%actor_id, ?versions, "could not process single change: {e}");
1885+
continue;
1886+
}
1887+
};
1888+
1889+
tx.commit()?;
1890+
(known, changeset)
1891+
};
18441892

18451893
seen.insert(versions.clone(), known.clone());
18461894

@@ -1976,23 +2024,15 @@ fn process_complete_version(
19762024
tx: &Transaction,
19772025
actor_id: ActorId,
19782026
versions: RangeInclusive<i64>,
1979-
parts: Option<ChangesetParts>,
2027+
parts: ChangesetParts,
19802028
) -> rusqlite::Result<(KnownDbVersion, Changeset)> {
19812029
let ChangesetParts {
19822030
version,
19832031
changes,
19842032
seqs,
19852033
last_seq,
19862034
ts,
1987-
} = match parts {
1988-
None => {
1989-
store_empty_changeset(tx, actor_id, versions.clone())?;
1990-
info!(%actor_id, ?versions, "cleared empty versions range");
1991-
// booked_write.insert_many(versions.clone(), KnownDbVersion::Cleared);
1992-
return Ok((KnownDbVersion::Cleared, Changeset::Empty { versions }));
1993-
}
1994-
Some(parts) => parts,
1995-
};
2035+
} = parts;
19962036

19972037
info!(%actor_id, version, "complete change, applying right away! seqs: {seqs:?}, last_seq: {last_seq}");
19982038

@@ -2099,7 +2139,14 @@ fn process_single_version(
20992139
let versions = changeset.versions();
21002140

21012141
let (known, changeset) = if changeset.is_complete() {
2102-
process_complete_version(tx, actor_id, versions, changeset.into_parts())?
2142+
process_complete_version(
2143+
tx,
2144+
actor_id,
2145+
versions,
2146+
changeset
2147+
.into_parts()
2148+
.expect("no changeset parts, this shouldn't be happening!"),
2149+
)?
21032150
} else {
21042151
let parts = changeset.into_parts().unwrap();
21052152
let known = process_incomplete_version(tx, actor_id, &parts)?;
@@ -2285,10 +2332,13 @@ async fn handle_sync(agent: &Agent, transport: &Transport) -> Result<(), SyncCli
22852332
Ok(())
22862333
}
22872334

2335+
const CHECK_EMPTIES_TO_INSERT_AFTER: Duration = Duration::from_secs(120);
2336+
22882337
async fn sync_loop(
22892338
agent: Agent,
22902339
transport: Transport,
22912340
mut rx_apply: Receiver<(ActorId, i64)>,
2341+
mut rx_empty: Receiver<(ActorId, RangeInclusive<i64>)>,
22922342
mut tripwire: Tripwire,
22932343
) {
22942344
let mut sync_backoff = backoff::Backoff::new(0)
@@ -2297,6 +2347,62 @@ async fn sync_loop(
22972347
let next_sync_at = tokio::time::sleep(sync_backoff.next().unwrap());
22982348
tokio::pin!(next_sync_at);
22992349

2350+
spawn_counted({
2351+
let mut tripwire = tripwire.clone();
2352+
let agent = agent.clone();
2353+
async move {
2354+
let mut inserted_empties = 0;
2355+
let mut empties: BTreeMap<ActorId, Vec<RangeInclusive<i64>>> = BTreeMap::new();
2356+
2357+
let next_empties_check = tokio::time::sleep(CHECK_EMPTIES_TO_INSERT_AFTER);
2358+
tokio::pin!(next_empties_check);
2359+
2360+
loop {
2361+
tokio::select! {
2362+
maybe_empty = rx_empty.recv() => match maybe_empty {
2363+
Some((actor_id, versions)) => {
2364+
empties.entry(actor_id).or_default().push(versions);
2365+
inserted_empties += 1;
2366+
2367+
if inserted_empties < 1000 {
2368+
continue;
2369+
}
2370+
},
2371+
None => {
2372+
debug!("empties queue is done");
2373+
break;
2374+
}
2375+
},
2376+
_ = &mut next_empties_check => {
2377+
next_empties_check.as_mut().reset(tokio::time::Instant::now() + CHECK_EMPTIES_TO_INSERT_AFTER);
2378+
if empties.is_empty() {
2379+
continue;
2380+
}
2381+
},
2382+
_ = &mut tripwire => break
2383+
}
2384+
2385+
inserted_empties = 0;
2386+
2387+
if let Err(e) = process_completed_empties(&agent, &mut empties).await {
2388+
error!("could not process empties: {e}");
2389+
}
2390+
}
2391+
info!("Draining empty versions to process...");
2392+
// drain empties channel
2393+
while let Ok((actor_id, versions)) = rx_empty.try_recv() {
2394+
empties.entry(actor_id).or_default().push(versions);
2395+
}
2396+
2397+
if !empties.is_empty() {
2398+
info!("inserting last unprocessed empties before shut down");
2399+
if let Err(e) = process_completed_empties(&agent, &mut empties).await {
2400+
error!("could not process empties: {e}");
2401+
}
2402+
}
2403+
}
2404+
});
2405+
23002406
loop {
23012407
enum Branch {
23022408
Tick,
@@ -2362,6 +2468,33 @@ async fn sync_loop(
23622468
}
23632469
}
23642470

2471+
async fn process_completed_empties(
2472+
agent: &Agent,
2473+
empties: &mut BTreeMap<ActorId, Vec<RangeInclusive<i64>>>,
2474+
) -> eyre::Result<()> {
2475+
let mut conn = agent.pool().write_normal().await?;
2476+
2477+
block_in_place(|| {
2478+
let tx = conn.transaction()?;
2479+
while let Some((actor_id, empties)) = empties.pop_first() {
2480+
let booked = agent.bookie().for_actor_blocking(actor_id);
2481+
let bookedw = booked.blocking_write();
2482+
2483+
for (range, _) in empties
2484+
.iter()
2485+
.filter_map(|range| bookedw.get_key_value(range.start()))
2486+
.dedup()
2487+
{
2488+
store_empty_changeset(&tx, actor_id, range.clone())?;
2489+
}
2490+
}
2491+
2492+
tx.commit()?;
2493+
2494+
Ok(())
2495+
})
2496+
}
2497+
23652498
pub fn migrate(conn: &mut Connection) -> rusqlite::Result<()> {
23662499
let migrations: Vec<Box<dyn Migration>> = vec![
23672500
Box::new(init_migration as fn(&Transaction) -> rusqlite::Result<()>),
@@ -2916,10 +3049,10 @@ pub mod tests {
29163049

29173050
conn.execute_batch(
29183051
"
2919-
CREATE TABLE foo (a INTEGER PRIMARY KEY, b INTEGER);
3052+
CREATE TABLE foo (a INTEGER NOT NULL PRIMARY KEY, b INTEGER);
29203053
SELECT crsql_as_crr('foo');
29213054
2922-
CREATE TABLE foo2 (a INTEGER PRIMARY KEY, b INTEGER);
3055+
CREATE TABLE foo2 (a INTEGER NOT NULL PRIMARY KEY, b INTEGER);
29233056
SELECT crsql_as_crr('foo2');
29243057
",
29253058
)?;
@@ -2950,7 +3083,7 @@ pub mod tests {
29503083
}
29513084

29523085
{
2953-
let mut prepped = conn.prepare("SELECT DISTINCT(__crsql_db_version) AS db_version FROM foo2__crsql_clock UNION SELECT DISTINCT(__crsql_db_version) AS db_version FROM foo__crsql_clock;")?;
3086+
let mut prepped = conn.prepare("SELECT DISTINCT(db_version) FROM foo2__crsql_clock UNION SELECT DISTINCT(db_version) FROM foo__crsql_clock;")?;
29543087
let mut rows = prepped.query([])?;
29553088

29563089
while let Ok(Some(row)) = rows.next() {

0 commit comments

Comments
 (0)