Skip to content

Commit e343d92

Browse files
authored
feat: improve download speed with the in-built downloader (#6632)
1 parent 2d942c9 commit e343d92

File tree

9 files changed

+744
-42
lines changed

9 files changed

+744
-42
lines changed

.config/forest.dic

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
75
1+
78
22
Algorand/M
33
API/M
44
APIs
5+
aria2c
56
args
67
arities
78
arity
@@ -81,6 +82,7 @@ libp2p
8182
liveness
8283
localhost
8384
mainnet
85+
MD5
8486
MDNS
8587
mempool
8688
Merkle
@@ -131,6 +133,7 @@ TOML
131133
trie
132134
truthy
133135
TTY
136+
UI
134137
uncompress
135138
unrepresentable
136139
untrusted

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929

3030
### Added
3131

32+
- [#3715](https://github.com/ChainSafe/forest/issues/3715): Implemented parallel HTTP downloads for snapshots with 5 concurrent connections by default (configurable via `FOREST_DOWNLOAD_CONNECTIONS`), bringing significant performance improvements for snapshot downloads (on par with a manual `aria2c -x5`).
33+
3234
### Changed
3335

3436
### Removed

docs/docs/users/knowledge_base/network_upgrades_state_migrations.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,10 @@ Sample output:
3737
location: /archive/mainnet/latest/forest_snapshot_mainnet_2024-08-06_height_415650.forest.car.zst
3838
```
3939

40-
You see that the snapshot is past the upgrade epoch by ten epochs. You download the snapshot with `aria2c` because it's significantly faster than a raw `curl`.
40+
You see that the snapshot is past the upgrade epoch by ten epochs. You download the snapshot with the in-built tool which is faster than raw `cURL`.
4141

4242
```bash
43-
aria2c -x5 https://forest-archive.chainsafe.dev/latest/mainnet/
43+
forest-tool snapshot fetch --chain mainnet
4444
```
4545

4646
You start your node with `--import-snapshot <snapshot-path>` and enjoy the new, fancy NV23 features. Hooray!

docs/docs/users/reference/env_variables.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ process.
5757
| `FOREST_RPC_BACKFILL_FULL_TIPSET_FROM_NETWORK` | 1 or true | false | 1 | Whether or not to backfill full tipsets from the p2p network |
5858
| `FOREST_STRICT_JSON` | 1 or true | false | 1 | Enable strict JSON validation to detect duplicate keys in RPC requests |
5959
| `FOREST_AUTO_DOWNLOAD_SNAPSHOT_PATH` | URL or file path | empty | `/var/tmp/forest_snapshot_calibnet.forest.car.zst` | Override snapshot path for `--auto-download-snapshot` |
60+
| `FOREST_DOWNLOAD_CONNECTIONS` | positive integer | 5 | 10 | Number of parallel HTTP connections for downloading snapshots |
6061

6162
### `FOREST_F3_SIDECAR_FFI_BUILD_OPT_OUT`
6263

src/cli_shared/snapshot.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ pub async fn fetch(
6767
.date_and_height_and_forest();
6868
let filename = filename(vendor, chain, date, height, forest_format);
6969

70+
tracing::info!("Downloading snapshot: {filename}");
71+
7072
download_file_with_retry(
7173
&url,
7274
directory,

src/daemon/mod.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -718,14 +718,19 @@ async fn maybe_set_snapshot_path(
718718
config.client.snapshot_path = Some(path.into());
719719
}
720720
_ => {
721-
let url = crate::cli_shared::snapshot::stable_url(vendor, chain)?;
722-
config.client.snapshot_path = Some(url.to_string().into());
721+
// Resolve the redirect URL to get the actual snapshot URL
722+
// This ensures all chunks download from the same snapshot even if
723+
// a new snapshot is published during the download
724+
let (resolved_url, _num_bytes, filename) =
725+
crate::cli_shared::snapshot::peek(vendor, chain).await?;
726+
tracing::info!("Downloading snapshot: {filename}");
727+
config.client.snapshot_path = Some(resolved_url.to_string().into());
723728
}
724729
}
725730
}
726731
(true, false, false) => {
727732
// we need a snapshot, don't have one, and don't have permission to download one, so ask the user
728-
let (url, num_bytes, _path) = crate::cli_shared::snapshot::peek(vendor, chain)
733+
let (url, num_bytes, filename) = crate::cli_shared::snapshot::peek(vendor, chain)
729734
.await
730735
.context("couldn't get snapshot size")?;
731736
// dialoguer will double-print long lines, so manually print the first clause ourselves,
@@ -751,6 +756,7 @@ async fn maybe_set_snapshot_path(
751756
"Forest requires a snapshot to sync with the network, but automatic fetching is disabled."
752757
)
753758
}
759+
tracing::info!("Downloading snapshot: {filename}");
754760
config.client.snapshot_path = Some(url.to_string().into());
755761
}
756762
};

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ pub mod doctest_private {
9494
version::NetworkVersion,
9595
},
9696
utils::io::progress_log::WithProgress,
97+
utils::net::{DownloadFileOption, download_to},
9798
utils::{encoding::blake2b_256, encoding::keccak_256, io::read_toml},
9899
};
99100
}

src/utils/net.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,10 @@ pub async fn reader(
105105
}
106106
};
107107

108-
Ok(tokio::io::BufReader::new(
108+
// Use a larger buffer (512KB) for better throughput on large files
109+
const DOWNLOAD_BUFFER_SIZE: usize = 512 * 1024;
110+
Ok(tokio::io::BufReader::with_capacity(
111+
DOWNLOAD_BUFFER_SIZE,
109112
WithProgress::wrap_sync_read_with_callback("Loading", stream, content_length, callback)
110113
.bytes(),
111114
))

0 commit comments

Comments
 (0)