Skip to content

Commit e40df82

Browse files
tarkahermo
authored andcommitted
boulder: Cache upstreams fetched via recipe new & update
1 parent 7fe47ac commit e40df82

File tree

6 files changed

+106
-40
lines changed

6 files changed

+106
-40
lines changed

boulder/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ mailparse.workspace = true
4848
walkdir = "2.5.0"
4949
zstd.workspace = true
5050
astr.workspace = true
51+
tempfile.workspace = true
5152

5253
[dev-dependencies]
5354
tempfile.workspace = true

boulder/src/build.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ pub mod git;
3333
pub mod job;
3434
pub mod pgo;
3535
mod root;
36-
mod upstream;
36+
pub mod upstream;
3737

3838
pub struct Builder {
3939
pub targets: Vec<Target>,

boulder/src/cli/recipe.rs

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,16 @@ use std::{
99

1010
use boulder::{
1111
Env, Macros, architecture,
12-
draft::{self, Drafter},
12+
draft::{self, Drafter, upstream::fetched_upstream_cache_path},
1313
macros, recipe,
1414
};
1515
use clap::Parser;
16-
use fs_err as fs;
16+
use fs_err::{self as fs};
1717
use futures_util::StreamExt;
1818
use itertools::Itertools;
19-
use moss::{request, runtime};
19+
use moss::{request, runtime, util};
2020
use sha2::{Digest, Sha256};
21+
use tempfile::NamedTempFile;
2122
use thiserror::Error;
2223
use tokio::io::AsyncWriteExt;
2324
use tui::{
@@ -107,14 +108,14 @@ fn parse_upstream(s: &str) -> Result<Upstream, String> {
107108
pub fn handle(command: Command, env: Env) -> Result<(), Error> {
108109
match command.subcommand {
109110
Subcommand::Bump { recipe, release } => bump(recipe, release),
110-
Subcommand::New { output, upstreams } => new(output, upstreams, env),
111+
Subcommand::New { output, upstreams } => new(env, output, upstreams),
111112
Subcommand::Update {
112113
recipe,
113114
overwrite,
114115
version,
115116
upstreams,
116117
no_bump,
117-
} => update(recipe, overwrite, version, upstreams, no_bump),
118+
} => update(env, recipe, overwrite, version, upstreams, no_bump),
118119
Subcommand::Macros { _macro } => macros(_macro, env),
119120
}
120121
}
@@ -145,11 +146,11 @@ fn bump(recipe: PathBuf, release: Option<u64>) -> Result<(), Error> {
145146
Ok(())
146147
}
147148

148-
fn new(output: PathBuf, upstreams: Vec<Url>, env: Env) -> Result<(), Error> {
149+
fn new(env: Env, output: PathBuf, upstreams: Vec<Url>) -> Result<(), Error> {
149150
const RECIPE_FILE: &str = "stone.yaml";
150151
const MONITORING_FILE: &str = "monitoring.yaml";
151152

152-
let drafter = Drafter::new(upstreams, env.data_dir);
153+
let drafter = Drafter::new(env, upstreams);
153154
let draft = drafter.run()?;
154155

155156
if !output.is_dir() {
@@ -165,6 +166,7 @@ fn new(output: PathBuf, upstreams: Vec<Url>, env: Env) -> Result<(), Error> {
165166
}
166167

167168
fn update(
169+
env: Env,
168170
recipe: Option<PathBuf>,
169171
overwrite: bool,
170172
version: String,
@@ -245,7 +247,7 @@ fn update(
245247
updater.update_value(version, |root| root / "version");
246248
}
247249
Update::PlainUpstream(i, key, new_uri) => {
248-
let hash = runtime::block_on(fetch_hash(new_uri.clone(), &mpb))?;
250+
let hash = runtime::block_on(fetch_and_cache_upstream(&env, new_uri.clone(), &mpb))?;
249251

250252
let path = |root| root / "upstreams" / i / key.as_str().unwrap_or_default();
251253

@@ -281,7 +283,13 @@ fn update(
281283
Ok(())
282284
}
283285

284-
async fn fetch_hash(uri: Url, mpb: &MultiProgress) -> Result<String, Error> {
286+
/// Fetches the upstream at `uri` and caches it so it doesn't need to be refetched
287+
/// when this recipe is finally built.
288+
///
289+
/// Returns the sha256 hash of the fetched upstream
290+
async fn fetch_and_cache_upstream(env: &Env, uri: Url, mpb: &MultiProgress) -> Result<String, Error> {
291+
use fs_err::tokio::{self as fs, File};
292+
285293
let pb = mpb.add(
286294
ProgressBar::new(u64::MAX)
287295
.with_message(format!("{} {}", "Fetching".blue(), uri.as_str().bold()))
@@ -293,11 +301,13 @@ async fn fetch_hash(uri: Url, mpb: &MultiProgress) -> Result<String, Error> {
293301
);
294302
pb.enable_steady_tick(Duration::from_millis(150));
295303

296-
let mut stream = request::stream(uri).await?;
304+
let mut stream = request::stream(uri.clone()).await?;
297305

306+
let (temp_file, temp_file_path) = NamedTempFile::with_prefix("boulder-")
307+
.map_err(Error::CreateTempFile)?
308+
.into_parts();
298309
let mut hasher = Sha256::new();
299-
// Discard bytes
300-
let mut out = tokio::io::sink();
310+
let mut out = File::from_std(fs_err::File::from_parts(temp_file, &temp_file_path));
301311

302312
while let Some(chunk) = stream.next().await {
303313
let bytes = &chunk?;
@@ -312,6 +322,22 @@ async fn fetch_hash(uri: Url, mpb: &MultiProgress) -> Result<String, Error> {
312322

313323
let hash = hex::encode(hasher.finalize());
314324

325+
// Move fetched asset to cache dir so we don't need to refetch it
326+
// when the user finally builds this new recipe
327+
{
328+
let cache_path = fetched_upstream_cache_path(env, &uri, &hash);
329+
330+
if let Some(parent) = cache_path.parent() {
331+
fs::create_dir_all(parent).await.map_err(Error::CreateDir)?;
332+
}
333+
334+
util::async_hardlink_or_copy(&temp_file_path, &cache_path)
335+
.await
336+
.map_err(Error::MoveTempFile)?;
337+
338+
drop(temp_file_path);
339+
}
340+
315341
pb.finish();
316342
mpb.remove(&pb);
317343

@@ -419,6 +445,10 @@ pub enum Error {
419445
CreateDir(#[source] io::Error),
420446
#[error("deserializing recipe")]
421447
Deser(#[from] serde_yaml::Error),
448+
#[error("create temp file")]
449+
CreateTempFile(#[source] io::Error),
450+
#[error("move temp file")]
451+
MoveTempFile(#[source] io::Error),
422452
#[error("fetch upstream")]
423453
Fetch(#[from] request::Error),
424454
#[error("fetch upstream")]

boulder/src/draft.rs

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,15 @@
55
use std::path::Path;
66
use std::{io, path::PathBuf};
77

8-
use fs_err as fs;
98
use itertools::Itertools;
109
use licenses::match_licences;
1110
use moss::{Dependency, util};
1211
use thiserror::Error;
1312
use tui::Styled;
1413
use url::Url;
1514

15+
use crate::Env;
16+
1617
use self::metadata::Metadata;
1718
use self::monitoring::Monitoring;
1819
use self::upstream::Upstream;
@@ -21,11 +22,11 @@ mod build;
2122
mod licenses;
2223
mod metadata;
2324
mod monitoring;
24-
mod upstream;
25+
pub mod upstream;
2526

2627
pub struct Drafter {
28+
env: Env,
2729
upstreams: Vec<Url>,
28-
datadir: PathBuf,
2930
}
3031

3132
pub struct Draft {
@@ -34,16 +35,16 @@ pub struct Draft {
3435
}
3536

3637
impl Drafter {
37-
pub fn new(upstreams: Vec<Url>, datadir: PathBuf) -> Self {
38-
Self { upstreams, datadir }
38+
pub fn new(env: Env, upstreams: Vec<Url>) -> Self {
39+
Self { env, upstreams }
3940
}
4041

4142
pub fn run(&self) -> Result<Draft, Error> {
42-
// TODO: Use tempdir
43-
let extract_root = PathBuf::from("/tmp/boulder-new");
43+
let temp_dir = tempfile::tempdir()?;
44+
let extract_root = temp_dir.as_ref();
4445

4546
// Fetch and extract all upstreams
46-
let extracted = upstream::fetch_and_extract(&self.upstreams, &extract_root)?;
47+
let extracted = upstream::fetch_and_extract(&self.env, &self.upstreams, extract_root)?;
4748

4849
// Build metadata from extracted upstreams
4950
let metadata = Metadata::new(extracted);
@@ -52,23 +53,20 @@ impl Drafter {
5253
let monitoring_result = monitoring.run()?;
5354

5455
// Enumerate all extracted files
55-
let files = util::enumerate_files(&extract_root, |_| true)?
56+
let files = util::enumerate_files(extract_root, |_| true)?
5657
.into_iter()
57-
.map(|path| File {
58-
path,
59-
extract_root: &extract_root,
60-
})
58+
.map(|path| File { path, extract_root })
6159
.collect::<Vec<_>>();
6260

6361
// Analyze files to determine build system / collect deps
6462
let build = build::analyze(&files).map_err(Error::AnalyzeBuildSystem)?;
6563

66-
let licences_dir = &self.datadir.join("licenses");
64+
let licences_dir = &self.env.data_dir.join("licenses");
6765

68-
let licenses = format_licenses(match_licences(&extract_root, licences_dir).unwrap_or_default());
66+
let licenses = format_licenses(match_licences(extract_root, licences_dir).unwrap_or_default());
6967

7068
// Remove temp extract dir
71-
fs::remove_dir_all(extract_root)?;
69+
drop(temp_dir);
7270

7371
let build_system = build.detected_system.unwrap_or_else(|| {
7472
println!(

boulder/src/draft/upstream.rs

Lines changed: 39 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,27 +9,28 @@ use fs_err::tokio::{self as fs, File};
99
use futures_util::{StreamExt, TryStreamExt, stream};
1010
use moss::{environment, request, runtime, util};
1111
use sha2::{Digest, Sha256};
12+
use tempfile::NamedTempFile;
1213
use thiserror::Error;
1314
use tokio::{io::AsyncWriteExt, process::Command};
1415
use tui::{MultiProgress, ProgressBar, ProgressStyle, Styled};
1516
use url::Url;
1617

18+
use crate::Env;
19+
1720
pub struct Upstream {
1821
pub uri: Url,
1922
pub hash: String,
2023
}
2124

2225
/// Fetch and extract the provided upstreams under `extract_root`
23-
pub fn fetch_and_extract(upstreams: &[Url], extract_root: &Path) -> Result<Vec<Upstream>, Error> {
24-
util::recreate_dir(extract_root)?;
25-
26+
pub fn fetch_and_extract(env: &Env, upstreams: &[Url], extract_root: &Path) -> Result<Vec<Upstream>, Error> {
2627
let mpb = MultiProgress::new();
2728

2829
let ret = runtime::block_on(
2930
stream::iter(upstreams)
3031
.map(|uri| async {
31-
let name = util::uri_file_name(uri);
32-
let archive_path = extract_root.join(name);
32+
let (temp_file, temp_path) = NamedTempFile::with_prefix("boulder-")?.into_parts();
33+
let archive_file = File::from_std(fs_err::File::from_parts(temp_file, &temp_path));
3334

3435
let pb = mpb.add(
3536
ProgressBar::new_spinner()
@@ -42,13 +43,26 @@ pub fn fetch_and_extract(upstreams: &[Url], extract_root: &Path) -> Result<Vec<U
4243
);
4344
pb.enable_steady_tick(Duration::from_millis(150));
4445

45-
let hash = fetch(uri, &archive_path).await?;
46+
let hash = fetch(uri, archive_file).await?;
47+
48+
// Hardlink or copy fetched asset to cache dir so we don't need
49+
// to refetch it when the user finally builds this new recipe
50+
{
51+
let cache_path = fetched_upstream_cache_path(env, uri, &hash);
52+
53+
if let Some(parent) = cache_path.parent() {
54+
fs::create_dir_all(parent).await?;
55+
}
56+
57+
util::async_hardlink_or_copy(&temp_path, &cache_path).await?;
58+
}
4659

4760
pb.set_message(format!("{} {}", "Extracting".yellow(), *uri));
4861

49-
extract(&archive_path, extract_root).await?;
62+
extract(&temp_path, extract_root).await?;
5063

51-
fs::remove_file(archive_path).await?;
64+
// Cleanup temp path
65+
drop(temp_path);
5266

5367
pb.suspend(|| println!("{} {}", "Fetched".green(), *uri));
5468

@@ -63,11 +77,9 @@ pub fn fetch_and_extract(upstreams: &[Url], extract_root: &Path) -> Result<Vec<U
6377
ret
6478
}
6579

66-
async fn fetch(url: &Url, output: &Path) -> Result<String, Error> {
80+
async fn fetch(url: &Url, mut file: File) -> Result<String, Error> {
6781
let mut stream = request::stream(url.clone()).await?;
6882

69-
let mut file = File::create(&output).await?;
70-
7183
let mut hasher = Sha256::new();
7284

7385
while let Some(bytes) = stream.next().await {
@@ -100,6 +112,22 @@ async fn extract(archive: &Path, destination: &Path) -> Result<(), Error> {
100112
}
101113
}
102114

115+
pub fn fetched_upstream_cache_path(env: &Env, uri: &Url, hash: &str) -> PathBuf {
116+
let mut hasher = Sha256::new();
117+
hasher.update(uri.as_str());
118+
hasher.update(hash);
119+
120+
let hash = hex::encode(hasher.finalize());
121+
122+
env.cache_dir
123+
.join("upstreams")
124+
.join("fetched")
125+
// Type safe guaranteed to be >= 5 bytes
126+
.join(&hash[..5])
127+
.join(&hash[hash.len() - 5..])
128+
.join(hash)
129+
}
130+
103131
#[derive(Debug, Error)]
104132
pub enum Error {
105133
#[error("failed to run `bsdtar`")]

moss/src/util.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,15 @@ pub fn hardlink_or_copy(from: &Path, to: &Path) -> io::Result<()> {
106106
Ok(())
107107
}
108108

109+
pub async fn async_hardlink_or_copy(from: &Path, to: &Path) -> io::Result<()> {
110+
let from = from.to_owned();
111+
let to = to.to_owned();
112+
113+
tokio::task::spawn_blocking(move || hardlink_or_copy(&from, &to))
114+
.await
115+
.expect("join handle")
116+
}
117+
109118
pub fn uri_file_name(uri: &Url) -> &str {
110119
let path = uri.path();
111120

0 commit comments

Comments
 (0)