Skip to content

Commit 917b6b0

Browse files
committed
feat(git): SHA256 repository support
This wires the unstable libgit2 SHA256 support into Cargo. SHA256 repositories usage are gated behind `-Zgit=sha256`. Before looking at a repo, Cargo now try to guess whether this git dep is SHA1 or SHA256 from these places (in this order): * locked rev in Cargo.lock * local db (with or without `-sha256` suffix) * Create a detached remote and probe its object format What works and doesn't: * Git CLI and libgit2 interop works * SHA1 and SHA256 git db coexist (via `-sha256` dir suffix) * `-Zgit=sha256` gates during early fetch paths, so even have local db cached you cannot use without Z flag * gitoxide hasn't yet supported Some known issues and regressions: * Probing adds a silent extra round-trip on every first fetch, even for SHA1 repos. An alternative is to assume SHA1 and retry on mismatch, though it has cost of a wasted fetch attempt for SHA256 repos.
1 parent f80b099 commit 917b6b0

9 files changed

Lines changed: 340 additions & 413 deletions

File tree

src/cargo/sources/git/source.rs

Lines changed: 80 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use crate::sources::IndexSummary;
88
use crate::sources::RecursivePathSource;
99
use crate::sources::git::utils::GitDatabase;
1010
use crate::sources::git::utils::GitRemote;
11+
use crate::sources::git::utils::probe_remote_object_format;
1112
use crate::sources::git::utils::rev_to_oid;
1213
use crate::sources::source::MaybePackage;
1314
use crate::sources::source::QueryKind;
@@ -47,14 +48,17 @@ use url::Url;
4748
/// │ │ └── e33d1ac/
4849
/// │ ├── log-c58e1db3de7c154d-shallow/
4950
/// │ │ └── 11eda98/
51+
/// │ └── foo-9f1e8cfc50c5ba7d-sha256/
52+
/// │ └── cfc50c5/
5053
/// └── db/
5154
/// ├── gimli-a0d193bd15a5ed96/
52-
/// └── log-c58e1db3de7c154d-shallow/
55+
/// ├── log-c58e1db3de7c154d-shallow/
56+
/// └── foo-9f1e8cfc50c5ba7d-sha256/
5357
/// ```
5458
///
5559
/// For more on Git cache directory, see ["Cargo Home"] in The Cargo Book.
5660
///
57-
/// For more on the directory format `<pkg>-<hash>[-shallow]`, see [`ident`]
61+
/// For more on the directory format `<pkg>-<hash>[-shallow][-sha256]`, see [`ident`]
5862
/// and [`ident_shallow`].
5963
///
6064
/// ## Locked to a revision
@@ -171,23 +175,81 @@ impl<'gctx> GitSource<'gctx> {
171175
}
172176

173177
fn mark_used(&self) -> CargoResult<()> {
178+
let format = match &*self.locked_rev.borrow() {
179+
Revision::Locked(oid) => oid.object_format(),
180+
_ => unreachable!("locked_rev must be resolved before mark_used"),
181+
};
182+
let ident = self.ident_for_format(format);
174183
self.gctx
175184
.deferred_global_last_use()?
176185
.mark_git_checkout_used(global_cache_tracker::GitCheckout {
177-
encoded_git_name: self.ident,
186+
encoded_git_name: ident,
178187
short_name: self.short_id.borrow().expect("update before download"),
179188
size: None,
180189
});
181190
Ok(())
182191
}
183192

193+
fn ident_for_format(&self, format: git2::ObjectFormat) -> InternedString {
194+
match format {
195+
git2::ObjectFormat::Sha1 => self.ident,
196+
git2::ObjectFormat::Sha256 => format!("{}-sha256", self.ident).into(),
197+
}
198+
}
199+
200+
/// Determines the Git object format for this remote.
201+
///
202+
/// This may probe the remote repository if needed.
203+
fn object_format_hint(&self) -> CargoResult<Option<git2::ObjectFormat>> {
204+
if let Revision::Locked(oid) = &*self.locked_rev.borrow() {
205+
return Ok(Some(oid.object_format()));
206+
}
207+
208+
let git_db_path = self.gctx.git_db_path();
209+
self.gctx
210+
.assert_package_cache_locked(CacheLockMode::DownloadExclusive, &git_db_path);
211+
let git_db_path = git_db_path.as_path_unlocked();
212+
213+
for format in [git2::ObjectFormat::Sha1, git2::ObjectFormat::Sha256] {
214+
let ident = self.ident_for_format(format);
215+
let path = git_db_path.join(ident);
216+
if path.exists()
217+
&& let Ok(db) = self.remote.db_at(&path)
218+
{
219+
return Ok(Some(db.object_format()));
220+
}
221+
}
222+
223+
Ok(None)
224+
}
225+
184226
/// Fetch and return a [`GitDatabase`] with the resolved revision
185227
/// for this source,
186228
///
187229
/// This won't fetch anything if the required revision is
188230
/// already available locally.
189231
pub(crate) fn fetch_db(&self, is_submodule: bool) -> CargoResult<(GitDatabase, git2::Oid)> {
190-
let db_path = self.gctx.git_db_path().join(&self.ident);
232+
let mut is_update_status_shown = false;
233+
234+
let format = if let Some(format) = self.object_format_hint()? {
235+
format
236+
} else {
237+
if !is_update_status_shown {
238+
is_update_status_shown = true;
239+
self.show_update_status(is_submodule)?;
240+
}
241+
if let Some(offline_flag) = self.gctx.offline_flag() {
242+
anyhow::bail!(
243+
"can't checkout from '{}': you are in the offline mode ({offline_flag})",
244+
self.remote.url()
245+
);
246+
}
247+
248+
trace!("probing git source `{:?}`", self.remote);
249+
probe_remote_object_format(self.source_id.borrow().url(), self.gctx)?
250+
};
251+
252+
let db_path = self.gctx.git_db_path().join(self.ident_for_format(format));
191253
let db_path = db_path.into_path_unlocked();
192254

193255
let db = self.remote.db_at(&db_path).ok();
@@ -226,14 +288,22 @@ impl<'gctx> GitSource<'gctx> {
226288
);
227289
}
228290

229-
self.show_update_status(is_submodule)?;
291+
if !is_update_status_shown {
292+
self.show_update_status(is_submodule)?;
293+
}
230294

231295
trace!("updating git source `{:?}`", self.remote);
232296

233297
let locked_rev = locked_rev.clone().into();
234298
let manifest_reference = self.source_id.borrow().git_reference().unwrap();
235-
self.remote
236-
.checkout(&db_path, db, manifest_reference, &locked_rev, self.gctx)?
299+
self.remote.checkout(
300+
&db_path,
301+
db,
302+
manifest_reference,
303+
&locked_rev,
304+
format,
305+
self.gctx,
306+
)?
237307
}
238308
};
239309
Ok((db, actual_rev))
@@ -287,10 +357,11 @@ impl<'gctx> GitSource<'gctx> {
287357
// Check out `actual_rev` from the database to a scoped location on the
288358
// filesystem. This will use hard links and such to ideally make the
289359
// checkout operation here pretty fast.
360+
let ident = self.ident_for_format(actual_rev.object_format());
290361
let checkout_path = self
291362
.gctx
292363
.git_checkouts_path()
293-
.join(&self.ident)
364+
.join(ident)
294365
.join(short_id.as_str());
295366
let checkout_path = checkout_path.into_path_unlocked();
296367
db.copy_to(actual_rev, &checkout_path, self.gctx, self.quiet)?;
@@ -365,6 +436,7 @@ fn ident(id: &SourceId) -> String {
365436

366437
/// Like [`ident()`], but appends `-shallow` to it, turning
367438
/// `proto://host/path/repo` into `repo-<hash-of-url>-shallow`.
439+
/// SHA256 repositories add the `-sha256` suffix on top of this identifier.
368440
///
369441
/// It's important to separate shallow from non-shallow clones for reasons of
370442
/// backwards compatibility --- older cargo's aren't necessarily handling

src/cargo/sources/git/utils.rs

Lines changed: 98 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@ use crate::util::{GlobalContext, IntoUrl, MetricsCounter, Progress, network};
1414
use anyhow::{Context as _, anyhow};
1515
use cargo_util::{ProcessBuilder, paths};
1616
use cargo_util_terminal::Verbosity;
17-
use git2::{ErrorClass, ObjectType, Oid};
17+
use git2::ErrorClass;
18+
use git2::ObjectType;
19+
use git2::Oid;
1820
use http::{Request, StatusCode};
1921
use tracing::{debug, info};
2022
use url::Url;
@@ -112,6 +114,7 @@ impl GitRemote {
112114
db: Option<GitDatabase>,
113115
manifest_reference: &GitReference,
114116
reference: &GitReference,
117+
object_format: git2::ObjectFormat,
115118
gctx: &GlobalContext,
116119
) -> CargoResult<(GitDatabase, git2::Oid)> {
117120
if let Some(mut db) = db {
@@ -137,7 +140,7 @@ impl GitRemote {
137140
paths::remove_dir_all(into)?;
138141
}
139142
paths::create_dir_all(into)?;
140-
let mut repo = init(into, true)?;
143+
let mut repo = init(into, true, object_format)?;
141144
fetch(
142145
&mut repo,
143146
self.url(),
@@ -212,6 +215,11 @@ impl GitDatabase {
212215
self.repo.revparse_single(&oid.to_string()).is_ok()
213216
}
214217

218+
/// Gets the object format for this database.
219+
pub fn object_format(&self) -> git2::ObjectFormat {
220+
self.repo.object_format()
221+
}
222+
215223
/// [`resolve_ref`]s this reference with this database.
216224
pub fn resolve(&self, r: &GitReference) -> CargoResult<git2::Oid> {
217225
resolve_ref(r, &self.repo)
@@ -471,7 +479,10 @@ impl<'a> GitCheckout<'a> {
471479
Err(..) => {
472480
let path = parent.workdir().unwrap().join(child.path());
473481
let _ = paths::remove_dir_all(&path);
474-
init(&path, false)?
482+
// Inherit the parent repo's object format.
483+
// Git does not support mixed-format submodules as of Git 2.54.
484+
// https://github.com/git/git/blob/94f057755b7941b321fd11fec1b2e3ca5313a4e0/object-file.c#L1747
485+
init(&path, false, head.object_format())?
475486
}
476487
};
477488
// Fetch submodule database and checkout to target revision
@@ -890,6 +901,9 @@ fn reset(repo: &git2::Repository, obj: &git2::Object<'_>, gctx: &GlobalContext)
890901
///
891902
/// The callback is provided a fetch options, which can be used by the actual
892903
/// git fetch.
904+
///
905+
/// NOTE: The auth/certificate_check setup is duplicated in
906+
/// [`probe_remote_object_format`]. Keep them in sync.
893907
pub fn with_fetch_options(
894908
git_config: &git2::Config,
895909
url: &str,
@@ -981,6 +995,67 @@ pub fn with_fetch_options(
981995
})
982996
}
983997

998+
/// Probes a remote for its object format (SHA-1 vs SHA-256) without fetching.
999+
///
1000+
/// NOTE: The auth/certificate_check setup is duplicated from
1001+
/// [`with_fetch_options`] above. Keep them in sync.
1002+
pub(crate) fn probe_remote_object_format(
1003+
remote_url: &Url,
1004+
gctx: &GlobalContext,
1005+
) -> CargoResult<git2::ObjectFormat> {
1006+
let git_config = git2::Config::open_default()?;
1007+
let ssh_config = gctx.net_config()?.ssh.as_ref();
1008+
let config_known_hosts = ssh_config.and_then(|ssh| ssh.known_hosts.as_ref());
1009+
let diagnostic_home_config = gctx.diagnostic_home_config();
1010+
let remote_url = remote_url.as_str();
1011+
network::retry::with_retry(gctx, || {
1012+
// Hack: libgit2 disallows overriding the error from check_cb since v1.8.0,
1013+
// so we store the error additionally and unwrap it later
1014+
let mut check_cb_result = Ok(());
1015+
let auth_result = with_authentication(gctx, remote_url, &git_config, |f| {
1016+
let port = Url::parse(remote_url).ok().and_then(|url| url.port());
1017+
let mut rcb = git2::RemoteCallbacks::new();
1018+
rcb.credentials(f);
1019+
rcb.certificate_check(|cert, host| {
1020+
match super::known_hosts::certificate_check(
1021+
gctx,
1022+
cert,
1023+
host,
1024+
port,
1025+
config_known_hosts,
1026+
&diagnostic_home_config,
1027+
) {
1028+
Ok(status) => Ok(status),
1029+
Err(e) => {
1030+
check_cb_result = Err(e);
1031+
// This is not really used because it'll be overridden by libgit2
1032+
// See https://github.com/libgit2/libgit2/commit/9a9f220119d9647a352867b24b0556195cb26548
1033+
Err(git2::Error::from_str(
1034+
"invalid or unknown remote ssh hostkey",
1035+
))
1036+
}
1037+
}
1038+
});
1039+
1040+
let proxy_options = git2::ProxyOptions::new();
1041+
// FIXME: Remove this comment when https://github.com/libgit2/libgit2/pull/7195 merges
1042+
// This doesn't respect insteadOf from global gitconfig
1043+
// If libgit2 can't get this fixed timely, we need to switch to
1044+
// probing a temporary on-disk repo.
1045+
let mut remote = git2::Remote::create_detached(remote_url)?;
1046+
let mut conn =
1047+
remote.connect_auth(git2::Direction::Fetch, Some(rcb), Some(proxy_options))?;
1048+
// Query while connected — libgit2's local transport frees the
1049+
// repo handle on disconnect, causing a SIGSEGV if queried after.
1050+
Ok(conn.remote().object_format()?)
1051+
});
1052+
if auth_result.is_err() {
1053+
check_cb_result?;
1054+
}
1055+
auth_result
1056+
})
1057+
}
1058+
9841059
/// Attempts to fetch the given git `reference` for a Git repository.
9851060
///
9861061
/// This is the main entry for git clone/fetch. It does the followings:
@@ -1084,11 +1159,17 @@ pub fn fetch(
10841159
}
10851160

10861161
debug!("doing a fetch for {remote_url}");
1162+
let format = repo.object_format();
10871163
let result = if let Some(true) = gctx.net_config()?.git_fetch_with_cli {
1164+
ensure_sha256_allowed(format, gctx)?;
10881165
fetch_with_cli(repo, remote_url, &refspecs, tags, shallow, gctx)
10891166
} else if gctx.cli_unstable().gitoxide.map_or(false, |git| git.fetch) {
1167+
if matches!(format, git2::ObjectFormat::Sha256) {
1168+
anyhow::bail!("gitoxide does not yet support SHA256 repositories");
1169+
}
10901170
fetch_with_gitoxide(repo, remote_url, refspecs, tags, shallow, gctx)
10911171
} else {
1172+
ensure_sha256_allowed(format, gctx)?;
10921173
fetch_with_libgit2(repo, remote_url, refspecs, tags, shallow, gctx)
10931174
};
10941175

@@ -1100,6 +1181,15 @@ pub fn fetch(
11001181
result
11011182
}
11021183

1184+
fn ensure_sha256_allowed(format: git2::ObjectFormat, gctx: &GlobalContext) -> CargoResult<()> {
1185+
if matches!(format, git2::ObjectFormat::Sha256)
1186+
&& !gctx.cli_unstable().git.map_or(false, |git| git.sha256)
1187+
{
1188+
anyhow::bail!("SHA256 git repositories require `-Zgit=sha256` to be enabled");
1189+
}
1190+
Ok(())
1191+
}
1192+
11031193
/// `gitoxide` uses shallow locks to assure consistency when fetching to and to avoid races, and to write
11041194
/// files atomically.
11051195
/// Cargo has its own lock files and doesn't need that mechanism for race protection, so a stray lock means
@@ -1438,6 +1528,7 @@ fn clean_repo_temp_files(repo: &git2::Repository) {
14381528
/// Reinitializes a given Git repository. This is useful when a Git repository
14391529
/// seems corrupted and we want to start over.
14401530
fn reinitialize(repo: &mut git2::Repository) -> CargoResult<()> {
1531+
let format = repo.object_format();
14411532
// Here we want to drop the current repository object pointed to by `repo`,
14421533
// so we initialize temporary repository in a sub-folder, blow away the
14431534
// existing git folder, and then recreate the git repo. Finally we blow away
@@ -1446,7 +1537,7 @@ fn reinitialize(repo: &mut git2::Repository) -> CargoResult<()> {
14461537
debug!("reinitializing git repo at {:?}", path);
14471538
let tmp = path.join("tmp");
14481539
let bare = !repo.path().ends_with(".git");
1449-
*repo = init(&tmp, false)?;
1540+
*repo = init(&tmp, false, format)?;
14501541
for entry in path.read_dir()? {
14511542
let entry = entry?;
14521543
if entry.file_name().to_str() == Some("tmp") {
@@ -1455,19 +1546,20 @@ fn reinitialize(repo: &mut git2::Repository) -> CargoResult<()> {
14551546
let path = entry.path();
14561547
drop(paths::remove_file(&path).or_else(|_| paths::remove_dir_all(&path)));
14571548
}
1458-
*repo = init(&path, bare)?;
1549+
*repo = init(&path, bare, format)?;
14591550
paths::remove_dir_all(&tmp)?;
14601551
Ok(())
14611552
}
14621553

14631554
/// Initializes a Git repository at `path`.
1464-
fn init(path: &Path, bare: bool) -> CargoResult<git2::Repository> {
1555+
fn init(path: &Path, bare: bool, format: git2::ObjectFormat) -> CargoResult<git2::Repository> {
14651556
let mut opts = git2::RepositoryInitOptions::new();
14661557
// Skip anything related to templates, they just call all sorts of issues as
14671558
// we really don't want to use them yet they insist on being used. See #6240
14681559
// for an example issue that comes up.
14691560
opts.external_template(false);
14701561
opts.bare(bare);
1562+
opts.object_format(format);
14711563
Ok(git2::Repository::init_opts(&path, &opts)?)
14721564
}
14731565

0 commit comments

Comments
 (0)