Skip to content

Commit fef1285

Browse files
fix: Support mixture of flat and sharded repodata (#4227)
Signed-off-by: Julien Jerphanion <git@jjerphan.xyz> Co-authored-by: Johan Mabille <johan.mabille@gmail.com>
1 parent 3a97afd commit fef1285

File tree

2 files changed

+127
-12
lines changed

2 files changed

+127
-12
lines changed

libmamba/include/mamba/api/channel_loader.hpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,12 @@ namespace mamba
7272
* for subdirs that will not use shards.
7373
* 4. Optionally, when offline, add repos from local `pkgs_dir`.
7474
* 5. For each subdir, load it into the database:
75-
* - when sharded repodata is enabled and up to date (and `root_packages` non-empty),
76-
* prefer `load_subdir_with_shards` and fall back to full repodata on failure;
77-
* - otherwise, load from full repodata (cached or freshly downloaded).
75+
* - when sharded repodata is enabled with non-empty `root_packages`, full-repodata
76+
* subdirs (no shard index) load first; dependency names from those repos extend
77+
* `root_packages` so shard-based loads stay complete across channels; then shard
78+
* subdirs load, with fallback to full `repodata.json` if shard loading fails.
79+
* - otherwise, prefer shards when applicable with the same fallback, or load full
80+
* repodata when shards are disabled or `root_packages` is empty.
7881
* Recoverable errors are aggregated and, when cache corruption is detected, a single
7982
* retry with cache invalidation is performed before reporting failure.
8083
*

libmamba/src/api/channel_loader.cpp

Lines changed: 121 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <optional>
99
#include <set>
1010
#include <sstream>
11+
#include <unordered_set>
1112

1213
#include "mamba/api/channel_loader.hpp"
1314
#include "mamba/core/channel_context.hpp"
@@ -23,6 +24,7 @@
2324
#include "mamba/solver/libsolv/database.hpp"
2425
#include "mamba/solver/libsolv/repo_info.hpp"
2526
#include "mamba/specs/error.hpp"
27+
#include "mamba/specs/match_spec.hpp"
2628
#include "mamba/specs/package_info.hpp"
2729
#include "mamba/specs/version.hpp"
2830

@@ -225,10 +227,13 @@ namespace mamba
225227
* Load a single subdir into the database, using shards when available.
226228
*
227229
* When shards are enabled and up to date, this:
228-
* - attempts `load_subdir_with_shards`,
230+
* - attempts ``load_subdir_with_shards``,
229231
* - falls back to cached full repodata when shard loading fails,
230-
* - optionally fetches fresh full repodata and loads it if there is no cache.
231-
* Otherwise, it calls `load_subdir_in_database` directly.
232+
* - optionally fetches fresh full ``repodata.json`` and loads it if there is no cache.
233+
* Subdirs without a shard index load full ``repodata.json`` directly.
234+
*
235+
* When ``repodata_use_shards`` is false, or ``root_packages`` is empty, loads full
236+
* repodata (or native solv cache) as usual.
232237
*/
233238
expected_t<solver::libsolv::RepoInfo> load_single_subdir(
234239
Context& ctx,
@@ -418,9 +423,74 @@ namespace mamba
418423
}
419424
}
420425

426+
/**
427+
* Extend ``root_packages`` with dependency (and constrain) names reachable from current
428+
* roots via repos loaded from full repodata (e.g. labels without shards), using a name BFS
429+
* so the whole subdir is not scanned. Feeds shard BFS on sharded channels (e.g.
430+
* conda-forge). For example, ``conda-forge/label/mamba_prerelease`` has no
431+
* ``repodata_shards`` on anaconda.org while main ``conda-forge`` is sharded; prerelease
432+
* ``mamba`` / ``libmamba`` there may depend on e.g. ``libmsgpack-c`` resolved from shards,
433+
* which roots like ``mamba`` alone would not reach without this pass.
434+
*/
435+
void expand_shard_root_packages_from_full_repodata_repos(
436+
const solver::libsolv::Database& database,
437+
const std::vector<solver::libsolv::RepoInfo>& full_repos,
438+
std::vector<std::string>& root_packages
439+
)
440+
{
441+
std::unordered_set<std::string> seen(root_packages.begin(), root_packages.end());
442+
std::vector<std::string> frontier(root_packages.begin(), root_packages.end());
443+
auto add_from_spec = [&](const std::string& dep_str)
444+
{
445+
if (auto name = specs::MatchSpec::extract_name(dep_str))
446+
{
447+
if (!name->empty() && *name != "*")
448+
{
449+
if (seen.insert(*name).second)
450+
{
451+
frontier.push_back(*name);
452+
root_packages.push_back(*name);
453+
}
454+
}
455+
}
456+
};
457+
458+
while (!frontier.empty())
459+
{
460+
const std::string pkg_name = std::move(frontier.back());
461+
frontier.pop_back();
462+
463+
for (const auto& repo : full_repos)
464+
{
465+
database.for_each_package_in_repo(
466+
repo,
467+
[&](const specs::PackageInfo& pkg)
468+
{
469+
if (pkg.name != pkg_name)
470+
{
471+
return;
472+
}
473+
for (const auto& dep : pkg.dependencies)
474+
{
475+
add_from_spec(dep);
476+
}
477+
for (const auto& c : pkg.constrains)
478+
{
479+
add_from_spec(c);
480+
}
481+
}
482+
);
483+
}
484+
}
485+
}
486+
421487
/**
422488
* Load all subdirs into the database, with a single retry on cache corruption.
423489
*
490+
* When sharded repodata is enabled with non-empty ``root_packages``, subdirs that load from
491+
* full repodata (no shard index) run first; roots are expanded from those repos so shard
492+
* loads on other channels stay complete. Otherwise a single pass is used.
493+
*
424494
* For each `SubdirIndexLoader`, this:
425495
* - skips subdirs already loaded via shards,
426496
* - verifies cache or skips when cache is missing and shards are not used,
@@ -434,7 +504,7 @@ namespace mamba
434504
bool load_all_subdirs(
435505
Context& ctx,
436506
solver::libsolv::Database& database,
437-
const std::vector<std::string>& root_packages,
507+
std::vector<std::string>& root_packages,
438508
std::vector<SubdirIndexLoader>& subdirs,
439509
const std::vector<solver::libsolv::Priorities>& priorities,
440510
const SubdirDownloadParams& subdir_params,
@@ -445,18 +515,29 @@ namespace mamba
445515
{
446516
std::set<std::string> loaded_subdirs_with_shards;
447517
bool loading_failed = false;
518+
const bool shard_then_expand = ctx.repodata_use_shards && !root_packages.empty();
519+
std::vector<solver::libsolv::RepoInfo> full_repos_for_shard_roots;
448520

449-
for (std::size_t i = 0; i < subdirs.size(); ++i)
521+
auto try_load = [&](std::size_t i, bool full_repodata_only_pass) -> void
450522
{
451523
auto& subdir = subdirs[i];
452524
bool use_shards = ctx.repodata_use_shards
453525
&& subdir.metadata().has_up_to_date_shards(ctx.repodata_shards_ttl)
454526
&& !root_packages.empty();
455527

528+
if (full_repodata_only_pass && use_shards)
529+
{
530+
return;
531+
}
532+
if (!full_repodata_only_pass && shard_then_expand && !use_shards)
533+
{
534+
return;
535+
}
536+
456537
// Skip if this subdir was already loaded as part of a sharded same-channel load.
457538
if (loaded_subdirs_with_shards.contains(subdir.name()))
458539
{
459-
continue;
540+
return;
460541
}
461542

462543
// When using shards we don't require valid cache here; the shard path may load
@@ -470,7 +551,7 @@ namespace mamba
470551
mamba_error_code::subdirdata_not_loaded
471552
));
472553
}
473-
continue;
554+
return;
474555
}
475556

476557
auto result = load_single_subdir(
@@ -487,11 +568,16 @@ namespace mamba
487568

488569
if (result)
489570
{
571+
auto repo = std::move(result).value();
490572
// `load_subdir_with_shards` already sets priorities for all repos it adds.
491573
// Avoid overriding another repo when this subdir has no direct match.
492574
if (!use_shards)
493575
{
494-
database.set_repo_priority(std::move(result).value(), priorities[i]);
576+
database.set_repo_priority(repo, priorities[i]);
577+
}
578+
if (shard_then_expand && full_repodata_only_pass && !use_shards)
579+
{
580+
full_repos_for_shard_roots.push_back(repo);
495581
}
496582
}
497583
else if (is_retry)
@@ -513,6 +599,31 @@ namespace mamba
513599
subdir.clear_valid_cache_files();
514600
loading_failed = true;
515601
}
602+
};
603+
604+
if (shard_then_expand)
605+
{
606+
const std::size_t roots_before = root_packages.size();
607+
for (std::size_t i = 0; i < subdirs.size(); ++i)
608+
{
609+
try_load(i, /*full_repodata_only_pass=*/true);
610+
}
611+
expand_shard_root_packages_from_full_repodata_repos(
612+
database,
613+
full_repos_for_shard_roots,
614+
root_packages
615+
);
616+
if (root_packages.size() > roots_before)
617+
{
618+
LOG_DEBUG << "Shard root packages expanded by "
619+
<< (root_packages.size() - roots_before)
620+
<< " name(s) from full-repodata subdirs (cross-channel closure seeds).";
621+
}
622+
}
623+
624+
for (std::size_t i = 0; i < subdirs.size(); ++i)
625+
{
626+
try_load(i, /*full_repodata_only_pass=*/false);
516627
}
517628

518629
return loading_failed;
@@ -823,10 +934,11 @@ namespace mamba
823934

824935
add_repos_from_pks_dir(ctx, channel_context, database);
825936

937+
std::vector<std::string> effective_shard_root_packages(root_packages);
826938
bool loading_failed = load_all_subdirs(
827939
ctx,
828940
database,
829-
root_packages,
941+
effective_shard_root_packages,
830942
subdirs,
831943
priorities,
832944
subdir_params,

0 commit comments

Comments
 (0)