Skip to content

Commit 8c09112

Browse files
feat: Prefilter Shard Records on Python Minor Version (#4214)
Signed-off-by: Julien Jerphanion <git@jjerphan.xyz> Co-authored-by: Johan Mabille <johan.mabille@gmail.com>
1 parent db8deaf commit 8c09112

16 files changed

+1078
-59
lines changed

libmamba/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,8 @@ set(
247247
${LIBMAMBA_SOURCE_DIR}/core/query.cpp
248248
${LIBMAMBA_SOURCE_DIR}/core/repo_checker_store.cpp
249249
${LIBMAMBA_SOURCE_DIR}/core/run.cpp
250+
${LIBMAMBA_SOURCE_DIR}/core/shard_python_minor_prefilter.hpp
251+
${LIBMAMBA_SOURCE_DIR}/core/shard_python_minor_prefilter.cpp
250252
${LIBMAMBA_SOURCE_DIR}/core/shell_init.cpp
251253
${LIBMAMBA_SOURCE_DIR}/core/shards.cpp
252254
${LIBMAMBA_SOURCE_DIR}/core/shard_index_loader.cpp

libmamba/include/mamba/api/channel_loader.hpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,13 @@
77
#ifndef MAMBA_API_CHANNEL_LOADER_HPP
88
#define MAMBA_API_CHANNEL_LOADER_HPP
99

10+
#include <optional>
1011
#include <set>
1112
#include <string>
1213
#include <vector>
1314

1415
#include "mamba/core/error_handling.hpp"
16+
#include "mamba/specs/version.hpp"
1517

1618
namespace mamba
1719
{
@@ -41,6 +43,8 @@ namespace mamba
4143
* @param subdir_idx Index of the subdir to load in \p subdirs.
4244
* @param loaded_subdirs_with_shards Set of subdir names already loaded via shards (updated).
4345
* @param priorities Repo priorities aligned with \p subdirs.
46+
* @param python_minor_version_for_prefilter Optional python minor for shard record prefiltering
47+
* (from \c prepare_solver_context).
4448
* @return The repo for the requested subdir, or unexpected mamba_error on failure.
4549
*/
4650
auto load_subdir_with_shards(
@@ -50,7 +54,8 @@ namespace mamba
5054
std::vector<SubdirIndexLoader>& subdirs,
5155
std::size_t subdir_idx,
5256
std::set<std::string>& loaded_subdirs_with_shards,
53-
const std::vector<solver::libsolv::Priorities>& priorities
57+
const std::vector<solver::libsolv::Priorities>& priorities,
58+
std::optional<specs::Version> python_minor_version_for_prefilter = std::nullopt
5459
) -> expected_t<solver::libsolv::RepoInfo>;
5560

5661
class ChannelContext;
@@ -86,7 +91,8 @@ namespace mamba
8691
ChannelContext& channel_context,
8792
solver::libsolv::Database& database,
8893
MultiPackageCache& package_caches,
89-
const std::vector<std::string>& root_packages = {}
94+
const std::vector<std::string>& root_packages = {},
95+
std::optional<specs::Version> python_minor_version_for_prefilter = std::nullopt
9096
) -> expected_t<void, mamba_aggregated_error>;
9197

9298
/* Brief Creates channels and mirrors objects,

libmamba/include/mamba/core/shards.hpp

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "mamba/fs/filesystem.hpp"
2323
#include "mamba/specs/authentication_info.hpp"
2424
#include "mamba/specs/channel.hpp"
25+
#include "mamba/specs/version.hpp"
2526

2627
namespace mamba
2728
{
@@ -30,6 +31,12 @@ namespace mamba
3031
*
3132
* This class manages fetching and caching of individual shards from
3233
* a sharded repodata index.
34+
*
35+
* **Python minor prefilter:** When constructed with ``python_minor_version_for_prefilter``
36+
* (e.g. 3.12), parsing a shard msgpack drops package records whose ``depends`` list constrains
37+
* ``python`` to a range that does not contain that minor, reducing work for the solver.
38+
* When that optional is unset, no such filtering is applied and all records in the shard
39+
* are parsed (python compatibility is left to the solver).
3340
*/
3441
class Shards
3542
{
@@ -47,6 +54,9 @@ namespace mamba
4754
* @param mirrors Optional base mirrors for channel-based downloads. When provided,
4855
* extend_mirrors in fetch_shards will be initialized from these before adding
4956
* absolute-URL mirrors.
57+
* @param python_minor_version_for_prefilter If set, shard parsing filters out records whose
58+
* ``depends`` python constraints are incompatible with this minor; if unset,
59+
* no python-minor-based record filtering is performed.
5060
*/
5161
Shards(
5262
ShardsIndexDict shards_index,
@@ -56,7 +66,8 @@ namespace mamba
5666
download::RemoteFetchParams remote_fetch_params,
5767
// 0 means: auto; value is normalized with normalize_to_affinity_concurrency().
5868
std::size_t download_threads = 0,
59-
std::optional<std::reference_wrapper<const download::mirror_map>> mirrors = std::nullopt
69+
std::optional<std::reference_wrapper<const download::mirror_map>> mirrors = std::nullopt,
70+
std::optional<specs::Version> python_minor_version_for_prefilter = std::nullopt
6071
);
6172

6273
/** Return the names of all packages available in this shard collection. */
@@ -119,6 +130,13 @@ namespace mamba
119130
/** Optional base mirrors for channel-based downloads. */
120131
std::optional<std::reference_wrapper<const download::mirror_map>> m_mirrors;
121132

133+
/**
134+
* Environment python minor used when parsing shards to prefilter package records
135+
* (see ``record_depends_on_python_minor_version_for_prefilter`` in shards.cpp).
136+
* Empty means the prefilter is disabled.
137+
*/
138+
std::optional<specs::Version> m_python_minor_version_for_prefilter;
139+
122140
/** Visited shards, keyed by package name. */
123141
std::map<std::string, ShardDict> m_visited;
124142

libmamba/src/api/channel_loader.cpp

Lines changed: 46 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@
2424
#include "mamba/solver/libsolv/repo_info.hpp"
2525
#include "mamba/specs/error.hpp"
2626
#include "mamba/specs/package_info.hpp"
27+
#include "mamba/specs/version.hpp"
28+
29+
#include "utils.hpp"
2730

2831
namespace mamba
2932
{
@@ -235,7 +238,8 @@ namespace mamba
235238
std::size_t subdir_idx,
236239
std::set<std::string>& loaded_subdirs_with_shards,
237240
const SubdirDownloadParams& subdir_params,
238-
const std::vector<solver::libsolv::Priorities>& priorities
241+
const std::vector<solver::libsolv::Priorities>& priorities,
242+
std::optional<specs::Version> python_minor_version_for_prefilter
239243
)
240244
{
241245
auto& subdir = subdirs[subdir_idx];
@@ -253,7 +257,8 @@ namespace mamba
253257
subdirs,
254258
subdir_idx,
255259
loaded_subdirs_with_shards,
256-
priorities
260+
priorities,
261+
python_minor_version_for_prefilter
257262
);
258263

259264
if (!res)
@@ -434,7 +439,8 @@ namespace mamba
434439
const std::vector<solver::libsolv::Priorities>& priorities,
435440
const SubdirDownloadParams& subdir_params,
436441
bool is_retry,
437-
std::vector<mamba_error>& error_list
442+
std::vector<mamba_error>& error_list,
443+
std::optional<specs::Version> python_minor_version_for_prefilter
438444
)
439445
{
440446
std::set<std::string> loaded_subdirs_with_shards;
@@ -475,7 +481,8 @@ namespace mamba
475481
i,
476482
loaded_subdirs_with_shards,
477483
subdir_params,
478-
priorities
484+
priorities,
485+
python_minor_version_for_prefilter
479486
);
480487

481488
if (result)
@@ -556,7 +563,11 @@ namespace mamba
556563
continue;
557564
}
558565
SubdirIndexLoader subdir_index_loader = std::move(subdir_index_loader_result).value();
559-
if (subdir_index_loader.valid_cache_found() && Console::can_report_status())
566+
567+
// Only show flat repodata cache status if we're not using shards and we have a
568+
// valid cache
569+
if (!ctx.repodata_use_shards && subdir_index_loader.valid_cache_found()
570+
&& Console::can_report_status())
560571
{
561572
Console::stream()
562573
<< fmt::format("{:<50} {:>20}", subdir_index_loader.name(), "Using cache");
@@ -641,7 +652,8 @@ namespace mamba
641652
std::vector<SubdirIndexLoader>& subdirs,
642653
std::size_t subdir_idx,
643654
std::set<std::string>& loaded_subdirs_with_shards,
644-
const std::vector<solver::libsolv::Priorities>& priorities
655+
const std::vector<solver::libsolv::Priorities>& priorities,
656+
std::optional<specs::Version> python_minor_version_for_prefilter
645657
) -> expected_t<solver::libsolv::RepoInfo>
646658
{
647659
auto& subdir = subdirs[subdir_idx];
@@ -670,6 +682,15 @@ namespace mamba
670682
LOG_DEBUG << "Shard index fetched for " << subdir.name();
671683
const auto& channel = subdir.channel();
672684
std::string current_repodata_url = subdir.repodata_url().str();
685+
if (python_minor_version_for_prefilter.has_value())
686+
{
687+
LOG_DEBUG << "Shard prefilter on python minor version enabled with "
688+
<< python_minor_version_for_prefilter.value().to_string();
689+
}
690+
else
691+
{
692+
LOG_DEBUG << "Shard prefilter on python minor version disabled.";
693+
}
673694

674695
// For all subdirs sharing the same channel URL, fetch their shard indices and build
675696
// a Shards instance per subdir; collect them into a RepodataSubset.
@@ -702,7 +723,8 @@ namespace mamba
702723
ctx.authentication_info(),
703724
ctx.remote_fetch_params,
704725
normalize_to_affinity_concurrency(static_cast<int>(ctx.repodata_shards_threads)),
705-
std::cref(ctx.mirrors)
726+
std::cref(ctx.mirrors),
727+
python_minor_version_for_prefilter
706728
);
707729
url_to_subdir_idx[sdir_url] = j;
708730
}
@@ -761,7 +783,8 @@ namespace mamba
761783
solver::libsolv::Database& database,
762784
MultiPackageCache& package_caches,
763785
const std::vector<std::string>& root_packages,
764-
bool is_retry
786+
bool is_retry,
787+
std::optional<specs::Version> python_minor_version_for_prefilter
765788
)
766789
{
767790
std::vector<SubdirIndexLoader> subdirs;
@@ -808,7 +831,8 @@ namespace mamba
808831
priorities,
809832
subdir_params,
810833
is_retry,
811-
error_list
834+
error_list,
835+
python_minor_version_for_prefilter
812836
);
813837

814838
if (loading_failed)
@@ -824,7 +848,8 @@ namespace mamba
824848
database,
825849
package_caches,
826850
root_packages,
827-
retry
851+
retry,
852+
python_minor_version_for_prefilter
828853
);
829854
}
830855
error_list.emplace_back(
@@ -843,11 +868,20 @@ namespace mamba
843868
ChannelContext& channel_context,
844869
solver::libsolv::Database& database,
845870
MultiPackageCache& package_caches,
846-
const std::vector<std::string>& root_packages
871+
const std::vector<std::string>& root_packages,
872+
std::optional<specs::Version> python_minor_version_for_prefilter
847873
) -> expected_t<void, mamba_aggregated_error>
848874
{
849875
bool retry = false;
850-
return load_channels_impl(ctx, channel_context, database, package_caches, root_packages, retry);
876+
return load_channels_impl(
877+
ctx,
878+
channel_context,
879+
database,
880+
package_caches,
881+
root_packages,
882+
retry,
883+
std::move(python_minor_version_for_prefilter)
884+
);
851885
}
852886

853887
void init_channels(Context& context, ChannelContext& channel_context)

libmamba/src/api/install.cpp

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,33 @@ namespace mamba
376376
{
377377
using Request = solver::Request;
378378

379+
// When the user explicitly asks for ``python`` in the requested specs, also inject a
380+
// plain ``pip`` request unless it is already present. This complements
381+
// ``add_pip_as_python_dependency`` at the repo level and makes sure that the Request
382+
// is in phase with the root packages including both ``python`` and ``pip`` when requested.
383+
bool wants_python = false;
384+
bool wants_pip = false;
385+
for (const auto& s : specs)
386+
{
387+
const auto maybe_name = specs::MatchSpec::extract_name(s);
388+
if (!maybe_name.has_value())
389+
{
390+
continue;
391+
}
392+
if (maybe_name.value() == "python")
393+
{
394+
wants_python = true;
395+
}
396+
else if (maybe_name.value() == "pip")
397+
{
398+
wants_pip = true;
399+
}
400+
}
401+
if (wants_python && !wants_pip)
402+
{
403+
specs.emplace_back("pip");
404+
}
405+
379406
const auto& prefix_pkgs = prefix_data.records();
380407

381408
auto request = Request();
@@ -555,7 +582,13 @@ namespace mamba
555582
auto& no_env = config.at("no_env").value<bool>();
556583

557584
validate_target_prefix_and_channels(ctx, create_env);
558-
auto [db, package_caches] = prepare_solver_context(ctx, channel_context, raw_specs);
585+
auto [db, package_caches] = prepare_solver_context(
586+
ctx,
587+
channel_context,
588+
raw_specs,
589+
is_retry,
590+
no_py_pin
591+
);
559592

560593
auto prefix_data = load_prefix_data_and_installed(ctx, channel_context, db);
561594

libmamba/src/api/update.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@
44
//
55
// The full license is in the file LICENSE, distributed with this software.
66

7+
#include <cctype>
8+
9+
#include <fmt/format.h>
10+
11+
#include "mamba/api/channel_loader.hpp"
712
#include "mamba/api/configuration.hpp"
813
#include "mamba/api/install.hpp"
914
#include "mamba/api/update.hpp"
@@ -147,7 +152,13 @@ namespace mamba
147152
auto& retry_clean_cache = config.at("retry_clean_cache").value<bool>();
148153

149154
validate_target_prefix_and_channels(ctx, /* create_env= */ false);
150-
auto [db, package_caches] = prepare_solver_context(ctx, channel_context, raw_update_specs);
155+
auto [db, package_caches] = prepare_solver_context(
156+
ctx,
157+
channel_context,
158+
raw_update_specs,
159+
is_retry,
160+
no_py_pin
161+
);
151162

152163
auto prefix_data = load_prefix_data_and_installed(ctx, channel_context, db);
153164

0 commit comments

Comments
 (0)