@@ -1528,160 +1528,216 @@ std::string ModelManager::resolve_model_path(const ModelInfo& info, const std::s
15281528 return files;
15291529 };
15301530
1531- std::vector<std::string> all_gguf_files = collect_gguf_files (active_hf_snapshot_path (model_cache_path_fs));
1532- if (all_gguf_files.empty ()) {
1533- // Backward-compatible fallback for caches without refs/main and for
1534- // partially migrated/manual HF cache layouts.
1535- all_gguf_files = collect_gguf_files (model_cache_path_fs);
1536- }
1531+ // Resolve the requested GGUF variant within a candidate list of files.
1532+ // Returns the matched absolute path, or "" if this candidate set does not
1533+ // contain the variant. Factored into a lambda so the search can be retried
1534+ // against a broader set of snapshots (see #2300 below) without duplicating
1535+ // the matching logic.
1536+ auto resolve_gguf_variant =
1537+ [&](const std::vector<std::string>& gguf_files) -> std::string {
1538+ if (gguf_files.empty ()) {
1539+ return " " ;
1540+ }
15371541
1538- if (all_gguf_files.empty ()) {
1539- return model_cache_path; // Return directory if no GGUF found
1540- }
1542+ // Case 0: Wildcard (*) - return first file (llama-server will auto-load shards)
1543+ if (variant == " *" ) {
1544+ return gguf_files[0 ];
1545+ }
15411546
1542- // Sort files for consistent ordering (important for sharded models)
1543- std::sort (all_gguf_files.begin (), all_gguf_files.end ());
1547+ // Case 1: Empty variant - return first file
1548+ if (variant.empty ()) {
1549+ return gguf_files[0 ];
1550+ }
15441551
1545- // Case 0: Wildcard (*) - return first file (llama-server will auto-load shards)
1546- if (variant == " *" ) {
1547- return all_gguf_files[0 ];
1548- }
1552+ // Case 2: Exact filename match (variant ends with .gguf)
1553+ if (variant.find (" .gguf" ) != std::string::npos) {
1554+ for (const auto & filepath : gguf_files) {
1555+ std::string filename = path_from_utf8 (filepath).filename ().string ();
1556+ if (filename == variant) {
1557+ return filepath;
1558+ }
1559+ }
1560+ return " " ; // Exact variant not found in this candidate set
1561+ }
15491562
1550- // Case 1: Empty variant - return first file
1551- if (variant. empty ()) {
1552- return all_gguf_files[ 0 ] ;
1553- }
1563+ // Case 3: Files ending with {variant}.gguf (case insensitive)
1564+ std::string variant_lower = variant;
1565+ std::transform (variant_lower. begin (), variant_lower. end (), variant_lower. begin (), ::tolower) ;
1566+ std::string suffix = variant_lower + " .gguf " ;
15541567
1555- // Case 2: Exact filename match (variant ends with .gguf)
1556- if (variant.find (" .gguf" ) != std::string::npos) {
1557- for (const auto & filepath : all_gguf_files) {
1568+ std::vector<std::string> matching_files;
1569+ for (const auto & filepath : gguf_files) {
15581570 std::string filename = path_from_utf8 (filepath).filename ().string ();
1559- if (filename == variant) {
1560- return filepath;
1571+ std::string filename_lower = filename;
1572+ std::transform (filename_lower.begin (), filename_lower.end (), filename_lower.begin (), ::tolower);
1573+
1574+ if (filename_lower.size () >= suffix.size () &&
1575+ filename_lower.substr (filename_lower.size () - suffix.size ()) == suffix) {
1576+ matching_files.push_back (filepath);
15611577 }
15621578 }
1563- return " " ; // Exact variant not found — signal not downloaded
1564- }
15651579
1566- // Case 3: Files ending with {variant}.gguf (case insensitive)
1567- std::string variant_lower = variant;
1568- std::transform (variant_lower.begin (), variant_lower.end (), variant_lower.begin (), ::tolower);
1569- std::string suffix = variant_lower + " .gguf" ;
1580+ if (!matching_files.empty ()) {
1581+ return matching_files[0 ];
1582+ }
15701583
1571- std::vector<std::string> matching_files;
1572- for (const auto & filepath : all_gguf_files) {
1573- std::string filename = path_from_utf8 (filepath).filename ().string ();
1574- std::string filename_lower = filename;
1575- std::transform (filename_lower.begin (), filename_lower.end (), filename_lower.begin (), ::tolower);
1584+ // Case 4: Folder-based sharding (files in variant/ folder)
1585+ std::string folder_prefix_lower = variant_lower + " /" ;
15761586
1577- if (filename_lower.size () >= suffix.size () &&
1578- filename_lower.substr (filename_lower.size () - suffix.size ()) == suffix) {
1579- matching_files.push_back (filepath);
1580- }
1581- }
1587+ for (const auto & filepath : gguf_files) {
1588+ // Get relative path from model cache path
1589+ std::string relative_path = path_to_utf8 (
1590+ path_from_utf8 (filepath).lexically_relative (model_cache_path_fs));
1591+ std::string relative_lower = relative_path;
1592+ // Normalize path separators and case so folder-variant matching works cross-platform.
1593+ std::transform (relative_lower.begin (), relative_lower.end (), relative_lower.begin (), ::tolower);
1594+ std::replace (relative_lower.begin (), relative_lower.end (), ' \\ ' , ' /' );
15821595
1583- if (!matching_files.empty ()) {
1584- return matching_files[0 ];
1585- }
1596+ if (relative_lower.find (folder_prefix_lower) != std::string::npos) {
1597+ return filepath;
1598+ }
1599+ }
15861600
1587- // Case 4: Folder-based sharding (files in variant/ folder)
1588- std::string folder_prefix_lower = variant_lower + " /" ;
1601+ // Case 5: Local quant-token fallback.
1602+ //
1603+ // Keep the existing resolver cases above as the primary logic: exact
1604+ // filenames, suffix matches, and folder-based sharding are more
1605+ // specific and preserve the CHECKPOINT:VARIANT contract.
1606+ //
1607+ // Some GGUF repositories name files with the quant token in the middle,
1608+ // for example:
1609+ // Qwen3.6-27B-MTP-IMAT-IQ4_XS-Q8nextn.gguf
1610+ // for variant:
1611+ // IQ4_XS
1612+ // That file does not end with IQ4_XS.gguf, so mirror the downloader's
1613+ // GGUF variant enumeration over the files that are already present in
1614+ // the local HF cache before declaring the model missing.
1615+ //
1616+ // HF cache paths have an extra snapshots/<revision>/ prefix that is not
1617+ // part of the repository-relative filename. Strip it before calling
1618+ // enumerate_gguf_variants(); otherwise the enumerator treats
1619+ // "snapshots" as a top-level sharded-folder variant and never extracts
1620+ // the quant token from the actual GGUF filename.
1621+ std::vector<std::string> relative_gguf_files;
1622+ std::map<std::string, std::string> absolute_by_relative;
1623+ auto repo_relative_from_cache_relative = [](std::string rel) {
1624+ std::replace (rel.begin (), rel.end (), ' \\ ' , ' /' );
1625+
1626+ static const std::string snapshots_prefix = " snapshots/" ;
1627+ if (rel.rfind (snapshots_prefix, 0 ) == 0 ) {
1628+ size_t revision_end = rel.find (' /' , snapshots_prefix.size ());
1629+ if (revision_end != std::string::npos && revision_end + 1 < rel.size ()) {
1630+ rel = rel.substr (revision_end + 1 );
1631+ }
1632+ }
15891633
1590- for (const auto & filepath : all_gguf_files) {
1591- // Get relative path from model cache path
1592- std::string relative_path = path_to_utf8 (
1593- path_from_utf8 (filepath).lexically_relative (model_cache_path_fs));
1594- std::string relative_lower = relative_path;
1595- // Normalize path separators and case so folder-variant matching works cross-platform.
1596- std::transform (relative_lower.begin (), relative_lower.end (), relative_lower.begin (), ::tolower);
1597- std::replace (relative_lower.begin (), relative_lower.end (), ' \\ ' , ' /' );
1634+ return rel;
1635+ };
15981636
1599- if (relative_lower. find (folder_prefix_lower) != std::string::npos ) {
1600- return filepath;
1601- }
1602- }
1637+ for ( const auto & filepath : gguf_files ) {
1638+ std::string relative_path = path_to_utf8 (
1639+ path_from_utf8 (filepath). lexically_relative (model_cache_path_fs));
1640+ relative_path = repo_relative_from_cache_relative (relative_path);
16031641
1604- // Case 5: Local quant-token fallback.
1605- //
1606- // Keep the existing resolver cases above as the primary logic: exact
1607- // filenames, suffix matches, and folder-based sharding are more
1608- // specific and preserve the CHECKPOINT:VARIANT contract.
1609- //
1610- // Some GGUF repositories name files with the quant token in the middle,
1611- // for example:
1612- // Qwen3.6-27B-MTP-IMAT-IQ4_XS-Q8nextn.gguf
1613- // for variant:
1614- // IQ4_XS
1615- // That file does not end with IQ4_XS.gguf, so mirror the downloader's
1616- // GGUF variant enumeration over the files that are already present in
1617- // the local HF cache before declaring the model missing.
1618- //
1619- // HF cache paths have an extra snapshots/<revision>/ prefix that is not
1620- // part of the repository-relative filename. Strip it before calling
1621- // enumerate_gguf_variants(); otherwise the enumerator treats
1622- // "snapshots" as a top-level sharded-folder variant and never extracts
1623- // the quant token from the actual GGUF filename.
1624- std::vector<std::string> relative_gguf_files;
1625- std::map<std::string, std::string> absolute_by_relative;
1626- auto repo_relative_from_cache_relative = [](std::string rel) {
1627- std::replace (rel.begin (), rel.end (), ' \\ ' , ' /' );
1628-
1629- static const std::string snapshots_prefix = " snapshots/" ;
1630- if (rel.rfind (snapshots_prefix, 0 ) == 0 ) {
1631- size_t revision_end = rel.find (' /' , snapshots_prefix.size ());
1632- if (revision_end != std::string::npos && revision_end + 1 < rel.size ()) {
1633- rel = rel.substr (revision_end + 1 );
1642+ // Multiple HF snapshots can contain the same repo-relative file.
1643+ // Keep the first absolute path from the sorted gguf_files list
1644+ // so duplicates do not create false ambiguity.
1645+ if (absolute_by_relative.emplace (relative_path, filepath).second ) {
1646+ relative_gguf_files.push_back (relative_path);
16341647 }
16351648 }
16361649
1637- return rel;
1638- };
1650+ std::vector<std::string> enumerated_matches;
1651+ auto local_variants = lemon::enumerate_gguf_variants (relative_gguf_files);
1652+ for (const auto & local_variant : local_variants.variants ) {
1653+ if (to_lower (local_variant.name ) != variant_lower) {
1654+ continue ;
1655+ }
16391656
1640- for (const auto & filepath : all_gguf_files) {
1641- std::string relative_path = path_to_utf8 (
1642- path_from_utf8 (filepath).lexically_relative (model_cache_path_fs));
1643- relative_path = repo_relative_from_cache_relative (relative_path);
1657+ auto it = absolute_by_relative.find (local_variant.primary_file );
1658+ if (it != absolute_by_relative.end ()) {
1659+ enumerated_matches.push_back (it->second );
1660+ }
1661+ }
16441662
1645- // Multiple HF snapshots can contain the same repo-relative file.
1646- // Keep the first absolute path from the sorted all_gguf_files list
1647- // so duplicates do not create false ambiguity.
1648- if (absolute_by_relative. emplace (relative_path, filepath). second ) {
1649- relative_gguf_files. push_back (relative_path) ;
1663+ if (enumerated_matches. size () == 1 ) {
1664+ LOG ( INFO , " ModelManager " )
1665+ << " Resolved local GGUF variant ' " << variant
1666+ << " ' via quant-token fallback: " << enumerated_matches[ 0 ] << std::endl;
1667+ return enumerated_matches[ 0 ] ;
16501668 }
1651- }
16521669
1653- std::vector<std::string> enumerated_matches;
1654- auto local_variants = lemon::enumerate_gguf_variants (relative_gguf_files);
1655- for ( const auto & local_variant : local_variants. variants ) {
1656- if ( to_lower (local_variant. name ) != variant_lower) {
1657- continue ;
1670+ if ( enumerated_matches. size () > 1 ) {
1671+ LOG ( WARNING , " ModelManager " )
1672+ << " Multiple local GGUF files matched variant ' " << variant
1673+ << " ' via quant-token fallback; refusing to guess " << std::endl;
1674+ return " " ;
16581675 }
16591676
1660- auto it = absolute_by_relative.find (local_variant.primary_file );
1661- if (it != absolute_by_relative.end ()) {
1662- enumerated_matches.push_back (it->second );
1677+ // No match in this candidate set. Do not fall back to another
1678+ // quantization in the same Hugging Face repo; otherwise a custom
1679+ // download with a different quant can make a built-in model appear
1680+ // downloaded and allow deleting the wrong file.
1681+ return " " ;
1682+ };
1683+
1684+ // Prefer the active refs/main snapshot so that when upstream only changed
1685+ // README/metadata Lemonade keeps using the previous snapshot's artifacts.
1686+ // (Sorted for consistent ordering, important for sharded models.)
1687+ std::vector<std::string> active_gguf_files =
1688+ collect_gguf_files (active_hf_snapshot_path (model_cache_path_fs));
1689+ std::sort (active_gguf_files.begin (), active_gguf_files.end ());
1690+
1691+ // Whole-repo-cache candidates spanning every snapshot, populated on demand.
1692+ std::vector<std::string> all_cache_gguf_files;
1693+ bool all_cache_collected = false ;
1694+ auto whole_cache_gguf_files = [&]() -> const std::vector<std::string>& {
1695+ if (!all_cache_collected) {
1696+ all_cache_gguf_files = collect_gguf_files (model_cache_path_fs);
1697+ std::sort (all_cache_gguf_files.begin (), all_cache_gguf_files.end ());
1698+ all_cache_collected = true ;
16631699 }
1664- }
1700+ return all_cache_gguf_files;
1701+ };
16651702
1666- if (enumerated_matches.size () == 1 ) {
1667- LOG (INFO , " ModelManager" )
1668- << " Resolved local GGUF variant '" << variant
1669- << " ' via quant-token fallback: " << enumerated_matches[0 ] << std::endl;
1670- return enumerated_matches[0 ];
1703+ if (active_gguf_files.empty () && whole_cache_gguf_files ().empty ()) {
1704+ return model_cache_path; // Return directory if no GGUF found anywhere
16711705 }
16721706
1673- if (enumerated_matches.size () > 1 ) {
1674- LOG (WARNING , " ModelManager" )
1675- << " Multiple local GGUF files matched variant '" << variant
1676- << " ' via quant-token fallback; refusing to guess" << std::endl;
1677- return " " ;
1707+ std::string resolved_path = resolve_gguf_variant (active_gguf_files);
1708+
1709+ // #2300: a sibling variant that shares this HF repo can live in a snapshot
1710+ // other than the one refs/main points at. refs/main advances to the
1711+ // snapshot of whichever variant was pulled or updated last, leaving the
1712+ // other variants' symlinks behind in earlier snapshots; after a restart the
1713+ // refs/main-only search above then reports them as missing. If the active
1714+ // snapshot did not contain the requested variant, broaden the search to
1715+ // every snapshot in this repo's cache before declaring it missing. Blobs are
1716+ // content-addressed and shared, so reading an older snapshot is safe, and
1717+ // resolving against the active snapshot first preserves the CHECKPOINT:VARIANT
1718+ // contract (a different quant is never substituted while the exact one exists).
1719+ //
1720+ // The whole-cache set is a superset of the active set (it recurses the repo
1721+ // cache, which contains the active snapshot dir), so the two are equal only
1722+ // when refs/main's snapshot is the sole snapshot holding GGUFs — in which case
1723+ // the broader search is identical and skipped. Comparing the (sorted) sets,
1724+ // rather than just their sizes, makes that intent explicit and stays correct
1725+ // even if that superset relationship ever changes.
1726+ //
1727+ // When more than one inactive snapshot holds the requested variant, the
1728+ // existing first-by-sorted-path dedup (see Case 5) picks one deterministically;
1729+ // every such copy is a valid GGUF of that quant, so this is safe for the
1730+ // resolve/downloaded-status purpose. Preferring the newest snapshot per variant
1731+ // would need per-variant snapshot state the HF cache does not record today and
1732+ // is left as a follow-up (out of scope for this fix).
1733+ if (resolved_path.empty ()) {
1734+ const std::vector<std::string>& all_files = whole_cache_gguf_files ();
1735+ if (all_files != active_gguf_files) {
1736+ resolved_path = resolve_gguf_variant (all_files);
1737+ }
16781738 }
16791739
1680- // No match found for the requested GGUF variant. Do not fall back to
1681- // another quantization in the same Hugging Face repo; otherwise a
1682- // custom download with a different quant can make a built-in model
1683- // appear downloaded and allow deleting the wrong file.
1684- return " " ;
1740+ return resolved_path;
16851741 }
16861742
16871743 // Everything else
0 commit comments