diff --git a/.github/workflows/CI-wheels.yaml b/.github/workflows/CI-wheels.yaml index f941ebe..754b925 100644 --- a/.github/workflows/CI-wheels.yaml +++ b/.github/workflows/CI-wheels.yaml @@ -7,7 +7,7 @@ on: - main env: - LIBZIM_DL_VERSION: "9.2.3-2" + LIBZIM_DL_VERSION: "9.3.0-1" MACOSX_DEPLOYMENT_TARGET: "13.0" CIBW_ENVIRONMENT_PASS_LINUX: "LIBZIM_DL_VERSION" CIBW_BUILD_VERBOSITY: "3" diff --git a/.github/workflows/Publish.yaml b/.github/workflows/Publish.yaml index 700081a..0330344 100644 --- a/.github/workflows/Publish.yaml +++ b/.github/workflows/Publish.yaml @@ -6,7 +6,7 @@ on: - published env: - LIBZIM_DL_VERSION: "9.2.3-2" + LIBZIM_DL_VERSION: "9.3.0-1" MACOSX_DEPLOYMENT_TARGET: "13.0" CIBW_ENVIRONMENT_PASS_LINUX: "LIBZIM_DL_VERSION" # APPLE_SIGNING_KEYCHAIN_PATH set in prepare keychain step diff --git a/.github/workflows/QA.yaml b/.github/workflows/QA.yaml index ac17adc..3be3e0c 100644 --- a/.github/workflows/QA.yaml +++ b/.github/workflows/QA.yaml @@ -2,7 +2,7 @@ name: QA on: [push] env: - LIBZIM_DL_VERSION: "9.2.3-2" + LIBZIM_DL_VERSION: "9.3.0-1" MACOSX_DEPLOYMENT_TARGET: "13.0" jobs: diff --git a/.github/workflows/Tests.yaml b/.github/workflows/Tests.yaml index 75eacda..7a8c746 100644 --- a/.github/workflows/Tests.yaml +++ b/.github/workflows/Tests.yaml @@ -2,7 +2,7 @@ name: Tests on: [push] env: - LIBZIM_DL_VERSION: "9.2.3-2" + LIBZIM_DL_VERSION: "9.3.0-1" MACOSX_DEPLOYMENT_TARGET: "13.0" # we want cython traces for coverage PROFILE: "1" diff --git a/CHANGELOG.md b/CHANGELOG.md index 9339d29..af4d38a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Set up documentation using `mkdocs`, published on readthedocs.com (#186) - `Archive.get_random_entry()` +- libzim 9.3.0 Cache Control API: + - `Archive.cluster_cache_max_size` + - `Archive.cluster_cache_current_size` + - `Archive.dirent_cache_max_size` + - `Archive.dirent_cache_current_size` + - `Archive.dirent_lookup_cache_max_size` + +### Changed + +- Using C++ libzim 9.3.0-1 ## [3.6.0] - 2024-10-15 diff --git a/libzim/libwrapper.h b/libzim/libwrapper.h index 2736ee0..7da1b47 100644 --- a/libzim/libwrapper.h +++ b/libzim/libwrapper.h @@ -172,6 +172,14 @@ class Archive : public Wrapper FORWARD(bool, hasTitleIndex) FORWARD(bool, hasChecksum) FORWARD(bool, check) + FORWARD(zim::size_type, getClusterCacheMaxSize) + FORWARD(zim::size_type, getClusterCacheCurrentSize) + FORWARD(void, setClusterCacheMaxSize) + FORWARD(zim::size_type, getDirentCacheMaxSize) + FORWARD(zim::size_type, getDirentCacheCurrentSize) + FORWARD(void, setDirentCacheMaxSize) + FORWARD(zim::size_type, getDirentLookupCacheMaxSize) + FORWARD(void, setDirentLookupCacheMaxSize) }; class SearchResultSet : public Wrapper diff --git a/libzim/libzim.pyx b/libzim/libzim.pyx index c895f59..4744829 100644 --- a/libzim/libzim.pyx +++ b/libzim/libzim.pyx @@ -1333,6 +1333,95 @@ cdef class Archive: except RuntimeError as e: raise KeyError(str(e)) + @property + def cluster_cache_max_size(self) -> pyint: + """Maximum size of the cluster cache. + + Returns: + (int): maximum number of clusters stored in the cache. + """ + return self.c_archive.getClusterCacheMaxSize() + + @cluster_cache_max_size.setter + def cluster_cache_max_size(self, nb_clusters: pyint): + """Set the size of the cluster cache. + + If the new size is lower than the number of currently stored clusters + some clusters will be dropped from cache to respect the new size. + + Args: + nb_clusters (int): maximum number of clusters stored in the cache + """ + + self.c_archive.setClusterCacheMaxSize(nb_clusters) + + @property + def cluster_cache_current_size(self) -> pyint: + """Size of the cluster cache. + + Returns: + (int): number of clusters currently stored in the cache. + """ + return self.c_archive.getClusterCacheCurrentSize() + + @property + def dirent_cache_max_size(self) -> pyint: + """Maximum size of the dirent cache. + + Returns: + (int): maximum number of dirents stored in the cache. + """ + return self.c_archive.getDirentCacheMaxSize() + + @dirent_cache_max_size.setter + def dirent_cache_max_size(self, nb_dirents: pyint): + """Set the size of the dirent cache. + + If the new size is lower than the number of currently stored dirents + some dirents will be dropped from cache to respect the new size. + + Args: + nb_dirents (int): maximum number of dirents stored in the cache. + """ + self.c_archive.setDirentCacheMaxSize(nb_dirents) + + @property + def dirent_cache_current_size(self) -> pyint: + """Size of the dirent cache. + + Returns: + (int): number of dirents currently stored in the cache. + """ + return self.c_archive.getDirentCacheCurrentSize() + + @property + def dirent_lookup_cache_max_size(self) -> pyint: + """Size of the dirent lookup cache. + + The returned size returns the default size or the last set size. + This may not correspond to the actual size of the dirent lookup cache. + See set_dirent_lookup_cache_max_size for more information. + + Returns: + (int): maximum number of sub ranges created in the lookup cache. + """ + return self.c_archive.getDirentLookupCacheMaxSize() + + @dirent_lookup_cache_max_size.setter + def dirent_lookup_cache_max_size(self, nb_ranges: pyint): + """Set the size of the dirent lookup cache. + + Contrary to other set__cache_max_size, this method is useless + once the lookup cache is created. + The lookup cache is created at first access to a entry in the archive. + So this method must be called before any access to content (including metadata). + It is best to call this method first, just after the archive creation. + + Args: + nb_ranges (int): maximum number of sub ranges created in the lookup cache. + """ + self.c_archive.setDirentLookupCacheMaxSize(nb_ranges) + def __repr__(self) -> str: return f"{self.__class__.__name__}(filename={self.filename})" diff --git a/libzim/reader.pyi b/libzim/reader.pyi index 43cfc6e..e0f1cb5 100644 --- a/libzim/reader.pyi +++ b/libzim/reader.pyi @@ -77,4 +77,20 @@ class Archive: def get_illustration_sizes(self) -> set[int]: ... def has_illustration(self, size: int | None = None) -> bool: ... def get_illustration_item(self, size: int | None = None) -> Item: ... + @property + def cluster_cache_max_size(self) -> int: ... + @cluster_cache_max_size.setter + def cluster_cache_max_size(self, nb_clusters: int): ... + @property + def cluster_cache_current_size(self) -> int: ... + @property + def dirent_cache_max_size(self) -> int: ... + @dirent_cache_max_size.setter + def dirent_cache_max_size(self, nb_dirents: int): ... + @property + def dirent_cache_current_size(self) -> int: ... + @property + def dirent_lookup_cache_max_size(self) -> int: ... + @dirent_lookup_cache_max_size.setter + def dirent_lookup_cache_max_size(self, nb_ranges: int): ... def __repr__(self) -> str: ... diff --git a/libzim/zim.pxd b/libzim/zim.pxd index 02aab2a..3ab094e 100644 --- a/libzim/zim.pxd +++ b/libzim/zim.pxd @@ -178,6 +178,15 @@ cdef extern from "libwrapper.h" namespace "wrapper": bool hasChecksum() except + bool check() except + + uint64_t getClusterCacheMaxSize() except + + uint64_t getClusterCacheCurrentSize() except + + void setClusterCacheMaxSize(uint64_t nbClusters) except + + uint64_t getDirentCacheMaxSize() except + + uint64_t getDirentCacheCurrentSize() except + + void setDirentCacheMaxSize(uint64_t nbDirents) except + + uint64_t getDirentLookupCacheMaxSize() except + + void setDirentLookupCacheMaxSize(uint64_t nbRanges) except + + cdef cppclass Searcher: Searcher() Searcher(const Archive& archive) except + diff --git a/setup.py b/setup.py index 81d9114..0368baf 100755 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ class Config: - libzim_dl_version: str = os.getenv("LIBZIM_DL_VERSION", "9.2.3-2") + libzim_dl_version: str = os.getenv("LIBZIM_DL_VERSION", "9.3.0-1") use_system_libzim: bool = bool(os.getenv("USE_SYSTEM_LIBZIM") or False) download_libzim: bool = not bool(os.getenv("DONT_DOWNLOAD_LIBZIM") or False) @@ -258,7 +258,7 @@ def _download_and_extract(self, filename: str) -> pathlib.Path: if not fpath.exists(): print(f"> from {url}") with ( - urllib.request.urlopen(url) as response, # noqa: S310 + urllib.request.urlopen(url) as response, # noqa: S310 # nosec B310 open(fpath, "wb") as fh, # nosec ): # nosec fh.write(response.read()) diff --git a/tests/test_libzim_creator.py b/tests/test_libzim_creator.py index cc9ef27..2478997 100644 --- a/tests/test_libzim_creator.py +++ b/tests/test_libzim_creator.py @@ -866,7 +866,7 @@ def test_accented_search_from_libzim(fpath): assert zim.entry_count == 2 assert zim.article_count == 2 - assert zim.all_entry_count == 7 + assert zim.all_entry_count == 6 ascii_query = Query().set_query("test article") ascii_searcher = Searcher(zim) diff --git a/tests/test_libzim_reader.py b/tests/test_libzim_reader.py index 54fa64b..2e0ca28 100644 --- a/tests/test_libzim_reader.py +++ b/tests/test_libzim_reader.py @@ -19,7 +19,7 @@ ZIMS_DATA = { "blank.zim": { "filename": "blank.zim", - "filesize": 2197, + "filesize": 2189, "new_ns": True, "mutlipart": False, "zim_uuid": None, @@ -615,3 +615,82 @@ def test_reader_get_random_entry(all_zims): zim_2 = Archive(all_zims / "example.zim") with pytest.raises(KeyError): zim_2.get_random_entry() + + +@skip_if_offline +@pytest.mark.parametrize(*parametrize_for(["filename"])) +def test_cluster_cache(all_zims, filename): + zim = Archive(all_zims / filename) + default_value = 16 + new_value = 1 + empty_value = 0 + + assert zim.cluster_cache_max_size == default_value + + zim.cluster_cache_max_size = new_value + assert zim.cluster_cache_max_size == new_value + + # test index access + for index in range(0, zim.entry_count - 1): + bytes(zim._get_entry_by_id(index).get_item().content) + + assert zim.cluster_cache_current_size <= new_value + + zim.cluster_cache_max_size = empty_value + assert zim.cluster_cache_max_size == empty_value + + for index in range(0, zim.entry_count - 1): + bytes(zim._get_entry_by_id(index).get_item().content) + + assert zim.cluster_cache_current_size == empty_value + + +@skip_if_offline +@pytest.mark.parametrize(*parametrize_for(["filename"])) +def test_dirent_cache(all_zims, filename): + zim = Archive(all_zims / filename) + default_value = 512 + new_value = 2 + empty_value = 0 + + assert zim.dirent_cache_max_size == default_value + + zim.dirent_cache_max_size = new_value + assert zim.dirent_cache_max_size == new_value + + # test index access + for index in range(0, zim.entry_count - 1): + bytes(zim._get_entry_by_id(index).get_item().content) + + assert zim.dirent_cache_current_size <= new_value + + zim.dirent_cache_max_size = empty_value + assert zim.dirent_cache_max_size == empty_value + assert zim.dirent_cache_current_size == empty_value + + for index in range(0, zim.entry_count - 1): + bytes(zim._get_entry_by_id(index).get_item().content) + + assert zim.dirent_cache_current_size == empty_value + + +@skip_if_offline +@pytest.mark.parametrize(*parametrize_for(["filename"])) +def test_dirent_lookup_cache(all_zims, filename): + zim = Archive(all_zims / filename) + default_value = 1024 + new_value = 2 + empty_value = 0 + + assert zim.dirent_lookup_cache_max_size == default_value + + zim.dirent_lookup_cache_max_size = new_value + assert zim.dirent_lookup_cache_max_size == new_value + + # test index access + for index in range(0, zim.entry_count - 1): + bytes(zim._get_entry_by_id(index).get_item().content) + + # setting after reading records the value but it has no use + zim.dirent_lookup_cache_max_size = empty_value + assert zim.dirent_lookup_cache_max_size == empty_value