Skip to content

Use libzim 9.3.0 #221

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/CI-wheels.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
- main

env:
LIBZIM_DL_VERSION: "9.2.3-2"
LIBZIM_DL_VERSION: "9.3.0-1"
MACOSX_DEPLOYMENT_TARGET: "13.0"
CIBW_ENVIRONMENT_PASS_LINUX: "LIBZIM_DL_VERSION"
CIBW_BUILD_VERBOSITY: "3"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/Publish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ on:
- published

env:
LIBZIM_DL_VERSION: "9.2.3-2"
LIBZIM_DL_VERSION: "9.3.0-1"
MACOSX_DEPLOYMENT_TARGET: "13.0"
CIBW_ENVIRONMENT_PASS_LINUX: "LIBZIM_DL_VERSION"
# APPLE_SIGNING_KEYCHAIN_PATH set in prepare keychain step
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/QA.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: QA
on: [push]

env:
LIBZIM_DL_VERSION: "9.2.3-2"
LIBZIM_DL_VERSION: "9.3.0-1"
MACOSX_DEPLOYMENT_TARGET: "13.0"

jobs:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/Tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Tests
on: [push]

env:
LIBZIM_DL_VERSION: "9.2.3-2"
LIBZIM_DL_VERSION: "9.3.0-1"
MACOSX_DEPLOYMENT_TARGET: "13.0"
# we want cython traces for coverage
PROFILE: "1"
Expand Down
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Set up documentation using `mkdocs`, published on readthedocs.com (#186)
- `Archive.get_random_entry()`
- libzim 9.3.0 Cache Control API:
- `Archive.cluster_cache_max_size`
- `Archive.cluster_cache_current_size`
- `Archive.dirent_cache_max_size`
- `Archive.dirent_cache_current_size`
- `Archive.dirent_lookup_cache_max_size`

### Changed

- Using C++ libzim 9.3.0-1

## [3.6.0] - 2024-10-15

Expand Down
8 changes: 8 additions & 0 deletions libzim/libwrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,14 @@ class Archive : public Wrapper<zim::Archive>
FORWARD(bool, hasTitleIndex)
FORWARD(bool, hasChecksum)
FORWARD(bool, check)
FORWARD(zim::size_type, getClusterCacheMaxSize)
FORWARD(zim::size_type, getClusterCacheCurrentSize)
FORWARD(void, setClusterCacheMaxSize)
FORWARD(zim::size_type, getDirentCacheMaxSize)
FORWARD(zim::size_type, getDirentCacheCurrentSize)
FORWARD(void, setDirentCacheMaxSize)
FORWARD(zim::size_type, getDirentLookupCacheMaxSize)
FORWARD(void, setDirentLookupCacheMaxSize)
};

class SearchResultSet : public Wrapper<zim::SearchResultSet>
Expand Down
89 changes: 89 additions & 0 deletions libzim/libzim.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1333,6 +1333,95 @@ cdef class Archive:
except RuntimeError as e:
raise KeyError(str(e))

@property
def cluster_cache_max_size(self) -> pyint:
"""Maximum size of the cluster cache.

Returns:
(int): maximum number of clusters stored in the cache.
"""
return self.c_archive.getClusterCacheMaxSize()

@cluster_cache_max_size.setter
def cluster_cache_max_size(self, nb_clusters: pyint):
"""Set the size of the cluster cache.

If the new size is lower than the number of currently stored clusters
some clusters will be dropped from cache to respect the new size.

Args:
nb_clusters (int): maximum number of clusters stored in the cache
"""

self.c_archive.setClusterCacheMaxSize(nb_clusters)

@property
def cluster_cache_current_size(self) -> pyint:
"""Size of the cluster cache.

Returns:
(int): number of clusters currently stored in the cache.
"""
return self.c_archive.getClusterCacheCurrentSize()

@property
def dirent_cache_max_size(self) -> pyint:
"""Maximum size of the dirent cache.

Returns:
(int): maximum number of dirents stored in the cache.
"""
return self.c_archive.getDirentCacheMaxSize()

@dirent_cache_max_size.setter
def dirent_cache_max_size(self, nb_dirents: pyint):
"""Set the size of the dirent cache.

If the new size is lower than the number of currently stored dirents
some dirents will be dropped from cache to respect the new size.

Args:
nb_dirents (int): maximum number of dirents stored in the cache.
"""
self.c_archive.setDirentCacheMaxSize(nb_dirents)

@property
def dirent_cache_current_size(self) -> pyint:
"""Size of the dirent cache.

Returns:
(int): number of dirents currently stored in the cache.
"""
return self.c_archive.getDirentCacheCurrentSize()

@property
def dirent_lookup_cache_max_size(self) -> pyint:
"""Size of the dirent lookup cache.

The returned size returns the default size or the last set size.
This may not correspond to the actual size of the dirent lookup cache.
See set_dirent_lookup_cache_max_size for more information.

Returns:
(int): maximum number of sub ranges created in the lookup cache.
"""
return self.c_archive.getDirentLookupCacheMaxSize()

@dirent_lookup_cache_max_size.setter
def dirent_lookup_cache_max_size(self, nb_ranges: pyint):
"""Set the size of the dirent lookup cache.

Contrary to other set_<foo>_cache_max_size, this method is useless
once the lookup cache is created.
The lookup cache is created at first access to a entry in the archive.
So this method must be called before any access to content (including metadata).
It is best to call this method first, just after the archive creation.

Args:
nb_ranges (int): maximum number of sub ranges created in the lookup cache.
"""
self.c_archive.setDirentLookupCacheMaxSize(nb_ranges)

def __repr__(self) -> str:
return f"{self.__class__.__name__}(filename={self.filename})"

Expand Down
16 changes: 16 additions & 0 deletions libzim/reader.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,20 @@ class Archive:
def get_illustration_sizes(self) -> set[int]: ...
def has_illustration(self, size: int | None = None) -> bool: ...
def get_illustration_item(self, size: int | None = None) -> Item: ...
@property
def cluster_cache_max_size(self) -> int: ...
@cluster_cache_max_size.setter
def cluster_cache_max_size(self, nb_clusters: int): ...
@property
def cluster_cache_current_size(self) -> int: ...
@property
def dirent_cache_max_size(self) -> int: ...
@dirent_cache_max_size.setter
def dirent_cache_max_size(self, nb_dirents: int): ...
@property
def dirent_cache_current_size(self) -> int: ...
@property
def dirent_lookup_cache_max_size(self) -> int: ...
@dirent_lookup_cache_max_size.setter
def dirent_lookup_cache_max_size(self, nb_ranges: int): ...
def __repr__(self) -> str: ...
9 changes: 9 additions & 0 deletions libzim/zim.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,15 @@ cdef extern from "libwrapper.h" namespace "wrapper":
bool hasChecksum() except +
bool check() except +

uint64_t getClusterCacheMaxSize() except +
uint64_t getClusterCacheCurrentSize() except +
void setClusterCacheMaxSize(uint64_t nbClusters) except +
uint64_t getDirentCacheMaxSize() except +
uint64_t getDirentCacheCurrentSize() except +
void setDirentCacheMaxSize(uint64_t nbDirents) except +
uint64_t getDirentLookupCacheMaxSize() except +
void setDirentLookupCacheMaxSize(uint64_t nbRanges) except +

cdef cppclass Searcher:
Searcher()
Searcher(const Archive& archive) except +
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@


class Config:
libzim_dl_version: str = os.getenv("LIBZIM_DL_VERSION", "9.2.3-2")
libzim_dl_version: str = os.getenv("LIBZIM_DL_VERSION", "9.3.0-1")
use_system_libzim: bool = bool(os.getenv("USE_SYSTEM_LIBZIM") or False)
download_libzim: bool = not bool(os.getenv("DONT_DOWNLOAD_LIBZIM") or False)

Expand Down Expand Up @@ -258,7 +258,7 @@ def _download_and_extract(self, filename: str) -> pathlib.Path:
if not fpath.exists():
print(f"> from {url}")
with (
urllib.request.urlopen(url) as response, # noqa: S310
urllib.request.urlopen(url) as response, # noqa: S310 # nosec B310
open(fpath, "wb") as fh, # nosec
): # nosec
fh.write(response.read())
Expand Down
2 changes: 1 addition & 1 deletion tests/test_libzim_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -866,7 +866,7 @@ def test_accented_search_from_libzim(fpath):

assert zim.entry_count == 2
assert zim.article_count == 2
assert zim.all_entry_count == 7
assert zim.all_entry_count == 6

ascii_query = Query().set_query("test article")
ascii_searcher = Searcher(zim)
Expand Down
81 changes: 80 additions & 1 deletion tests/test_libzim_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
ZIMS_DATA = {
"blank.zim": {
"filename": "blank.zim",
"filesize": 2197,
"filesize": 2189,
"new_ns": True,
"mutlipart": False,
"zim_uuid": None,
Expand Down Expand Up @@ -615,3 +615,82 @@ def test_reader_get_random_entry(all_zims):
zim_2 = Archive(all_zims / "example.zim")
with pytest.raises(KeyError):
zim_2.get_random_entry()


@skip_if_offline
@pytest.mark.parametrize(*parametrize_for(["filename"]))
def test_cluster_cache(all_zims, filename):
zim = Archive(all_zims / filename)
default_value = 16
new_value = 1
empty_value = 0

assert zim.cluster_cache_max_size == default_value

zim.cluster_cache_max_size = new_value
assert zim.cluster_cache_max_size == new_value

# test index access
for index in range(0, zim.entry_count - 1):
bytes(zim._get_entry_by_id(index).get_item().content)

assert zim.cluster_cache_current_size <= new_value

zim.cluster_cache_max_size = empty_value
assert zim.cluster_cache_max_size == empty_value

for index in range(0, zim.entry_count - 1):
bytes(zim._get_entry_by_id(index).get_item().content)

assert zim.cluster_cache_current_size == empty_value


@skip_if_offline
@pytest.mark.parametrize(*parametrize_for(["filename"]))
def test_dirent_cache(all_zims, filename):
zim = Archive(all_zims / filename)
default_value = 512
new_value = 2
empty_value = 0

assert zim.dirent_cache_max_size == default_value

zim.dirent_cache_max_size = new_value
assert zim.dirent_cache_max_size == new_value

# test index access
for index in range(0, zim.entry_count - 1):
bytes(zim._get_entry_by_id(index).get_item().content)

assert zim.dirent_cache_current_size <= new_value

zim.dirent_cache_max_size = empty_value
assert zim.dirent_cache_max_size == empty_value
assert zim.dirent_cache_current_size == empty_value

for index in range(0, zim.entry_count - 1):
bytes(zim._get_entry_by_id(index).get_item().content)

assert zim.dirent_cache_current_size == empty_value


@skip_if_offline
@pytest.mark.parametrize(*parametrize_for(["filename"]))
def test_dirent_lookup_cache(all_zims, filename):
zim = Archive(all_zims / filename)
default_value = 1024
new_value = 2
empty_value = 0

assert zim.dirent_lookup_cache_max_size == default_value

zim.dirent_lookup_cache_max_size = new_value
assert zim.dirent_lookup_cache_max_size == new_value

# test index access
for index in range(0, zim.entry_count - 1):
bytes(zim._get_entry_by_id(index).get_item().content)

# setting after reading records the value but it has no use
zim.dirent_lookup_cache_max_size = empty_value
assert zim.dirent_lookup_cache_max_size == empty_value