Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,15 @@ for path in crawler.go(token=TimeoutToken(0.0001)): # Limit the iteration time t

> ↑ Follow these rules to avoid accidentally "baking" an expired token inside a crawler object.

By default, cancellation stops iteration silently — the caller cannot tell it apart from natural exhaustion. Pass `raise_on_cancel=...` to make the crawler raise an exception on cancellation instead:

```python
for path in Crawler('.', token=TimeoutToken(0.0001), raise_on_cancel=True):
print(path)
```

> ↑ `raise_on_cancel=True` re-raises the native `cantok` exception; `raise_on_cancel=MyError("...")` raises that exact instance; `raise_on_cancel=MyError` instantiates the class with the cantok message and raises that. Default is `False` (silent).


## Combination

Expand Down
107 changes: 85 additions & 22 deletions dirstree/crawlers/crawler.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,36 @@
from pathlib import Path
from typing import Any, Callable, Collection, Dict, Generator, List, Optional, Union
from typing import (
Any,
Callable,
Collection,
Dict,
Generator,
List,
Optional,
Type,
Union,
)

import pathspec
from cantok import AbstractToken, DefaultToken
from cantok import AbstractToken, CancellationError, DefaultToken
from printo import describe_data_object, not_none
from sigmatch import PossibleCallMatcher
from sigmatch.errors import SignatureMismatchError, SignatureNotFoundError

from dirstree.crawlers.abstract import AbstractCrawler
from dirstree.errors import IncompatibleCrawlerOptionsError


def _exception_class_accepts_single_positional(cls: type) -> bool:
try:
PossibleCallMatcher('.').match(cls, raise_exception=True)
except SignatureNotFoundError:
return True
except SignatureMismatchError:
return False
return True


# TODO: add typing tests
class Crawler(AbstractCrawler):
"""
Expand Down Expand Up @@ -40,6 +61,7 @@ def __init__( # noqa: PLR0913
token: AbstractToken = DefaultToken(), # noqa: B008
only_files: bool = True,
freeze: bool = False,
raise_on_cancel: Union[bool, BaseException, Type[BaseException]] = False,
) -> None:
if extensions is not None and not only_files:
raise IncompatibleCrawlerOptionsError(
Expand All @@ -56,6 +78,19 @@ def __init__( # noqa: PLR0913
if filter is not None:
PossibleCallMatcher('.').match(filter, raise_exception=True)

if not (
isinstance(raise_on_cancel, (bool, BaseException))
or (
isinstance(raise_on_cancel, type)
and issubclass(raise_on_cancel, BaseException)
and _exception_class_accepts_single_positional(raise_on_cancel)
)
):
raise TypeError(
'raise_on_cancel must be a bool, a BaseException instance, '
'or a BaseException subclass whose constructor accepts a single positional argument.',
)

self.paths = paths
self.extensions = extensions
self.exclude = exclude if exclude is not None else []
Expand All @@ -64,6 +99,13 @@ def __init__( # noqa: PLR0913
self.only_files = only_files
self.frozen = freeze

if isinstance(raise_on_cancel, bool):
self.raise_on_cancel: bool = raise_on_cancel
self.cancellation_exception: Optional[Union[BaseException, Type[BaseException]]] = None
else:
self.raise_on_cancel = True
self.cancellation_exception = raise_on_cancel

self.addictional_repr_filters: Dict[str, Callable[[Any], bool]] = {}

def __repr__(self) -> str:
Expand All @@ -74,9 +116,14 @@ def __repr__(self) -> str:
'token': lambda x: not isinstance(x, DefaultToken),
'only_files': lambda x: x is False,
'freeze': lambda x: x is True,
'raise_on_cancel': lambda x: x is not False,
}
filters.update(self.addictional_repr_filters)

displayed_raise_on_cancel: Union[bool, BaseException, Type[BaseException]] = (
self.cancellation_exception if self.cancellation_exception is not None else self.raise_on_cancel
)

return describe_data_object(
self.__class__.__name__,
self.paths,
Expand All @@ -87,41 +134,57 @@ def __repr__(self) -> str:
'token': self.token,
'only_files': self.only_files,
'freeze': self.frozen,
'raise_on_cancel': displayed_raise_on_cancel,
},
filters=filters, # type: ignore[arg-type]
)

def _check_token(self, token: AbstractToken) -> bool:
if token:
return True
if self.raise_on_cancel:
try:
token.check()
except CancellationError as original_exception:
if self.cancellation_exception is None:
raise
if isinstance(self.cancellation_exception, type):
raise self.cancellation_exception(str(original_exception)) from original_exception
raise self.cancellation_exception from original_exception
return False

def _traverse(self, token: AbstractToken) -> Generator[Path, None, None]:
excludes_spec = pathspec.PathSpec.from_lines('gitwildmatch', self.exclude)

for path in self.paths:
if not self._check_token(token):
return
base_path = Path(path)
if token:
for child_path in base_path.rglob('*'):
if (
(not self.only_files or child_path.is_file())
and not (
excludes_spec.match_file(child_path)
or (child_path.is_dir() and excludes_spec.match_file(f'{child_path}/'))
)
and (self.extensions is None or child_path.suffix in self.extensions)
and (self.filter is None or self.filter(child_path))
):
yield child_path

if not token:
break
else:
break
for child_path in base_path.rglob('*'):
if (
(not self.only_files or child_path.is_file())
and not (
excludes_spec.match_file(child_path)
or (child_path.is_dir() and excludes_spec.match_file(f'{child_path}/'))
)
and (self.extensions is None or child_path.suffix in self.extensions)
and (self.filter is None or self.filter(child_path))
):
yield child_path

if not self._check_token(token):
return
self._check_token(token)

def go(self, token: AbstractToken = DefaultToken()) -> Generator[Path, None, None]: # noqa: B008
token = token + self.token
instance_token = self.token
token = token + instance_token

if self.frozen:
snapshot = list(self._traverse(token))
for path in snapshot:
if not token:
break
if not self._check_token(token):
return
yield path
else:
yield from self._traverse(token)
5 changes: 3 additions & 2 deletions dirstree/crawlers/python_crawler.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from typing import Callable, List, Optional, Union
from typing import Callable, List, Optional, Type, Union

from cantok import AbstractToken, DefaultToken

Expand All @@ -14,9 +14,10 @@ def __init__(
filter: Optional[Callable[[Path], bool]] = None, # noqa: A002
token: AbstractToken = DefaultToken(), # noqa: B008
freeze: bool = False,
raise_on_cancel: Union[bool, BaseException, Type[BaseException]] = False,
) -> None:
super().__init__(
*paths, extensions=('.py',), exclude=exclude, filter=filter, token=token, freeze=freeze,
*paths, extensions=('.py',), exclude=exclude, filter=filter, token=token, freeze=freeze, raise_on_cancel=raise_on_cancel,
)
self.addictional_repr_filters = {
'extensions': lambda x: False, # noqa: ARG005
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "dirstree"
version = "0.0.11"
version = "0.0.12"
authors = [{ name = "Evgeniy Blinov", email = "zheni-b@yandex.ru" }]
description = 'Another library for iterating through the contents of a directory'
readme = "README.md"
Expand Down
51 changes: 51 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,58 @@
import os
from pathlib import Path
from typing import Tuple, Type, Union

import pytest
from cantok import CancellationError, SimpleToken


def extract_cancellation_message(token_class: Type[SimpleToken]) -> str:
"""Instantiate a cancelled token of ``token_class`` and return the message that
cantok raises from ``.check()``.

``token_class`` must accept ``cancelled=True`` as a constructor argument (the
canonical example is ``SimpleToken``). The trailing ``raise AssertionError``
is a contract assertion: cantok's API guarantees that ``.check()`` on a
cancelled token raises, so the only way we ever reach it is a cantok-side
contract violation.
"""
try:
token_class(cancelled=True).check()
except CancellationError as original_exception:
return str(original_exception)
raise AssertionError('cantok contract violation: .check() on a cancelled token must raise')


def predict_raised_exception(
raise_on_cancel_value: Union[bool, BaseException, Type[BaseException]],
native_message: str,
) -> Tuple[Type[BaseException], str]:
"""Return ``(expected_type, expected_message)`` for a given ``raise_on_cancel`` form.

Maps each of the three truthy flag forms to what the iteration is expected
to raise when the cancellation fires:

- ``True`` → cantok ``CancellationError`` with cantok's native message;
- instance → that instance's type with its own message (``str(instance)``);
- class → that class with cantok's native message (the constructor is
called with ``str(original_exception)``).

``False`` accepts the type only for caller convenience (parametrize lists
often share a wider ``bool`` type), but passing it is a programming error:
the function is meaningful only when a raise is expected, so ``False`` hits
the trailing assertion. ``native_message`` is the message cantok would emit
for the token used in the test (typically
``extract_cancellation_message(SimpleToken)`` for pre-cancelled SimpleToken
scenarios, or extracted inline from the actual token for mid-iteration
scenarios).
"""
if raise_on_cancel_value is True:
return CancellationError, native_message
if isinstance(raise_on_cancel_value, type):
return raise_on_cancel_value, native_message
if isinstance(raise_on_cancel_value, BaseException):
return type(raise_on_cancel_value), str(raise_on_cancel_value)
raise AssertionError(f'predict_raised_exception is not meaningful for {raise_on_cancel_value!r}')


@pytest.fixture(params=[str, Path])
Expand Down
Loading
Loading