From 2fd08040bbda5f33cec695a1038c5e08b9ea64f1 Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Mon, 15 Jul 2024 13:29:59 -0400 Subject: [PATCH 1/7] Distinguish between filepath and filepath component lengths, deprecate max_len --- pathvalidate/_base.py | 88 ++++++++++++++++++++-------- pathvalidate/_filename.py | 117 ++++++++++++++++++++++++++++---------- pathvalidate/_filepath.py | 116 +++++++++++++++++++++++++++---------- 3 files changed, 237 insertions(+), 84 deletions(-) diff --git a/pathvalidate/_base.py b/pathvalidate/_base.py index 20eeb18..98d9ce5 100644 --- a/pathvalidate/_base.py +++ b/pathvalidate/_base.py @@ -6,6 +6,7 @@ import os import sys from typing import ClassVar, Optional, Sequence, Tuple +import warnings from ._common import normalize_platform, unprintable_ascii_chars from ._const import DEFAULT_MIN_LEN, Platform @@ -21,6 +22,7 @@ class BaseFile: _INVALID_WIN_FILENAME_CHARS: ClassVar[str] = ( _INVALID_FILENAME_CHARS + _INVALID_WIN_PATH_CHARS + "\\" ) + _DEFAULT_MAX_FILENAME_LEN = 255 @property def platform(self) -> Platform: @@ -32,14 +34,28 @@ def reserved_keywords(self) -> Tuple[str, ...]: @property def max_len(self) -> int: - return self._max_len + warnings.warn( + "'max_len' is deprecated. Use 'max_filepath_len' instead.", + DeprecationWarning, + ) + return self._max_filepath_len + + @property + def max_filename_len(self) -> int: + return self._max_filename_len + + @property + def max_filepath_len(self) -> int: + return self._max_filepath_len def __init__( self, - max_len: int, fs_encoding: Optional[str], + max_filename_len: Optional[int] = None, + max_filepath_len: Optional[int] = None, additional_reserved_names: Optional[Sequence[str]] = None, - platform_max_len: Optional[int] = None, + platform_max_filename_len: Optional[int] = None, + platform_max_filepath_len: Optional[int] = None, platform: Optional[PlatformType] = None, ) -> None: if additional_reserved_names is None: @@ -48,15 +64,25 @@ def __init__( self.__platform = normalize_platform(platform) - if platform_max_len is None: - platform_max_len = self._get_default_max_path_len() + # determine max filepath length + if platform_max_filepath_len is None: + platform_max_filepath_len = self._get_default_max_path_len() - if max_len <= 0: - self._max_len = platform_max_len + if max_filepath_len is None or max_filepath_len <= 0: + self._max_filepath_len = platform_max_filepath_len else: - self._max_len = max_len + self._max_filepath_len = min(max_filepath_len, platform_max_filepath_len) - self._max_len = min(self._max_len, platform_max_len) + # determine max filename length + if platform_max_filename_len is None: + platform_max_filename_len = self._get_default_max_name_len() + + if max_filename_len is None or max_filename_len <= 0: + self._max_filename_len = platform_max_filename_len + else: + self._max_filename_len = min(max_filename_len, platform_max_filename_len) + # name cannot be longer than max path length + self._max_filename_len = min(self._max_filename_len, self._max_filepath_len) if fs_encoding: self._fs_encoding = fs_encoding @@ -99,24 +125,29 @@ def _get_default_max_path_len(self) -> int: return 260 # universal + def _get_default_max_name_len(self) -> int: + return self._DEFAULT_MAX_FILENAME_LEN + class AbstractValidator(BaseFile, metaclass=abc.ABCMeta): def __init__( self, - max_len: int, fs_encoding: Optional[str], check_reserved: bool, + max_filename_len: Optional[int] = None, + max_filepath_len: Optional[int] = None, additional_reserved_names: Optional[Sequence[str]] = None, - platform_max_len: Optional[int] = None, + platform_max_filepath_len: Optional[int] = None, platform: Optional[PlatformType] = None, ) -> None: self._check_reserved = check_reserved super().__init__( - max_len, fs_encoding, + max_filename_len=max_filename_len, + max_filepath_len=max_filepath_len, additional_reserved_names=additional_reserved_names, - platform_max_len=platform_max_len, + platform_max_filepath_len=platform_max_filepath_len, platform=platform, ) @@ -145,21 +176,23 @@ class AbstractSanitizer(BaseFile, metaclass=abc.ABCMeta): def __init__( self, validator: AbstractValidator, - max_len: int, fs_encoding: Optional[str], validate_after_sanitize: bool, + max_filename_len: Optional[int] = None, + max_filepath_len: Optional[int] = None, null_value_handler: Optional[ValidationErrorHandler] = None, reserved_name_handler: Optional[ValidationErrorHandler] = None, additional_reserved_names: Optional[Sequence[str]] = None, - platform_max_len: Optional[int] = None, + platform_max_filepath_len: Optional[int] = None, platform: Optional[PlatformType] = None, ) -> None: super().__init__( - max_len=max_len, fs_encoding=fs_encoding, additional_reserved_names=additional_reserved_names, - platform_max_len=platform_max_len, + platform_max_filepath_len=platform_max_filepath_len, platform=platform, + max_filename_len=max_filename_len, + max_filepath_len=max_filepath_len, ) if null_value_handler is None: @@ -187,11 +220,12 @@ def min_len(self) -> int: def __init__( self, min_len: int, - max_len: int, fs_encoding: Optional[str], check_reserved: bool, + max_filename_len: Optional[int] = None, + max_filepath_len: Optional[int] = None, additional_reserved_names: Optional[Sequence[str]] = None, - platform_max_len: Optional[int] = None, + platform_max_filepath_len: Optional[int] = None, platform: Optional[PlatformType] = None, ) -> None: if min_len <= 0: @@ -199,11 +233,12 @@ def __init__( self._min_len = max(min_len, 1) super().__init__( - max_len=max_len, fs_encoding=fs_encoding, + max_filename_len=max_filename_len, + max_filepath_len=max_filepath_len, check_reserved=check_reserved, additional_reserved_names=additional_reserved_names, - platform_max_len=platform_max_len, + platform_max_filepath_len=platform_max_filepath_len, platform=platform, ) @@ -227,11 +262,14 @@ def _validate_reserved_keywords(self, name: str) -> None: ) def _validate_max_len(self) -> None: - if self.max_len < 1: - raise ValueError("max_len must be greater or equal to one") + if self.max_filename_len < 1: + raise ValueError("max_filename_len must be greater or equal to one") + + if self.max_filepath_len < 1: + raise ValueError("max_filepath_len must be greater or equal to one") - if self.min_len > self.max_len: - raise ValueError("min_len must be lower than max_len") + if self.min_len > self.max_filename_len: + raise ValueError("min_len must be lower than max_filename_len") @staticmethod def __extract_root_name(path: str) -> str: diff --git a/pathvalidate/_filename.py b/pathvalidate/_filename.py index 1b2168f..e825ec6 100644 --- a/pathvalidate/_filename.py +++ b/pathvalidate/_filename.py @@ -18,7 +18,6 @@ from .handler import ReservedNameHandler, ValidationErrorHandler -_DEFAULT_MAX_FILENAME_LEN = 255 _RE_INVALID_FILENAME = re.compile(f"[{re.escape(BaseFile._INVALID_FILENAME_CHARS):s}]", re.UNICODE) _RE_INVALID_WIN_FILENAME = re.compile( f"[{re.escape(BaseFile._INVALID_WIN_FILENAME_CHARS):s}]", re.UNICODE @@ -28,7 +27,7 @@ class FileNameSanitizer(AbstractSanitizer): def __init__( self, - max_len: int = _DEFAULT_MAX_FILENAME_LEN, + max_len: Optional[int] = None, # deprecated fs_encoding: Optional[str] = None, platform: Optional[PlatformType] = None, null_value_handler: Optional[ValidationErrorHandler] = None, @@ -36,13 +35,21 @@ def __init__( additional_reserved_names: Optional[Sequence[str]] = None, validate_after_sanitize: bool = False, validator: Optional[AbstractValidator] = None, + max_filename_len: Optional[int] = None, + max_filepath_len: Optional[int] = None, ) -> None: + if max_len is not None: + warnings.warn( + "'max_len' is deprecated. Use 'max_filename_len' instead.", + DeprecationWarning, + ) + max_filename_len = max_len if validator: fname_validator = validator else: fname_validator = FileNameValidator( min_len=DEFAULT_MIN_LEN, - max_len=max_len, + max_len=max_filename_len, fs_encoding=fs_encoding, check_reserved=True, additional_reserved_names=additional_reserved_names, @@ -50,12 +57,12 @@ def __init__( ) super().__init__( - max_len=max_len, fs_encoding=fs_encoding, + max_filename_len=max_filename_len, + max_filepath_len=max_filepath_len, null_value_handler=null_value_handler, reserved_name_handler=reserved_name_handler, additional_reserved_names=additional_reserved_names, - platform_max_len=_DEFAULT_MAX_FILENAME_LEN, platform=platform, validate_after_sanitize=validate_after_sanitize, validator=fname_validator, @@ -75,7 +82,9 @@ def sanitize(self, value: PathType, replacement_text: str = "") -> PathType: raise sanitized_filename = self._sanitize_regexp.sub(replacement_text, str(value)) - sanitized_filename = truncate_str(sanitized_filename, self._fs_encoding, self.max_len) + sanitized_filename = truncate_str( + sanitized_filename, self._fs_encoding, self.max_filename_len + ) try: self._validator.validate(sanitized_filename) @@ -149,19 +158,27 @@ def reserved_keywords(self) -> Tuple[str, ...]: def __init__( self, min_len: int = DEFAULT_MIN_LEN, - max_len: int = _DEFAULT_MAX_FILENAME_LEN, + max_len: Optional[int] = None, # deprecated fs_encoding: Optional[str] = None, platform: Optional[PlatformType] = None, check_reserved: bool = True, additional_reserved_names: Optional[Sequence[str]] = None, + max_filename_len: Optional[int] = None, + max_filepath_len: Optional[int] = None, ) -> None: + if max_len is not None: + warnings.warn( + "'max_len' is deprecated. Use 'max_filename_len' instead.", + DeprecationWarning, + ) + max_filename_len = max_len super().__init__( min_len=min_len, - max_len=max_len, + max_filename_len=max_filename_len, + max_filepath_len=max_filepath_len, fs_encoding=fs_encoding, check_reserved=check_reserved, additional_reserved_names=additional_reserved_names, - platform_max_len=_DEFAULT_MAX_FILENAME_LEN, platform=platform, ) @@ -179,10 +196,10 @@ def validate(self, value: PathType) -> None: ErrorAttrKey.FS_ENCODING: self._fs_encoding, ErrorAttrKey.BYTE_COUNT: byte_ct, } - if byte_ct > self.max_len: + if byte_ct > self.max_filename_len: raise ValidationError( [ - f"filename is too long: expected<={self.max_len:d} bytes, actual={byte_ct:d} bytes" + f"filename is too long: expected<={self.max_filename_len:d} bytes, actual={byte_ct:d} bytes" ], **err_kwargs, ) @@ -266,10 +283,12 @@ def validate_filename( filename: PathType, platform: Optional[PlatformType] = None, min_len: int = DEFAULT_MIN_LEN, - max_len: int = _DEFAULT_MAX_FILENAME_LEN, + max_len: Optional[int] = None, # deprecated fs_encoding: Optional[str] = None, check_reserved: bool = True, additional_reserved_names: Optional[Sequence[str]] = None, + max_filename_len: Optional[int] = None, + max_filepath_len: Optional[int] = None, ) -> None: """Verifying whether the ``filename`` is a valid file name or not. @@ -284,14 +303,7 @@ def validate_filename( Minimum byte length of the ``filename``. The value must be greater or equal to one. Defaults to ``1``. max_len: - Maximum byte length of the ``filename``. The value must be lower than: - - - ``Linux``: 4096 - - ``macOS``: 1024 - - ``Windows``: 260 - - ``universal``: 260 - - Defaults to ``255``. + [Deprecated] Use 'max_filename_len' instead. fs_encoding: Filesystem encoding that used to calculate the byte length of the filename. If |None|, get the value from the execution environment. @@ -300,10 +312,21 @@ def validate_filename( additional_reserved_names: Additional reserved names to check. Case insensitive. + max_filename_len: + Maximum byte length of the ``filename``. + Defaults to ``255``. + max_filepath_len: + Maximum byte length of the file path. + Defaults to: + + - ``Linux``: 4096 + - ``macOS``: 1024 + - ``Windows``: 260 + - ``universal``: 260 Raises: ValidationError (ErrorReason.INVALID_LENGTH): - If the ``filename`` is longer than ``max_len`` characters. + If the ``filename`` is longer than ``max_filename_len`` characters. ValidationError (ErrorReason.INVALID_CHARACTER): If the ``filename`` includes invalid character(s) for a filename: |invalid_filename_chars|. @@ -321,11 +344,17 @@ def validate_filename( `Naming Files, Paths, and Namespaces - Win32 apps | Microsoft Docs `__ """ - + if max_len is not None: + warnings.warn( + "'max_len' is deprecated. Use 'max_filename_len' instead.", + DeprecationWarning, + ) + max_filename_len = max_len FileNameValidator( platform=platform, min_len=min_len, - max_len=max_len, + max_filename_len=max_filename_len, + max_filepath_len=max_filepath_len, fs_encoding=fs_encoding, check_reserved=check_reserved, additional_reserved_names=additional_reserved_names, @@ -336,10 +365,12 @@ def is_valid_filename( filename: PathType, platform: Optional[PlatformType] = None, min_len: int = DEFAULT_MIN_LEN, - max_len: Optional[int] = None, + max_len: Optional[int] = None, # deprecated fs_encoding: Optional[str] = None, check_reserved: bool = True, additional_reserved_names: Optional[Sequence[str]] = None, + max_filename_len: Optional[int] = None, + max_filepath_len: Optional[int] = None, ) -> bool: """Check whether the ``filename`` is a valid name or not. @@ -355,11 +386,18 @@ def is_valid_filename( See Also: :py:func:`.validate_filename()` """ + if max_len is not None: + warnings.warn( + "'max_len' is deprecated. Use 'max_filename_len' instead.", + DeprecationWarning, + ) + max_filename_len = max_len return FileNameValidator( platform=platform, min_len=min_len, - max_len=-1 if max_len is None else max_len, + max_filename_len=max_filename_len, + max_filepath_len=max_filepath_len, fs_encoding=fs_encoding, check_reserved=check_reserved, additional_reserved_names=additional_reserved_names, @@ -370,13 +408,15 @@ def sanitize_filename( filename: PathType, replacement_text: str = "", platform: Optional[PlatformType] = None, - max_len: Optional[int] = _DEFAULT_MAX_FILENAME_LEN, + max_len: Optional[int] = None, # deprecated fs_encoding: Optional[str] = None, check_reserved: Optional[bool] = None, null_value_handler: Optional[ValidationErrorHandler] = None, reserved_name_handler: Optional[ValidationErrorHandler] = None, additional_reserved_names: Optional[Sequence[str]] = None, validate_after_sanitize: bool = False, + max_filename_len: Optional[int] = None, + max_filepath_len: Optional[int] = None, ) -> PathType: """Make a valid filename from a string. @@ -401,9 +441,7 @@ def sanitize_filename( .. include:: platform.txt max_len: - Maximum byte length of the ``filename``. - Truncate the name length if the ``filename`` length exceeds this value. - Defaults to ``255``. + [Deprecated] Use 'max_filename_len' instead. fs_encoding: Filesystem encoding that used to calculate the byte length of the filename. If |None|, get the value from the execution environment. @@ -433,6 +471,18 @@ def sanitize_filename( Case insensitive. validate_after_sanitize: Execute validation after sanitization to the file name. + max_filename_len: + Maximum byte length of the ``filename``. + Truncate the name length if the ``filename`` length exceeds this value. + Defaults to ``255``. + max_filepath_len: + Maximum byte length of the file path. + Defaults to: + + - ``Linux``: 4096 + - ``macOS``: 1024 + - ``Windows``: 260 + - ``universal``: 260 Returns: Same type as the ``filename`` (str or PathLike object): @@ -445,6 +495,12 @@ def sanitize_filename( Example: :ref:`example-sanitize-filename` """ + if max_len is not None: + warnings.warn( + "'max_len' is deprecated. Use 'max_filename_len' instead.", + DeprecationWarning, + ) + max_filename_len = max_len if check_reserved is not None: warnings.warn( @@ -457,7 +513,8 @@ def sanitize_filename( return FileNameSanitizer( platform=platform, - max_len=-1 if max_len is None else max_len, + max_filename_len=max_filename_len, + max_filepath_len=max_filepath_len, fs_encoding=fs_encoding, null_value_handler=null_value_handler, reserved_name_handler=reserved_name_handler, diff --git a/pathvalidate/_filepath.py b/pathvalidate/_filepath.py index 38192a8..69ef656 100644 --- a/pathvalidate/_filepath.py +++ b/pathvalidate/_filepath.py @@ -26,7 +26,7 @@ class FilePathSanitizer(AbstractSanitizer): def __init__( self, - max_len: int = -1, + max_len: Optional[int] = None, # deprecated fs_encoding: Optional[str] = None, platform: Optional[PlatformType] = None, null_value_handler: Optional[ValidationErrorHandler] = None, @@ -35,20 +35,31 @@ def __init__( normalize: bool = True, validate_after_sanitize: bool = False, validator: Optional[AbstractValidator] = None, + max_filename_len: Optional[int] = None, + max_filepath_len: Optional[int] = None, ) -> None: + if max_len is not None: + warnings.warn( + "'max_len' is deprecated. Use 'max_filepath_len' instead.", + DeprecationWarning, + ) + max_filepath_len = max_len + if validator: fpath_validator = validator else: fpath_validator = FilePathValidator( min_len=DEFAULT_MIN_LEN, - max_len=max_len, + max_filename_len=max_filename_len, + max_filepath_len=max_filepath_len, fs_encoding=fs_encoding, check_reserved=True, additional_reserved_names=additional_reserved_names, platform=platform, ) super().__init__( - max_len=max_len, + max_filename_len=max_filename_len, + max_filepath_len=max_filepath_len, fs_encoding=fs_encoding, validator=fpath_validator, null_value_handler=null_value_handler, @@ -60,7 +71,8 @@ def __init__( self._sanitize_regexp = self._get_sanitize_regexp() self.__fname_sanitizer = FileNameSanitizer( - max_len=self.max_len, + max_filename_len=self.max_filename_len, + max_filepath_len=self.max_filepath_len, fs_encoding=fs_encoding, null_value_handler=null_value_handler, reserved_name_handler=reserved_name_handler, @@ -161,15 +173,24 @@ def reserved_keywords(self) -> Tuple[str, ...]: def __init__( self, min_len: int = DEFAULT_MIN_LEN, - max_len: int = -1, + max_len: Optional[int] = None, # deprecated fs_encoding: Optional[str] = None, platform: Optional[PlatformType] = None, check_reserved: bool = True, additional_reserved_names: Optional[Sequence[str]] = None, + max_filename_len: Optional[int] = None, + max_filepath_len: Optional[int] = None, ) -> None: + if max_len is not None: + warnings.warn( + "'max_len' is deprecated. Use 'max_filepath_len' instead.", + DeprecationWarning, + ) + max_filepath_len = max_len super().__init__( min_len=min_len, - max_len=max_len, + max_filename_len=max_filename_len, + max_filepath_len=max_filepath_len, fs_encoding=fs_encoding, check_reserved=check_reserved, additional_reserved_names=additional_reserved_names, @@ -206,10 +227,10 @@ def validate(self, value: PathType) -> None: ErrorAttrKey.BYTE_COUNT: byte_ct, } - if byte_ct > self.max_len: + if byte_ct > self.max_filepath_len: raise ValidationError( [ - f"file path is too long: expected<={self.max_len:d} bytes, actual={byte_ct:d} bytes" + f"file path is too long: expected<={self.max_filepath_len:d} bytes, actual={byte_ct:d} bytes" ], **err_kwargs, ) @@ -307,10 +328,12 @@ def validate_filepath( file_path: PathType, platform: Optional[PlatformType] = None, min_len: int = DEFAULT_MIN_LEN, - max_len: Optional[int] = None, + max_len: Optional[int] = None, # deprecated fs_encoding: Optional[str] = None, check_reserved: bool = True, additional_reserved_names: Optional[Sequence[str]] = None, + max_filename_len: Optional[int] = None, + max_filepath_len: Optional[int] = None, ) -> None: """Verifying whether the ``file_path`` is a valid file path or not. @@ -325,13 +348,7 @@ def validate_filepath( Minimum byte length of the ``file_path``. The value must be greater or equal to one. Defaults to ``1``. max_len (Optional[int], optional): - Maximum byte length of the ``file_path``. If the value is |None| or minus, - automatically determined by the ``platform``: - - - ``Linux``: 4096 - - ``macOS``: 1024 - - ``Windows``: 260 - - ``universal``: 260 + [Deprecated] Use 'max_filepath_len' instead. fs_encoding (Optional[str], optional): Filesystem encoding that used to calculate the byte length of the file path. If |None|, get the value from the execution environment. @@ -340,6 +357,17 @@ def validate_filepath( Defaults to |True|. additional_reserved_names (Optional[Sequence[str]], optional): Additional reserved names to check. + max_filename_len: + Maximum byte length of each component of the ``file_path``. + Defaults to ``255``. + max_filepath_len: + Maximum byte length of the ``file_path``. + Defaults to: + + - ``Linux``: 4096 + - ``macOS``: 1024 + - ``Windows``: 260 + - ``universal``: 260 Raises: ValidationError (ErrorReason.INVALID_CHARACTER): @@ -359,11 +387,18 @@ def validate_filepath( `Naming Files, Paths, and Namespaces - Win32 apps | Microsoft Docs `__ """ + if max_len is not None: + warnings.warn( + "'max_len' is deprecated. Use 'max_filepath_len' instead.", + DeprecationWarning, + ) + max_filepath_len = max_len FilePathValidator( platform=platform, min_len=min_len, - max_len=-1 if max_len is None else max_len, + max_filename_len=max_filename_len, + max_filepath_len=max_filepath_len, fs_encoding=fs_encoding, check_reserved=check_reserved, additional_reserved_names=additional_reserved_names, @@ -374,10 +409,12 @@ def is_valid_filepath( file_path: PathType, platform: Optional[PlatformType] = None, min_len: int = DEFAULT_MIN_LEN, - max_len: Optional[int] = None, + max_len: Optional[int] = None, # deprecated fs_encoding: Optional[str] = None, check_reserved: bool = True, additional_reserved_names: Optional[Sequence[str]] = None, + max_filename_len: Optional[int] = None, + max_filepath_len: Optional[int] = None, ) -> bool: """Check whether the ``file_path`` is a valid name or not. @@ -393,11 +430,18 @@ def is_valid_filepath( See Also: :py:func:`.validate_filepath()` """ + if max_len is not None: + warnings.warn( + "'max_len' is deprecated. Use 'max_filepath_len' instead.", + DeprecationWarning, + ) + max_filepath_len = max_len return FilePathValidator( platform=platform, min_len=min_len, - max_len=-1 if max_len is None else max_len, + max_filename_len=max_filename_len, + max_filepath_len=max_filepath_len, fs_encoding=fs_encoding, check_reserved=check_reserved, additional_reserved_names=additional_reserved_names, @@ -408,7 +452,7 @@ def sanitize_filepath( file_path: PathType, replacement_text: str = "", platform: Optional[PlatformType] = None, - max_len: Optional[int] = None, + max_len: Optional[int] = None, # deprecated fs_encoding: Optional[str] = None, check_reserved: Optional[bool] = None, null_value_handler: Optional[ValidationErrorHandler] = None, @@ -416,6 +460,8 @@ def sanitize_filepath( additional_reserved_names: Optional[Sequence[str]] = None, normalize: bool = True, validate_after_sanitize: bool = False, + max_filename_len: Optional[int] = None, + max_filepath_len: Optional[int] = None, ) -> PathType: """Make a valid file path from a string. @@ -442,14 +488,7 @@ def sanitize_filepath( .. include:: platform.txt max_len: - Maximum byte length of the file path. - Truncate the path if the value length exceeds the `max_len`. - If the value is |None| or minus, ``max_len`` will automatically determined by the ``platform``: - - - ``Linux``: 4096 - - ``macOS``: 1024 - - ``Windows``: 260 - - ``universal``: 260 + [Deprecated] Use 'max_filepath_len' instead. fs_encoding: Filesystem encoding that used to calculate the byte length of the file path. If |None|, get the value from the execution environment. @@ -481,6 +520,18 @@ def sanitize_filepath( If |True|, normalize the the file path. validate_after_sanitize: Execute validation after sanitization to the file path. + max_filename_len: + Maximum byte length of each component of the ``file_path``. + Truncate each component if the length exceeds this value. + Defaults to ``255``. + max_filepath_len: + Maximum byte length of the ``file_path``. + Defaults to: + + - ``Linux``: 4096 + - ``macOS``: 1024 + - ``Windows``: 260 + - ``universal``: 260 Returns: Same type as the argument (str or PathLike object): @@ -493,6 +544,12 @@ def sanitize_filepath( Example: :ref:`example-sanitize-file-path` """ + if max_len is not None: + warnings.warn( + "'max_len' is deprecated. Use 'max_filepath_len' instead.", + DeprecationWarning, + ) + max_filepath_len = max_len if check_reserved is not None: warnings.warn( @@ -505,7 +562,8 @@ def sanitize_filepath( return FilePathSanitizer( platform=platform, - max_len=-1 if max_len is None else max_len, + max_filename_len=max_filename_len, + max_filepath_len=max_filepath_len, fs_encoding=fs_encoding, normalize=normalize, null_value_handler=null_value_handler, From 27af09d4360a72cea1d7465f256863417d260f8b Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Mon, 15 Jul 2024 14:11:05 -0400 Subject: [PATCH 2/7] Fix filepath validation --- pathvalidate/_base.py | 4 ++-- pathvalidate/_filepath.py | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pathvalidate/_base.py b/pathvalidate/_base.py index 98d9ce5..06d855c 100644 --- a/pathvalidate/_base.py +++ b/pathvalidate/_base.py @@ -5,8 +5,8 @@ import abc import os import sys -from typing import ClassVar, Optional, Sequence, Tuple import warnings +from typing import ClassVar, Optional, Sequence, Tuple from ._common import normalize_platform, unprintable_ascii_chars from ._const import DEFAULT_MIN_LEN, Platform @@ -80,7 +80,7 @@ def __init__( if max_filename_len is None or max_filename_len <= 0: self._max_filename_len = platform_max_filename_len else: - self._max_filename_len = min(max_filename_len, platform_max_filename_len) + self._max_filename_len = max_filename_len # name cannot be longer than max path length self._max_filename_len = min(self._max_filename_len, self._max_filepath_len) diff --git a/pathvalidate/_filepath.py b/pathvalidate/_filepath.py index 69ef656..e613e4c 100644 --- a/pathvalidate/_filepath.py +++ b/pathvalidate/_filepath.py @@ -199,7 +199,9 @@ def __init__( self.__fname_validator = FileNameValidator( min_len=min_len, - max_len=max_len, + max_filename_len=self.max_filename_len, + max_filepath_len=self.max_filepath_len, + fs_encoding=fs_encoding, check_reserved=check_reserved, additional_reserved_names=additional_reserved_names, platform=platform, @@ -249,8 +251,7 @@ def validate(self, value: PathType) -> None: for entry in unicode_filepath.split("/"): if not entry or entry in (".", ".."): continue - - self.__fname_validator._validate_reserved_keywords(entry) + self.__fname_validator.validate(entry) if self._is_windows(include_universal=True): self.__validate_win_filepath(unicode_filepath) From a58201cd2b20eaa15cbd171fd90d4a35f812206c Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Mon, 15 Jul 2024 14:21:11 -0400 Subject: [PATCH 3/7] Fix tests --- test/test_filepath.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/test/test_filepath.py b/test/test_filepath.py index ece45cc..9fb56da 100644 --- a/test/test_filepath.py +++ b/test/test_filepath.py @@ -4,6 +4,7 @@ .. codeauthor:: Tsuyoshi Hombashi """ +import math import platform as m_platform import random import sys @@ -216,7 +217,8 @@ def test_normal_min_len(self, value, min_len, expected): def test_normal_max_len(self, value, platform, max_len, expected): kwargs = { "platform": platform, - "max_len": max_len, + "max_filepath_len": max_len, + "max_filename_len": math.inf, # ignore filename checks } if expected is None: @@ -376,8 +378,13 @@ def test_relative_path(self, test_platform, value, expected): ], ) def test_normal_space_or_period_at_tail(self, platform, value): - validate_filepath(value, platform=platform) - assert is_valid_filepath(value, platform=platform) + if platform == "windows" or platform == "universal": + with pytest.raises(ValidationError): + validate_filepath(value, platform=platform) + assert not is_valid_filepath(value, platform=platform) + else: + validate_filepath(value, platform=platform) + assert is_valid_filepath(value, platform=platform) @pytest.mark.skipif(not is_faker_installed(), reason="requires faker") @pytest.mark.parametrize( From e3fdafcf9e2de59f75b7dcb5df069ded746a984c Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Mon, 15 Jul 2024 14:43:04 -0400 Subject: [PATCH 4/7] Update tests for max filename and max filepath lengths --- test/test_filepath.py | 47 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/test/test_filepath.py b/test/test_filepath.py index 9fb56da..5dfd0e6 100644 --- a/test/test_filepath.py +++ b/test/test_filepath.py @@ -218,7 +218,52 @@ def test_normal_max_len(self, value, platform, max_len, expected): kwargs = { "platform": platform, "max_filepath_len": max_len, - "max_filename_len": math.inf, # ignore filename checks + "max_filename_len": math.inf, # ignore filename length checks + } + + if expected is None: + validate_filepath(value, **kwargs) + assert is_valid_filepath(value, **kwargs) + return + + with pytest.raises(ValidationError) as e: + validate_filepath(value, **kwargs) + assert e.value.reason == ErrorReason.INVALID_LENGTH + assert e.value.fs_encoding + assert e.value.byte_count + assert e.value.byte_count > 0 + + @pytest.mark.parametrize( + ["value", "platform", "max_path_len", "max_name_len", "expected"], + [ + ["a/" + "a" * 255, "linux", None, None, None], + ["a/" + "a" * 256, "linux", None, None, ErrorReason.INVALID_LENGTH], + ["a/" + "a" * 255, "windows", None, None, None], + ["a/" + "a" * 256, "windows", None, None, ErrorReason.INVALID_LENGTH], + ["a/" + "a" * 255, "universal", None, None, None], + ["a/" + "a" * 256, "universal", None, None, ErrorReason.INVALID_LENGTH], + ["/".join("a" * 255 for _ in range(16)), "linux", None, None, None], + [ + "/".join("a" * 255 for _ in range(17)), + "linux", + None, + None, + ErrorReason.INVALID_LENGTH, + ], + ["a/" + "a" * 255 + "/aa", "windows", None, None, None], + ["a/" + "a" * 255 + "/aa", "universal", None, None, None], + ["a/" + "a" * 255 + "/aaa", "windows", None, None, ErrorReason.INVALID_LENGTH], + ["a/" + "a" * 255 + "/aaa", "universal", None, None, ErrorReason.INVALID_LENGTH], + ["/".join("a" * 10 for _ in range(5)), "universal", 54, 10, None], + ["/".join("a" * 10 for _ in range(5)), "universal", 53, 10, ErrorReason.INVALID_LENGTH], + ["/".join("a" * 10 for _ in range(5)), "universal", 54, 9, ErrorReason.INVALID_LENGTH], + ], + ) + def test_max_name_max_path_len(self, value, platform, max_path_len, max_name_len, expected): + kwargs = { + "platform": platform, + "max_filepath_len": max_path_len, + "max_filename_len": max_name_len, } if expected is None: From c8cc600fb69f772f531f9f81cbeeaf2283023b48 Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Sun, 28 Jul 2024 10:14:37 -0400 Subject: [PATCH 5/7] Undeprecate max_len, remove max_filepath_len for filename classes --- pathvalidate/_filename.py | 62 ++++++--------------------------------- pathvalidate/_filepath.py | 36 +++++------------------ 2 files changed, 16 insertions(+), 82 deletions(-) diff --git a/pathvalidate/_filename.py b/pathvalidate/_filename.py index e825ec6..0203a77 100644 --- a/pathvalidate/_filename.py +++ b/pathvalidate/_filename.py @@ -27,7 +27,7 @@ class FileNameSanitizer(AbstractSanitizer): def __init__( self, - max_len: Optional[int] = None, # deprecated + max_len: Optional[int] = None, fs_encoding: Optional[str] = None, platform: Optional[PlatformType] = None, null_value_handler: Optional[ValidationErrorHandler] = None, @@ -36,13 +36,8 @@ def __init__( validate_after_sanitize: bool = False, validator: Optional[AbstractValidator] = None, max_filename_len: Optional[int] = None, - max_filepath_len: Optional[int] = None, ) -> None: if max_len is not None: - warnings.warn( - "'max_len' is deprecated. Use 'max_filename_len' instead.", - DeprecationWarning, - ) max_filename_len = max_len if validator: fname_validator = validator @@ -59,7 +54,7 @@ def __init__( super().__init__( fs_encoding=fs_encoding, max_filename_len=max_filename_len, - max_filepath_len=max_filepath_len, + max_filepath_len=max_filename_len, null_value_handler=null_value_handler, reserved_name_handler=reserved_name_handler, additional_reserved_names=additional_reserved_names, @@ -158,24 +153,19 @@ def reserved_keywords(self) -> Tuple[str, ...]: def __init__( self, min_len: int = DEFAULT_MIN_LEN, - max_len: Optional[int] = None, # deprecated + max_len: Optional[int] = None, fs_encoding: Optional[str] = None, platform: Optional[PlatformType] = None, check_reserved: bool = True, additional_reserved_names: Optional[Sequence[str]] = None, max_filename_len: Optional[int] = None, - max_filepath_len: Optional[int] = None, ) -> None: if max_len is not None: - warnings.warn( - "'max_len' is deprecated. Use 'max_filename_len' instead.", - DeprecationWarning, - ) max_filename_len = max_len super().__init__( min_len=min_len, max_filename_len=max_filename_len, - max_filepath_len=max_filepath_len, + max_filepath_len=max_filename_len, fs_encoding=fs_encoding, check_reserved=check_reserved, additional_reserved_names=additional_reserved_names, @@ -283,12 +273,11 @@ def validate_filename( filename: PathType, platform: Optional[PlatformType] = None, min_len: int = DEFAULT_MIN_LEN, - max_len: Optional[int] = None, # deprecated + max_len: Optional[int] = None, fs_encoding: Optional[str] = None, check_reserved: bool = True, additional_reserved_names: Optional[Sequence[str]] = None, max_filename_len: Optional[int] = None, - max_filepath_len: Optional[int] = None, ) -> None: """Verifying whether the ``filename`` is a valid file name or not. @@ -303,7 +292,7 @@ def validate_filename( Minimum byte length of the ``filename``. The value must be greater or equal to one. Defaults to ``1``. max_len: - [Deprecated] Use 'max_filename_len' instead. + Alias for ``max_filename_len``. fs_encoding: Filesystem encoding that used to calculate the byte length of the filename. If |None|, get the value from the execution environment. @@ -315,14 +304,6 @@ def validate_filename( max_filename_len: Maximum byte length of the ``filename``. Defaults to ``255``. - max_filepath_len: - Maximum byte length of the file path. - Defaults to: - - - ``Linux``: 4096 - - ``macOS``: 1024 - - ``Windows``: 260 - - ``universal``: 260 Raises: ValidationError (ErrorReason.INVALID_LENGTH): @@ -345,16 +326,11 @@ def validate_filename( `__ """ if max_len is not None: - warnings.warn( - "'max_len' is deprecated. Use 'max_filename_len' instead.", - DeprecationWarning, - ) max_filename_len = max_len FileNameValidator( platform=platform, min_len=min_len, max_filename_len=max_filename_len, - max_filepath_len=max_filepath_len, fs_encoding=fs_encoding, check_reserved=check_reserved, additional_reserved_names=additional_reserved_names, @@ -365,12 +341,11 @@ def is_valid_filename( filename: PathType, platform: Optional[PlatformType] = None, min_len: int = DEFAULT_MIN_LEN, - max_len: Optional[int] = None, # deprecated + max_len: Optional[int] = None, fs_encoding: Optional[str] = None, check_reserved: bool = True, additional_reserved_names: Optional[Sequence[str]] = None, max_filename_len: Optional[int] = None, - max_filepath_len: Optional[int] = None, ) -> bool: """Check whether the ``filename`` is a valid name or not. @@ -387,17 +362,12 @@ def is_valid_filename( :py:func:`.validate_filename()` """ if max_len is not None: - warnings.warn( - "'max_len' is deprecated. Use 'max_filename_len' instead.", - DeprecationWarning, - ) max_filename_len = max_len return FileNameValidator( platform=platform, min_len=min_len, max_filename_len=max_filename_len, - max_filepath_len=max_filepath_len, fs_encoding=fs_encoding, check_reserved=check_reserved, additional_reserved_names=additional_reserved_names, @@ -408,7 +378,7 @@ def sanitize_filename( filename: PathType, replacement_text: str = "", platform: Optional[PlatformType] = None, - max_len: Optional[int] = None, # deprecated + max_len: Optional[int] = None, fs_encoding: Optional[str] = None, check_reserved: Optional[bool] = None, null_value_handler: Optional[ValidationErrorHandler] = None, @@ -416,7 +386,6 @@ def sanitize_filename( additional_reserved_names: Optional[Sequence[str]] = None, validate_after_sanitize: bool = False, max_filename_len: Optional[int] = None, - max_filepath_len: Optional[int] = None, ) -> PathType: """Make a valid filename from a string. @@ -441,7 +410,7 @@ def sanitize_filename( .. include:: platform.txt max_len: - [Deprecated] Use 'max_filename_len' instead. + Alias for ``max_filename_len``. fs_encoding: Filesystem encoding that used to calculate the byte length of the filename. If |None|, get the value from the execution environment. @@ -475,14 +444,6 @@ def sanitize_filename( Maximum byte length of the ``filename``. Truncate the name length if the ``filename`` length exceeds this value. Defaults to ``255``. - max_filepath_len: - Maximum byte length of the file path. - Defaults to: - - - ``Linux``: 4096 - - ``macOS``: 1024 - - ``Windows``: 260 - - ``universal``: 260 Returns: Same type as the ``filename`` (str or PathLike object): @@ -496,10 +457,6 @@ def sanitize_filename( :ref:`example-sanitize-filename` """ if max_len is not None: - warnings.warn( - "'max_len' is deprecated. Use 'max_filename_len' instead.", - DeprecationWarning, - ) max_filename_len = max_len if check_reserved is not None: @@ -514,7 +471,6 @@ def sanitize_filename( return FileNameSanitizer( platform=platform, max_filename_len=max_filename_len, - max_filepath_len=max_filepath_len, fs_encoding=fs_encoding, null_value_handler=null_value_handler, reserved_name_handler=reserved_name_handler, diff --git a/pathvalidate/_filepath.py b/pathvalidate/_filepath.py index e613e4c..ccae212 100644 --- a/pathvalidate/_filepath.py +++ b/pathvalidate/_filepath.py @@ -26,7 +26,7 @@ class FilePathSanitizer(AbstractSanitizer): def __init__( self, - max_len: Optional[int] = None, # deprecated + max_len: Optional[int] = None, fs_encoding: Optional[str] = None, platform: Optional[PlatformType] = None, null_value_handler: Optional[ValidationErrorHandler] = None, @@ -39,10 +39,6 @@ def __init__( max_filepath_len: Optional[int] = None, ) -> None: if max_len is not None: - warnings.warn( - "'max_len' is deprecated. Use 'max_filepath_len' instead.", - DeprecationWarning, - ) max_filepath_len = max_len if validator: @@ -72,7 +68,6 @@ def __init__( self._sanitize_regexp = self._get_sanitize_regexp() self.__fname_sanitizer = FileNameSanitizer( max_filename_len=self.max_filename_len, - max_filepath_len=self.max_filepath_len, fs_encoding=fs_encoding, null_value_handler=null_value_handler, reserved_name_handler=reserved_name_handler, @@ -173,7 +168,7 @@ def reserved_keywords(self) -> Tuple[str, ...]: def __init__( self, min_len: int = DEFAULT_MIN_LEN, - max_len: Optional[int] = None, # deprecated + max_len: Optional[int] = None, fs_encoding: Optional[str] = None, platform: Optional[PlatformType] = None, check_reserved: bool = True, @@ -182,10 +177,6 @@ def __init__( max_filepath_len: Optional[int] = None, ) -> None: if max_len is not None: - warnings.warn( - "'max_len' is deprecated. Use 'max_filepath_len' instead.", - DeprecationWarning, - ) max_filepath_len = max_len super().__init__( min_len=min_len, @@ -200,7 +191,6 @@ def __init__( self.__fname_validator = FileNameValidator( min_len=min_len, max_filename_len=self.max_filename_len, - max_filepath_len=self.max_filepath_len, fs_encoding=fs_encoding, check_reserved=check_reserved, additional_reserved_names=additional_reserved_names, @@ -329,7 +319,7 @@ def validate_filepath( file_path: PathType, platform: Optional[PlatformType] = None, min_len: int = DEFAULT_MIN_LEN, - max_len: Optional[int] = None, # deprecated + max_len: Optional[int] = None, fs_encoding: Optional[str] = None, check_reserved: bool = True, additional_reserved_names: Optional[Sequence[str]] = None, @@ -349,7 +339,7 @@ def validate_filepath( Minimum byte length of the ``file_path``. The value must be greater or equal to one. Defaults to ``1``. max_len (Optional[int], optional): - [Deprecated] Use 'max_filepath_len' instead. + Alias for ``max_filepath_len``. fs_encoding (Optional[str], optional): Filesystem encoding that used to calculate the byte length of the file path. If |None|, get the value from the execution environment. @@ -389,10 +379,6 @@ def validate_filepath( `__ """ if max_len is not None: - warnings.warn( - "'max_len' is deprecated. Use 'max_filepath_len' instead.", - DeprecationWarning, - ) max_filepath_len = max_len FilePathValidator( @@ -410,7 +396,7 @@ def is_valid_filepath( file_path: PathType, platform: Optional[PlatformType] = None, min_len: int = DEFAULT_MIN_LEN, - max_len: Optional[int] = None, # deprecated + max_len: Optional[int] = None, fs_encoding: Optional[str] = None, check_reserved: bool = True, additional_reserved_names: Optional[Sequence[str]] = None, @@ -432,10 +418,6 @@ def is_valid_filepath( :py:func:`.validate_filepath()` """ if max_len is not None: - warnings.warn( - "'max_len' is deprecated. Use 'max_filepath_len' instead.", - DeprecationWarning, - ) max_filepath_len = max_len return FilePathValidator( @@ -453,7 +435,7 @@ def sanitize_filepath( file_path: PathType, replacement_text: str = "", platform: Optional[PlatformType] = None, - max_len: Optional[int] = None, # deprecated + max_len: Optional[int] = None, fs_encoding: Optional[str] = None, check_reserved: Optional[bool] = None, null_value_handler: Optional[ValidationErrorHandler] = None, @@ -489,7 +471,7 @@ def sanitize_filepath( .. include:: platform.txt max_len: - [Deprecated] Use 'max_filepath_len' instead. + Alias for ``max_filepath_len``. fs_encoding: Filesystem encoding that used to calculate the byte length of the file path. If |None|, get the value from the execution environment. @@ -546,10 +528,6 @@ def sanitize_filepath( :ref:`example-sanitize-file-path` """ if max_len is not None: - warnings.warn( - "'max_len' is deprecated. Use 'max_filepath_len' instead.", - DeprecationWarning, - ) max_filepath_len = max_len if check_reserved is not None: From 871df1ebe0f8203d8c7c6f7a58099f03f80ef355 Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Sun, 28 Jul 2024 10:17:08 -0400 Subject: [PATCH 6/7] Remove deprecation warning --- pathvalidate/_base.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pathvalidate/_base.py b/pathvalidate/_base.py index 06d855c..dc42116 100644 --- a/pathvalidate/_base.py +++ b/pathvalidate/_base.py @@ -5,7 +5,6 @@ import abc import os import sys -import warnings from typing import ClassVar, Optional, Sequence, Tuple from ._common import normalize_platform, unprintable_ascii_chars @@ -34,10 +33,6 @@ def reserved_keywords(self) -> Tuple[str, ...]: @property def max_len(self) -> int: - warnings.warn( - "'max_len' is deprecated. Use 'max_filepath_len' instead.", - DeprecationWarning, - ) return self._max_filepath_len @property From b029675f903ffd751deda9660028d6f962246b90 Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Sun, 28 Jul 2024 10:26:06 -0400 Subject: [PATCH 7/7] Make documentation similar to what it was before --- pathvalidate/_filepath.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pathvalidate/_filepath.py b/pathvalidate/_filepath.py index ccae212..2cef0c6 100644 --- a/pathvalidate/_filepath.py +++ b/pathvalidate/_filepath.py @@ -352,8 +352,8 @@ def validate_filepath( Maximum byte length of each component of the ``file_path``. Defaults to ``255``. max_filepath_len: - Maximum byte length of the ``file_path``. - Defaults to: + Maximum byte length of the ``file_path``. If the value is |None| or minus, + automatically determined by the ``platform``: - ``Linux``: 4096 - ``macOS``: 1024 @@ -508,8 +508,8 @@ def sanitize_filepath( Truncate each component if the length exceeds this value. Defaults to ``255``. max_filepath_len: - Maximum byte length of the ``file_path``. - Defaults to: + Maximum byte length of the ``file_path``. If the value is |None| or minus, + automatically determined by the ``platform``: - ``Linux``: 4096 - ``macOS``: 1024