Skip to content

Commit 3743c20

Browse files
authored
Merge pull request #120 from jaraco/list-perf
Listdir performance optimization - separate class
2 parents 093eca8 + 4b9e000 commit 3743c20

3 files changed

Lines changed: 217 additions & 8 deletions

File tree

CHANGES.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
10.3
2+
----
3+
4+
- #115: Added a new performance-optimized implementation
5+
for listdir operations, optimizing ``listdir``, ``walk``,
6+
``walkfiles``, ``walkdirs``, and ``fnmatch``, presented
7+
as the ``FastPath`` class.
8+
9+
Please direct feedback on this implementation to the ticket,
10+
especially if the performance benefits justify it replacing
11+
the default ``Path`` class.
12+
113
10.2
214
----
315

path.py

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1712,3 +1712,189 @@ class CaseInsensitivePattern(text_type):
17121712
@property
17131713
def normcase(self):
17141714
return __import__('ntpath').normcase
1715+
1716+
1717+
class FastPath(Path):
1718+
"""
1719+
Performance optimized version of Path for use
1720+
on embedded platforms and other systems with limited
1721+
CPU. See #115 and #116 for background.
1722+
"""
1723+
1724+
def listdir(self, pattern=None):
1725+
children = os.listdir(self)
1726+
if pattern is None:
1727+
return [self / child for child in children]
1728+
1729+
pattern, normcase = self.__prepare(pattern)
1730+
return [
1731+
self / child
1732+
for child in children
1733+
if self._next_class(child).__fnmatch(pattern, normcase)
1734+
]
1735+
1736+
def walk(self, pattern=None, errors='strict'):
1737+
class Handlers:
1738+
def strict(msg):
1739+
raise
1740+
1741+
def warn(msg):
1742+
warnings.warn(msg, TreeWalkWarning)
1743+
1744+
def ignore(msg):
1745+
pass
1746+
1747+
if not callable(errors) and errors not in vars(Handlers):
1748+
raise ValueError("invalid errors parameter")
1749+
errors = vars(Handlers).get(errors, errors)
1750+
1751+
if pattern:
1752+
pattern, normcase = self.__prepare(pattern)
1753+
else:
1754+
normcase = None
1755+
1756+
return self.__walk(pattern, normcase, errors)
1757+
1758+
def __walk(self, pattern, normcase, errors):
1759+
""" Prepared version of walk """
1760+
try:
1761+
childList = self.listdir()
1762+
except Exception:
1763+
exc = sys.exc_info()[1]
1764+
tmpl = "Unable to list directory '%(self)s': %(exc)s"
1765+
msg = tmpl % locals()
1766+
errors(msg)
1767+
return
1768+
1769+
for child in childList:
1770+
if pattern is None or child.__fnmatch(pattern, normcase):
1771+
yield child
1772+
try:
1773+
isdir = child.isdir()
1774+
except Exception:
1775+
exc = sys.exc_info()[1]
1776+
tmpl = "Unable to access '%(child)s': %(exc)s"
1777+
msg = tmpl % locals()
1778+
errors(msg)
1779+
isdir = False
1780+
1781+
if isdir:
1782+
for item in child.__walk(pattern, normcase, errors):
1783+
yield item
1784+
1785+
def walkdirs(self, pattern=None, errors='strict'):
1786+
if errors not in ('strict', 'warn', 'ignore'):
1787+
raise ValueError("invalid errors parameter")
1788+
1789+
if pattern:
1790+
pattern, normcase = self.__prepare(pattern)
1791+
else:
1792+
normcase = None
1793+
1794+
return self.__walkdirs(pattern, normcase, errors)
1795+
1796+
def __walkdirs(self, pattern, normcase, errors):
1797+
""" Prepared version of walkdirs """
1798+
try:
1799+
dirs = self.dirs()
1800+
except Exception:
1801+
if errors == 'ignore':
1802+
return
1803+
elif errors == 'warn':
1804+
warnings.warn(
1805+
"Unable to list directory '%s': %s"
1806+
% (self, sys.exc_info()[1]),
1807+
TreeWalkWarning)
1808+
return
1809+
else:
1810+
raise
1811+
1812+
for child in dirs:
1813+
if pattern is None or child.__fnmatch(pattern, normcase):
1814+
yield child
1815+
for subsubdir in child.__walkdirs(pattern, normcase, errors):
1816+
yield subsubdir
1817+
1818+
def walkfiles(self, pattern=None, errors='strict'):
1819+
if errors not in ('strict', 'warn', 'ignore'):
1820+
raise ValueError("invalid errors parameter")
1821+
1822+
if pattern:
1823+
pattern, normcase = self.__prepare(pattern)
1824+
else:
1825+
normcase = None
1826+
1827+
return self.__walkfiles(pattern, normcase, errors)
1828+
1829+
def __walkfiles(self, pattern, normcase, errors):
1830+
""" Prepared version of walkfiles """
1831+
try:
1832+
childList = self.listdir()
1833+
except Exception:
1834+
if errors == 'ignore':
1835+
return
1836+
elif errors == 'warn':
1837+
warnings.warn(
1838+
"Unable to list directory '%s': %s"
1839+
% (self, sys.exc_info()[1]),
1840+
TreeWalkWarning)
1841+
return
1842+
else:
1843+
raise
1844+
1845+
for child in childList:
1846+
try:
1847+
isfile = child.isfile()
1848+
isdir = not isfile and child.isdir()
1849+
except:
1850+
if errors == 'ignore':
1851+
continue
1852+
elif errors == 'warn':
1853+
warnings.warn(
1854+
"Unable to access '%s': %s"
1855+
% (self, sys.exc_info()[1]),
1856+
TreeWalkWarning)
1857+
continue
1858+
else:
1859+
raise
1860+
1861+
if isfile:
1862+
if pattern is None or child.__fnmatch(pattern, normcase):
1863+
yield child
1864+
elif isdir:
1865+
for f in child.__walkfiles(pattern, normcase, errors):
1866+
yield f
1867+
1868+
def __fnmatch(self, pattern, normcase):
1869+
""" Return ``True`` if `self.name` matches the given `pattern`,
1870+
prepared version.
1871+
`pattern` - A filename pattern with wildcards,
1872+
for example ``'*.py'``. The pattern is expected to be normcase'd
1873+
already.
1874+
`normcase` - A function used to normalize the pattern and
1875+
filename before matching.
1876+
.. seealso:: :func:`Path.fnmatch`
1877+
"""
1878+
return fnmatch.fnmatchcase(normcase(self.name), pattern)
1879+
1880+
def __prepare(self, pattern, normcase=None):
1881+
""" Prepares a fmatch_pattern for use with ``FastPath.__fnmatch`.
1882+
`pattern` - A filename pattern with wildcards,
1883+
for example ``'*.py'``. If the pattern contains a `normcase`
1884+
attribute, it is applied to the name and path prior to comparison.
1885+
`normcase` - (optional) A function used to normalize the pattern and
1886+
filename before matching. Defaults to :meth:`self.module`, which defaults
1887+
to :meth:`os.path.normcase`.
1888+
.. seealso:: :func:`FastPath.__fnmatch`
1889+
"""
1890+
if not normcase:
1891+
normcase = getattr(pattern, 'normcase', self.module.normcase)
1892+
pattern = normcase(pattern)
1893+
return pattern, normcase
1894+
1895+
def fnmatch(self, pattern, normcase=None):
1896+
if not pattern:
1897+
raise ValueError("No pattern provided")
1898+
1899+
pattern, normcase = self.__prepare(pattern, normcase)
1900+
return self.__fnmatch(pattern, normcase)

test_path.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,27 @@
3131
import pytest
3232

3333
import path
34-
from path import Path, tempdir
34+
from path import tempdir
3535
from path import CaseInsensitivePattern as ci
3636
from path import SpecialResolver
3737
from path import Multi
3838

39+
Path = None
40+
3941

4042
def p(**choices):
4143
""" Choose a value from several possible values, based on os.name """
4244
return choices[os.name]
4345

4446

47+
@pytest.fixture(autouse=True, params=[path.Path, path.FastPath])
48+
def path_class(request, monkeypatch):
49+
"""
50+
Invoke tests on any number of Path classes.
51+
"""
52+
monkeypatch.setitem(globals(), 'Path', request.param)
53+
54+
4555
class TestBasics:
4656
def test_relpath(self):
4757
root = Path(p(nt='C:\\', posix='/'))
@@ -789,17 +799,17 @@ def test_chdir_or_cd(self, tmpdir):
789799

790800

791801
class TestSubclass:
792-
class PathSubclass(Path):
793-
pass
794802

795803
def test_subclass_produces_same_class(self):
796804
"""
797805
When operations are invoked on a subclass, they should produce another
798806
instance of that subclass.
799807
"""
800-
p = self.PathSubclass('/foo')
808+
class PathSubclass(Path):
809+
pass
810+
p = PathSubclass('/foo')
801811
subdir = p / 'bar'
802-
assert isinstance(subdir, self.PathSubclass)
812+
assert isinstance(subdir, PathSubclass)
803813

804814

805815
class TestTempDir:
@@ -809,7 +819,7 @@ def test_constructor(self):
809819
One should be able to readily construct a temporary directory
810820
"""
811821
d = tempdir()
812-
assert isinstance(d, Path)
822+
assert isinstance(d, path.Path)
813823
assert d.exists()
814824
assert d.isdir()
815825
d.rmdir()
@@ -822,7 +832,7 @@ def test_next_class(self):
822832
"""
823833
d = tempdir()
824834
sub = d / 'subdir'
825-
assert isinstance(sub, Path)
835+
assert isinstance(sub, path.Path)
826836
d.rmdir()
827837

828838
def test_context_manager(self):
@@ -1084,7 +1094,8 @@ def test_for_class(self):
10841094
cls = Multi.for_class(Path)
10851095
assert issubclass(cls, Path)
10861096
assert issubclass(cls, Multi)
1087-
assert cls.__name__ == 'MultiPath'
1097+
expected_name = 'Multi' + Path.__name__
1098+
assert cls.__name__ == expected_name
10881099

10891100
def test_detect_no_pathsep(self):
10901101
"""

0 commit comments

Comments
 (0)