|
1 | | -from collections import deque, namedtuple |
2 | 1 | from functools import wraps |
3 | 2 | from inspect import Parameter, signature |
4 | 3 | import logging |
5 | 4 | import os |
6 | 5 | import os.path as op |
7 | 6 | import shutil |
8 | | -import time |
9 | 7 | import appdirs |
10 | 8 | import joblib |
| 9 | +from .fastio import walk |
| 10 | +from .util import DirFingerprint, FileFingerprint |
11 | 11 |
|
12 | 12 | lgr = logging.getLogger(__name__) |
13 | 13 |
|
@@ -114,7 +114,7 @@ def fingerprinter(*args, **kwargs): |
114 | 114 | if op.isdir(path): |
115 | 115 | fprint = self._get_dir_fingerprint(path) |
116 | 116 | else: |
117 | | - fprint = self._get_file_fingerprint(path) |
| 117 | + fprint = FileFingerprint.for_file(path) |
118 | 118 | if fprint is None: |
119 | 119 | lgr.debug("Calling %s directly since no fingerprint for %r", f, path) |
120 | 120 | # just call the function -- we have no fingerprint, |
@@ -143,67 +143,8 @@ def fingerprinter(*args, **kwargs): |
143 | 143 | return fingerprinter |
144 | 144 |
|
145 | 145 | @staticmethod |
146 | | - def _get_file_fingerprint(path): |
147 | | - """Simplistic generic file fingerprinting based on ctime, mtime, and size |
148 | | - """ |
149 | | - try: |
150 | | - # we can't take everything, since atime can change, etc. |
151 | | - # So let's take some |
152 | | - s = os.stat(path, follow_symlinks=True) |
153 | | - fprint = FileFingerprint.from_stat(s) |
154 | | - lgr.log(5, "Fingerprint for %s: %s", path, fprint) |
155 | | - return fprint |
156 | | - except Exception as exc: |
157 | | - lgr.debug(f"Cannot fingerprint {path}: {exc}") |
158 | | - |
159 | | - @staticmethod |
160 | | - def _get_dir_fingerprint(path): |
161 | | - fprint = DirFingerprint() |
162 | | - dirqueue = deque([path]) |
163 | | - try: |
164 | | - while dirqueue: |
165 | | - d = dirqueue.popleft() |
166 | | - with os.scandir(d) as entries: |
167 | | - for e in entries: |
168 | | - if e.is_dir(follow_symlinks=True): |
169 | | - dirqueue.append(e.path) |
170 | | - else: |
171 | | - s = e.stat(follow_symlinks=True) |
172 | | - fprint.add_file(e.path, FileFingerprint.from_stat(s)) |
173 | | - except Exception as exc: |
174 | | - lgr.debug(f"Cannot fingerprint {path}: {exc}") |
175 | | - return None |
176 | | - else: |
177 | | - return fprint |
178 | | - |
179 | | - |
180 | | -class FileFingerprint(namedtuple("FileFingerprint", "mtime_ns ctime_ns size inode")): |
181 | | - @classmethod |
182 | | - def from_stat(cls, s): |
183 | | - return cls(s.st_mtime_ns, s.st_ctime_ns, s.st_size, s.st_ino) |
184 | | - |
185 | | - def modified_in_window(self, min_dtime): |
186 | | - return abs(time.time() - self.mtime_ns * 1e-9) < min_dtime |
187 | | - |
188 | | - def to_tuple(self): |
189 | | - return tuple(self) |
190 | | - |
191 | | - |
192 | | -class DirFingerprint: |
193 | | - def __init__(self): |
194 | | - self.last_modified = None |
195 | | - self.tree_fprints = {} |
196 | | - |
197 | | - def add_file(self, path, fprint: FileFingerprint): |
198 | | - self.tree_fprints[path] = fprint |
199 | | - if self.last_modified is None or self.last_modified < fprint.mtime_ns: |
200 | | - self.last_modified = fprint.mtime_ns |
201 | | - |
202 | | - def modified_in_window(self, min_dtime): |
203 | | - if self.last_modified is None: |
204 | | - return False |
205 | | - else: |
206 | | - return abs(time.time() - self.last_modified * 1e-9) < min_dtime |
207 | | - |
208 | | - def to_tuple(self): |
209 | | - return sum(sorted(self.tree_fprints.items()), ()) |
| 146 | + def _get_dir_fingerprint(dirpath): |
| 147 | + dprint = DirFingerprint() |
| 148 | + for path, fprint in walk(dirpath): |
| 149 | + dprint.add_file(path, fprint) |
| 150 | + return dprint |
0 commit comments