|
1 | 1 | import io
|
2 | 2 | import os
|
3 | 3 | import re
|
4 |
| -import six |
5 | 4 | import tarfile
|
6 | 5 | import tempfile
|
7 | 6 |
|
| 7 | +import six |
| 8 | + |
| 9 | +from .fnmatch import fnmatch |
8 | 10 | from ..constants import IS_WINDOWS_PLATFORM
|
9 |
| -from fnmatch import fnmatch |
10 |
| -from itertools import chain |
11 | 11 |
|
12 | 12 |
|
13 | 13 | _SEP = re.compile('/|\\\\') if IS_WINDOWS_PLATFORM else re.compile('/')
|
@@ -44,92 +44,9 @@ def exclude_paths(root, patterns, dockerfile=None):
|
44 | 44 | if dockerfile is None:
|
45 | 45 | dockerfile = 'Dockerfile'
|
46 | 46 |
|
47 |
| - def split_path(p): |
48 |
| - return [pt for pt in re.split(_SEP, p) if pt and pt != '.'] |
49 |
| - |
50 |
| - def normalize(p): |
51 |
| - # Leading and trailing slashes are not relevant. Yes, |
52 |
| - # "foo.py/" must exclude the "foo.py" regular file. "." |
53 |
| - # components are not relevant either, even if the whole |
54 |
| - # pattern is only ".", as the Docker reference states: "For |
55 |
| - # historical reasons, the pattern . is ignored." |
56 |
| - # ".." component must be cleared with the potential previous |
57 |
| - # component, regardless of whether it exists: "A preprocessing |
58 |
| - # step [...] eliminates . and .. elements using Go's |
59 |
| - # filepath.". |
60 |
| - i = 0 |
61 |
| - split = split_path(p) |
62 |
| - while i < len(split): |
63 |
| - if split[i] == '..': |
64 |
| - del split[i] |
65 |
| - if i > 0: |
66 |
| - del split[i - 1] |
67 |
| - i -= 1 |
68 |
| - else: |
69 |
| - i += 1 |
70 |
| - return split |
71 |
| - |
72 |
| - patterns = ( |
73 |
| - (True, normalize(p[1:])) |
74 |
| - if p.startswith('!') else |
75 |
| - (False, normalize(p)) |
76 |
| - for p in patterns) |
77 |
| - patterns = list(reversed(list(chain( |
78 |
| - # Exclude empty patterns such as "." or the empty string. |
79 |
| - filter(lambda p: p[1], patterns), |
80 |
| - # Always include the Dockerfile and .dockerignore |
81 |
| - [(True, split_path(dockerfile)), (True, ['.dockerignore'])])))) |
82 |
| - return set(walk(root, patterns)) |
83 |
| - |
84 |
| - |
85 |
| -def walk(root, patterns, default=True): |
86 |
| - """ |
87 |
| - A collection of file lying below root that should be included according to |
88 |
| - patterns. |
89 |
| - """ |
90 |
| - |
91 |
| - def match(p): |
92 |
| - if p[1][0] == '**': |
93 |
| - rec = (p[0], p[1][1:]) |
94 |
| - return [p] + (match(rec) if rec[1] else [rec]) |
95 |
| - elif fnmatch(f, p[1][0]): |
96 |
| - return [(p[0], p[1][1:])] |
97 |
| - else: |
98 |
| - return [] |
99 |
| - |
100 |
| - for f in os.listdir(root): |
101 |
| - cur = os.path.join(root, f) |
102 |
| - # The patterns if recursing in that directory. |
103 |
| - sub = list(chain(*(match(p) for p in patterns))) |
104 |
| - # Whether this file is explicitely included / excluded. |
105 |
| - hit = next((p[0] for p in sub if not p[1]), None) |
106 |
| - # Whether this file is implicitely included / excluded. |
107 |
| - matched = default if hit is None else hit |
108 |
| - sub = list(filter(lambda p: p[1], sub)) |
109 |
| - if os.path.isdir(cur) and not os.path.islink(cur): |
110 |
| - # Entirely skip directories if there are no chance any subfile will |
111 |
| - # be included. |
112 |
| - if all(not p[0] for p in sub) and not matched: |
113 |
| - continue |
114 |
| - # I think this would greatly speed up dockerignore handling by not |
115 |
| - # recursing into directories we are sure would be entirely |
116 |
| - # included, and only yielding the directory itself, which will be |
117 |
| - # recursively archived anyway. However the current unit test expect |
118 |
| - # the full list of subfiles and I'm not 100% sure it would make no |
119 |
| - # difference yet. |
120 |
| - # if all(p[0] for p in sub) and matched: |
121 |
| - # yield f |
122 |
| - # continue |
123 |
| - children = False |
124 |
| - for r in (os.path.join(f, p) for p in walk(cur, sub, matched)): |
125 |
| - yield r |
126 |
| - children = True |
127 |
| - # The current unit tests expect directories only under those |
128 |
| - # conditions. It might be simplifiable though. |
129 |
| - if (not sub or not children) and hit or hit is None and default: |
130 |
| - yield f |
131 |
| - elif matched: |
132 |
| - yield f |
| 47 | + patterns.append('!' + dockerfile) |
| 48 | + pm = PatternMatcher(patterns) |
| 49 | + return set(pm.walk(root)) |
133 | 50 |
|
134 | 51 |
|
135 | 52 | def build_file_list(root):
|
@@ -217,3 +134,122 @@ def mkbuildcontext(dockerfile):
|
217 | 134 | t.close()
|
218 | 135 | f.seek(0)
|
219 | 136 | return f
|
| 137 | + |
| 138 | + |
| 139 | +def split_path(p): |
| 140 | + return [pt for pt in re.split(_SEP, p) if pt and pt != '.'] |
| 141 | + |
| 142 | + |
| 143 | +def normalize_slashes(p): |
| 144 | + if IS_WINDOWS_PLATFORM: |
| 145 | + return '/'.join(split_path(p)) |
| 146 | + return p |
| 147 | + |
| 148 | + |
| 149 | +def walk(root, patterns, default=True): |
| 150 | + pm = PatternMatcher(patterns) |
| 151 | + return pm.walk(root) |
| 152 | + |
| 153 | + |
| 154 | +# Heavily based on |
| 155 | +# https://github.com/moby/moby/blob/master/pkg/fileutils/fileutils.go |
| 156 | +class PatternMatcher(object): |
| 157 | + def __init__(self, patterns): |
| 158 | + self.patterns = list(filter( |
| 159 | + lambda p: p.dirs, [Pattern(p) for p in patterns] |
| 160 | + )) |
| 161 | + self.patterns.append(Pattern('!.dockerignore')) |
| 162 | + |
| 163 | + def matches(self, filepath): |
| 164 | + matched = False |
| 165 | + parent_path = os.path.dirname(filepath) |
| 166 | + parent_path_dirs = split_path(parent_path) |
| 167 | + |
| 168 | + for pattern in self.patterns: |
| 169 | + negative = pattern.exclusion |
| 170 | + match = pattern.match(filepath) |
| 171 | + if not match and parent_path != '': |
| 172 | + if len(pattern.dirs) <= len(parent_path_dirs): |
| 173 | + match = pattern.match( |
| 174 | + os.path.sep.join(parent_path_dirs[:len(pattern.dirs)]) |
| 175 | + ) |
| 176 | + |
| 177 | + if match: |
| 178 | + matched = not negative |
| 179 | + |
| 180 | + return matched |
| 181 | + |
| 182 | + def walk(self, root): |
| 183 | + def rec_walk(current_dir): |
| 184 | + for f in os.listdir(current_dir): |
| 185 | + fpath = os.path.join( |
| 186 | + os.path.relpath(current_dir, root), f |
| 187 | + ) |
| 188 | + if fpath.startswith('.' + os.path.sep): |
| 189 | + fpath = fpath[2:] |
| 190 | + match = self.matches(fpath) |
| 191 | + if not match: |
| 192 | + yield fpath |
| 193 | + |
| 194 | + cur = os.path.join(root, fpath) |
| 195 | + if not os.path.isdir(cur) or os.path.islink(cur): |
| 196 | + continue |
| 197 | + |
| 198 | + if match: |
| 199 | + # If we want to skip this file and it's a directory |
| 200 | + # then we should first check to see if there's an |
| 201 | + # excludes pattern (e.g. !dir/file) that starts with this |
| 202 | + # dir. If so then we can't skip this dir. |
| 203 | + skip = True |
| 204 | + |
| 205 | + for pat in self.patterns: |
| 206 | + if not pat.exclusion: |
| 207 | + continue |
| 208 | + if pat.cleaned_pattern.startswith( |
| 209 | + normalize_slashes(fpath)): |
| 210 | + skip = False |
| 211 | + break |
| 212 | + if skip: |
| 213 | + continue |
| 214 | + for sub in rec_walk(cur): |
| 215 | + yield sub |
| 216 | + |
| 217 | + return rec_walk(root) |
| 218 | + |
| 219 | + |
| 220 | +class Pattern(object): |
| 221 | + def __init__(self, pattern_str): |
| 222 | + self.exclusion = False |
| 223 | + if pattern_str.startswith('!'): |
| 224 | + self.exclusion = True |
| 225 | + pattern_str = pattern_str[1:] |
| 226 | + |
| 227 | + self.dirs = self.normalize(pattern_str) |
| 228 | + self.cleaned_pattern = '/'.join(self.dirs) |
| 229 | + |
| 230 | + @classmethod |
| 231 | + def normalize(cls, p): |
| 232 | + |
| 233 | + # Leading and trailing slashes are not relevant. Yes, |
| 234 | + # "foo.py/" must exclude the "foo.py" regular file. "." |
| 235 | + # components are not relevant either, even if the whole |
| 236 | + # pattern is only ".", as the Docker reference states: "For |
| 237 | + # historical reasons, the pattern . is ignored." |
| 238 | + # ".." component must be cleared with the potential previous |
| 239 | + # component, regardless of whether it exists: "A preprocessing |
| 240 | + # step [...] eliminates . and .. elements using Go's |
| 241 | + # filepath.". |
| 242 | + i = 0 |
| 243 | + split = split_path(p) |
| 244 | + while i < len(split): |
| 245 | + if split[i] == '..': |
| 246 | + del split[i] |
| 247 | + if i > 0: |
| 248 | + del split[i - 1] |
| 249 | + i -= 1 |
| 250 | + else: |
| 251 | + i += 1 |
| 252 | + return split |
| 253 | + |
| 254 | + def match(self, filepath): |
| 255 | + return fnmatch(normalize_slashes(filepath), self.cleaned_pattern) |
0 commit comments