|
| 1 | +name: Pull Request File Path Check |
| 2 | +on: [pull_request] |
| 3 | +jobs: |
| 4 | + |
| 5 | + filepath-check: |
| 6 | + name: Check for invalid characters in file paths |
| 7 | + runs-on: ubuntu-latest |
| 8 | + steps: |
| 9 | + |
| 10 | + - name: Check out the code |
| 11 | + uses: actions/checkout@v6 |
| 12 | + |
| 13 | + - name: Validate file paths for Go module compatibility |
| 14 | + run: | |
| 15 | + # Go's module zip rejects filenames containing certain characters. |
| 16 | + # See golang.org/x/mod/module fileNameOK() for the full specification. |
| 17 | + # |
| 18 | + # Allowed ASCII: letters, digits, and: !#$%&()+,-.=@[]^_{}~ and space |
| 19 | + # Allowed non-ASCII: unicode letters only |
| 20 | + # Rejected: " ' * < > ? ` | / \ : and any non-letter unicode (control |
| 21 | + # chars, format chars like U+200E LEFT-TO-RIGHT MARK, etc.) |
| 22 | + # |
| 23 | + # This check catches issues like the U+200E incident in PR #9552. |
| 24 | +
|
| 25 | + EXIT_STATUS=0 |
| 26 | +
|
| 27 | + git ls-files -z | python3 -c " |
| 28 | + import sys, unicodedata |
| 29 | +
|
| 30 | + data = sys.stdin.buffer.read() |
| 31 | + files = data.split(b'\x00') |
| 32 | +
|
| 33 | + # Characters explicitly rejected by Go's fileNameOK |
| 34 | + # (path separators / and \ are inherent to paths so we check per-element) |
| 35 | + bad_ascii = set('\"' + \"'\" + '*<>?\`|:') |
| 36 | +
|
| 37 | + allowed_ascii = set('!#$%&()+,-.=@[]^_{}~ ') |
| 38 | +
|
| 39 | + def is_ok(ch): |
| 40 | + if ch.isascii(): |
| 41 | + return ch.isalnum() or ch in allowed_ascii |
| 42 | + return ch.isalpha() |
| 43 | +
|
| 44 | + bad_files = [] # list of (original_path, clean_path, char_desc) |
| 45 | + for f in files: |
| 46 | + if not f: |
| 47 | + continue |
| 48 | + try: |
| 49 | + name = f.decode('utf-8') |
| 50 | + except UnicodeDecodeError: |
| 51 | + print(f'::error::Non-UTF-8 bytes in filename: {f!r}') |
| 52 | + bad_files.append((repr(f), None, 'non-UTF-8 bytes')) |
| 53 | + continue |
| 54 | +
|
| 55 | + # Check each path element (split on /) |
| 56 | + for element in name.split('/'): |
| 57 | + for ch in element: |
| 58 | + if not is_ok(ch): |
| 59 | + cp = ord(ch) |
| 60 | + char_name = unicodedata.name(ch, f'U+{cp:04X}') |
| 61 | + char_desc = f'U+{cp:04X} ({char_name})' |
| 62 | + # Build cleaned path by stripping invalid chars |
| 63 | + clean = '/'.join( |
| 64 | + ''.join(c for c in elem if is_ok(c)) |
| 65 | + for elem in name.split('/') |
| 66 | + ) |
| 67 | + print(f'::error file={name}::File \"{name}\" contains invalid char {char_desc}') |
| 68 | + bad_files.append((name, clean, char_desc)) |
| 69 | + break |
| 70 | +
|
| 71 | + if bad_files: |
| 72 | + print() |
| 73 | + print('The following files have characters that are invalid in Go module zip archives:') |
| 74 | + print() |
| 75 | + for original, clean, desc in bad_files: |
| 76 | + print(f' {original} — {desc}') |
| 77 | + print() |
| 78 | + print('To fix, rename the files to remove the problematic characters:') |
| 79 | + print() |
| 80 | + for original, clean, desc in bad_files: |
| 81 | + if clean: |
| 82 | + print(f' mv \"{original}\" \"{clean}\" && git add \"{clean}\"') |
| 83 | + print(f' # or: git mv \"{original}\" \"{clean}\"') |
| 84 | + else: |
| 85 | + print(f' # {original} — cannot auto-suggest rename (non-UTF-8)') |
| 86 | + print() |
| 87 | + print('See https://github.com/vmware-tanzu/velero/pull/9552 for context.') |
| 88 | + sys.exit(1) |
| 89 | + else: |
| 90 | + print('All file paths are valid for Go module zip.') |
| 91 | + " || EXIT_STATUS=1 |
| 92 | +
|
| 93 | + exit $EXIT_STATUS |
0 commit comments