|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +# Copyright (c) Advanced Micro Devices, Inc., or its affiliates. |
| 4 | +# SPDX-License-Identifier: MIT |
| 5 | + |
| 6 | +""" |
| 7 | +Purpose: |
| 8 | + Normalize and enforce AMD two-line copyright + SPDX headers across files. |
| 9 | +
|
| 10 | +Target files: |
| 11 | + - C/C++-style: .cpp, .hpp, .inc -> uses "//" comment style |
| 12 | + - Hash-style: .py, .cmake, .sh, and CMakeLists.txt -> uses "#" style |
| 13 | +
|
| 14 | +Header formats inserted (top of file, followed by exactly one blank line): |
| 15 | + C/C++ : |
| 16 | + // Copyright (c) Advanced Micro Devices, Inc., or its affiliates. |
| 17 | + // SPDX-License-Identifier: MIT |
| 18 | + <blank> |
| 19 | + Hash : |
| 20 | + <blank> |
| 21 | +
|
| 22 | +Shebang special case (hash-style only): |
| 23 | + - If line 1 starts with "#!", keep shebang, then a blank line, then the |
| 24 | + two hash-style header lines, then a blank line. |
| 25 | +
|
| 26 | +Removal rules: |
| 27 | + - Remove any comment lines (anywhere in file) containing the keywords |
| 28 | + "copyright" or "spdx" (case-insensitive). Blank lines are preserved. |
| 29 | + - Remove long-form MIT license block comment when: |
| 30 | + a) The file starts with the block (absolute top), OR |
| 31 | + b) The block appears immediately after the AMD header position |
| 32 | + (i.e., when remainder at insertion point begins with "/*" and |
| 33 | + the first content line is "* The MIT License (MIT)"). |
| 34 | +
|
| 35 | +Blank-line normalization: |
| 36 | + - Enforce exactly ONE blank line immediately after the AMD header. |
| 37 | + (Drop only the leading blank lines at the insertion point before |
| 38 | + re-inserting the header.) |
| 39 | + - Do not change blank lines between other non-copyright comments. |
| 40 | +
|
| 41 | +Preservation: |
| 42 | + - Preserve original newline style: CRLF (\r\n) vs LF (\n). |
| 43 | + - Preserve UTF-8 BOM if present. |
| 44 | + - Do not modify non-comment code lines. |
| 45 | +
|
| 46 | +Idempotency: |
| 47 | + - Running this script multiple times does not further modify files. |
| 48 | +""" |
| 49 | + |
| 50 | +from __future__ import annotations |
| 51 | +import re |
| 52 | +import sys |
| 53 | +from pathlib import Path |
| 54 | +from typing import List, Tuple |
| 55 | + |
| 56 | +AMD_CPP_HEADER_TEXT = [ |
| 57 | + "// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.", |
| 58 | + "// SPDX-License-Identifier: MIT", |
| 59 | +] |
| 60 | +AMD_HASH_HEADER_TEXT = [ |
| 61 | + "# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.", |
| 62 | + "# SPDX-License-Identifier: MIT", |
| 63 | +] |
| 64 | + |
| 65 | +CPP_EXTS = {".cpp", ".hpp", ".inc"} |
| 66 | +HASH_EXTS = {".py", ".cmake", ".sh"} |
| 67 | + |
| 68 | +# --- Encoding helpers ------------------------------------------------------- |
| 69 | + |
| 70 | + |
| 71 | +def has_bom(raw: bytes) -> bool: |
| 72 | + return raw.startswith(b"\xef\xbb\xbf") |
| 73 | + |
| 74 | + |
| 75 | +def decode_text(raw: bytes) -> str: |
| 76 | + return raw.decode("utf-8-sig", errors="replace") |
| 77 | + |
| 78 | + |
| 79 | +def encode_text(text: str, bom: bool) -> bytes: |
| 80 | + data = text.encode("utf-8") |
| 81 | + return (b"\xef\xbb\xbf" + data) if bom else data |
| 82 | + |
| 83 | + |
| 84 | +# --- Newline detection ------------------------------------------------------ |
| 85 | + |
| 86 | + |
| 87 | +def detect_newline_sequence(raw: bytes) -> str: |
| 88 | + if b"\r\n" in raw: |
| 89 | + return "\r\n" |
| 90 | + elif b"\n" in raw: |
| 91 | + return "\n" |
| 92 | + else: |
| 93 | + return "\n" |
| 94 | + |
| 95 | + |
| 96 | +# --- Utilities -------------------------------------------------------------- |
| 97 | + |
| 98 | + |
| 99 | +def is_comment_line(line: str, style: str) -> bool: |
| 100 | + stripped = line.lstrip() |
| 101 | + if style == "cpp": |
| 102 | + return ( |
| 103 | + stripped.startswith("//") |
| 104 | + or stripped.startswith("/*") |
| 105 | + or stripped.startswith("*") |
| 106 | + or stripped.startswith("*/") |
| 107 | + ) |
| 108 | + elif style == "hash": |
| 109 | + return stripped.startswith("#") |
| 110 | + return False |
| 111 | + |
| 112 | + |
| 113 | +def has_keywords(line: str) -> bool: |
| 114 | + lower_line = line.lower() |
| 115 | + return ("copyright" in lower_line) or ("spdx" in lower_line) |
| 116 | + |
| 117 | + |
| 118 | +# --- MIT License banner detection ------------------------------ |
| 119 | +MIT_C_FIRST_LINE_RE = re.compile(r"^\s*\*\s*The MIT License \(MIT\)") |
| 120 | +MIT_HASH_FIRST_LINE_RE = re.compile(r"^\s*#\s*The MIT License \(MIT\)") |
| 121 | + |
| 122 | + |
| 123 | +def remove_top_mit_block(lines: List[str]) -> Tuple[List[str], bool]: |
| 124 | + """ |
| 125 | + Unified MIT banner removal at the top of 'lines'. |
| 126 | + Supports: |
| 127 | + - C-style block starting with '/*' and ending with '*/'; removes only if |
| 128 | + a line within the block matches MIT_C_FIRST_LINE_RE. |
| 129 | + - Hash-style banner: contiguous top run of lines starting with '#'; |
| 130 | + removes only if any line in that run matches MIT_HASH_FIRST_LINE_RE. |
| 131 | + Returns (new_lines, removed_flag). Preserves EOLs. |
| 132 | + """ |
| 133 | + if not lines: |
| 134 | + return lines, False |
| 135 | + |
| 136 | + first = lines[0].lstrip() |
| 137 | + |
| 138 | + # C-style block |
| 139 | + if first.startswith("/*"): |
| 140 | + end_idx, saw_mit = None, False |
| 141 | + for i, line in enumerate(lines[1:], 1): |
| 142 | + if not saw_mit and MIT_C_FIRST_LINE_RE.match(line): |
| 143 | + saw_mit = True |
| 144 | + s = line.lstrip() |
| 145 | + if s.startswith("*/") or s.rstrip().endswith("*/"): |
| 146 | + end_idx = i + 1 |
| 147 | + break |
| 148 | + if end_idx is not None and saw_mit: |
| 149 | + return lines[end_idx:], True |
| 150 | + return lines, False |
| 151 | + |
| 152 | + # Hash-style contiguous banner |
| 153 | + if first.startswith("#"): |
| 154 | + end_idx, saw_mit = 0, False |
| 155 | + for i, line in enumerate(lines): |
| 156 | + if line.lstrip().startswith("#"): |
| 157 | + if not saw_mit and MIT_HASH_FIRST_LINE_RE.match(line): |
| 158 | + saw_mit = True |
| 159 | + end_idx = i + 1 |
| 160 | + else: |
| 161 | + break |
| 162 | + if saw_mit: |
| 163 | + return lines[end_idx:], True |
| 164 | + return lines, False |
| 165 | + |
| 166 | + return lines, False |
| 167 | + |
| 168 | + |
| 169 | +# --- Removal + normalization helpers --------------------------------------- |
| 170 | + |
| 171 | + |
| 172 | +def remove_keyword_comment_lines_globally(lines: List[str], style: str) -> List[str]: |
| 173 | + """Remove comment lines containing keywords anywhere in the file. |
| 174 | + **Do not** remove blank lines; preserve all other lines as-is.""" |
| 175 | + out: List[str] = [] |
| 176 | + for line in lines: |
| 177 | + if is_comment_line(line, style) and has_keywords(line): |
| 178 | + continue |
| 179 | + out.append(line) |
| 180 | + return out |
| 181 | + |
| 182 | + |
| 183 | +def drop_leading_blank_lines(lines: List[str]) -> List[str]: |
| 184 | + """Drop only the leading blank lines at the start of the given list.""" |
| 185 | + i = 0 |
| 186 | + while i < len(lines) and lines[i].strip() == "": |
| 187 | + i += 1 |
| 188 | + return lines[i:] |
| 189 | + |
| 190 | + |
| 191 | +# --- Header builder --------------------------------------------------------- |
| 192 | + |
| 193 | + |
| 194 | +def build_header_lines(style: str, nl: str) -> List[str]: |
| 195 | + base = AMD_CPP_HEADER_TEXT if style == "cpp" else AMD_HASH_HEADER_TEXT |
| 196 | + return [base[0] + nl, base[1] + nl, nl] # header + exactly one blank |
| 197 | + |
| 198 | + |
| 199 | +# --- Main transforms -------------------------------------------------------- |
| 200 | + |
| 201 | + |
| 202 | +def process_cpp(text: str, nl: str) -> str: |
| 203 | + lines = text.splitlines(True) |
| 204 | + |
| 205 | + # Remove MIT block if it is at the *absolute* top |
| 206 | + lines, _ = remove_top_mit_block(lines) |
| 207 | + |
| 208 | + # Remove keyworded comment lines globally (blank lines preserved) |
| 209 | + lines = remove_keyword_comment_lines_globally(lines, style="cpp") |
| 210 | + |
| 211 | + # Normalize insertion point and remove MIT block if it appears *after header* |
| 212 | + lines = drop_leading_blank_lines(lines) |
| 213 | + lines, _ = remove_top_mit_block(lines) |
| 214 | + |
| 215 | + # Prepend AMD header (guarantee exactly one blank after) |
| 216 | + return "".join(build_header_lines("cpp", nl) + lines) |
| 217 | + |
| 218 | + |
| 219 | +def process_hash(text: str, nl: str) -> str: |
| 220 | + lines = text.splitlines(True) |
| 221 | + if not lines: |
| 222 | + return "".join(build_header_lines("hash", nl)) |
| 223 | + |
| 224 | + shebang = lines[0].startswith("#!") |
| 225 | + |
| 226 | + if shebang: |
| 227 | + remainder = remove_keyword_comment_lines_globally(lines[1:], style="hash") |
| 228 | + remainder = drop_leading_blank_lines(remainder) |
| 229 | + remainder, _ = remove_top_mit_block(remainder) # remove MIT block after header |
| 230 | + new_top = [lines[0], nl] + build_header_lines("hash", nl) |
| 231 | + return "".join(new_top + remainder) |
| 232 | + else: |
| 233 | + remainder = remove_keyword_comment_lines_globally(lines, style="hash") |
| 234 | + remainder = drop_leading_blank_lines(remainder) |
| 235 | + remainder, _ = remove_top_mit_block(remainder) # remove MIT block after header |
| 236 | + return "".join(build_header_lines("hash", nl) + remainder) |
| 237 | + |
| 238 | + |
| 239 | +# --- File processing & CLI -------------------------------------------------- |
| 240 | + |
| 241 | + |
| 242 | +def process_file(path: Path) -> bool: |
| 243 | + name = path.name |
| 244 | + suffix = path.suffix.lower() |
| 245 | + if suffix in CPP_EXTS: |
| 246 | + style = "cpp" |
| 247 | + elif suffix in HASH_EXTS or name == "CMakeLists.txt": |
| 248 | + style = "hash" |
| 249 | + else: |
| 250 | + return False |
| 251 | + |
| 252 | + raw = path.read_bytes() |
| 253 | + bom = has_bom(raw) |
| 254 | + nl = detect_newline_sequence(raw) |
| 255 | + text = decode_text(raw) |
| 256 | + |
| 257 | + updated = process_cpp(text, nl) if style == "cpp" else process_hash(text, nl) |
| 258 | + if updated != text: |
| 259 | + path.write_bytes(encode_text(updated, bom)) |
| 260 | + return True |
| 261 | + return False |
| 262 | + |
| 263 | + |
| 264 | +def main(argv: List[str]) -> int: |
| 265 | + if len(argv) < 2: |
| 266 | + print(__doc__) |
| 267 | + return 2 |
| 268 | + changed = 0 |
| 269 | + skipped = 0 |
| 270 | + errors: List[str] = [] |
| 271 | + for arg in argv[1:]: |
| 272 | + p = Path(arg) |
| 273 | + try: |
| 274 | + if not p.exists(): |
| 275 | + errors.append(f"Not found: {p}") |
| 276 | + continue |
| 277 | + if p.is_dir(): |
| 278 | + errors.append(f"Is a directory (pass specific files): {p}") |
| 279 | + continue |
| 280 | + if process_file(p): |
| 281 | + changed += 1 |
| 282 | + print(f"Updated: {p}") |
| 283 | + else: |
| 284 | + skipped += 1 |
| 285 | + print(f"Skipped (no change needed or unsupported type): {p}") |
| 286 | + except Exception as e: |
| 287 | + errors.append(f"Error processing {p}: {e}") |
| 288 | + print(f"\nSummary: {changed} updated, {skipped} skipped, {len(errors)} errors") |
| 289 | + for msg in errors: |
| 290 | + print(f" - {msg}") |
| 291 | + return 0 if not errors else 1 |
| 292 | + |
| 293 | + |
| 294 | +if __name__ == "__main__": |
| 295 | + raise SystemExit(main(sys.argv)) |
0 commit comments