Skip to content

Commit 86ad7d2

Browse files
committed
feat: add script to automate copyright addition
1 parent cc1e20e commit 86ad7d2

File tree

1 file changed

+295
-0
lines changed

1 file changed

+295
-0
lines changed
Lines changed: 295 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,295 @@
1+
#!/usr/bin/env python3
2+
3+
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
4+
# SPDX-License-Identifier: MIT
5+
6+
"""
7+
Purpose:
8+
Normalize and enforce AMD two-line copyright + SPDX headers across files.
9+
10+
Target files:
11+
- C/C++-style: .cpp, .hpp, .inc -> uses "//" comment style
12+
- Hash-style: .py, .cmake, .sh, and CMakeLists.txt -> uses "#" style
13+
14+
Header formats inserted (top of file, followed by exactly one blank line):
15+
C/C++ :
16+
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
17+
// SPDX-License-Identifier: MIT
18+
<blank>
19+
Hash :
20+
<blank>
21+
22+
Shebang special case (hash-style only):
23+
- If line 1 starts with "#!", keep shebang, then a blank line, then the
24+
two hash-style header lines, then a blank line.
25+
26+
Removal rules:
27+
- Remove any comment lines (anywhere in file) containing the keywords
28+
"copyright" or "spdx" (case-insensitive). Blank lines are preserved.
29+
- Remove long-form MIT license block comment when:
30+
a) The file starts with the block (absolute top), OR
31+
b) The block appears immediately after the AMD header position
32+
(i.e., when remainder at insertion point begins with "/*" and
33+
the first content line is "* The MIT License (MIT)").
34+
35+
Blank-line normalization:
36+
- Enforce exactly ONE blank line immediately after the AMD header.
37+
(Drop only the leading blank lines at the insertion point before
38+
re-inserting the header.)
39+
- Do not change blank lines between other non-copyright comments.
40+
41+
Preservation:
42+
- Preserve original newline style: CRLF (\r\n) vs LF (\n).
43+
- Preserve UTF-8 BOM if present.
44+
- Do not modify non-comment code lines.
45+
46+
Idempotency:
47+
- Running this script multiple times does not further modify files.
48+
"""
49+
50+
from __future__ import annotations
51+
import re
52+
import sys
53+
from pathlib import Path
54+
from typing import List, Tuple
55+
56+
AMD_CPP_HEADER_TEXT = [
57+
"// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.",
58+
"// SPDX-License-Identifier: MIT",
59+
]
60+
AMD_HASH_HEADER_TEXT = [
61+
"# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.",
62+
"# SPDX-License-Identifier: MIT",
63+
]
64+
65+
CPP_EXTS = {".cpp", ".hpp", ".inc"}
66+
HASH_EXTS = {".py", ".cmake", ".sh"}
67+
68+
# --- Encoding helpers -------------------------------------------------------
69+
70+
71+
def has_bom(raw: bytes) -> bool:
72+
return raw.startswith(b"\xef\xbb\xbf")
73+
74+
75+
def decode_text(raw: bytes) -> str:
76+
return raw.decode("utf-8-sig", errors="replace")
77+
78+
79+
def encode_text(text: str, bom: bool) -> bytes:
80+
data = text.encode("utf-8")
81+
return (b"\xef\xbb\xbf" + data) if bom else data
82+
83+
84+
# --- Newline detection ------------------------------------------------------
85+
86+
87+
def detect_newline_sequence(raw: bytes) -> str:
88+
if b"\r\n" in raw:
89+
return "\r\n"
90+
elif b"\n" in raw:
91+
return "\n"
92+
else:
93+
return "\n"
94+
95+
96+
# --- Utilities --------------------------------------------------------------
97+
98+
99+
def is_comment_line(line: str, style: str) -> bool:
100+
stripped = line.lstrip()
101+
if style == "cpp":
102+
return (
103+
stripped.startswith("//")
104+
or stripped.startswith("/*")
105+
or stripped.startswith("*")
106+
or stripped.startswith("*/")
107+
)
108+
elif style == "hash":
109+
return stripped.startswith("#")
110+
return False
111+
112+
113+
def has_keywords(line: str) -> bool:
114+
lower_line = line.lower()
115+
return ("copyright" in lower_line) or ("spdx" in lower_line)
116+
117+
118+
# --- MIT License banner detection ------------------------------
119+
MIT_C_FIRST_LINE_RE = re.compile(r"^\s*\*\s*The MIT License \(MIT\)")
120+
MIT_HASH_FIRST_LINE_RE = re.compile(r"^\s*#\s*The MIT License \(MIT\)")
121+
122+
123+
def remove_top_mit_block(lines: List[str]) -> Tuple[List[str], bool]:
124+
"""
125+
Unified MIT banner removal at the top of 'lines'.
126+
Supports:
127+
- C-style block starting with '/*' and ending with '*/'; removes only if
128+
a line within the block matches MIT_C_FIRST_LINE_RE.
129+
- Hash-style banner: contiguous top run of lines starting with '#';
130+
removes only if any line in that run matches MIT_HASH_FIRST_LINE_RE.
131+
Returns (new_lines, removed_flag). Preserves EOLs.
132+
"""
133+
if not lines:
134+
return lines, False
135+
136+
first = lines[0].lstrip()
137+
138+
# C-style block
139+
if first.startswith("/*"):
140+
end_idx, saw_mit = None, False
141+
for i, line in enumerate(lines[1:], 1):
142+
if not saw_mit and MIT_C_FIRST_LINE_RE.match(line):
143+
saw_mit = True
144+
s = line.lstrip()
145+
if s.startswith("*/") or s.rstrip().endswith("*/"):
146+
end_idx = i + 1
147+
break
148+
if end_idx is not None and saw_mit:
149+
return lines[end_idx:], True
150+
return lines, False
151+
152+
# Hash-style contiguous banner
153+
if first.startswith("#"):
154+
end_idx, saw_mit = 0, False
155+
for i, line in enumerate(lines):
156+
if line.lstrip().startswith("#"):
157+
if not saw_mit and MIT_HASH_FIRST_LINE_RE.match(line):
158+
saw_mit = True
159+
end_idx = i + 1
160+
else:
161+
break
162+
if saw_mit:
163+
return lines[end_idx:], True
164+
return lines, False
165+
166+
return lines, False
167+
168+
169+
# --- Removal + normalization helpers ---------------------------------------
170+
171+
172+
def remove_keyword_comment_lines_globally(lines: List[str], style: str) -> List[str]:
173+
"""Remove comment lines containing keywords anywhere in the file.
174+
**Do not** remove blank lines; preserve all other lines as-is."""
175+
out: List[str] = []
176+
for line in lines:
177+
if is_comment_line(line, style) and has_keywords(line):
178+
continue
179+
out.append(line)
180+
return out
181+
182+
183+
def drop_leading_blank_lines(lines: List[str]) -> List[str]:
184+
"""Drop only the leading blank lines at the start of the given list."""
185+
i = 0
186+
while i < len(lines) and lines[i].strip() == "":
187+
i += 1
188+
return lines[i:]
189+
190+
191+
# --- Header builder ---------------------------------------------------------
192+
193+
194+
def build_header_lines(style: str, nl: str) -> List[str]:
195+
base = AMD_CPP_HEADER_TEXT if style == "cpp" else AMD_HASH_HEADER_TEXT
196+
return [base[0] + nl, base[1] + nl, nl] # header + exactly one blank
197+
198+
199+
# --- Main transforms --------------------------------------------------------
200+
201+
202+
def process_cpp(text: str, nl: str) -> str:
203+
lines = text.splitlines(True)
204+
205+
# Remove MIT block if it is at the *absolute* top
206+
lines, _ = remove_top_mit_block(lines)
207+
208+
# Remove keyworded comment lines globally (blank lines preserved)
209+
lines = remove_keyword_comment_lines_globally(lines, style="cpp")
210+
211+
# Normalize insertion point and remove MIT block if it appears *after header*
212+
lines = drop_leading_blank_lines(lines)
213+
lines, _ = remove_top_mit_block(lines)
214+
215+
# Prepend AMD header (guarantee exactly one blank after)
216+
return "".join(build_header_lines("cpp", nl) + lines)
217+
218+
219+
def process_hash(text: str, nl: str) -> str:
220+
lines = text.splitlines(True)
221+
if not lines:
222+
return "".join(build_header_lines("hash", nl))
223+
224+
shebang = lines[0].startswith("#!")
225+
226+
if shebang:
227+
remainder = remove_keyword_comment_lines_globally(lines[1:], style="hash")
228+
remainder = drop_leading_blank_lines(remainder)
229+
remainder, _ = remove_top_mit_block(remainder) # remove MIT block after header
230+
new_top = [lines[0], nl] + build_header_lines("hash", nl)
231+
return "".join(new_top + remainder)
232+
else:
233+
remainder = remove_keyword_comment_lines_globally(lines, style="hash")
234+
remainder = drop_leading_blank_lines(remainder)
235+
remainder, _ = remove_top_mit_block(remainder) # remove MIT block after header
236+
return "".join(build_header_lines("hash", nl) + remainder)
237+
238+
239+
# --- File processing & CLI --------------------------------------------------
240+
241+
242+
def process_file(path: Path) -> bool:
243+
name = path.name
244+
suffix = path.suffix.lower()
245+
if suffix in CPP_EXTS:
246+
style = "cpp"
247+
elif suffix in HASH_EXTS or name == "CMakeLists.txt":
248+
style = "hash"
249+
else:
250+
return False
251+
252+
raw = path.read_bytes()
253+
bom = has_bom(raw)
254+
nl = detect_newline_sequence(raw)
255+
text = decode_text(raw)
256+
257+
updated = process_cpp(text, nl) if style == "cpp" else process_hash(text, nl)
258+
if updated != text:
259+
path.write_bytes(encode_text(updated, bom))
260+
return True
261+
return False
262+
263+
264+
def main(argv: List[str]) -> int:
265+
if len(argv) < 2:
266+
print(__doc__)
267+
return 2
268+
changed = 0
269+
skipped = 0
270+
errors: List[str] = []
271+
for arg in argv[1:]:
272+
p = Path(arg)
273+
try:
274+
if not p.exists():
275+
errors.append(f"Not found: {p}")
276+
continue
277+
if p.is_dir():
278+
errors.append(f"Is a directory (pass specific files): {p}")
279+
continue
280+
if process_file(p):
281+
changed += 1
282+
print(f"Updated: {p}")
283+
else:
284+
skipped += 1
285+
print(f"Skipped (no change needed or unsupported type): {p}")
286+
except Exception as e:
287+
errors.append(f"Error processing {p}: {e}")
288+
print(f"\nSummary: {changed} updated, {skipped} skipped, {len(errors)} errors")
289+
for msg in errors:
290+
print(f" - {msg}")
291+
return 0 if not errors else 1
292+
293+
294+
if __name__ == "__main__":
295+
raise SystemExit(main(sys.argv))

0 commit comments

Comments
 (0)