Skip to content

Commit 4e0446b

Browse files
feat: add SPDX license identifiers to all Python files (issue #4) (#31)
Implement comprehensive SPDX licensing compliance across the codebase for improved license clarity and machine-readable attribution. Changes: - Add 'SPDX-License-Identifier: MIT' to all 77 Python files - Create scripts/check-spdx.py for automated SPDX validation - Integrate SPDX check into local quality checks (run-quality-checks.sh) - Add SPDX validation to CI/CD pipeline (.github/workflows/ci.yml) - Update contribution guidelines with SPDX requirements - Add logging consistency check to CI/CD pipeline Benefits: - Aligns with Python packaging standards (PEP 639) - Enables machine-readable license detection - Supports automated license compliance checking - Improves clarity for contributors and package managers Closes #4 Co-authored-by: florath-ai-assistant[bot] <Andreas.Florath@telekom.de>
1 parent f529664 commit 4e0446b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

81 files changed

+654
-457
lines changed

.github/community/CONTRIBUTING.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,35 @@ Then create a Pull Request on GitHub.
7777
- Maximum line length: 88 characters (Black default)
7878
- Use descriptive variable names
7979
- Add docstrings for classes and functions
80+
- **REQUIRED**: Include SPDX license identifier at the top of every Python file
81+
82+
### SPDX License Requirements
83+
84+
All Python source files must include an SPDX license identifier for licensing clarity and compliance:
85+
86+
**For files without shebang lines:**
87+
```python
88+
# SPDX-License-Identifier: MIT
89+
"""Module docstring here."""
90+
```
91+
92+
**For script files with shebang:**
93+
```python
94+
#!/usr/bin/env python3
95+
# SPDX-License-Identifier: MIT
96+
"""Script docstring here."""
97+
```
98+
99+
**Why SPDX identifiers are required:**
100+
- Provides machine-readable licensing information
101+
- Aligns with Python packaging standards (PEP 639)
102+
- Enables automated license compliance checking
103+
- Improves clarity for contributors and users
104+
105+
The SPDX check runs automatically in CI/CD and can be tested locally:
106+
```bash
107+
python scripts/check-spdx.py
108+
```
80109

81110
### Commit Messages
82111
Use conventional commit format:

.github/workflows/ci.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,14 @@ jobs:
4747
run: |
4848
mypy src/ --strict
4949
50+
- name: Check SPDX license identifiers
51+
run: |
52+
python scripts/check-spdx.py
53+
54+
- name: Check logging consistency
55+
run: |
56+
python scripts/check-logging.py
57+
5058
test:
5159
name: Tests
5260
runs-on: ubuntu-latest

scripts/bump_version.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#!/usr/bin/env python3
2+
# SPDX-License-Identifier: MIT
23
"""
34
Version bumping utility for aletheia-probe.
45

scripts/check-logging.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#!/usr/bin/env python3
2+
# SPDX-License-Identifier: MIT
23
"""Script to enforce consistent logging practices in the codebase.
34
45
This script checks that no files use direct logging.getLogger() and instead

scripts/check-spdx.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#!/usr/bin/env python3
2+
# SPDX-License-Identifier: MIT
3+
"""Script to verify SPDX license identifiers are present in all Python files."""
4+
5+
import sys
6+
from pathlib import Path
7+
8+
9+
def check_spdx_header(file_path: Path) -> tuple[bool, str]:
10+
"""Check if a Python file has a valid SPDX license identifier.
11+
12+
Returns:
13+
Tuple of (has_valid_spdx, error_message)
14+
"""
15+
try:
16+
with open(file_path, encoding='utf-8') as f:
17+
lines = f.readlines()
18+
except Exception as e:
19+
return False, f"Error reading file: {e}"
20+
21+
if not lines:
22+
return False, "File is empty"
23+
24+
# Check first few lines for SPDX identifier
25+
for line in lines[:10]: # Check first 10 lines
26+
if 'SPDX-License-Identifier:' in line:
27+
# Verify it's the correct MIT license
28+
if 'MIT' in line:
29+
# Check that it's a properly formatted comment
30+
stripped = line.strip()
31+
if stripped.startswith('#') and 'SPDX-License-Identifier: MIT' in stripped:
32+
return True, ""
33+
else:
34+
return False, f"SPDX header found but incorrectly formatted: '{stripped}'"
35+
else:
36+
return False, f"SPDX header found but wrong license: '{line.strip()}'"
37+
38+
return False, "No SPDX license identifier found"
39+
40+
41+
def main() -> int:
42+
"""Main function to check SPDX headers in all Python files."""
43+
project_root = Path(__file__).parent.parent
44+
45+
# Find all Python files
46+
python_files = list(project_root.glob("**/*.py"))
47+
48+
# Remove this script itself from the check if it exists
49+
script_path = Path(__file__).resolve()
50+
python_files = [f for f in python_files if f.resolve() != script_path]
51+
52+
missing_spdx: list[tuple[Path, str]] = []
53+
total_files = len(python_files)
54+
55+
print(f"Checking SPDX license identifiers in {total_files} Python files...")
56+
57+
for py_file in python_files:
58+
has_spdx, error_msg = check_spdx_header(py_file)
59+
if not has_spdx:
60+
missing_spdx.append((py_file, error_msg))
61+
62+
# Report results
63+
if missing_spdx:
64+
print(f"\n❌ SPDX Check FAILED: {len(missing_spdx)} file(s) missing or have invalid SPDX headers:")
65+
for file_path, error in missing_spdx:
66+
rel_path = file_path.relative_to(project_root)
67+
print(f" - {rel_path}: {error}")
68+
69+
print("\nTo fix these issues, add the following line at the top of each file")
70+
print("(after any shebang line):")
71+
print(" # SPDX-License-Identifier: MIT")
72+
73+
return 1 # Exit with error code
74+
else:
75+
print(f"✅ SPDX Check PASSED: All {total_files} Python files have valid SPDX headers")
76+
return 0
77+
78+
79+
if __name__ == "__main__":
80+
sys.exit(main())

scripts/run-quality-checks.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ run_check "Pytest with coverage" pytest --cov=src --cov-report=term-missing test
5252
# 5. Logging consistency check
5353
run_check "Logging consistency" python scripts/check-logging.py || true
5454

55+
# 6. SPDX license identifier check
56+
run_check "SPDX license identifiers" python scripts/check-spdx.py || true
57+
5558
# Final summary
5659
echo -e "${BLUE}========================================${NC}"
5760
if [ $FAILED -eq 0 ]; then

src/aletheia_probe/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# SPDX-License-Identifier: MIT
12
"""Journal Assessment Tool - Automated predatory journal detection."""
23

34
from importlib.metadata import PackageNotFoundError, version

src/aletheia_probe/article_retraction_checker.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# SPDX-License-Identifier: MIT
12
"""Article-level retraction checking using multiple data sources."""
23

34
import asyncio

src/aletheia_probe/backends/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# SPDX-License-Identifier: MIT
12
"""Backend modules for journal assessment."""
23

34
# Import backends to register them

src/aletheia_probe/backends/algerian_ministry.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# SPDX-License-Identifier: MIT
12
"""Algerian Ministry backend for predatory journal verification."""
23

34
from .base import CachedBackend, get_backend_registry

0 commit comments

Comments
 (0)