From 3049fe312f62adde86902d030d0893ecb0d81767 Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Tue, 18 Mar 2025 19:00:32 +0000 Subject: [PATCH 1/6] Initial commit: Create Python project starter template --- .github/workflows/lint.yml | 21 ++++++++ .github/workflows/test.yml | 22 ++++++++ .gitignore | 37 ++++++++++++++ .pre-commit-config.yaml | 12 +++++ README.md | 95 ++++++++++++++++++++++++++++++++++- data/sample.csv | 12 +++++ pyproject.toml | 40 +++++++++++++++ src/starter_repo/plot_data.py | 62 +++++++++++++++++++++++ tests/test_plot_data.py | 33 ++++++++++++ 9 files changed, 332 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/lint.yml create mode 100644 .github/workflows/test.yml create mode 100644 .gitignore create mode 100644 .pre-commit-config.yaml create mode 100644 data/sample.csv create mode 100644 pyproject.toml create mode 100644 src/starter_repo/plot_data.py create mode 100644 tests/test_plot_data.py diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000..a687adb7 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,21 @@ +name: Lint + +on: [push, pull_request] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.8' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install ".[dev]" + - name: Run ruff + run: ruff check . + - name: Run mypy + run: mypy src tests \ No newline at end of file diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 00000000..e88e1b24 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,22 @@ +name: Test + +on: [push, pull_request] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.8', '3.9', '3.10', '3.11'] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install ".[dev]" + - name: Run tests + run: pytest \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..316c25f8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,37 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual Environment +venv/ +env/ +ENV/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Project specific +*.png +*.csv +!data/sample.csv \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..c04a62cc --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,12 @@ +repos: +- repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.1.6 + hooks: + - id: ruff + args: [--fix] + - id: ruff-format +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.7.1 + hooks: + - id: mypy + additional_dependencies: [types-all] \ No newline at end of file diff --git a/README.md b/README.md index 0139dc2e..e9e7cfe3 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,93 @@ -# starter-repo -An example starter repo for Python projects +# Python Project Starter Repository + +This repository serves as a template demonstrating Python best practices for data analysis projects. It includes examples of: +- CSV data processing +- Data visualization with matplotlib +- Command-line argument parsing +- Type annotations +- Testing +- Code quality tools +- Continuous Integration + +## Features + +### 1. Data Processing and Visualization +The main script ([src/starter_repo/plot_data.py](src/starter_repo/plot_data.py)) demonstrates: +- Reading CSV files using pandas +- Creating plots with matplotlib +- Modern Python type annotations +- Command-line argument parsing with argparse + +Example usage: +```bash +# Install the package +pip install . + +# Create a plot from the sample data +python -m starter_repo.plot_data data/sample.csv year population --title "Population Growth" -o population.png +``` + +### 2. Testing +The project uses pytest for testing. Test files are located in the [tests/](tests/) directory. + +To run tests: +```bash +pip install ".[dev]" # Install development dependencies +pytest +``` + +### 3. Code Quality Tools +This project uses several tools to maintain code quality: + +#### Pre-commit Hooks +We use [pre-commit](.pre-commit-config.yaml) with: +- [Ruff](https://github.com/charliermarsh/ruff) for linting and formatting +- [mypy](https://mypy.readthedocs.io/) for static type checking + +To set up pre-commit: +```bash +pip install pre-commit +pre-commit install +``` + +### 4. Continuous Integration +GitHub Actions workflows are set up for: +- [Linting](.github/workflows/lint.yml): Runs Ruff and mypy +- [Testing](.github/workflows/test.yml): Runs pytest on multiple Python versions + +### 5. Project Structure +``` +. +|- src/ +| |- starter_repo/ # Source code +| |- plot_data.py # Main script +|- tests/ # Test files +| |- test_plot_data.py +|- data/ # Sample data +| |- sample.csv +|- .github/workflows/ # CI configuration +|- pyproject.toml # Project metadata and dependencies +|- .pre-commit-config.yaml # Pre-commit hook configuration +|- README.md +``` + +## Installation + +```bash +# For users +pip install . + +# For developers +pip install -e ".[dev]" +``` + +## Contributing +1. Fork the repository +2. Install development dependencies: `pip install -e ".[dev]"` +3. Install pre-commit hooks: `pre-commit install` +4. Make your changes +5. Run tests: `pytest` +6. Submit a pull request + +## Author +Graham Neubig (neubig@gmail.com) \ No newline at end of file diff --git a/data/sample.csv b/data/sample.csv new file mode 100644 index 00000000..c1fe84b9 --- /dev/null +++ b/data/sample.csv @@ -0,0 +1,12 @@ +year,population +2000,6115 +2001,6214 +2002,6312 +2003,6411 +2004,6510 +2005,6609 +2006,6709 +2007,6809 +2008,6909 +2009,7009 +2010,7109 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..2a15676e --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,40 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "starter-repo" +version = "0.1.0" +authors = [ + { name="Graham Neubig", email="neubig@gmail.com" }, +] +description = "A starter repository demonstrating Python best practices" +readme = "README.md" +requires-python = ">=3.8" +dependencies = [ + "pandas", + "matplotlib", +] + +[project.optional-dependencies] +dev = [ + "pytest", + "ruff", + "mypy", + "pre-commit", +] + +[tool.ruff] +select = ["E", "F", "I"] +line-length = 100 + +[tool.mypy] +python_version = "3.8" +warn_return_any = true +warn_unused_configs = true +disallow_untyped_defs = true +check_untyped_defs = true + +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = ["test_*.py"] \ No newline at end of file diff --git a/src/starter_repo/plot_data.py b/src/starter_repo/plot_data.py new file mode 100644 index 00000000..70d2eb7e --- /dev/null +++ b/src/starter_repo/plot_data.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 + +from pathlib import Path +import argparse +import pandas as pd +import matplotlib.pyplot as plt +from typing import list, tuple + +def read_csv_data(file_path: Path, x_col: str, y_col: str) -> tuple[list[float], list[float]]: + """Read data from a CSV file and return specified columns. + + Args: + file_path: Path to the CSV file + x_col: Name of the column to use for x-axis + y_col: Name of the column to use for y-axis + + Returns: + Tuple of x and y data as lists + """ + df = pd.read_csv(file_path) + return df[x_col].tolist(), df[y_col].tolist() + +def create_plot(x_data: list[float], y_data: list[float], + x_label: str, y_label: str, title: str) -> plt.Figure: + """Create a plot from the provided data. + + Args: + x_data: Data for x-axis + y_data: Data for y-axis + x_label: Label for x-axis + y_label: Label for y-axis + title: Plot title + + Returns: + matplotlib Figure object + """ + fig, ax = plt.subplots() + ax.plot(x_data, y_data) + ax.set_xlabel(x_label) + ax.set_ylabel(y_label) + ax.set_title(title) + return fig + +def main() -> None: + parser = argparse.ArgumentParser(description="Create plots from CSV data") + parser.add_argument("file_path", type=Path, help="Path to the CSV file") + parser.add_argument("x_column", type=str, help="Column name for x-axis") + parser.add_argument("y_column", type=str, help="Column name for y-axis") + parser.add_argument("--output", "-o", type=Path, default=Path("plot.png"), + help="Output file path (default: plot.png)") + parser.add_argument("--title", "-t", type=str, default="Data Plot", + help="Plot title (default: Data Plot)") + + args = parser.parse_args() + + x_data, y_data = read_csv_data(args.file_path, args.x_column, args.y_column) + fig = create_plot(x_data, y_data, args.x_column, args.y_column, args.title) + fig.savefig(args.output) + plt.close(fig) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tests/test_plot_data.py b/tests/test_plot_data.py new file mode 100644 index 00000000..ddbcb609 --- /dev/null +++ b/tests/test_plot_data.py @@ -0,0 +1,33 @@ +from pathlib import Path +import pandas as pd +import pytest +from starter_repo.plot_data import read_csv_data, create_plot + +@pytest.fixture +def sample_csv(tmp_path: Path) -> Path: + """Create a sample CSV file for testing.""" + df = pd.DataFrame({ + 'x': [1, 2, 3, 4, 5], + 'y': [2, 4, 6, 8, 10] + }) + file_path = tmp_path / "test.csv" + df.to_csv(file_path, index=False) + return file_path + +def test_read_csv_data(sample_csv: Path) -> None: + """Test reading data from CSV file.""" + x_data, y_data = read_csv_data(sample_csv, 'x', 'y') + assert x_data == [1, 2, 3, 4, 5] + assert y_data == [2, 4, 6, 8, 10] + +def test_create_plot() -> None: + """Test plot creation.""" + x_data = [1, 2, 3] + y_data = [2, 4, 6] + fig = create_plot(x_data, y_data, "X", "Y", "Test Plot") + assert fig is not None + # Basic check that the figure contains the expected elements + ax = fig.axes[0] + assert ax.get_xlabel() == "X" + assert ax.get_ylabel() == "Y" + assert ax.get_title() == "Test Plot" \ No newline at end of file From 800c2f2b3b26ae7155ad0c274768e81acec1a58f Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Tue, 18 Mar 2025 19:03:03 +0000 Subject: [PATCH 2/6] Fix formatting with pre-commit hooks --- .pre-commit-config.yaml | 3 +-- pyproject.toml | 3 +++ src/starter_repo/plot_data.py | 46 ++++++++++++++++++++++------------- tests/test_plot_data.py | 15 ++++++------ 4 files changed, 41 insertions(+), 26 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c04a62cc..89d12d53 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,5 +8,4 @@ repos: - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.7.1 hooks: - - id: mypy - additional_dependencies: [types-all] \ No newline at end of file + - id: mypy \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 2a15676e..9fe4c3e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,6 +2,9 @@ requires = ["hatchling"] build-backend = "hatchling.build" +[tool.hatch.build.targets.wheel] +packages = ["src/starter_repo"] + [project] name = "starter-repo" version = "0.1.0" diff --git a/src/starter_repo/plot_data.py b/src/starter_repo/plot_data.py index 70d2eb7e..e6f1c414 100644 --- a/src/starter_repo/plot_data.py +++ b/src/starter_repo/plot_data.py @@ -1,36 +1,40 @@ #!/usr/bin/env python3 -from pathlib import Path import argparse -import pandas as pd +from pathlib import Path +from typing import List, Tuple + import matplotlib.pyplot as plt -from typing import list, tuple +import pandas as pd + -def read_csv_data(file_path: Path, x_col: str, y_col: str) -> tuple[list[float], list[float]]: +def read_csv_data(file_path: Path, x_col: str, y_col: str) -> Tuple[List[float], List[float]]: """Read data from a CSV file and return specified columns. - + Args: file_path: Path to the CSV file x_col: Name of the column to use for x-axis y_col: Name of the column to use for y-axis - + Returns: Tuple of x and y data as lists """ df = pd.read_csv(file_path) return df[x_col].tolist(), df[y_col].tolist() -def create_plot(x_data: list[float], y_data: list[float], - x_label: str, y_label: str, title: str) -> plt.Figure: + +def create_plot( + x_data: List[float], y_data: List[float], x_label: str, y_label: str, title: str +) -> plt.Figure: """Create a plot from the provided data. - + Args: x_data: Data for x-axis y_data: Data for y-axis x_label: Label for x-axis y_label: Label for y-axis title: Plot title - + Returns: matplotlib Figure object """ @@ -41,22 +45,30 @@ def create_plot(x_data: list[float], y_data: list[float], ax.set_title(title) return fig + def main() -> None: parser = argparse.ArgumentParser(description="Create plots from CSV data") parser.add_argument("file_path", type=Path, help="Path to the CSV file") parser.add_argument("x_column", type=str, help="Column name for x-axis") parser.add_argument("y_column", type=str, help="Column name for y-axis") - parser.add_argument("--output", "-o", type=Path, default=Path("plot.png"), - help="Output file path (default: plot.png)") - parser.add_argument("--title", "-t", type=str, default="Data Plot", - help="Plot title (default: Data Plot)") - + parser.add_argument( + "--output", + "-o", + type=Path, + default=Path("plot.png"), + help="Output file path (default: plot.png)", + ) + parser.add_argument( + "--title", "-t", type=str, default="Data Plot", help="Plot title (default: Data Plot)" + ) + args = parser.parse_args() - + x_data, y_data = read_csv_data(args.file_path, args.x_column, args.y_column) fig = create_plot(x_data, y_data, args.x_column, args.y_column, args.title) fig.savefig(args.output) plt.close(fig) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/tests/test_plot_data.py b/tests/test_plot_data.py index ddbcb609..c1236940 100644 --- a/tests/test_plot_data.py +++ b/tests/test_plot_data.py @@ -1,25 +1,26 @@ from pathlib import Path + import pandas as pd import pytest -from starter_repo.plot_data import read_csv_data, create_plot +from starter_repo.plot_data import create_plot, read_csv_data + @pytest.fixture def sample_csv(tmp_path: Path) -> Path: """Create a sample CSV file for testing.""" - df = pd.DataFrame({ - 'x': [1, 2, 3, 4, 5], - 'y': [2, 4, 6, 8, 10] - }) + df = pd.DataFrame({"x": [1, 2, 3, 4, 5], "y": [2, 4, 6, 8, 10]}) file_path = tmp_path / "test.csv" df.to_csv(file_path, index=False) return file_path + def test_read_csv_data(sample_csv: Path) -> None: """Test reading data from CSV file.""" - x_data, y_data = read_csv_data(sample_csv, 'x', 'y') + x_data, y_data = read_csv_data(sample_csv, "x", "y") assert x_data == [1, 2, 3, 4, 5] assert y_data == [2, 4, 6, 8, 10] + def test_create_plot() -> None: """Test plot creation.""" x_data = [1, 2, 3] @@ -30,4 +31,4 @@ def test_create_plot() -> None: ax = fig.axes[0] assert ax.get_xlabel() == "X" assert ax.get_ylabel() == "Y" - assert ax.get_title() == "Test Plot" \ No newline at end of file + assert ax.get_title() == "Test Plot" From 79ba8a2bc3016bdd367576fb2edaf1e7fda8834b Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Tue, 18 Mar 2025 19:10:48 +0000 Subject: [PATCH 3/6] Move starter_repo directory to root level --- README.md | 7 +++---- pyproject.toml | 2 +- {src/starter_repo => starter_repo}/plot_data.py | 0 3 files changed, 4 insertions(+), 5 deletions(-) rename {src/starter_repo => starter_repo}/plot_data.py (100%) diff --git a/README.md b/README.md index e9e7cfe3..1cd36c90 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ This repository serves as a template demonstrating Python best practices for dat ## Features ### 1. Data Processing and Visualization -The main script ([src/starter_repo/plot_data.py](src/starter_repo/plot_data.py)) demonstrates: +The main script ([starter_repo/plot_data.py](starter_repo/plot_data.py)) demonstrates: - Reading CSV files using pandas - Creating plots with matplotlib - Modern Python type annotations @@ -58,9 +58,8 @@ GitHub Actions workflows are set up for: ### 5. Project Structure ``` . -|- src/ -| |- starter_repo/ # Source code -| |- plot_data.py # Main script +|- starter_repo/ # Source code +| |- plot_data.py # Main script |- tests/ # Test files | |- test_plot_data.py |- data/ # Sample data diff --git a/pyproject.toml b/pyproject.toml index 9fe4c3e1..63e0f734 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] -packages = ["src/starter_repo"] +packages = ["starter_repo"] [project] name = "starter-repo" diff --git a/src/starter_repo/plot_data.py b/starter_repo/plot_data.py similarity index 100% rename from src/starter_repo/plot_data.py rename to starter_repo/plot_data.py From 0faeb618c27a9c1f67be0dc2a4521cf96e0eb2f1 Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Tue, 18 Mar 2025 19:20:11 +0000 Subject: [PATCH 4/6] Use lower-case type hints and fix test data types --- .github/workflows/lint.yml | 4 ++-- .github/workflows/test.yml | 4 ++-- LICENSE | 21 +++++++++++++++++++++ README.md | 22 +++++++++++++++++++++- mypy.ini | 10 ++++++++++ pyproject.toml | 15 +++++++-------- starter_repo/__init__.py | 3 +++ starter_repo/plot_data.py | 5 ++--- tests/test_plot_data.py | 11 ++++++----- 9 files changed, 74 insertions(+), 21 deletions(-) create mode 100644 LICENSE create mode 100644 mypy.ini create mode 100644 starter_repo/__init__.py diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index a687adb7..1083b0a2 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,6 +1,6 @@ name: Lint -on: [push, pull_request] +on: [pull_request] jobs: lint: @@ -10,7 +10,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.8' + python-version: '3.12' - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e88e1b24..7ae3c4a6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,13 +1,13 @@ name: Test -on: [push, pull_request] +on: [pull_request] jobs: test: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ['3.11', '3.12'] steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..25c11fa0 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Graham Neubig + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md index 1cd36c90..8fae1d57 100644 --- a/README.md +++ b/README.md @@ -88,5 +88,25 @@ pip install -e ".[dev]" 5. Run tests: `pytest` 6. Submit a pull request +## License + +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. + +> **Note**: Without a license, the code is under exclusive copyright by default. This means no one can copy, distribute, or modify your work without facing potential legal consequences. Adding a license (like MIT) explicitly grants these permissions, making it clear how others can use your code. + ## Author -Graham Neubig (neubig@gmail.com) \ No newline at end of file +Graham Neubig (neubig@gmail.com) + +## Citation + +If you use this repository in your research, please cite it using the following BibTeX entry: + +```bibtex +@misc{neubig2025starter, + author = {Graham Neubig}, + title = {Python Project Starter Repository}, + year = {2025}, + publisher = {GitHub}, + journal = {GitHub Repository}, + howpublished = {\url{https://github.com/neubig/starter-repo}} +} \ No newline at end of file diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 00000000..95ace867 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,10 @@ +[mypy] +python_version = 3.12 +warn_return_any = true +warn_unused_configs = true +disallow_untyped_defs = true +check_untyped_defs = true +namespace_packages = true + +[mypy.plugins.numpy.*] +ignore_errors = true \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 63e0f734..25c05f2a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,8 @@ authors = [ ] description = "A starter repository demonstrating Python best practices" readme = "README.md" -requires-python = ">=3.8" +license = "MIT" +requires-python = ">=3.11" dependencies = [ "pandas", "matplotlib", @@ -25,18 +26,16 @@ dev = [ "ruff", "mypy", "pre-commit", + "pandas-stubs", ] [tool.ruff] -select = ["E", "F", "I"] line-length = 100 -[tool.mypy] -python_version = "3.8" -warn_return_any = true -warn_unused_configs = true -disallow_untyped_defs = true -check_untyped_defs = true +[tool.ruff.lint] +select = ["E", "F", "I"] + + [tool.pytest.ini_options] testpaths = ["tests"] diff --git a/starter_repo/__init__.py b/starter_repo/__init__.py new file mode 100644 index 00000000..0c2751dc --- /dev/null +++ b/starter_repo/__init__.py @@ -0,0 +1,3 @@ +"""A starter repository demonstrating Python best practices.""" + +__version__ = "0.1.0" diff --git a/starter_repo/plot_data.py b/starter_repo/plot_data.py index e6f1c414..921ef635 100644 --- a/starter_repo/plot_data.py +++ b/starter_repo/plot_data.py @@ -2,13 +2,12 @@ import argparse from pathlib import Path -from typing import List, Tuple import matplotlib.pyplot as plt import pandas as pd -def read_csv_data(file_path: Path, x_col: str, y_col: str) -> Tuple[List[float], List[float]]: +def read_csv_data(file_path: Path, x_col: str, y_col: str) -> tuple[list[float], list[float]]: """Read data from a CSV file and return specified columns. Args: @@ -24,7 +23,7 @@ def read_csv_data(file_path: Path, x_col: str, y_col: str) -> Tuple[List[float], def create_plot( - x_data: List[float], y_data: List[float], x_label: str, y_label: str, title: str + x_data: list[float], y_data: list[float], x_label: str, y_label: str, title: str ) -> plt.Figure: """Create a plot from the provided data. diff --git a/tests/test_plot_data.py b/tests/test_plot_data.py index c1236940..32bdb060 100644 --- a/tests/test_plot_data.py +++ b/tests/test_plot_data.py @@ -2,13 +2,14 @@ import pandas as pd import pytest + from starter_repo.plot_data import create_plot, read_csv_data @pytest.fixture def sample_csv(tmp_path: Path) -> Path: """Create a sample CSV file for testing.""" - df = pd.DataFrame({"x": [1, 2, 3, 4, 5], "y": [2, 4, 6, 8, 10]}) + df = pd.DataFrame({"x": [1.0, 2.0, 3.0, 4.0, 5.0], "y": [2.0, 4.0, 6.0, 8.0, 10.0]}) file_path = tmp_path / "test.csv" df.to_csv(file_path, index=False) return file_path @@ -17,14 +18,14 @@ def sample_csv(tmp_path: Path) -> Path: def test_read_csv_data(sample_csv: Path) -> None: """Test reading data from CSV file.""" x_data, y_data = read_csv_data(sample_csv, "x", "y") - assert x_data == [1, 2, 3, 4, 5] - assert y_data == [2, 4, 6, 8, 10] + assert x_data == [1.0, 2.0, 3.0, 4.0, 5.0] + assert y_data == [2.0, 4.0, 6.0, 8.0, 10.0] def test_create_plot() -> None: """Test plot creation.""" - x_data = [1, 2, 3] - y_data = [2, 4, 6] + x_data = [1.0, 2.0, 3.0] + y_data = [2.0, 4.0, 6.0] fig = create_plot(x_data, y_data, "X", "Y", "Test Plot") assert fig is not None # Basic check that the figure contains the expected elements From 8d0481e98effcdd5b37a1ce47b24bbdaa6ffb86b Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Thu, 20 Mar 2025 09:40:33 -0400 Subject: [PATCH 5/6] Fix README --- README.md | 71 ++++++++++++++++++++++--------------------------------- 1 file changed, 28 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index 8fae1d57..2f5f5f1b 100644 --- a/README.md +++ b/README.md @@ -1,24 +1,25 @@ # Python Project Starter Repository -This repository serves as a template demonstrating Python best practices for data analysis projects. It includes examples of: -- CSV data processing -- Data visualization with matplotlib -- Command-line argument parsing -- Type annotations -- Testing -- Code quality tools -- Continuous Integration +This repository serves as a template demonstrating Python best practices for data analysis projects. It includes: + +- An example Python program (reading in data and plotting) +- Command-line argument parsing ([argparse](https://docs.python.org/3/library/argparse.html)) +- Code style checking, aka "linting" (with [ruff](https://github.com/astral-sh/ruff)) +- Static type checking (with [mypy](https://mypy.readthedocs.io/)) +- Pre-commit hooks that run these checks automatically (with [pre-commit](https://pre-commit.com/)) +- Testing (with [pytest](https://docs.pytest.org/)) +- Continuous Integration (with [GitHub Actions](https://github.com/features/actions)) +- Package management (with [pip](https://pip.pypa.io/) and [pyproject.toml](https://pip.pypa.io/en/stable/reference/build-system/pyproject-toml/)) +- An open source license ([MIT](https://opensource.org/licenses/MIT)) ## Features ### 1. Data Processing and Visualization -The main script ([starter_repo/plot_data.py](starter_repo/plot_data.py)) demonstrates: -- Reading CSV files using pandas -- Creating plots with matplotlib -- Modern Python type annotations -- Command-line argument parsing with argparse -Example usage: +The main script ([starter_repo/plot_data.py](starter_repo/plot_data.py)) can be replaced with any code that you want to write. + +Installation can be done as follows: + ```bash # Install the package pip install . @@ -28,59 +29,44 @@ python -m starter_repo.plot_data data/sample.csv year population --title "Popula ``` ### 2. Testing -The project uses pytest for testing. Test files are located in the [tests/](tests/) directory. + +Writing unit tests is a good way to ensure that your code behaves as expected, and you can write unit tests before you write the code that you want to test (aka "test-driven development"). Test files are located in the [tests/](tests/) directory. To run tests: + ```bash pip install ".[dev]" # Install development dependencies pytest ``` ### 3. Code Quality Tools + This project uses several tools to maintain code quality: #### Pre-commit Hooks + We use [pre-commit](.pre-commit-config.yaml) with: + - [Ruff](https://github.com/charliermarsh/ruff) for linting and formatting - [mypy](https://mypy.readthedocs.io/) for static type checking To set up pre-commit: + ```bash pip install pre-commit pre-commit install ``` ### 4. Continuous Integration + GitHub Actions workflows are set up for: + - [Linting](.github/workflows/lint.yml): Runs Ruff and mypy - [Testing](.github/workflows/test.yml): Runs pytest on multiple Python versions -### 5. Project Structure -``` -. -|- starter_repo/ # Source code -| |- plot_data.py # Main script -|- tests/ # Test files -| |- test_plot_data.py -|- data/ # Sample data -| |- sample.csv -|- .github/workflows/ # CI configuration -|- pyproject.toml # Project metadata and dependencies -|- .pre-commit-config.yaml # Pre-commit hook configuration -|- README.md -``` - -## Installation - -```bash -# For users -pip install . - -# For developers -pip install -e ".[dev]" -``` ## Contributing + 1. Fork the repository 2. Install development dependencies: `pip install -e ".[dev]"` 3. Install pre-commit hooks: `pre-commit install` @@ -94,11 +80,9 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file > **Note**: Without a license, the code is under exclusive copyright by default. This means no one can copy, distribute, or modify your work without facing potential legal consequences. Adding a license (like MIT) explicitly grants these permissions, making it clear how others can use your code. -## Author -Graham Neubig (neubig@gmail.com) - ## Citation +This was created by [Graham Neubig](https://phontron.com) primarily as an example for student researchers. If you use this repository in your research, please cite it using the following BibTeX entry: ```bibtex @@ -109,4 +93,5 @@ If you use this repository in your research, please cite it using the following publisher = {GitHub}, journal = {GitHub Repository}, howpublished = {\url{https://github.com/neubig/starter-repo}} -} \ No newline at end of file +} +``` From 4e69b7525ea7178e36c7521954d9843f0f11c184 Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Thu, 20 Mar 2025 09:42:32 -0400 Subject: [PATCH 6/6] Update linting --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 1083b0a2..aa29c8b7 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -18,4 +18,4 @@ jobs: - name: Run ruff run: ruff check . - name: Run mypy - run: mypy src tests \ No newline at end of file + run: mypy starter_repo tests \ No newline at end of file