diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000..aa29c8b7 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,21 @@ +name: Lint + +on: [pull_request] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.12' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install ".[dev]" + - name: Run ruff + run: ruff check . + - name: Run mypy + run: mypy starter_repo tests \ No newline at end of file diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 00000000..7ae3c4a6 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,22 @@ +name: Test + +on: [pull_request] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.11', '3.12'] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install ".[dev]" + - name: Run tests + run: pytest \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..316c25f8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,37 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual Environment +venv/ +env/ +ENV/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Project specific +*.png +*.csv +!data/sample.csv \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..89d12d53 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,11 @@ +repos: +- repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.1.6 + hooks: + - id: ruff + args: [--fix] + - id: ruff-format +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.7.1 + hooks: + - id: mypy \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..25c11fa0 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Graham Neubig + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md index 0139dc2e..2f5f5f1b 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,97 @@ -# starter-repo -An example starter repo for Python projects +# Python Project Starter Repository + +This repository serves as a template demonstrating Python best practices for data analysis projects. It includes: + +- An example Python program (reading in data and plotting) +- Command-line argument parsing ([argparse](https://docs.python.org/3/library/argparse.html)) +- Code style checking, aka "linting" (with [ruff](https://github.com/astral-sh/ruff)) +- Static type checking (with [mypy](https://mypy.readthedocs.io/)) +- Pre-commit hooks that run these checks automatically (with [pre-commit](https://pre-commit.com/)) +- Testing (with [pytest](https://docs.pytest.org/)) +- Continuous Integration (with [GitHub Actions](https://github.com/features/actions)) +- Package management (with [pip](https://pip.pypa.io/) and [pyproject.toml](https://pip.pypa.io/en/stable/reference/build-system/pyproject-toml/)) +- An open source license ([MIT](https://opensource.org/licenses/MIT)) + +## Features + +### 1. Data Processing and Visualization + +The main script ([starter_repo/plot_data.py](starter_repo/plot_data.py)) can be replaced with any code that you want to write. + +Installation can be done as follows: + +```bash +# Install the package +pip install . + +# Create a plot from the sample data +python -m starter_repo.plot_data data/sample.csv year population --title "Population Growth" -o population.png +``` + +### 2. Testing + +Writing unit tests is a good way to ensure that your code behaves as expected, and you can write unit tests before you write the code that you want to test (aka "test-driven development"). Test files are located in the [tests/](tests/) directory. + +To run tests: + +```bash +pip install ".[dev]" # Install development dependencies +pytest +``` + +### 3. Code Quality Tools + +This project uses several tools to maintain code quality: + +#### Pre-commit Hooks + +We use [pre-commit](.pre-commit-config.yaml) with: + +- [Ruff](https://github.com/charliermarsh/ruff) for linting and formatting +- [mypy](https://mypy.readthedocs.io/) for static type checking + +To set up pre-commit: + +```bash +pip install pre-commit +pre-commit install +``` + +### 4. Continuous Integration + +GitHub Actions workflows are set up for: + +- [Linting](.github/workflows/lint.yml): Runs Ruff and mypy +- [Testing](.github/workflows/test.yml): Runs pytest on multiple Python versions + + +## Contributing + +1. Fork the repository +2. Install development dependencies: `pip install -e ".[dev]"` +3. Install pre-commit hooks: `pre-commit install` +4. Make your changes +5. Run tests: `pytest` +6. Submit a pull request + +## License + +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. + +> **Note**: Without a license, the code is under exclusive copyright by default. This means no one can copy, distribute, or modify your work without facing potential legal consequences. Adding a license (like MIT) explicitly grants these permissions, making it clear how others can use your code. + +## Citation + +This was created by [Graham Neubig](https://phontron.com) primarily as an example for student researchers. +If you use this repository in your research, please cite it using the following BibTeX entry: + +```bibtex +@misc{neubig2025starter, + author = {Graham Neubig}, + title = {Python Project Starter Repository}, + year = {2025}, + publisher = {GitHub}, + journal = {GitHub Repository}, + howpublished = {\url{https://github.com/neubig/starter-repo}} +} +``` diff --git a/data/sample.csv b/data/sample.csv new file mode 100644 index 00000000..c1fe84b9 --- /dev/null +++ b/data/sample.csv @@ -0,0 +1,12 @@ +year,population +2000,6115 +2001,6214 +2002,6312 +2003,6411 +2004,6510 +2005,6609 +2006,6709 +2007,6809 +2008,6909 +2009,7009 +2010,7109 \ No newline at end of file diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 00000000..95ace867 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,10 @@ +[mypy] +python_version = 3.12 +warn_return_any = true +warn_unused_configs = true +disallow_untyped_defs = true +check_untyped_defs = true +namespace_packages = true + +[mypy.plugins.numpy.*] +ignore_errors = true \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..25c05f2a --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,42 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["starter_repo"] + +[project] +name = "starter-repo" +version = "0.1.0" +authors = [ + { name="Graham Neubig", email="neubig@gmail.com" }, +] +description = "A starter repository demonstrating Python best practices" +readme = "README.md" +license = "MIT" +requires-python = ">=3.11" +dependencies = [ + "pandas", + "matplotlib", +] + +[project.optional-dependencies] +dev = [ + "pytest", + "ruff", + "mypy", + "pre-commit", + "pandas-stubs", +] + +[tool.ruff] +line-length = 100 + +[tool.ruff.lint] +select = ["E", "F", "I"] + + + +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = ["test_*.py"] \ No newline at end of file diff --git a/starter_repo/__init__.py b/starter_repo/__init__.py new file mode 100644 index 00000000..0c2751dc --- /dev/null +++ b/starter_repo/__init__.py @@ -0,0 +1,3 @@ +"""A starter repository demonstrating Python best practices.""" + +__version__ = "0.1.0" diff --git a/starter_repo/plot_data.py b/starter_repo/plot_data.py new file mode 100644 index 00000000..921ef635 --- /dev/null +++ b/starter_repo/plot_data.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 + +import argparse +from pathlib import Path + +import matplotlib.pyplot as plt +import pandas as pd + + +def read_csv_data(file_path: Path, x_col: str, y_col: str) -> tuple[list[float], list[float]]: + """Read data from a CSV file and return specified columns. + + Args: + file_path: Path to the CSV file + x_col: Name of the column to use for x-axis + y_col: Name of the column to use for y-axis + + Returns: + Tuple of x and y data as lists + """ + df = pd.read_csv(file_path) + return df[x_col].tolist(), df[y_col].tolist() + + +def create_plot( + x_data: list[float], y_data: list[float], x_label: str, y_label: str, title: str +) -> plt.Figure: + """Create a plot from the provided data. + + Args: + x_data: Data for x-axis + y_data: Data for y-axis + x_label: Label for x-axis + y_label: Label for y-axis + title: Plot title + + Returns: + matplotlib Figure object + """ + fig, ax = plt.subplots() + ax.plot(x_data, y_data) + ax.set_xlabel(x_label) + ax.set_ylabel(y_label) + ax.set_title(title) + return fig + + +def main() -> None: + parser = argparse.ArgumentParser(description="Create plots from CSV data") + parser.add_argument("file_path", type=Path, help="Path to the CSV file") + parser.add_argument("x_column", type=str, help="Column name for x-axis") + parser.add_argument("y_column", type=str, help="Column name for y-axis") + parser.add_argument( + "--output", + "-o", + type=Path, + default=Path("plot.png"), + help="Output file path (default: plot.png)", + ) + parser.add_argument( + "--title", "-t", type=str, default="Data Plot", help="Plot title (default: Data Plot)" + ) + + args = parser.parse_args() + + x_data, y_data = read_csv_data(args.file_path, args.x_column, args.y_column) + fig = create_plot(x_data, y_data, args.x_column, args.y_column, args.title) + fig.savefig(args.output) + plt.close(fig) + + +if __name__ == "__main__": + main() diff --git a/tests/test_plot_data.py b/tests/test_plot_data.py new file mode 100644 index 00000000..32bdb060 --- /dev/null +++ b/tests/test_plot_data.py @@ -0,0 +1,35 @@ +from pathlib import Path + +import pandas as pd +import pytest + +from starter_repo.plot_data import create_plot, read_csv_data + + +@pytest.fixture +def sample_csv(tmp_path: Path) -> Path: + """Create a sample CSV file for testing.""" + df = pd.DataFrame({"x": [1.0, 2.0, 3.0, 4.0, 5.0], "y": [2.0, 4.0, 6.0, 8.0, 10.0]}) + file_path = tmp_path / "test.csv" + df.to_csv(file_path, index=False) + return file_path + + +def test_read_csv_data(sample_csv: Path) -> None: + """Test reading data from CSV file.""" + x_data, y_data = read_csv_data(sample_csv, "x", "y") + assert x_data == [1.0, 2.0, 3.0, 4.0, 5.0] + assert y_data == [2.0, 4.0, 6.0, 8.0, 10.0] + + +def test_create_plot() -> None: + """Test plot creation.""" + x_data = [1.0, 2.0, 3.0] + y_data = [2.0, 4.0, 6.0] + fig = create_plot(x_data, y_data, "X", "Y", "Test Plot") + assert fig is not None + # Basic check that the figure contains the expected elements + ax = fig.axes[0] + assert ax.get_xlabel() == "X" + assert ax.get_ylabel() == "Y" + assert ax.get_title() == "Test Plot"