Skip to content

Commit 534ddaf

Browse files
committed
Use rich click
1 parent 5380bae commit 534ddaf

File tree

13 files changed

+173
-78
lines changed

13 files changed

+173
-78
lines changed

README.md

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,51 +31,54 @@ export RRC_DATA_DIR=/path/to/output
3131
docker run --rm \
3232
-v $RRC_IMAGE_DIR:/data/images \
3333
-v $RRC_DATA_DIR:/data/output \
34-
ghcr.io/reglab/rrc-pipeline:latest ingest
34+
ghcr.io/reglab/rrc-pipeline:latest rrc ingest
3535

3636
# 2. Run OCR
3737
docker run --rm --gpus all \
3838
-v $RRC_IMAGE_DIR:/data/images \
3939
-v $RRC_DATA_DIR:/data/output \
40-
ghcr.io/reglab/rrc-pipeline:latest ocr
40+
ghcr.io/reglab/rrc-pipeline:latest rrc ocr
4141

4242
# 3. Detect covenants
4343
docker run --rm --gpus all \
4444
-v $RRC_IMAGE_DIR:/data/images \
4545
-v $RRC_DATA_DIR:/data/output \
46-
ghcr.io/reglab/rrc-pipeline:latest detect
46+
ghcr.io/reglab/rrc-pipeline:latest rrc detect
4747

4848
# 4. Export results
4949
docker run --rm \
5050
-v $RRC_IMAGE_DIR:/data/images \
5151
-v $RRC_DATA_DIR:/data/output \
52-
ghcr.io/reglab/rrc-pipeline:latest export
52+
ghcr.io/reglab/rrc-pipeline:latest rrc export
5353
```
5454

5555
## Pipeline Stages
5656

57-
### 1. Ingest (`ingest`)
57+
> [!NOTE]
58+
> To see all available commands, run `docker run --rm ghcr.io/reglab/rrc-pipeline:latest rrc --help`.
59+
60+
### 1. Ingest (`rrc ingest`)
5861
- Scans input directory for image files (jpg, jpeg, png, tiff, tif, bmp)
5962
- Validates images can be opened
6063
- Handles multi-page TIFF files
6164
- Creates database records for new images
6265

63-
### 2. OCR (`ocr`)
66+
### 2. OCR (`rrc ocr`)
6467
- Transcribes images using the DocTR OCR library
6568
- Requires GPU acceleration
6669
- Processes only images without existing transcriptions
6770

68-
### 3. Detection (`detect`)
71+
### 3. Detection (`rrc detect`)
6972
- Analyzes transcribed text using our Mistral-based covenant detection model
7073
- Requires GPU acceleration
7174
- Identifies presence of racial covenants and extracts relevant passages
7275
- Processes only transcribed pages without existing predictions
7376

74-
### 4. Export (`export`)
77+
### 4. Export (`rrc export`)
7578
- Exports detection results to CSV format
7679
- Includes confidence scores and extracted covenant text where found
7780

78-
### 5. Pipeline Summary (`summarize`)
81+
### 5. Pipeline Summary (`rrc summarize`)
7982
- Displays current pipeline progress and statistics
8083
- Shows total page counts and processing status
8184
- Reports covenant detection statistics

backend/pyproject.toml

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ dependencies = [
2626
"tqdm>=4.67.1",
2727
"click>=8.1.7",
2828
"rich>=13.8.1",
29+
"rich-click>=1.8.9",
2930
]
3031

3132
[tool.uv]
@@ -39,11 +40,7 @@ dev-dependencies = [
3940
homepage = "https://github.com/reglab/rrc-pipeline"
4041

4142
[project.scripts]
42-
ingest = "rrc.ingest.ingest_directory:main"
43-
ocr = "rrc.ocr.transcribe_pending:main"
44-
detect = "rrc.inference.detect_pending:main"
45-
export = "rrc.reporting.export_predictions:main"
46-
summarize = "rrc.reporting.summarize_db:main"
43+
rrc = "rrc.cli:cli"
4744

4845
[build-system]
4946
requires = ["hatchling"]

backend/rrc/cli.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
"""Main CLI for the RRC (Racial Restrictive Covenants) pipeline."""
2+
3+
import rrc.utils.click as click
4+
from rrc.inference.detect_pending import main as detect_cmd
5+
from rrc.ingest.ingest_directory import main as ingest_cmd
6+
from rrc.ocr.transcribe_pending import main as ocr_cmd
7+
from rrc.reporting.export_predictions import main as export_cmd
8+
from rrc.reporting.summarize_db import main as summarize_cmd
9+
10+
11+
@click.group()
12+
@click.version_option()
13+
def cli():
14+
"""
15+
RRC Pipeline - Racial Restrictive Covenants Detection
16+
17+
A comprehensive pipeline for identifying racial covenants in property deeds.
18+
Process images through OCR, detect covenants with ML models, and export results.
19+
See https://reglab.github.io/racialcovenants/ for more information.
20+
"""
21+
pass
22+
23+
24+
# Add the original commands to the CLI group with new names and emojis in help
25+
cli.add_command(ingest_cmd, name="ingest")
26+
cli.add_command(ocr_cmd, name="ocr")
27+
cli.add_command(detect_cmd, name="detect")
28+
cli.add_command(export_cmd, name="export")
29+
cli.add_command(summarize_cmd, name="summarize")
30+
31+
# Update the help text for each command to add emojis
32+
ingest_cmd.help = ingest_cmd.help or "Ingest images from a directory into the database"
33+
ocr_cmd.help = ocr_cmd.help or "Transcribe pending pages using OCR"
34+
detect_cmd.help = (
35+
detect_cmd.help or "Detect covenants in transcribed pages using ML models"
36+
)
37+
export_cmd.help = export_cmd.help or "Export covenant predictions to CSV files"
38+
summarize_cmd.help = (
39+
summarize_cmd.help or "Display a summary of the current database state"
40+
)
41+
42+
43+
if __name__ == "__main__":
44+
cli()

backend/rrc/inference/detect_pending.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
from pathlib import Path
22

3-
import click
43
import tqdm
54
from rich.console import Console
65
from sqlalchemy import func, select
76
from sqlalchemy.orm import Session, joinedload
87

8+
import rrc.utils.click as click
99
import rrc.utils.io
1010
from rrc.db.models import CovenantPrediction, Page, Provenance
1111
from rrc.db.session import get_session

backend/rrc/inference/service.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@ def __init__(self, options: dict[str, Any] = None):
9393
self.model_download_dir = self.options.get("model_download_dir")
9494

9595
def __enter__(self):
96+
if DEFAULT_DEVICE is None:
97+
raise RuntimeError("No CUDA or MPS device available")
9698
self.vllm_model = vllm.LLM(
9799
model=self.model_name_or_path,
98100
device=DEFAULT_DEVICE,

backend/rrc/ingest/ingest_directory.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33
from pathlib import Path
44
from typing import TypeVar
55

6-
import click
76
import PIL.Image
87
import tqdm
98
from rich.console import Console
109
from sqlalchemy import select
1110

11+
import rrc.utils.click as click
1212
from rrc.db.models import Page
1313
from rrc.db.session import get_session
1414
from rrc.utils.io import get_image_path

backend/rrc/ocr/service.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ def __init__(self, options: dict[str, Any] | None = None):
4545
super().__init__(options)
4646

4747
def __enter__(self):
48+
if DEFAULT_DEVICE is None:
49+
raise RuntimeError("No CUDA or MPS device available")
4850
self.model = ocr_predictor(
4951
det_arch=_DOCTR_DET_ARCH,
5052
reco_arch=_DOCTR_RECO_ARCH,

backend/rrc/ocr/transcribe_pending.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
import click
21
import tqdm
32
from rich.console import Console
43
from sqlalchemy import select
54
from sqlalchemy.orm import Session
65

6+
import rrc.utils.click as click
77
from rrc.db.models import Page, Provenance, Transcription
88
from rrc.db.session import get_session
99
from rrc.ocr.service import DoctrOCRService

backend/rrc/reporting/export_predictions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22
from datetime import datetime
33
from pathlib import Path
44

5-
import click
65
import tqdm
76
from rich.console import Console
87
from sqlalchemy import select
98
from sqlalchemy.orm import joinedload
109

10+
import rrc.utils.click as click
1111
import rrc.utils.io
1212
from rrc.db.models import CovenantPrediction
1313
from rrc.db.session import get_session

backend/rrc/reporting/summarize_db.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
import click
21
from rich.console import Console
32
from rich.progress_bar import ProgressBar
43
from rich.table import Table
54
from sqlalchemy import func, select
65

6+
import rrc.utils.click as click
77
from rrc.db.models import CovenantPrediction, Page, Transcription
88
from rrc.db.session import get_session
99

0 commit comments

Comments
 (0)