Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.9.0
hooks:
- id: ruff
args: [ --fix ]
- id: ruff-format

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.14.0
hooks:
- id: mypy
additional_dependencies: [pydantic>=2.13.0]
20 changes: 19 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,25 @@
IMAGE_NAME = extractor-agent
PORT = 3000

.PHONY: build run stop clean test start shell logs
.PHONY: build run stop clean test start shell logs lint format lint-fix typecheck install-hooks

# Pre-commit hooks
install-hooks:
uv run pre-commit install

# Linting and Formatting
lint:
uv run ruff check .

format:
uv run ruff format .

lint-fix:
uv run ruff check --fix .
uv run ruff format .

typecheck:
uv run mypy .

# Build and run the container
start: build run
Expand Down
29 changes: 29 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,35 @@ docker run -p 3000:3000 -t govuk-ai-graph-tools-app
---


---

## Development and Code Quality

### 1. Manual Checks
You can run the full suite of checks using the `Makefile`:

```bash
# Run all checks
make lint && make format && make typecheck

# Run individual checks
make lint
make format
make typecheck
```

### 2. Pre-commit Hooks
The project is configured with `pre-commit` to automatically run these checks before every `git commit`.

To install the hooks in your local repository:
```bash
make install-hooks
```

Once installed, your code will be automatically linted and type-checked whenever you commit. If you need to skip the hooks (e.g., for an urgent WIP commit), you can use `git commit --no-verify`.

---

## Tests

```bash
Expand Down
106 changes: 61 additions & 45 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,119 +1,130 @@
import asyncio
import fsspec
import json
import logging
import os
import re
import time
import uuid

from asgiref.wsgi import WsgiToAsgi
from dotenv import load_dotenv
from flask import Flask, request, jsonify, render_template
from src.visualiser_graph_generator import generate_graph, generate_output_path
from src.visualiser_graph_loader import load_json_file, extract_path_parts, visualiser_graph_file_path
from flask import Flask, jsonify, render_template, request
from werkzeug.exceptions import BadRequest

from src.utils import (
update_job_status,
read_job_status,
get_job_id_for_path,
get_active_job_status,
background_run_extraction,
resume_interrupted_jobs
get_active_job_status,
get_job_id_for_path,
read_job_status,
resume_interrupted_jobs,
update_job_status,
)
from src.visualiser_graph_generator import generate_output_path
from src.visualiser_graph_loader import (
extract_path_parts,
load_json_file,
visualiser_graph_file_path,
)
from werkzeug.exceptions import BadRequest


load_dotenv()

# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler()
]
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
handlers=[logging.StreamHandler()],
)
logger = logging.getLogger(__name__)


def create_app():
app = Flask(__name__)

@app.route('/graph', methods=['GET'])
@app.route("/graph", methods=["GET"])
def graph_page():
"""Serve the Cytoscape graph viewer page."""
source_path_param = request.args.get('source_path')
source_path_param = request.args.get("source_path")

# Validate the source_path format
if source_path_param:
extract_path_parts(source_path_param)

return render_template('graph.html', source_path=source_path_param or '')
return render_template("graph.html", source_path=source_path_param or "")

@app.route('/graph-viewmodel', methods=['GET'])
@app.route("/graph-viewmodel", methods=["GET"])
async def graph_viewmodel():
"""Serve the graph data as JSON for the frontend."""
try:
source_path_param = request.args.get('source_path')
source_path_param = request.args.get("source_path")

graph_filepath = visualiser_graph_file_path(source_path_param)

graph_data = load_json_file(graph_filepath)

logger.info('Graph data loaded successfully.')
logger.info("Graph data loaded successfully.")
return jsonify(graph_data), 200
except Exception as e:
app.logger.error(f"Error loading graph data: {str(e)}")
return jsonify({"error": "Error loading graph data."}), 500

@app.route('/healthcheck/ready', methods=['GET'])
@app.route("/healthcheck/ready", methods=["GET"])
def health_check():
"""Simple health check endpoint."""
return "Application OK", 200

@app.route('/extract', methods=['GET'])
@app.route("/extract", methods=["GET"])
async def extract_quotes():
"""
Endpoint that runs the Cytoscape graph generation logic based on graph.json.
"""
try:
source_path = request.args.get('source_path')
source_path = request.args.get("source_path")
if not source_path:
return jsonify({"error": "Missing 'source_path' query parameter"}), 400

input_path, output_path = generate_output_path(source_path)
job_id = get_job_id_for_path(source_path)

active_status = get_active_job_status(job_id)
if active_status:
logger.info(f"Duplicate request for {source_path}. Job {job_id} is already in progress.")
return jsonify({
'job_id': job_id,
'status': 'already_running',
'message': f'A graph generation job is already in progress for {source_path}',
'output_path': output_path
}), 202
logger.info(
f"Duplicate request for {source_path}. Job {job_id} is already in progress."
)
return jsonify(
{
"job_id": job_id,
"status": "already_running",
"message": (
f"A graph generation job is already in progress for {source_path}"
),
"output_path": output_path,
}
), 202

initial_status = {
"job_id": job_id,
"status": "pending",
"source_path": source_path,
"created_at": time.time()
"created_at": time.time(),
}
update_job_status(job_id, initial_status)

asyncio.create_task(background_run_extraction(job_id, input_path, output_path, initial_status))
asyncio.create_task(
background_run_extraction(job_id, input_path, output_path, initial_status)
)

return jsonify({
'job_id': job_id,
'status': 'accepted',
'message': f'Graph generation started in background for {source_path}',
'output_path': output_path
}), 202
return jsonify(
{
"job_id": job_id,
"status": "accepted",
"message": f"Graph generation started in background for {source_path}",
"output_path": output_path,
}
), 202

except Exception as e:
app.logger.error(f"Error starting background task: {str(e)}")
return jsonify({"error": str(e)}), 500

@app.route('/status/<job_id>', methods=['GET'])
@app.route("/status/<job_id>", methods=["GET"])
def get_status(job_id):
"""Check the status of a background job from S3."""
status_info = read_job_status(job_id)
Expand All @@ -127,8 +138,10 @@ def handle_bad_request(e):

return app


class LifespanMiddleware:
"""ASGI middleware to handle startup and shutdown events."""

def __init__(self, app):
self.app = app

Expand All @@ -146,14 +159,17 @@ async def __call__(self, scope, receive, send):
return
return await self.app(scope, receive, send)


def create_asgi_app():
flask_app = create_app()
asgi_app = WsgiToAsgi(flask_app)
return LifespanMiddleware(asgi_app)


if __name__ == "__main__":
asgi_app = create_asgi_app()
import uvicorn

port = int(os.getenv("PORT", 3000))
logger.info(f"Starting Uvicorn server on port {port}...")
uvicorn.run(asgi_app, host='0.0.0.0', port=port)
uvicorn.run(asgi_app, host="0.0.0.0", port=port)
30 changes: 30 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,34 @@ dev = [
"pytest>=9.0.3",
"pytest-asyncio>=1.3.0",
"pytest-mock>=3.15.1",
"ruff>=0.9.0",
"mypy>=1.14.0",
"pre-commit>=4.0.0",
]

[tool.mypy]
plugins = ["pydantic.mypy"]
follow_imports = "silent"
warn_redundant_casts = true
warn_unused_ignores = true
disallow_any_generics = false
check_untyped_defs = true
no_implicit_reexport = true
ignore_missing_imports = true

[tool.pydantic-mypy]
init_forbid_extra = true
init_typed = true
warn_required_dynamic_aliases = true

[tool.ruff]
line-length = 100
target-version = "py312"

[tool.ruff.lint]
select = ["E", "F", "I", "W"]
ignore = []

[tool.ruff.lint.isort]
combine-as-imports = true
lines-after-imports = 2
Loading
Loading