Skip to content

Commit 704847c

Browse files
authored
Merge pull request #132 from UNDO-project/hotspot_redesign_backend
Hotspot redesign backend
2 parents abb6d4c + 2889466 commit 704847c

35 files changed

Lines changed: 3988 additions & 217 deletions

README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,19 @@ The pipeline consists of three main agents:
2020
- **Comprehensive visualizations**: Heatmaps, hotspots, route maps, and statistical charts
2121
- **Spatial optimization**: Efficient GeoDataFrame indexing for large camera datasets
2222

23+
## Hotspot methodology
24+
25+
`v2.4.0` ships a four-layer hotspot analysis. Each layer answers a different question — they complement rather than replace each other.
26+
27+
1. **HDBSCAN clusters** (`<city>_hotspots.geojson` + `<city>_hotspot_polygons.geojson`)
28+
Density-based clustering with locally-adaptive `ε`, computed in UTM metres so a "20-metre cluster" means the same thing at any latitude. Replaces the prior single-`ε` DBSCAN, which fused downtown into one blob and missed sparser suburban clusters (cf. GraphTrace 2025 urban-analytics benchmarks).
29+
2. **Planar KDE density surface** (`<city>_heatmap.html` + `<city>_density.geojson`)
30+
FFT-based kernel density on a metric grid; the folium heatmap is *derived* from the surface rather than from folium's opaque built-in interpolation, and the same surface contours into a GeoJSON layer at the 50/75/90/95 percentiles for researcher-grade work.
31+
3. **Getis-Ord Gi\* hex grid** (`<city>_gi_star.geojson` + `<city>_gi_star.png`)
32+
The spatial statistic researchers and journalists know from ArcGIS/QGIS "Hot Spot Analysis": per-hex z-score, FDR-adjusted p-value, and a five-class hot/cold classification — the defensible statistical layer per Amnesty's [*Decode Surveillance NYC*](https://decoders.amnesty.org/projects/decode-surveillance) methodology.
33+
4. **Cameras per road-km** (`<city>_density_metrics.json`)
34+
The single citable headline number Stanford's [*Surveilling Surveillance*](https://reglab.stanford.edu/projects/surveilling-surveillance/) (2021) made canonical for cross-city comparison. Normalises by the pedestrian network humans actually use, neutralising the park/water/industrial-zone bias of cameras/km². Reuses the routing agent's cached OSMnx graph.
35+
2336
# Installation
2437

2538
## Prerequisites

cli_local_test_pipeline.sh

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,31 @@ pytest tests/tools/test_mapping_tools.py
7070
echo "Done..."
7171
echo "==============================================="
7272

73+
echo "Running geo projection tests"
74+
pytest tests/tools/test_geo_projection.py
75+
echo "Done..."
76+
echo "==============================================="
77+
78+
echo "Running hotspot clustering tests"
79+
pytest tests/tools/test_hotspot_clustering.py
80+
echo "Done..."
81+
echo "==============================================="
82+
83+
echo "Running KDE density tests"
84+
pytest tests/tools/test_density_kde.py
85+
echo "Done..."
86+
echo "==============================================="
87+
88+
echo "Running Gi* spatial stats tests"
89+
pytest tests/tools/test_spatial_stats.py
90+
echo "Done..."
91+
echo "==============================================="
92+
93+
echo "Running density-metrics tests (cameras-per-road-km)"
94+
pytest tests/tools/test_density_metrics.py
95+
echo "Done..."
96+
echo "==============================================="
97+
7398
echo "Running chart tools tests"
7499
pytest tests/tools/test_chart_tools.py
75100
echo "Done..."

main.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,17 @@ def parse_args():
6161
default=None,
6262
help=(
6363
"Override the preset for hotspot generation. Toggles both the "
64-
"DBSCAN clustering output and the matplotlib plot together."
64+
"HDBSCAN clustering output and the matplotlib plot together."
65+
),
66+
)
67+
parser.add_argument(
68+
"--gi-star",
69+
action=argparse.BooleanOptionalAction,
70+
default=None,
71+
dest="gi_star",
72+
help=(
73+
"Override the preset for the Getis-Ord Gi* hex-grid layer. "
74+
"Toggles both the GeoJSON artifact and the choropleth chart."
6575
),
6676
)
6777
parser.add_argument(
@@ -84,6 +94,18 @@ def parse_args():
8494
"(--report / --no-report)."
8595
),
8696
)
97+
parser.add_argument(
98+
"--density-metrics",
99+
action=argparse.BooleanOptionalAction,
100+
default=None,
101+
dest="density_metrics",
102+
help=(
103+
"Override the preset for the cameras-per-road-km headline "
104+
"metric (--density-metrics / --no-density-metrics). Defaults "
105+
"on for both BASIC and FULL presets — opt out when you don't "
106+
"want to touch the OSMnx graph cache."
107+
),
108+
)
87109
parser.add_argument(
88110
"--output-dir",
89111
help="Output directory for results",
@@ -260,6 +282,12 @@ def display_results(results: dict):
260282
elif analyze.get("success"):
261283
elements = analyze.get("element_count", 0)
262284
table.add_row("Elements Analyzed", f"[green]{elements}[/green]")
285+
metrics = analyze.get("density_metrics") or {}
286+
if metrics.get("cameras_per_road_km") is not None:
287+
table.add_row(
288+
"Cameras / road-km",
289+
f"[green]{metrics['cameras_per_road_km']:.3f}[/green]",
290+
)
263291
else:
264292
table.add_row(
265293
"Analysis", f"[red]Failed: {analyze.get('error', 'Unknown')}[/red]"
@@ -306,6 +334,12 @@ def display_results(results: dict):
306334
files_table.add_row("Hotspots Data", str(analyze["hotspots_path"]))
307335
if analyze.get("plot_hotspots"):
308336
files_table.add_row("Hotspots Plot", str(analyze["plot_hotspots"]))
337+
if analyze.get("gi_star_path"):
338+
files_table.add_row("Gi* GeoJSON", str(analyze["gi_star_path"]))
339+
if analyze.get("gi_star_chart"):
340+
files_table.add_row("Gi* Plot", str(analyze["gi_star_chart"]))
341+
if analyze.get("density_metrics_path"):
342+
files_table.add_row("Density Metrics", str(analyze["density_metrics_path"]))
309343
if analyze.get("chart_path"):
310344
files_table.add_row("Statistics Chart", str(analyze["chart_path"]))
311345

@@ -372,6 +406,9 @@ def main():
372406
if args.hotspots is not None:
373407
config_kwargs["generate_hotspots"] = args.hotspots
374408
config_kwargs["plot_hotspots"] = args.hotspots
409+
if args.gi_star is not None:
410+
config_kwargs["generate_gi_star"] = args.gi_star
411+
config_kwargs["plot_gi_star"] = args.gi_star
375412
if args.charts is not None:
376413
config_kwargs["generate_chart"] = args.charts
377414
config_kwargs["plot_zone_sensitivity"] = args.charts
@@ -381,6 +418,8 @@ def main():
381418
config_kwargs["plot_install_timeline"] = args.charts
382419
if args.report is not None:
383420
config_kwargs["generate_report"] = args.report
421+
if args.density_metrics is not None:
422+
config_kwargs["compute_density_metrics"] = args.density_metrics
384423

385424
# Routing configuration
386425
if args.enable_routing:

pyproject.toml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "agentic-counter-surveillance"
3-
version = "2.3.0"
3+
version = "2.4.0"
44
description = "Multi-agent pipeline that scrapes OpenStreetMap surveillance-camera data, enriches it with a local LLM, renders maps and charts, generates an LLM-written city report, and computes low-surveillance walking routes."
55
readme = "README.md"
66
requires-python = ">=3.11"
@@ -9,6 +9,9 @@ dependencies = [
99
"fastapi>=0.123.5",
1010
"folium>=0.19.6",
1111
"geopandas>=1.0.1",
12+
"h3>=4.4.2",
13+
"hdbscan>=0.8.42",
14+
"kdepy>=1.1.12",
1215
"langchain>=0.3.27",
1316
"langchain-community>=0.3.30",
1417
"langchain-core>=0.3.77",
@@ -18,6 +21,8 @@ dependencies = [
1821
"osmnx>=2.0.6",
1922
"pre-commit>=4.2.0",
2023
"pydantic-settings>=2.9.1",
24+
"pyproj>=3.7.1",
25+
"pysal>=26.1",
2126
"pytest>=8.3.5",
2227
"python-multipart>=0.0.20",
2328
"requests>=2.32.3",

src/agents/langchain_analyzer.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,13 +90,15 @@ def analyze(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
9090
"generate_geojson": input_data.get("generate_geojson", True),
9191
"generate_heatmap": input_data.get("generate_heatmap", False),
9292
"generate_hotspots": input_data.get("generate_hotspots", False),
93+
"generate_gi_star": input_data.get("generate_gi_star", False),
9394
"compute_stats": input_data.get("compute_stats", True),
9495
"generate_chart": input_data.get("generate_chart", False),
9596
"plot_zone_sensitivity": input_data.get("plot_zone_sensitivity", False),
9697
"plot_sensitivity_reasons": input_data.get(
9798
"plot_sensitivity_reasons", False
9899
),
99100
"plot_hotspots": input_data.get("plot_hotspots", False),
101+
"plot_gi_star": input_data.get("plot_gi_star", False),
100102
"plot_operator_distribution": input_data.get(
101103
"plot_operator_distribution", False
102104
),
@@ -105,7 +107,12 @@ def analyze(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
105107
),
106108
"plot_install_timeline": input_data.get("plot_install_timeline", False),
107109
"generate_report": input_data.get("generate_report", False),
110+
"compute_density_metrics": input_data.get("compute_density_metrics", False),
108111
"force_rerender": input_data.get("force_rerender", False),
112+
# Non-bool passthrough used by the density-metrics step to
113+
# reach the routing agent's graph cache. ``None`` is fine —
114+
# OSMnx will geocode the city name alone.
115+
"country": input_data.get("country"),
109116
}
110117

111118
try:
@@ -133,6 +140,12 @@ def analyze(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
133140
response["heatmap_path"] = result["heatmap_path"]
134141
if "hotspots_path" in result:
135142
response["hotspots_path"] = result["hotspots_path"]
143+
if "hotspot_polygons_path" in result:
144+
response["hotspot_polygons_path"] = result["hotspot_polygons_path"]
145+
if "density_path" in result:
146+
response["density_path"] = result["density_path"]
147+
if "gi_star_path" in result:
148+
response["gi_star_path"] = result["gi_star_path"]
136149
if "stats" in result:
137150
response["stats"] = result["stats"]
138151
if "pie_chart_path" in result:
@@ -145,6 +158,8 @@ def analyze(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
145158
]
146159
if "hotspots_chart" in result:
147160
response["hotspots_chart"] = result["hotspots_chart"]
161+
if "gi_star_chart" in result:
162+
response["gi_star_chart"] = result["gi_star_chart"]
148163
if "operator_chart_path" in result:
149164
response["operator_chart_path"] = result["operator_chart_path"]
150165
if "manufacturer_chart_path" in result:
@@ -155,6 +170,10 @@ def analyze(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
155170
]
156171
if "report_path" in result:
157172
response["report_path"] = result["report_path"]
173+
if "density_metrics_path" in result:
174+
response["density_metrics_path"] = result["density_metrics_path"]
175+
if "density_metrics" in result:
176+
response["density_metrics"] = result["density_metrics"]
158177

159178
# Add visualization errors if any
160179
if "visualization_errors" in result:

src/api/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ async def lifespan(app: FastAPI):
6060
2. Monitor progress via GET /api/v1/pipeline/{task_id}
6161
3. Retrieve generated files via /api/v1/outputs/...
6262
""",
63-
version="2.3.0",
63+
version="2.4.0",
6464
docs_url="/docs",
6565
redoc_url="/redoc",
6666
lifespan=lifespan,

src/api/models/requests.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,15 @@ class OutputOverrides(BaseModel):
3434
default=None, description="Render the folium heatmap (HTML)"
3535
)
3636
generate_hotspots: Optional[bool] = Field(
37-
default=None, description="Compute DBSCAN hotspots GeoJSON"
37+
default=None,
38+
description="Compute HDBSCAN hotspots (centroids + convex-hull polygons)",
39+
)
40+
generate_gi_star: Optional[bool] = Field(
41+
default=None,
42+
description=(
43+
"Compute the Getis-Ord Gi* hex-grid statistical layer "
44+
"(<city>_gi_star.geojson)."
45+
),
3846
)
3947
plot_zone_sensitivity: Optional[bool] = Field(
4048
default=None, description="Render the zone-sensitivity stacked bar chart"
@@ -45,6 +53,10 @@ class OutputOverrides(BaseModel):
4553
plot_hotspots: Optional[bool] = Field(
4654
default=None, description="Render the hotspots scatter plot (PNG)"
4755
)
56+
plot_gi_star: Optional[bool] = Field(
57+
default=None,
58+
description="Render the Gi* choropleth chart (<city>_gi_star.png)",
59+
)
4860
plot_operator_distribution: Optional[bool] = Field(
4961
default=None,
5062
description="Render the top-N operator distribution bar chart",
@@ -61,6 +73,13 @@ class OutputOverrides(BaseModel):
6173
default=None,
6274
description="Generate an LLM-written markdown city report (<city>_report.md)",
6375
)
76+
compute_density_metrics: Optional[bool] = Field(
77+
default=None,
78+
description=(
79+
"Compute the cameras-per-road-km headline metric "
80+
"(<city>_density_metrics.json)."
81+
),
82+
)
6483

6584

6685
class ScrapeRequest(BaseModel):

src/api/models/responses.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ class VersionResponse(BaseModel):
192192
json_schema_extra={
193193
"examples": [
194194
{
195-
"version": "2.3.0",
195+
"version": "2.4.0",
196196
"api_version": "v1",
197197
"description": "Agentic Surveillance Research API",
198198
}

src/api/routes/health.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ async def version_info():
3838
:return: Version details
3939
"""
4040
return VersionResponse(
41-
version="2.3.0",
41+
version="2.4.0",
4242
api_version="v1",
4343
description="Agentic Surveillance Research API",
4444
)

src/api/routes/outputs.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,79 @@ async def get_city_charts(city: str, chart: str):
209209
)
210210

211211

212+
#: Hotspot-redesign artifacts addressable by name.
213+
#:
214+
#: Each entry maps the URL leaf the frontend uses to the on-disk filename
215+
#: the analyzer chain writes. Two reasons for keeping this as a single
216+
#: declarative dict rather than five hand-rolled handlers:
217+
#: 1. The dashboard's layer-toggle UI iterates the names server-side via
218+
#: ``/list``; the names here must match those filenames byte-for-byte
219+
#: or the toggle goes dead.
220+
#: 2. The set grows together keeping the registry literal avoids the next layer
221+
#: being silently added to ``/list`` but missing a dedicated route.
222+
_HOTSPOT_ARTIFACTS: dict = {
223+
"density.geojson": "_density.geojson",
224+
"density_metrics.json": "_density_metrics.json",
225+
"gi_star.geojson": "_gi_star.geojson",
226+
"hotspots.geojson": "_hotspots.geojson",
227+
"hotspot_polygons.geojson": "_hotspot_polygons.geojson",
228+
}
229+
230+
231+
def _serve_named_artifact(city: str, suffix: str) -> FileResponse:
232+
"""
233+
Resolve ``<city><suffix>`` inside the per-city directory and serve it.
234+
235+
Centralises the validate-then-stream dance so each artifact handler
236+
is one line. ``suffix`` is the literal trailing chunk after the
237+
city stem (e.g. ``"_density.geojson"``), matching the analyzer
238+
chain's filename construction at the call site.
239+
"""
240+
base = resolve_city_base(city)
241+
file_path = base / f"{city}{suffix}"
242+
validate_path(file_path)
243+
return FileResponse(
244+
path=file_path,
245+
media_type=get_mime_type(file_path),
246+
filename=file_path.name,
247+
)
248+
249+
250+
@router.get("/{city}/density.geojson")
251+
async def get_city_density_geojson(city: str):
252+
"""Serve the KDE density contours: ``<city>_density.geojson``."""
253+
return _serve_named_artifact(city, _HOTSPOT_ARTIFACTS["density.geojson"])
254+
255+
256+
@router.get("/{city}/density_metrics.json")
257+
async def get_city_density_metrics(city: str):
258+
"""Serve the headline density metric: ``<city>_density_metrics.json``."""
259+
return _serve_named_artifact(city, _HOTSPOT_ARTIFACTS["density_metrics.json"])
260+
261+
262+
@router.get("/{city}/gi_star.geojson")
263+
async def get_city_gi_star_geojson(city: str):
264+
"""Serve the Getis-Ord Gi* hex grid: ``<city>_gi_star.geojson``."""
265+
return _serve_named_artifact(city, _HOTSPOT_ARTIFACTS["gi_star.geojson"])
266+
267+
268+
@router.get("/{city}/hotspots.geojson")
269+
async def get_city_hotspots_geojson(city: str):
270+
"""
271+
Serve the HDBSCAN cluster centroids:
272+
``<city>_hotspots.geojson``. Same filename as the prior DBSCAN
273+
artifact — the schema underneath is the new one
274+
(``cluster_id``/``count``/``persistence``).
275+
"""
276+
return _serve_named_artifact(city, _HOTSPOT_ARTIFACTS["hotspots.geojson"])
277+
278+
279+
@router.get("/{city}/hotspot_polygons.geojson")
280+
async def get_city_hotspot_polygons_geojson(city: str):
281+
"""Serve the HDBSCAN convex hulls: ``<city>_hotspot_polygons.geojson``."""
282+
return _serve_named_artifact(city, _HOTSPOT_ARTIFACTS["hotspot_polygons.geojson"])
283+
284+
212285
@router.get("/{city}/report")
213286
async def get_city_report(city: str):
214287
"""

0 commit comments

Comments
 (0)