From 663c463bf66aea2937b2909b9eee73203cd16e0e Mon Sep 17 00:00:00 2001 From: nullrouten0 Date: Tue, 10 Feb 2026 22:17:30 -0800 Subject: [PATCH] Add Python top-repeaters script for get-samples --- README.md | 26 +++ server/scripts/top-repeaters-from-samples.js | 187 +++++++++++++++++ server/scripts/top-repeaters-from-samples.py | 208 +++++++++++++++++++ 3 files changed, 421 insertions(+) create mode 100755 server/scripts/top-repeaters-from-samples.js create mode 100755 server/scripts/top-repeaters-from-samples.py diff --git a/README.md b/README.md index d6f7628..d48c3f6 100644 --- a/README.md +++ b/README.md @@ -236,6 +236,32 @@ node scripts/migrate-samples.js --source https://source.domain.com/get-samples - # Add delay between requests (useful for rate limiting) node scripts/migrate-samples.js --delay 100 + +## top repeaters query logic (matches map "Top Repeaters") + +The map's **Top Repeaters** panel counts how many **6-character geohash coverage tiles** each repeater appears in (not raw sample count). + +Logic: +1. Read samples from `GET /get-samples?p=` +2. Group samples by 6-char geohash prefix (`sample.name.substring(0, 6)`) +3. For each tile, collect unique repeater IDs from `metadata.path` +4. Count how many tiles each repeater appears in +5. Sort descending by tile count + +Use the helper script: + +```bash +cd server +node scripts/top-repeaters-from-samples.js --url http://localhost:3000/get-samples +python3 scripts/top-repeaters-from-samples.py --url http://localhost:3000/get-samples + +# Optional filters +node scripts/top-repeaters-from-samples.js --url https://your.domain/get-samples --prefix c23n --limit 25 +python3 scripts/top-repeaters-from-samples.py --url https://your.domain/get-samples --prefix c23n --limit 25 +node scripts/top-repeaters-from-samples.js --url https://your.domain/get-samples --json +python3 scripts/top-repeaters-from-samples.py --url https://your.domain/get-samples --json +``` + ## Troubleshooting **Database connection issues:** diff --git a/server/scripts/top-repeaters-from-samples.js b/server/scripts/top-repeaters-from-samples.js new file mode 100755 index 0000000..1d5c46d --- /dev/null +++ b/server/scripts/top-repeaters-from-samples.js @@ -0,0 +1,187 @@ +#!/usr/bin/env node + +/** + * Build "Top Repeaters" stats from the /get-samples API. + * + * This mirrors the map's Top Repeaters logic: + * - group samples by 6-char geohash prefix (coverage tile) + * - for each tile, collect unique repeater IDs seen in sample path + * - count how many tiles each repeater appears in + * - sort descending by tile count + * + * Usage: + * node scripts/top-repeaters-from-samples.js --url http://localhost:3000/get-samples + * node scripts/top-repeaters-from-samples.js --url https://example.com/get-samples --limit 25 + * node scripts/top-repeaters-from-samples.js --url https://example.com/get-samples --prefix c23n + */ + +const DEFAULT_URL = 'http://localhost:3000/get-samples'; +const DEFAULT_LIMIT = 50; + +function parseArgs(argv) { + const config = { + url: DEFAULT_URL, + limit: DEFAULT_LIMIT, + prefix: null, + json: false, + }; + + for (let i = 0; i < argv.length; i++) { + const arg = argv[i]; + + if (arg === '--url' && argv[i + 1]) { + config.url = argv[++i]; + } else if (arg === '--limit' && argv[i + 1]) { + config.limit = Math.max(1, parseInt(argv[++i], 10) || DEFAULT_LIMIT); + } else if (arg === '--prefix' && argv[i + 1]) { + config.prefix = String(argv[++i]); + } else if (arg === '--json') { + config.json = true; + } else if (arg === '--help' || arg === '-h') { + printHelp(); + process.exit(0); + } + } + + return config; +} + +function printHelp() { + console.log(`Top Repeaters from /get-samples\n +Options: + --url Full /get-samples endpoint URL (default: ${DEFAULT_URL}) + --limit Max rows to print (default: ${DEFAULT_LIMIT}) + --prefix

Optional geohash prefix filter (sent as ?p=

) + --json Emit JSON instead of a table + -h, --help Show this help\n`); +} + +async function fetchSamples(config) { + const url = new URL(config.url); + if (config.prefix) { + url.searchParams.set('p', config.prefix); + } + + const response = await fetch(url, { + headers: { Accept: 'application/json' }, + }); + + if (!response.ok) { + throw new Error(`HTTP ${response.status} ${response.statusText}`); + } + + const data = await response.json(); + if (!data || !Array.isArray(data.keys)) { + throw new Error('Invalid response payload (expected { keys: [...] })'); + } + + return { samples: data.keys, url: url.toString() }; +} + +function getSampleGeohash(sample) { + if (typeof sample.name === 'string' && sample.name.length > 0) return sample.name; + if (typeof sample.hash === 'string' && sample.hash.length > 0) return sample.hash; + return null; +} + +function getSamplePath(sample) { + const metadataPath = sample?.metadata?.path; + if (Array.isArray(metadataPath)) return metadataPath; + + const flatPath = sample?.path; + if (Array.isArray(flatPath)) return flatPath; + + const legacyPath = sample?.metadata?.rptr ?? sample?.rptr; + if (Array.isArray(legacyPath)) return legacyPath; + + return []; +} + +function computeTopRepeaters(samples) { + // geohashPrefix -> Set + const tileToRepeaters = new Map(); + + for (const sample of samples) { + const geohash = getSampleGeohash(sample); + if (!geohash || geohash.length < 6) continue; + + const tile = geohash.substring(0, 6); + const path = getSamplePath(sample); + if (!tileToRepeaters.has(tile)) { + tileToRepeaters.set(tile, new Set()); + } + + const bucket = tileToRepeaters.get(tile); + for (const rawId of path) { + if (rawId === null || rawId === undefined) continue; + const id = String(rawId).toLowerCase(); + if (id) bucket.add(id); + } + } + + // repeaterId -> tileCount + const repeaterTileCounts = new Map(); + + for (const repeaterSet of tileToRepeaters.values()) { + for (const repeaterId of repeaterSet) { + repeaterTileCounts.set(repeaterId, (repeaterTileCounts.get(repeaterId) || 0) + 1); + } + } + + const rows = Array.from(repeaterTileCounts.entries()) + .map(([id, geohashCount]) => ({ id, geohashCount })) + .sort((a, b) => { + if (b.geohashCount !== a.geohashCount) return b.geohashCount - a.geohashCount; + return a.id.localeCompare(b.id); + }); + + return { + rows, + stats: { + sampleCount: samples.length, + coverageTileCount: tileToRepeaters.size, + repeaterCount: rows.length, + }, + }; +} + +function printTable(topRows, stats, sourceUrl) { + console.log(`Source: ${sourceUrl}`); + console.log(`Samples: ${stats.sampleCount}`); + console.log(`Coverage tiles (6-char geohash): ${stats.coverageTileCount}`); + console.log(`Repeaters with coverage: ${stats.repeaterCount}`); + console.log(''); + + if (topRows.length === 0) { + console.log('No repeater coverage found in samples.'); + return; + } + + const header = `${'#'.padStart(4)} ${'Repeater'.padEnd(12)} CoverageTiles`; + console.log(header); + console.log('-'.repeat(header.length)); + + topRows.forEach((row, index) => { + console.log(`${String(index + 1).padStart(4)} ${row.id.padEnd(12)} ${row.geohashCount}`); + }); +} + +async function main() { + const config = parseArgs(process.argv.slice(2)); + + const { samples, url } = await fetchSamples(config); + const { rows, stats } = computeTopRepeaters(samples); + const topRows = rows.slice(0, config.limit); + + if (config.json) { + console.log(JSON.stringify({ source: url, stats, topRepeaters: topRows }, null, 2)); + return; + } + + printTable(topRows, stats, url); +} + +main().catch((error) => { + console.error(`Error: ${error.message}`); + process.exit(1); +}); diff --git a/server/scripts/top-repeaters-from-samples.py b/server/scripts/top-repeaters-from-samples.py new file mode 100755 index 0000000..b782fe9 --- /dev/null +++ b/server/scripts/top-repeaters-from-samples.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python3 +""" +Build "Top Repeaters" stats from the /get-samples API. + +This mirrors the map's Top Repeaters logic: +- group samples by 6-char geohash prefix (coverage tile) +- for each tile, collect unique repeater IDs seen in sample path +- count how many tiles each repeater appears in +- sort descending by tile count + +Usage: + python3 scripts/top-repeaters-from-samples.py --url http://localhost:3000/get-samples + python3 scripts/top-repeaters-from-samples.py --url https://example.com/get-samples --limit 25 + python3 scripts/top-repeaters-from-samples.py --url https://example.com/get-samples --prefix c23n +""" + +import argparse +import json +import sys +from typing import Any, Dict, List, Optional +from urllib.parse import urlencode, urlparse, parse_qsl, urlunparse +from urllib.request import Request, urlopen + +DEFAULT_URL = "http://localhost:3000/get-samples" +DEFAULT_LIMIT = 50 + + +def build_url(base_url: str, prefix: Optional[str]) -> str: + parsed = urlparse(base_url) + query = dict(parse_qsl(parsed.query, keep_blank_values=True)) + if prefix: + query["p"] = prefix + + return urlunparse( + ( + parsed.scheme, + parsed.netloc, + parsed.path, + parsed.params, + urlencode(query), + parsed.fragment, + ) + ) + + +def fetch_samples(url: str) -> Dict[str, Any]: + request = Request(url, headers={"Accept": "application/json"}) + + with urlopen(request) as response: # nosec B310 - endpoint is user-provided by design + status = getattr(response, "status", None) + if status is not None and status >= 400: + raise RuntimeError(f"HTTP {status}") + + payload = response.read().decode("utf-8") + + data = json.loads(payload) + if not isinstance(data, dict) or not isinstance(data.get("keys"), list): + raise RuntimeError("Invalid response payload (expected { keys: [...] })") + + return data + + +def get_sample_geohash(sample: Dict[str, Any]) -> Optional[str]: + name = sample.get("name") + if isinstance(name, str) and name: + return name + + flat_hash = sample.get("hash") + if isinstance(flat_hash, str) and flat_hash: + return flat_hash + + return None + + +def get_sample_path(sample: Dict[str, Any]) -> List[Any]: + metadata = sample.get("metadata") + if isinstance(metadata, dict): + metadata_path = metadata.get("path") + if isinstance(metadata_path, list): + return metadata_path + + flat_path = sample.get("path") + if isinstance(flat_path, list): + return flat_path + + if isinstance(metadata, dict): + legacy_metadata_path = metadata.get("rptr") + if isinstance(legacy_metadata_path, list): + return legacy_metadata_path + + legacy_flat_path = sample.get("rptr") + if isinstance(legacy_flat_path, list): + return legacy_flat_path + + return [] + + +def compute_top_repeaters(samples: List[Dict[str, Any]]) -> Dict[str, Any]: + # geohashPrefix -> Set[repeaterId] + tile_to_repeaters: Dict[str, set] = {} + + for sample in samples: + if not isinstance(sample, dict): + continue + + geohash = get_sample_geohash(sample) + if not geohash or len(geohash) < 6: + continue + + tile = geohash[:6] + path = get_sample_path(sample) + + if tile not in tile_to_repeaters: + tile_to_repeaters[tile] = set() + + bucket = tile_to_repeaters[tile] + for raw_id in path: + if raw_id is None: + continue + repeater_id = str(raw_id).lower() + if repeater_id: + bucket.add(repeater_id) + + # repeaterId -> tileCount + repeater_tile_counts: Dict[str, int] = {} + for repeater_set in tile_to_repeaters.values(): + for repeater_id in repeater_set: + repeater_tile_counts[repeater_id] = repeater_tile_counts.get(repeater_id, 0) + 1 + + rows = [ + {"id": repeater_id, "geohashCount": count} + for repeater_id, count in repeater_tile_counts.items() + ] + rows.sort(key=lambda r: (-r["geohashCount"], r["id"])) + + return { + "rows": rows, + "stats": { + "sampleCount": len(samples), + "coverageTileCount": len(tile_to_repeaters), + "repeaterCount": len(rows), + }, + } + + +def print_table(rows: List[Dict[str, Any]], stats: Dict[str, int], source_url: str) -> None: + print(f"Source: {source_url}") + print(f"Samples: {stats['sampleCount']}") + print(f"Coverage tiles (6-char geohash): {stats['coverageTileCount']}") + print(f"Repeaters with coverage: {stats['repeaterCount']}") + print("") + + if not rows: + print("No repeater coverage found in samples.") + return + + header = f"{'#':>4} {'Repeater':<12} CoverageTiles" + print(header) + print("-" * len(header)) + + for idx, row in enumerate(rows, start=1): + repeater_id = str(row.get("id", "")) + geohash_count = row.get("geohashCount", 0) + print(f"{idx:>4} {repeater_id:<12} {geohash_count}") + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Compute Top Repeaters from /get-samples") + parser.add_argument("--url", default=DEFAULT_URL, help="Full /get-samples endpoint URL") + parser.add_argument("--limit", type=int, default=DEFAULT_LIMIT, help="Max rows to print") + parser.add_argument("--prefix", default=None, help="Optional geohash prefix filter (?p=)") + parser.add_argument("--json", action="store_true", help="Emit JSON instead of a table") + args = parser.parse_args() + + if args.limit < 1: + args.limit = DEFAULT_LIMIT + + return args + + +def main() -> int: + args = parse_args() + query_url = build_url(args.url, args.prefix) + + data = fetch_samples(query_url) + samples = data["keys"] + result = compute_top_repeaters(samples) + + top_rows = result["rows"][: args.limit] + if args.json: + output = { + "source": query_url, + "stats": result["stats"], + "topRepeaters": top_rows, + } + print(json.dumps(output, indent=2)) + else: + print_table(top_rows, result["stats"], query_url) + + return 0 + + +if __name__ == "__main__": + try: + raise SystemExit(main()) + except Exception as exc: # pragma: no cover + print(f"Error: {exc}", file=sys.stderr) + raise SystemExit(1)