Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions py-modules/tileserver-stats/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.env
.venv
__pycache__
.idea
5 changes: 5 additions & 0 deletions py-modules/tileserver-stats/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Tileserver stats

Macrostrat library that subsets tileserver statistics.

**Note**: This library has been archived and subsumed within the [Macrostrat](https://github.com/UW-Macrostrat/macrostrat) library.
727 changes: 727 additions & 0 deletions py-modules/tileserver-stats/poetry.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions py-modules/tileserver-stats/poetry.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[virtualenvs]
in-project = true
26 changes: 26 additions & 0 deletions py-modules/tileserver-stats/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
[tool.poetry]
authors = ["Daven Quinn <[email protected]>"]
description = "Harvest tileserver statistics"
license = "MIT"
name = "tileserver_stats"
packages = [{include = "tileserver_stats"}]
readme = "README.md"
version = "0.1.0"

# Command-line scripts
[tool.poetry.scripts]
tileserver-stats = "tileserver_stats:app"

[tool.poetry.dependencies]
"macrostrat.database" = "^3.1.1"
python = "^3.8"
python-dotenv = "^0.21.0"
rich = "^12.6.0"
typer = "^0.12.5"

[tool.poetry.group.dev.dependencies]
black = "^24.8.0"

[build-system]
build-backend = "poetry.core.masonry.api"
requires = ["poetry-core"]
79 changes: 79 additions & 0 deletions py-modules/tileserver-stats/tileserver_stats/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from datetime import datetime
from os import environ
from pathlib import Path

from dotenv import load_dotenv
from psycopg2.sql import Literal
from rich import print
from sqlalchemy.sql import text
from typer import Typer

from macrostrat.database import Database
from macrostrat.utils import cmd, relative_path

load_dotenv()

app = Typer(no_args_is_help=True)

app.command()


@app.command()
def run(truncate: bool = False):
"""Run the update procedure."""
tileserver_db = environ.get("TILESERVER_STATS_DATABASE")
db = Database(tileserver_db)

# Run update
fn = Path(relative_path(__file__, "procedures")) / "run-update.sql"
sql = text(fn.read_text().replace(":", "\:"))

# check query timing
conn = db.engine.connect()
n_results = 10000
start = datetime.now()
step = start
while n_results > 0:
res = conn.execute(sql, execution_options=dict(no_parameters=True)).first()
n_results = res.n_rows
conn.execute(text("COMMIT"))
next_step = datetime.now()
dt = (next_step - step).total_seconds()
print(f"{res.last_row_id} ({dt*1000:.0f} ms)")
step = next_step

if truncate and n_results == 0:
conn.execute(text("TRUNCATE TABLE requests"))

print(f"Total time: {datetime.now() - start}")


@app.command()
def reset(drop: bool = False):
"""Empty the stats schema and re-run the schema creation scripts."""
tileserver_db = environ.get("TILESERVER_STATS_DATABASE")
db = Database(tileserver_db)

if drop:
db.engine.execute("DROP SCHEMA IF EXISTS stats CASCADE")

files = Path(relative_path(__file__, "schema")).glob("*.sql")
files = list(files)
files.sort()

for file in files:
list(db.run_sql(file))


@app.command()
def truncate():
"""Create the stats schema."""
tileserver_db = environ.get("TILESERVER_STATS_DATABASE")
db = Database(tileserver_db)

files = Path(relative_path(__file__, "schema")).glob("*.sql")
files = list(files)
files.sort()

for file in files:
list(db.run_sql(file))
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
WITH a AS (
SELECT
req_id,
layer,
ext,
x,
y,
z,
substring(nullif(referrer, '') from '(?:.*://)?(?:www\.)?([^(/:)?]*)') referrer,
app,
app_version,
date_trunc('day', time) date
FROM requests
WHERE req_id > (SELECT last_row_id FROM stats.processing_status ORDER BY last_row_time DESC LIMIT 1)
ORDER BY req_id
LIMIT 100000
),
b AS (
INSERT INTO stats.day_index (layer, ext, referrer, app, app_version, date, num_requests)
SELECT
layer,
ext,
coalesce(referrer, 'none'),
coalesce(app, 'none'),
coalesce(app_version, 'none'),
date,
count(*)
FROM a
GROUP BY layer, ext, app, referrer, app_version, date
ON CONFLICT (layer, ext, referrer, app, app_version, date)
DO UPDATE SET
num_requests = stats.day_index.num_requests + EXCLUDED.num_requests
RETURNING *
),
reduced_complexity_locations AS (
SELECT
layer,
ext,
CASE WHEN z > 8 THEN
x >> (z - 8) -- Bit shift right by the difference between the current zoom level and 8
ELSE x END x,
CASE WHEN z > 8 THEN
y >> (z - 8)
ELSE y END y,
CASE WHEN z > 8 THEN 8 ELSE z END z,
z orig_z
FROM a
),
c AS (
INSERT INTO stats.location_index (layer, ext, x, y, z, orig_z, num_requests)
SELECT
layer,
ext,
x,
y,
z,
orig_z,
count(*) num_requests
FROM reduced_complexity_locations
GROUP BY layer, ext, x, y, z, orig_z
ON CONFLICT (layer, ext, x, y, z, orig_z)
DO UPDATE SET
num_requests = stats.location_index.num_requests + EXCLUDED.num_requests
),
d AS (
INSERT INTO stats.processing_status (last_row_id, last_row_time)
SELECT req_id, time FROM requests WHERE req_id = (SELECT max(req_id) FROM a)
)
SELECT max(req_id) last_row_id, count(*) n_rows FROM a;
30 changes: 30 additions & 0 deletions py-modules/tileserver-stats/tileserver_stats/schema/schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
CREATE SCHEMA IF NOT EXISTS stats;

CREATE TABLE IF NOT EXISTS stats.processing_status (
last_row_id integer NOT NULL,
last_row_time timestamp without time zone DEFAULT now()
);

CREATE TABLE IF NOT EXISTS stats.day_index (
layer text NOT NULL,
ext text NOT NULL,
referrer text NOT NULL,
app text NOT NULL,
app_version text NOT NULL,
date timestamp without time zone NOT NULL,
num_requests integer NOT NULL,
UNIQUE (layer, ext, referrer, app, app_version, date)
);

CREATE TABLE IF NOT EXISTS stats.location_index (
layer text NOT NULL,
ext text NOT NULL,
x integer NOT NULL,
y integer NOT NULL,
z integer NOT NULL,
orig_z integer NOT NULL,
num_requests integer NOT NULL,
UNIQUE (layer, ext, x, y, z, orig_z)
);

INSERT INTO stats.processing_status (last_row_id) VALUES (0);