Skip to content

Commit 624753c

Browse files
committed
Merge branch 'tilserver-stats'
* tilserver-stats: Update tileserver stats calculation Added archival notice Add 'truncate' command Updated requests processing code Updated gitignore Updated procedures to have less nulls Relax python version constraint Does everything and takes two minutes for steno Basic module structure
2 parents 9a46190 + 71728d8 commit 624753c

File tree

11 files changed

+959
-3
lines changed

11 files changed

+959
-3
lines changed

.idea/copilot.data.migration.agent.xml

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/copilot.data.migration.edit.xml

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

core/macrostrat/core/config.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,12 @@
33

44
from dotenv import load_dotenv
55
from dynaconf import Dynaconf, Validator
6+
from macrostrat.app_frame.control_command import BackendType
7+
from macrostrat.utils import get_logger
68
from sqlalchemy.engine import make_url
79
from sqlalchemy.engine.url import URL
810
from toml import load as load_toml
911

10-
from macrostrat.app_frame.control_command import BackendType
11-
from macrostrat.utils import get_logger
12-
1312
from .resolvers import cast_sources, setup_source_roots_environment
1413
from .utils import convert_to_string, find_macrostrat_config, path_list_resolver
1514

@@ -82,6 +81,8 @@ def get(self, key, default=None):
8281
# Backend information. We could potentially infer this from other environment variables
8382
Validator("backend", default="kubernetes", cast=BackendType),
8483
Validator("sources", cast=cast_sources, default=None),
84+
# Settings to control the location of arbitrary named databases
85+
Validator("databases", default={}),
8586
)
8687

8788
macrostrat_env = getattr(settings, "env", "default")
@@ -120,6 +121,9 @@ def get(self, key, default=None):
120121
docker_localhost = getattr(settings, "docker_localhost", "localhost")
121122
PG_DATABASE_DOCKER = PG_DATABASE.replace("localhost", docker_localhost)
122123

124+
# add this to the settings.databases mapping
125+
settings.databases["macrostrat"] = PG_DATABASE
126+
123127
# Set environment variables
124128
url = make_url(PG_DATABASE)
125129

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
.env
2+
.venv
3+
__pycache__
4+
.idea
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Tileserver stats
2+
3+
Macrostrat library that subsets tileserver statistics.
4+
5+
**Note**: This library has been archived and subsumed within the [Macrostrat](https://github.com/UW-Macrostrat/macrostrat) library.

py-modules/tileserver-stats/poetry.lock

Lines changed: 727 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[virtualenvs]
2+
in-project = true
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
[tool.poetry]
2+
authors = ["Daven Quinn <[email protected]>"]
3+
description = "Harvest tileserver statistics"
4+
license = "MIT"
5+
name = "tileserver_stats"
6+
packages = [{include = "tileserver_stats"}]
7+
readme = "README.md"
8+
version = "0.1.0"
9+
10+
# Command-line scripts
11+
[tool.poetry.scripts]
12+
tileserver-stats = "tileserver_stats:app"
13+
14+
[tool.poetry.dependencies]
15+
"macrostrat.database" = "^3.1.1"
16+
python = "^3.8"
17+
python-dotenv = "^0.21.0"
18+
rich = "^12.6.0"
19+
typer = "^0.12.5"
20+
21+
[tool.poetry.group.dev.dependencies]
22+
black = "^24.8.0"
23+
24+
[build-system]
25+
build-backend = "poetry.core.masonry.api"
26+
requires = ["poetry-core"]
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
from datetime import datetime
2+
from os import environ
3+
from pathlib import Path
4+
5+
from dotenv import load_dotenv
6+
from macrostrat.database import Database
7+
from macrostrat.utils import relative_path
8+
from rich import print
9+
from sqlalchemy.sql import text
10+
from typer import Typer
11+
12+
load_dotenv()
13+
14+
app = Typer(no_args_is_help=True)
15+
16+
app.command()
17+
18+
19+
@app.command()
20+
def run(truncate: bool = False):
21+
"""Run the update procedure."""
22+
tileserver_db = environ.get("TILESERVER_STATS_DATABASE")
23+
db = Database(tileserver_db)
24+
25+
# Run update
26+
fn = Path(relative_path(__file__, "procedures")) / "run-update.sql"
27+
sql = text(fn.read_text().replace(":", "\:"))
28+
29+
# check query timing
30+
conn = db.engine.connect()
31+
n_results = 10000
32+
start = datetime.now()
33+
step = start
34+
while n_results > 0:
35+
res = conn.execute(sql, execution_options=dict(no_parameters=True)).first()
36+
n_results = res.n_rows
37+
conn.execute(text("COMMIT"))
38+
next_step = datetime.now()
39+
dt = (next_step - step).total_seconds()
40+
print(f"{res.last_row_id} ({dt*1000:.0f} ms)")
41+
step = next_step
42+
43+
if truncate and n_results == 0:
44+
conn.execute(text("TRUNCATE TABLE requests"))
45+
46+
print(f"Total time: {datetime.now() - start}")
47+
48+
49+
@app.command()
50+
def reset(drop: bool = False):
51+
"""Empty the stats schema and re-run the schema creation scripts."""
52+
tileserver_db = environ.get("TILESERVER_STATS_DATABASE")
53+
db = Database(tileserver_db)
54+
55+
if drop:
56+
db.engine.execute("DROP SCHEMA IF EXISTS stats CASCADE")
57+
58+
files = Path(relative_path(__file__, "schema")).glob("*.sql")
59+
files = list(files)
60+
files.sort()
61+
62+
for file in files:
63+
list(db.run_sql(file))
64+
65+
66+
@app.command()
67+
def truncate():
68+
"""Create the stats schema."""
69+
tileserver_db = environ.get("TILESERVER_STATS_DATABASE")
70+
db = Database(tileserver_db)
71+
72+
files = Path(relative_path(__file__, "schema")).glob("*.sql")
73+
files = list(files)
74+
files.sort()
75+
76+
for file in files:
77+
list(db.run_sql(file))
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
WITH a AS (
2+
SELECT
3+
req_id,
4+
layer,
5+
ext,
6+
x,
7+
y,
8+
z,
9+
substring(nullif(referrer, '') from '(?:.*://)?(?:www\.)?([^(/:)?]*)') referrer,
10+
app,
11+
app_version,
12+
date_trunc('day', time) date
13+
FROM requests
14+
WHERE req_id > (SELECT last_row_id FROM stats.processing_status ORDER BY last_row_time DESC LIMIT 1)
15+
ORDER BY req_id
16+
LIMIT 100000
17+
),
18+
b AS (
19+
INSERT INTO stats.day_index (layer, ext, referrer, app, app_version, date, num_requests)
20+
SELECT
21+
layer,
22+
ext,
23+
coalesce(referrer, 'none'),
24+
coalesce(app, 'none'),
25+
coalesce(app_version, 'none'),
26+
date,
27+
count(*)
28+
FROM a
29+
GROUP BY layer, ext, app, referrer, app_version, date
30+
ON CONFLICT (layer, ext, referrer, app, app_version, date)
31+
DO UPDATE SET
32+
num_requests = stats.day_index.num_requests + EXCLUDED.num_requests
33+
RETURNING *
34+
),
35+
reduced_complexity_locations AS (
36+
SELECT
37+
layer,
38+
ext,
39+
CASE WHEN z > 8 THEN
40+
x >> (z - 8) -- Bit shift right by the difference between the current zoom level and 8
41+
ELSE x END x,
42+
CASE WHEN z > 8 THEN
43+
y >> (z - 8)
44+
ELSE y END y,
45+
CASE WHEN z > 8 THEN 8 ELSE z END z,
46+
z orig_z
47+
FROM a
48+
),
49+
c AS (
50+
INSERT INTO stats.location_index (layer, ext, x, y, z, orig_z, num_requests)
51+
SELECT
52+
layer,
53+
ext,
54+
x,
55+
y,
56+
z,
57+
orig_z,
58+
count(*) num_requests
59+
FROM reduced_complexity_locations
60+
GROUP BY layer, ext, x, y, z, orig_z
61+
ON CONFLICT (layer, ext, x, y, z, orig_z)
62+
DO UPDATE SET
63+
num_requests = stats.location_index.num_requests + EXCLUDED.num_requests
64+
),
65+
d AS (
66+
INSERT INTO stats.processing_status (last_row_id, last_row_time)
67+
SELECT req_id, time FROM requests WHERE req_id = (SELECT max(req_id) FROM a)
68+
)
69+
SELECT max(req_id) last_row_id, count(*) n_rows FROM a;

0 commit comments

Comments
 (0)