Skip to content
Merged
Show file tree
Hide file tree
Changes from 52 commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
30a47a7
Update autocomplete script for PostgreSQL
davenquinn Dec 17, 2025
863c144
Start working on lookup_strat_names
davenquinn Dec 17, 2025
6deba42
Updated lookup_strat_names script, not quite finished
davenquinn Dec 17, 2025
a596910
Update lookup-strat-names script
davenquinn Dec 17, 2025
39b3a38
Partially working lookup-strat-names script
davenquinn Dec 17, 2025
cf5d25c
Format code and sort imports
davenquinn Dec 17, 2025
e7232ed
Reverted control flow error
davenquinn Dec 17, 2025
478e917
Merge remote-tracking branch 'origin/modernize-rebuild-scripts' into …
davenquinn Dec 17, 2025
83fecc3
Small control flow improvements
davenquinn Dec 17, 2025
1756b84
Format code and sort imports
davenquinn Dec 18, 2025
d8eae43
Partially working lookup-strat-names script
davenquinn Dec 20, 2025
2f0bb33
Insert strat names
davenquinn Dec 20, 2025
69de255
Greatly speed up strat name insertion
davenquinn Dec 20, 2025
3910dd1
Mark foreign key scripts as alpha
davenquinn Dec 21, 2025
6b60924
Improved linking scripts
davenquinn Dec 21, 2025
34038f6
Updated lookup_strat_names script; still quite slow
davenquinn Dec 22, 2025
0e22d72
Added more queries to notes
davenquinn Dec 23, 2025
ed1c4fc
Merge remote-tracking branch 'origin/modernize-rebuild-scripts' into …
davenquinn Dec 24, 2025
4c62ac3
Merge remote-tracking branch 'origin/main' into modernize-rebuild-scr…
davenquinn Dec 24, 2025
f013b7e
Merge branch 'stratigraphy-ingestion' into modernize-rebuild-scripts
davenquinn Feb 19, 2026
8969bad
Moved sql
davenquinn Feb 19, 2026
897ee8e
Got rid of unused, partially migrated rebuild script
davenquinn Feb 19, 2026
e0d0551
Starting point for lookup_units migration
davenquinn Feb 19, 2026
ba8c768
Lookup units script sort of works
davenquinn Feb 19, 2026
a275c07
Convert to a cached approach to unit age updates
davenquinn Feb 19, 2026
f3e55a8
Streamline unit boundaries
davenquinn Feb 19, 2026
0f54779
Switch to bulk updating of timescales
davenquinn Feb 19, 2026
9457ee1
Fixed lookup_units script and made it efficient
davenquinn Feb 20, 2026
8441328
Added a sequence fixer utility
davenquinn Feb 20, 2026
eeb5b29
Updated column creation utils
davenquinn Feb 20, 2026
4855513
Column shows up
davenquinn Feb 20, 2026
a3a860f
Basic intervals insertion
davenquinn Feb 20, 2026
f46a768
Updated lookup_unit_intervals script
davenquinn Feb 20, 2026
8ac0a3e
Merge branch 'update-units-new' into modernize-rebuild-scripts
davenquinn Feb 20, 2026
b89fc41
Start migrating unit attrs api script
davenquinn Feb 20, 2026
3c3d318
Lookup unit attrs api
davenquinn Feb 20, 2026
0211664
Accumulate JSON for liths entirely in SQL
davenquinn Feb 20, 2026
730807f
Added environments handling
davenquinn Feb 20, 2026
f7c6387
Finished porting over environments
davenquinn Feb 20, 2026
45b3326
Modernized lookup_unit_attrs
davenquinn Feb 20, 2026
e44693a
Unwrap dom/sub lith updates somewhat
davenquinn Feb 20, 2026
9d4e389
Fix and unify dom/sub lith calculations
davenquinn Feb 20, 2026
c630760
Fix submodule reference
davenquinn Feb 20, 2026
4baf4ae
Merge branch 'main' into modernize-rebuild-scripts
davenquinn Feb 20, 2026
4c20d79
Format code and sort imports
davenquinn Feb 20, 2026
0cc9f02
Fix #256 with where clause update
davenquinn Feb 21, 2026
5f9bcc1
Merge branch 'modernize-rebuild-scripts' of https://github.com/UW-Mac…
davenquinn Feb 21, 2026
35ad677
updated mariadb queries to postgresql
amyfromandi Feb 25, 2026
b7d8b59
Format code and sort imports
amyfromandi Feb 26, 2026
277adb8
added to macrostrat cli
amyfromandi Feb 26, 2026
5345e1e
Format code and sort imports
amyfromandi Feb 26, 2026
c1f4bd9
updated autocomplete table to include strat_name_orphans
amyfromandi Feb 27, 2026
1a274cd
fixed concept_id issue. data variance issue now is duplicates after t…
amyfromandi Feb 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 95 additions & 0 deletions docs/notes/helpful-queries.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/**
Extended attributes for strat names tree
*/
SELECT
parent, -- this_name has been renamed to parent
child,
sn1.strat_name parent_strat_name,
sn1.rank parent_rank,
sn2.strat_name child_strat_name,
sn2.rank child_rank
FROM macrostrat.strat_tree
JOIN macrostrat.strat_names sn1
ON parent = sn1.id
JOIN macrostrat.strat_names sn2
ON child = sn2.id
WHERE child = :child
AND rel = 'parent' and sn1.rank != '';

SELECT
child child_id,
array_to_string(array_agg(parent), ', ') parent_id, -- this_name has been renamed to parent
array_to_string(array_agg(sn1.strat_name || ' ' || sn1.rank::text), ', ') parent_strat_name,
sn2.strat_name || ' ' || sn2.rank child_strat_name
FROM macrostrat.strat_tree
JOIN macrostrat.strat_names sn1
ON parent = sn1.id
JOIN macrostrat.strat_names sn2
ON child = sn2.id
WHERE rel = 'parent' and sn1.rank != ''
GROUP BY child, sn2.strat_name, sn2.rank
HAVING COUNT(*) > 1;

/* Count of number of children per parent */
SELECT
parent parent_id,
array_agg(child) children,
count(child) n_children
FROM macrostrat.strat_tree
GROUP BY parent;

SELECT child id,
ARRAY_AGG(parent) parents,
COUNT(parent) n_parents
FROM macrostrat.strat_tree
GROUP BY child
HAVING COUNT(parent) > 1;

/* Unified count of parents per child */
SELECT
id strat_name_id,
strat_name,
parents,
children,
n_parents,
n_children
FROM macrostrat.strat_names
LEFT JOIN (SELECT parent id,
ARRAY_AGG(child) children,
COUNT(child) n_children
FROM macrostrat.strat_tree
GROUP BY parent) cc
USING (id)
LEFT JOIN (
SELECT child id,
ARRAY_AGG(parent) parents,
COUNT(parent) n_parents
FROM macrostrat.strat_tree
GROUP BY child
) pc
USING (id)

/** Seven strat names have more than one parent
60687, 64337, 66418, 4092, 5012, 1712, 80846
*/


SELECT
concept_id,
COUNT(*) n_names,
array_agg(strat_name) strat_names
FROM macrostrat.strat_names
GROUP BY concept_id;

SELECT ref_id FROM macrostrat.strat_names WHERE concept_id IS Null;

/** List of concept IDs by data domain */
WITH a as (
SELECT concept_id,
(REGEXP_MATCH(url, '^(https?\:\/\/)([A-Za-z0-9\.]+)'))[2] domain
FROM macrostrat.strat_names_meta
)
SELECT domain, COUNT(*) n_names FROM a GROUP BY domain;


SELECT rel, count(*) FROM macrostrat.strat_tree GROUP BY rel;
2 changes: 1 addition & 1 deletion py-modules/cli/macrostrat/cli/commands/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from psycopg2.extras import NamedTupleCursor

from ..database import mariaConnection, pgConnection
from ..database._legacy import pgConnection


class Base(object):
Expand Down
26 changes: 12 additions & 14 deletions py-modules/cli/macrostrat/cli/commands/process_scripts/tesselate.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,12 @@
import numpy as np
import pyproj
import shapely.ops as ops
from psycopg2.extensions import AsIs
from psycopg2.extras import NamedTupleCursor
from scipy.spatial import Voronoi
from shapely.geometry import MultiPoint, Point, Polygon, mapping, shape
from shapely.ops import cascaded_union
from shapely.wkt import loads

from .. import schlep
# TODO: don't need this any longer
# from .. import schlep
from ..base import Base


Expand Down Expand Up @@ -660,16 +658,16 @@ def run(self, args):
self.mariadb["connection"].close()

# Update postgres
schlep_instance = schlep(
{
"pg": self.pg["raw_connection"],
"mariadb": self.mariadb["raw_connection"],
},
[None, ""],
)

schlep_instance.move_table("col_areas")
schlep_instance.move_table("cols")
# schlep_instance = schlep(
# {
# "pg": self.pg["raw_connection"],
# "mariadb": self.mariadb["raw_connection"],
# },
# [None, ""],
# )
#
# schlep_instance.move_table("col_areas")
# schlep_instance.move_table("cols")

urllib.request.urlopen(
"http://127.0.0.1:5000/api/v2/columns/refresh-cache?cacheRefreshKey=%s"
Expand Down
176 changes: 10 additions & 166 deletions py-modules/cli/macrostrat/cli/commands/rebuild_scripts/autocomplete.py
Original file line number Diff line number Diff line change
@@ -1,174 +1,18 @@
from pathlib import Path

from ...database import get_db
from ..base import Base

here = Path(__file__).parent

# NOTE: this was successfully migrated from MariaDB to PostgreSQL on 2025-12-16


class Autocomplete(Base):
def __init__(self, *args):
Base.__init__(self, {}, *args)

def run(self):
self.mariadb["cursor"].execute(
"""
DROP TABLE IF EXISTS autocomplete_new;
"""
)
self.mariadb["connection"].commit()
self.mariadb["cursor"].execute(
"""
DROP TABLE IF EXISTS autocomplete_old;
"""
)
self.mariadb["connection"].commit()

# Build the new table
self.mariadb["cursor"].execute(
"""
CREATE TABLE autocomplete_new AS
SELECT * FROM (
select id, econ as name, 'econs' as type, 'econ' as category from econs
union
select 0 AS id, econ_type AS name, 'econ_types' AS type, 'econ' as category
FROM econs
WHERE econ != econ_type
GROUP BY econ_type
union
SELECT 0 AS id, econ_class AS name, 'econ_classes' AS type, 'econ' as category FROM econs GROUP BY econ_class
union
select id, environ as name, 'environments' as type, 'environ' as category
FROM environs
WHERE environ != environ_class
union
select 0 AS id, environ_type AS name, 'environment_types' AS type, 'environ' as category
FROM environs
GROUP BY environ_type
union
select 0 AS id, environ_class AS name, 'environment_classes' AS type, 'environ' as category
FROM environs
GROUP BY environ_class
union
select id, concat(lith_att, ' (', att_type, ')') as name, 'lithology_attributes' as type, 'lith_att' as category from lith_atts
union
select id, project as name, 'projects' as type, 'project' as category from projects
union
SELECT DISTINCT strat_names_meta.concept_id AS id, name, 'strat_name_concepts' AS type, 'strat_name' as category
FROM strat_names_meta
JOIN strat_names ON strat_names_meta.concept_id = strat_names.concept_id
union
(select id, CONCAT(strat_name, ' ', rank) AS name, 'strat_name_orphans' as type, 'strat_name' as category from strat_names WHERE concept_id = 0)
union
select id, col_name as name, 'columns' as type, 'column' as category from cols
union
select id, col_group_long as name, 'groups' as type, 'group' as category from col_groups
union
SELECT id, lith AS name, 'lithologies' AS type, 'lithology' as category
FROM liths
WHERE lith != lith_type AND lith != lith_class
union
SELECT 0 AS id, lith_type AS name, 'lithology_types' AS type, 'lithology' AS category
FROM liths
WHERE lith_type != lith_class
GROUP BY lith_type
union
SELECT 0 AS id, lith_class AS name, 'lithology_classes' AS type, 'lithology' as category
FROM liths
GROUP BY lith_class
union
select id, interval_name as name, 'intervals' as type, 'interval' as category from intervals
union
SELECT id, mineral AS name, 'minerals' AS type, 'mineral' as category from minerals
union
SELECT id, structure as name, 'structures' as type, 'structure' as category from structures
) i;
"""
)
self.mariadb["cursor"].close()
self.mariadb["cursor"] = self.mariadb["connection"].cursor()

self.mariadb["cursor"].execute(
"""
UPDATE autocomplete_new AS a
INNER JOIN (
SELECT concept_id, CONCAT(name, COALESCE(CONCAT(' (', interval_name, ')'), '')) AS name
FROM strat_names_meta
LEFT JOIN intervals ON intervals.id = strat_names_meta.interval_id
) sub ON a.id = sub.concept_id
SET a.name = sub.name
WHERE a.id IN (
SELECT id FROM (
SELECT id
FROM autocomplete_new
WHERE name IN (
SELECT name
FROM (
SELECT name, type, count(*)
FROM autocomplete_new
WHERE type = 'strat_name_concepts'
GROUP BY name, type
HAVING count(*) > 1
ORDER BY count(*) desc
) a
)
) b
) AND type = 'strat_name_concepts';
"""
)
self.mariadb["cursor"].close()
self.mariadb["cursor"] = self.mariadb["connection"].cursor()

self.mariadb["cursor"].execute(
"""
UPDATE autocomplete_new AS a
INNER JOIN (
SELECT DISTINCT strat_names.id, CONCAT(strat_name, ' (', FO_period, ')') AS name
FROM strat_names
JOIN unit_strat_names ON strat_names.id = unit_strat_names.strat_name_id
JOIN lookup_unit_intervals ON lookup_unit_intervals.unit_id = unit_strat_names.unit_id
) sub ON a.id = sub.id
SET a.name = sub.name
WHERE a.id IN (
SELECT id FROM (
SELECT id
FROM autocomplete_new
WHERE name IN (
SELECT name
FROM (
SELECT name, type, count(*)
FROM autocomplete_new
WHERE type = 'strat_name_orphans'
GROUP BY name, type
HAVING count(*) > 1
ORDER BY count(*) desc
) a
)
) b
) AND type = 'strat_name_orphans';
"""
)
self.mariadb["cursor"].close()
self.mariadb["cursor"] = self.mariadb["connection"].cursor()

self.mariadb["cursor"].execute(
"""
ALTER TABLE autocomplete rename to autocomplete_old;
"""
)
self.mariadb["cursor"].close()
self.mariadb["cursor"] = self.mariadb["connection"].cursor()

self.mariadb["cursor"].execute(
"""
ALTER TABLE autocomplete_new rename to autocomplete;
"""
)
self.mariadb["cursor"].close()
self.mariadb["cursor"] = self.mariadb["connection"].cursor()

self.mariadb["cursor"].execute(
"""
DROP TABLE IF EXISTS autocomplete_old;
"""
)
self.mariadb["cursor"].close()
self.mariadb["cursor"] = self.mariadb["connection"].cursor()

self.mariadb["cursor"].close()
self.mariadb["connection"].close()
db = get_db()
db.run_sql(here / "sql" / "autocomplete.sql")
# TODO: synchonize macrostrat_api.autocomplete view
Loading