UW-Macrostrat
diff --git a/‎.gitmodules‎
Lines changed: 3 additions & 0 deletions b/‎.gitmodules‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎py-modules/cli/macrostrat/cli/entrypoint.py‎
Lines changed: 12 additions & 0 deletions b/‎py-modules/cli/macrostrat/cli/entrypoint.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎py-modules/cli/pyproject.toml‎
Lines changed: 3 additions & 1 deletion b/‎py-modules/cli/pyproject.toml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎py-modules/column-ingestion/README.md‎
Lines changed: 3 additions & 0 deletions b/‎py-modules/column-ingestion/README.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎py-modules/column-ingestion/macrostrat/column_ingestion/__init__.py‎
Lines changed: 18 additions & 0 deletions b/‎py-modules/column-ingestion/macrostrat/column_ingestion/__init__.py‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎py-modules/column-ingestion/macrostrat/column_ingestion/columns.py‎
Lines changed: 166 additions & 0 deletions b/‎py-modules/column-ingestion/macrostrat/column_ingestion/columns.py‎
Lines changed: 166 additions & 0 deletions
diff --git a/‎py-modules/column-ingestion/macrostrat/column_ingestion/database.py‎
Lines changed: 105 additions & 0 deletions b/‎py-modules/column-ingestion/macrostrat/column_ingestion/database.py‎
Lines changed: 105 additions & 0 deletions
@@ -7,3 +7,6 @@
 [submodule "submodules/storage-admin"]
 	path = submodules/storage-admin
 	url = https://github.com/UW-Macrostrat/radosgw_admin_client
+[submodule "submodules/column-ingestion"]
+	path = submodules/column-ingestion
+	url = https://github.com/Macrostrat/column-ingestion.git
@@ -273,6 +273,18 @@ def update_weaver(db):
 except ImportError as err:
     pass
 
+try:
+    from macrostrat.column_ingestion import app as column_app
+
+    main.add_typer(
+        column_app,
+        name="columns",
+        rich_help_panel="Subsystems",
+        short_help="Column data ingestion subsystem",
+    )
+except ImportError as err:
+    pass
+
 
 # Get subsystems config
 subsystems = getattr(settings, "subsystems", {})
 
@@ -27,6 +27,7 @@ dependencies = [
     "macrostrat.match-utils",
     "criticalmaas.ta1-geopackage>=0.2.0,<0.3",
     "mapboard.topology-manager",
+    "macrostrat.column-ingestion",
     "htpheno.radosgw-admin-client",
     "numpy>=1.23.4,<2",
     "psycopg2-binary>=2.9.4,<3",
@@ -39,7 +40,7 @@ dependencies = [
     "spectra>=0.0.11,<0.0.12",
     "tiletanic>=1.1.0,<2",
     "tqdm>=4.65.0,<5",
-    "typer>=0.12,<0.13",
+    "typer>=0.12,<0.24.0",
     "click>=8.1.0,<8.2.0",
     "docker>=7,<8",
     "toml>=0.10.2,<0.11",
@@ -77,6 +78,7 @@ default-groups = [
 "mapboard.topology-manager" = { path = "../../submodules/topology-manager", editable = true }
 "htpheno.radosgw-admin-client" = { path = "../../submodules/storage-admin", editable = true }
 "macrostrat.match-utils" = { path = "../match-utils", editable = true }
+"macrostrat.column-ingestion" = { path = "../../py-modules/column-ingestion", editable = true }
 
 [tool.hatch.build.targets.sdist]
 include = ["macrostrat"]
 
@@ -0,0 +1,3 @@
+# Column ingestion
+
+Macrostrat utilities for stratigraphic column ingestion.
@@ -0,0 +1,18 @@
+from pathlib import Path
+
+from typer import Argument, Typer
+
+app = Typer(
+    no_args_is_help=True,
+    help="Column ingestion subsystem for Macrostrat",
+)
+
+
+@app.command(name="ingest")
+def ingest_columns(
+    data_file: Path = Argument(..., help="Path to the data file to ingest")
+):
+    """Ingest columns tabular data."""
+    from .ingest import ingest_columns_from_file
+
+    ingest_columns_from_file(data_file)
@@ -0,0 +1,166 @@
+from dataclasses import dataclass, field
+from datetime import datetime
+
+import polars as pl
+from sqlalchemy.dialects.postgresql import insert
+
+from macrostrat.database import Database
+
+from .database import get_macrostrat_table
+from .units import Unit
+
+
+@dataclass
+class Column:
+    id: int = -1
+    group_id: int = -1
+    local_id: str | None = None
+    name: str | None = None
+    description: str | None = None
+    project_id: int | None = None
+    status_code: str = "in process"
+    col_type: str = "column"
+    geom: str | None = None
+    rgeom: str | None = None
+    units: list[Unit] = field(default_factory=list)
+
+
+def get_or_create_column_group(db: Database, project_id: int, name="Default") -> int:
+    """Get or create a column group for a given project ID."""
+    col_groups_tbl = get_macrostrat_table(db, "col_groups")
+
+    # TODO: need to add an index on project_id to the col_groups table for this to work properly
+
+    # Find a pre-existing column group for the project, if it exists
+    existing_group_id = (
+        db.session.query(col_groups_tbl.c.id)
+        .filter(col_groups_tbl.c.project_id == project_id)
+        .filter(col_groups_tbl.c.col_group == name)
+        .scalar()
+    )
+    if existing_group_id is not None:
+        return existing_group_id
+
+    insert_stmt = (
+        insert(col_groups_tbl)
+        .values(
+            project_id=project_id,
+            col_group="Default",
+            col_group_long="Default column group",
+        )
+        .returning(col_groups_tbl.c.id)
+    )
+    return db.session.execute(insert_stmt).scalar()
+
+
+def get_or_create_column(db: Database, col: Column) -> int:
+    """Get or create a column in the database."""
+    cols_tbl = get_macrostrat_table(db, "cols")
+
+    # TODO: Use insert-on-conflict to get or create the column
+    # Requires an index on (col_name, project_id) to work properly
+
+    vals = dict(
+        status_code=col.status_code,
+        col_type=col.col_type,
+        col_group_id=col.group_id,
+    )
+
+    default_vals = dict(
+        # TODO: figure out how to handle these fields
+        col_position="",
+        col_area=0,
+        created=datetime.now(),
+        col=0,
+        lat=0,
+        lng=0,
+    )
+
+    # Get an existing column by name and project_id, if it exists
+    col_id = (
+        db.session.query(cols_tbl.c.id)
+        .filter(cols_tbl.c.col_name == col.name)
+        .filter(cols_tbl.c.project_id == col.project_id)
+        .scalar()
+    )
+    stmt = None
+    if col_id is not None:
+        # Update the existing column with any new values
+        print("Updating existing column with ID", col_id)
+        stmt = (
+            cols_tbl.update()
+            .where(cols_tbl.c.id == col_id)
+            .values(**vals)
+            .returning(cols_tbl.c.id)
+        )
+    else:
+        stmt = (
+            insert(cols_tbl)
+            .values(
+                col_name=col.name,
+                project_id=col.project_id,
+                **default_vals,
+                **vals,
+            )
+            .returning(cols_tbl.c.id)
+        )
+    return db.session.execute(stmt).scalar()
+
+
+def get_or_create_section(db: Database, col_id: int) -> int:
+    """Get a single section in the database for a given column ID, creating it if it doesn't exist.
+    Note: multiple sections are not supported as yet.
+    """
+    sections_tbl = get_macrostrat_table(db, "sections")
+
+    # Get an existing section for the column, if it exists
+    section_id = (
+        db.session.query(sections_tbl.c.id)
+        .filter(sections_tbl.c.col_id == col_id)
+        .scalar()
+    )
+    if section_id is not None:
+        return section_id
+
+    insert_stmt = (
+        insert(sections_tbl)
+        .values(col_id=col_id, fo=-1, fo_h=-1, lo=-1, lo_h=-1)
+        .returning(sections_tbl.c.id)
+    )
+
+    return db.session.execute(insert_stmt).scalar()
+
+
+def get_column_data(data_file, meta) -> list[Column]:
+    df = pl.read_excel(data_file, sheet_name="columns")
+
+    df = df.rename(
+        {
+            "name": "col_name",
+            "id": "col_id",
+            "type": "col_type",
+        },
+        strict=False,
+    )
+
+    print(df.head())
+
+    columns = []
+    for row in df.iter_rows(named=True):
+
+        geom = row.get("rgeom", getattr(meta, "rgeom", None))
+
+        col = Column(
+            # TODO: implement ID upgrading to handle existing columns
+            local_id=str(row.get("col_id")),
+            name=row.get("col_name"),
+            description=row.get("description"),
+            status_code=row.get(
+                "status_code", getattr(meta, "status_code", "in process")
+            ),
+            col_type=row.get("col_type", getattr(meta, "col_type", "column")),
+            geom=row.get("geom"),
+            rgeom=geom,
+        )
+        columns.append(col)
+    return columns
@@ -0,0 +1,105 @@
+import re
+from typing import Optional
+
+from pydantic import BaseModel
+
+from macrostrat.core.database import get_database
+
+
+class ProjectIdentifier(BaseModel):
+    id: Optional[int] = None
+    slug: Optional[str] = None
+    name: Optional[str] = None
+
+    # At least one of id, slug, or name must be provided
+    def __init__(self, **data):
+        super().__init__(**data)
+        if not (self.id or self.slug or self.name):
+            raise ValueError("At least one of id, slug, or name must be provided")
+
+
+class ProjectData(ProjectIdentifier):
+    id: int
+    slug: str
+    name: str
+
+
+def get_macrostrat_model(db, table_name: str):
+    """Get the SQLAlchemy model for a given table name."""
+    name = "macrostrat_" + table_name
+    if not hasattr(db.model, name):
+        db.automap(schemas=["macrostrat"])
+    return getattr(db.model, name)
+
+
+def get_macrostrat_table(db, table_name: str):
+    """Get the SQLAlchemy table for a given table name."""
+    name = "macrostrat_" + table_name
+    if not hasattr(db.table, name):
+        db.automap(schemas=["macrostrat"])
+    return getattr(db.table, name)
+
+
+def get_or_create_project(
+    db, project: ProjectIdentifier, create_if_not_exists: bool = True
+) -> ProjectData:
+    """Get or create a project in the database."""
+    # map the project table
+    Project = get_macrostrat_model(db, table_name="projects")
+
+    # Try to find the project by id, slug, or name
+    query = db.session.query(Project)
+    if project.id is not None:
+        query = query.filter(Project.id == project.id)
+    elif project.slug is not None:
+        query = query.filter(Project.slug == project.slug)
+    elif project.name is not None:
+        query = query.filter(Project.project == project.name)
+    else:
+        raise ValueError("At least one of id, slug, or name must be provided")
+
+    existing_project = query.first()
+    if existing_project:
+        return ProjectData(
+            id=existing_project.id,
+            slug=existing_project.slug,
+            name=existing_project.project,
+        )
+
+    if create_if_not_exists:
+        # Create a new project
+        # Remove parentheticals from the project name for the slug
+        slug = None
+        if project.name is not None:
+
+            simple_name = re.sub(r"\s*\(.*?\)\s*", "", project.name)
+            simple_name = re.sub(r"\s+", " ", simple_name).strip()
+            slug = simple_name.lower().replace(" ", "-")
+
+        new_project = Project(
+            id=project.id,
+            slug=slug,
+            project=project.name,
+            descrip="A random description",
+            timescale_id=1,  # TODO: this should be set to a valid timescale ID
+        )
+        db.session.add(new_project)
+        return ProjectData(
+            id=new_project.id,
+            slug=new_project.slug,
+            name=new_project.project,
+        )
+
+    return None
+
+
+def get_all_liths():
+    """Get all lithologies from the database."""
+    db = get_database()
+    return db.run_query("SELECT id, lith name FROM macrostrat.liths").fetchall()
+
+
+def get_all_lith_attributes():
+    """Get all lithology attributes from the database."""
+    db = get_database()
+    return db.run_query("SELECT id, lith_att name FROM macrostrat.lith_atts").fetchall()
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# Column ingestion`
	`2`	`+`
	`3`	`+Macrostrat utilities for stratigraphic column ingestion.`