Skip to content

Commit 95d962f

Browse files
committed
Merge branch 'file_management' of github.com:UW-Macrostrat/macrostrat into merge_backup
2 parents eea9e8a + 115fe2f commit 95d962f

File tree

26 files changed

+729
-3376
lines changed

26 files changed

+729
-3376
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ __pycache__
33
macrostrat.toml
44
*.egg-info
55
*.pyc
6+
.DS_Store

py-modules/core/macrostrat/core/schemas.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ class IngestProcess(Base):
193193
access_group_id: Mapped[int] = mapped_column(
194194
ForeignKey("macrostrat_auth.group.id"), nullable=True
195195
)
196+
#TODO remove all object_group_id associations
196197
object_group_id: Mapped[ObjectGroup] = mapped_column(
197198
ForeignKey("storage.object_group.id")
198199
)

py-modules/map-integration/macrostrat/__init__.py

Whitespace-only changes.

py-modules/map-integration/macrostrat/map_integration/__init__.py

Lines changed: 161 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from macrostrat.core import app
1616
from macrostrat.database import Database
1717
from macrostrat.map_integration.commands.prepare_fields import _prepare_fields
18-
from macrostrat.map_integration.pipeline import ingest_map
18+
#from macrostrat.map_integration.pipeline import ingest_map
1919
from macrostrat.map_integration.process.geometry import create_rgeom, create_webgeom
2020
from macrostrat.map_integration.utils.ingestion_utils import (
2121
find_gis_files,
@@ -94,9 +94,6 @@ def delete_sources(
9494
help="BULK delete = filename.txt [every line lists the slug_name to delete. no whitespaces.]\n "
9595
+ "SINGLE delete = 'slug_name' [list the slug_name in quotes]",
9696
),
97-
file_name: str = Option(
98-
None, help="deletes a specified file within the slug's directory."
99-
),
10097
dry_run: bool = Option(False, "--dry-run"),
10198
all_data: bool = Option(False, "--all-data"),
10299
):
@@ -116,13 +113,11 @@ def delete_sources(
116113
print("\nDry run; not actually deleting anything")
117114
return
118115

119-
for slug in slug:
120-
cmd_delete_dir(slug, file_name)
121-
print(f"Deleting map {slug}")
122-
print(slug)
116+
for s in slug:
117+
print(f"Deleting map {s}")
123118
tables = db.run_query(
124119
"SELECT primary_table, primary_line_table FROM maps.sources WHERE slug = :slug",
125-
dict(slug=slug),
120+
dict(slug=s),
126121
).fetchone()
127122

128123
line_table = None
@@ -132,17 +127,18 @@ def delete_sources(
132127
poly_table = tables.primary_table
133128

134129
if line_table is None:
135-
line_table = f"{slug}_lines"
130+
line_table = f"{s}_lines"
136131
if poly_table is None:
137-
poly_table = f"{slug}_polygons"
138-
points_table = f"{slug}_points"
132+
poly_table = f"{s}_polygons"
133+
points_table = f"{s}_points"
139134

140135
for table in [line_table, poly_table, points_table]:
141136
db.run_sql(
142137
"DROP TABLE IF EXISTS {table}",
143138
dict(table=Identifier("sources", table)),
144139
)
145140

141+
<<<<<<< HEAD:py-modules/map-integration/macrostrat/map_integration/__init__.py
146142
ingest_process = db.run_query(
147143
"""
148144
SELECT id FROM maps_metadata.ingest_process
@@ -183,16 +179,40 @@ def delete_sources(
183179
"DELETE FROM maps_metadata.ingest_process WHERE id = :ingest_process_id",
184180
dict(ingest_process_id=ingest_process_id),
185181
)
182+
=======
183+
staging_delete_dir(s, db)
184+
>>>>>>> 115fe2f6d4d422b4c341a2938707dc116834a325:map-integration/macrostrat/map_integration/__init__.py
186185

187186
source_id = db.run_query(
188187
"SELECT source_id FROM maps.sources WHERE slug = :slug",
189-
dict(slug=slug),
188+
dict(slug=s),
190189
).scalar()
191190

191+
192+
# Delete ALL ingest-related rows for this source
193+
db.run_sql(
194+
"""
195+
DELETE FROM maps_metadata.ingest_process_tag
196+
WHERE ingest_process_id IN (
197+
SELECT id FROM maps_metadata.ingest_process
198+
WHERE source_id = :source_id
199+
)
200+
""",
201+
dict(source_id=source_id),
202+
)
203+
204+
db.run_sql(
205+
"""
206+
DELETE FROM maps_metadata.ingest_process
207+
WHERE source_id = :source_id
208+
""",
209+
dict(source_id=source_id),
210+
)
211+
192212
if all_data:
193213
_delete_map_data(source_id)
194214

195-
db.run_sql("DELETE FROM maps.sources WHERE slug = :slug", dict(slug=slug))
215+
db.run_sql("DELETE FROM maps.sources WHERE slug = :slug", dict(slug=s))
196216

197217

198218
@cli.command(name="change-slug")
@@ -307,8 +327,6 @@ def staging(
307327

308328
slug, name, ext = normalize_slug(prefix, Path(data_path))
309329
# we need to add database insert here.
310-
object_ids = cmd_upload_dir(slug=slug, data_path=Path(data_path), ext=ext)
311-
312330
print(f"Ingesting {slug} from {data_path}")
313331

314332
gis_files, excluded_files = find_gis_files(Path(data_path), filter=filter)
@@ -411,11 +429,14 @@ def staging(
411429
),
412430
)
413431

432+
cmd_upload_dir(slug=slug, data_path=Path(data_path), ext=ext)
433+
414434
map_info = get_map_info(db, slug)
415435
_prepare_fields(map_info)
416436
create_rgeom(map_info)
417437
create_webgeom(map_info)
418438

439+
<<<<<<< HEAD:py-modules/map-integration/macrostrat/map_integration/__init__.py
419440
# Ingest process assertions
420441
if len(object_ids) > 0:
421442
ingest_id = db.run_query(
@@ -438,6 +459,8 @@ def staging(
438459
dict(ingest_process_id=ingest_id, object_id=object),
439460
)
440461

462+
=======
463+
>>>>>>> 115fe2f6d4d422b4c341a2938707dc116834a325:map-integration/macrostrat/map_integration/__init__.py
441464
console.print(
442465
f"[green] \n Finished staging setup for {slug}. "
443466
f"View map here: https://dev.macrostrat.org/maps/ingestion/{source_id}/ [/green] \n"
@@ -451,25 +474,40 @@ def staging(
451474
# commands nested under 'macrostrat maps staging...'
452475

453476

454-
@staging_cli.command("s3-upload-dir")
455-
def cmd_upload_dir(slug: str = ..., data_path: Path = ..., ext: str = Option("")):
477+
@staging_cli.command("s3-upload")
478+
def cmd_upload_dir(slug: str = ..., data_path: Path = ..., ext: str = Option(".gdb", help="extension of the data path"), ingest_process_id: int = Option(None)):
456479
"""Upload a local directory to the staging bucket under SLUG/."""
457480
db = get_database()
458-
res, object_ids = staging_upload_dir(slug, data_path, ext, db)
481+
source_id = db.run_query(
482+
"SELECT source_id FROM maps.sources WHERE slug = :slug",
483+
dict(slug=slug),
484+
).scalar()
485+
ingest_id = db.run_query(
486+
"""
487+
SELECT id
488+
FROM maps_metadata.ingest_process
489+
WHERE source_id = :source_id
490+
ORDER BY id DESC
491+
LIMIT 1
492+
""",
493+
dict(source_id=source_id),
494+
).scalar()
495+
res = staging_upload_dir(slug, data_path, ext, db, ingest_id)
459496
pretty_res = json.dumps(res, indent=2)
460497
console.print(f"[green] Processed files \n {pretty_res} [/green]")
461-
return object_ids
498+
return
462499

463500

464-
@staging_cli.command("s3-delete-dir")
501+
@staging_cli.command("s3-delete")
465502
def cmd_delete_dir(
466503
slug: str = ...,
467504
file_name: str = Option(
468505
None, help="deletes a specified file within the slug directory."
469506
),
470507
):
471508
"""Delete all objects under SLUG/ in the staging bucket."""
472-
staging_delete_dir(slug, file_name)
509+
db = get_database()
510+
staging_delete_dir(slug, db)
473511
console.print(
474512
f"[green] Successfully deleted objects within the s3 bucket under slug: {slug} [/green]"
475513
)
@@ -506,7 +544,7 @@ def cmd_list_dir(
506544
token = page["next_page_token"]
507545

508546

509-
@staging_cli.command("s3-download-dir")
547+
@staging_cli.command("s3-download")
510548
def cmd_download_dir(
511549
slug: str = ...,
512550
dest_path: pathlib.Path = Option(
@@ -519,6 +557,99 @@ def cmd_download_dir(
519557
console.print(json.dumps(res, indent=2))
520558

521559

560+
@staging_cli.command("convert-e00")
561+
def convert_e00_to_gpkg(
562+
data_path: str = Option(..., help="Directory containing .e00 files"),
563+
slug: str = Option(..., help="Output basename (no .gpkg needed)"),
564+
):
565+
data_dir = Path(data_path).expanduser().resolve()
566+
out_gpkg = data_dir / f"{slug}.gpkg"
567+
e00_files = sorted(data_dir.glob("*.e00"))
568+
569+
if not e00_files:
570+
raise ValueError(f"No .e00 files found in {data_dir}")
571+
572+
def list_layers(e00_path: Path) -> set[str]:
573+
# ogrinfo output includes lines like: "1: ARC (Line String)"
574+
p = subprocess.run(
575+
["ogrinfo", "-ro", "-so", str(e00_path)],
576+
capture_output=True,
577+
text=True,
578+
)
579+
text_out = (p.stdout or "") + "\n" + (p.stderr or "")
580+
layers = set()
581+
for line in text_out.splitlines():
582+
line = line.strip()
583+
# matches: "1: ARC (Line String)"
584+
if ":" in line and "(" in line:
585+
left = line.split(":", 1)[1].strip()
586+
name = left.split("(", 1)[0].strip()
587+
if name:
588+
layers.add(name)
589+
return layers
590+
591+
def run(cmd):
592+
p = subprocess.run(cmd, capture_output=True, text=True)
593+
return p.returncode, p.stdout, p.stderr
594+
created = False
595+
for f in e00_files:
596+
base = f.stem
597+
layers = list_layers(f)
598+
line_layers = [lyr for lyr in ("ARC",) if lyr in layers]
599+
point_layers = [lyr for lyr in ("CNT", "LAB", "POINT") if lyr in layers]
600+
poly_layers = [lyr for lyr in ("PAL", "AREA") if lyr in layers]
601+
602+
# Lines
603+
for lyr in line_layers:
604+
cmd = ["ogr2ogr", "-f", "GPKG"]
605+
if created:
606+
cmd += ["-update", "-append"]
607+
else:
608+
# create/overwrite first successful write
609+
cmd += ["-overwrite"]
610+
cmd += [
611+
str(out_gpkg), str(f), lyr,
612+
"-nln", f"{base}_lines",
613+
"-nlt", "LINESTRING",
614+
]
615+
rc, _, err = run(cmd)
616+
if rc == 0:
617+
created = True
618+
619+
# Points
620+
for lyr in point_layers:
621+
if not created:
622+
cmd = ["ogr2ogr", "-f", "GPKG", "-overwrite"]
623+
else:
624+
cmd = ["ogr2ogr", "-f", "GPKG", "-update", "-append"]
625+
cmd += [
626+
str(out_gpkg), str(f), lyr,
627+
"-nln", f"{base}_points",
628+
"-nlt", "POINT",
629+
]
630+
rc, _, _ = run(cmd)
631+
if rc == 0:
632+
created = True
633+
634+
# Polygons
635+
for lyr in poly_layers:
636+
if not created:
637+
cmd = ["ogr2ogr", "-f", "GPKG", "-overwrite"]
638+
else:
639+
cmd = ["ogr2ogr", "-f", "GPKG", "-update", "-append"]
640+
cmd += [
641+
str(out_gpkg), str(f), lyr,
642+
"-nln", f"{base}_polygons",
643+
"-nlt", "POLYGON",
644+
]
645+
rc, _, _ = run(cmd)
646+
if rc == 0:
647+
created = True
648+
649+
print(f"{f.name}: layers={sorted(layers)}")
650+
651+
print(f"Done: {out_gpkg}")
652+
522653
# ----------------------------------------------------------------------------------------------------------------------
523654

524655

@@ -550,9 +681,6 @@ def staging_bulk(
550681
for region_path in region_dirs:
551682
slug, name, ext = normalize_slug(prefix, Path(region_path))
552683

553-
# upload to the s3 bucket!
554-
object_ids = cmd_upload_dir(slug=slug, data_path=region_path, ext=ext)
555-
556684
print(f"Ingesting {slug} from {region_path}")
557685
gis_files, excluded_files = find_gis_files(Path(region_path), filter=filter)
558686
if not gis_files:
@@ -653,11 +781,16 @@ def staging_bulk(
653781
),
654782
)
655783

784+
785+
cmd_upload_dir(slug=slug, data_path=region_path, ext=ext)
786+
787+
656788
map_info = get_map_info(db, slug)
657789
_prepare_fields(map_info)
658790
create_rgeom(map_info)
659791
create_webgeom(map_info)
660792

793+
<<<<<<< HEAD:py-modules/map-integration/macrostrat/map_integration/__init__.py
661794
# Ingest process assertions
662795
if len(object_ids) > 0:
663796
ingest_id = db.run_query(
@@ -680,6 +813,8 @@ def staging_bulk(
680813
dict(ingest_process_id=ingest_id, object_id=object),
681814
)
682815

816+
=======
817+
>>>>>>> 115fe2f6d4d422b4c341a2938707dc116834a325:map-integration/macrostrat/map_integration/__init__.py
683818
print(
684819
f"\nFinished staging setup for {slug}. View map here: https://dev.macrostrat.org/maps/ingestion/{source_id}/ \n"
685820
)

0 commit comments

Comments
 (0)