1515from macrostrat .core import app
1616from macrostrat .database import Database
1717from macrostrat .map_integration .commands .prepare_fields import _prepare_fields
18- from macrostrat .map_integration .pipeline import ingest_map
18+ # from macrostrat.map_integration.pipeline import ingest_map
1919from macrostrat .map_integration .process .geometry import create_rgeom , create_webgeom
2020from macrostrat .map_integration .utils .ingestion_utils import (
2121 find_gis_files ,
@@ -94,9 +94,6 @@ def delete_sources(
9494 help = "BULK delete = filename.txt [every line lists the slug_name to delete. no whitespaces.]\n "
9595 + "SINGLE delete = 'slug_name' [list the slug_name in quotes]" ,
9696 ),
97- file_name : str = Option (
98- None , help = "deletes a specified file within the slug's directory."
99- ),
10097 dry_run : bool = Option (False , "--dry-run" ),
10198 all_data : bool = Option (False , "--all-data" ),
10299):
@@ -116,13 +113,11 @@ def delete_sources(
116113 print ("\n Dry run; not actually deleting anything" )
117114 return
118115
119- for slug in slug :
120- cmd_delete_dir (slug , file_name )
121- print (f"Deleting map { slug } " )
122- print (slug )
116+ for s in slug :
117+ print (f"Deleting map { s } " )
123118 tables = db .run_query (
124119 "SELECT primary_table, primary_line_table FROM maps.sources WHERE slug = :slug" ,
125- dict (slug = slug ),
120+ dict (slug = s ),
126121 ).fetchone ()
127122
128123 line_table = None
@@ -132,17 +127,18 @@ def delete_sources(
132127 poly_table = tables .primary_table
133128
134129 if line_table is None :
135- line_table = f"{ slug } _lines"
130+ line_table = f"{ s } _lines"
136131 if poly_table is None :
137- poly_table = f"{ slug } _polygons"
138- points_table = f"{ slug } _points"
132+ poly_table = f"{ s } _polygons"
133+ points_table = f"{ s } _points"
139134
140135 for table in [line_table , poly_table , points_table ]:
141136 db .run_sql (
142137 "DROP TABLE IF EXISTS {table}" ,
143138 dict (table = Identifier ("sources" , table )),
144139 )
145140
141+ < << << << HEAD :py - modules / map - integration / macrostrat / map_integration / __init__ .py
146142 ingest_process = db .run_query (
147143 """
148144 SELECT id FROM maps_metadata.ingest_process
@@ -183,16 +179,40 @@ def delete_sources(
183179 "DELETE FROM maps_metadata.ingest_process WHERE id = :ingest_process_id" ,
184180 dict (ingest_process_id = ingest_process_id ),
185181 )
182+ == == == =
183+ staging_delete_dir (s , db )
184+ > >> >> >> 115 fe2f6d4d422b4c341a2938707dc116834a325 :map - integration / macrostrat / map_integration / __init__ .py
186185
187186 source_id = db .run_query (
188187 "SELECT source_id FROM maps.sources WHERE slug = :slug" ,
189- dict (slug = slug ),
188+ dict (slug = s ),
190189 ).scalar ()
191190
191+
192+ # Delete ALL ingest-related rows for this source
193+ db .run_sql (
194+ """
195+ DELETE FROM maps_metadata.ingest_process_tag
196+ WHERE ingest_process_id IN (
197+ SELECT id FROM maps_metadata.ingest_process
198+ WHERE source_id = :source_id
199+ )
200+ """ ,
201+ dict (source_id = source_id ),
202+ )
203+
204+ db .run_sql (
205+ """
206+ DELETE FROM maps_metadata.ingest_process
207+ WHERE source_id = :source_id
208+ """ ,
209+ dict (source_id = source_id ),
210+ )
211+
192212 if all_data :
193213 _delete_map_data (source_id )
194214
195- db .run_sql ("DELETE FROM maps.sources WHERE slug = :slug" , dict (slug = slug ))
215+ db .run_sql ("DELETE FROM maps.sources WHERE slug = :slug" , dict (slug = s ))
196216
197217
198218@cli .command (name = "change-slug" )
@@ -307,8 +327,6 @@ def staging(
307327
308328 slug , name , ext = normalize_slug (prefix , Path (data_path ))
309329 # we need to add database insert here.
310- object_ids = cmd_upload_dir (slug = slug , data_path = Path (data_path ), ext = ext )
311-
312330 print (f"Ingesting { slug } from { data_path } " )
313331
314332 gis_files , excluded_files = find_gis_files (Path (data_path ), filter = filter )
@@ -411,11 +429,14 @@ def staging(
411429 ),
412430 )
413431
432+ cmd_upload_dir (slug = slug , data_path = Path (data_path ), ext = ext )
433+
414434 map_info = get_map_info (db , slug )
415435 _prepare_fields (map_info )
416436 create_rgeom (map_info )
417437 create_webgeom (map_info )
418438
439+ < << << << HEAD :py - modules / map - integration / macrostrat / map_integration / __init__ .py
419440 # Ingest process assertions
420441 if len (object_ids ) > 0 :
421442 ingest_id = db .run_query (
@@ -438,6 +459,8 @@ def staging(
438459 dict (ingest_process_id = ingest_id , object_id = object ),
439460 )
440461
462+ == == == =
463+ >> >> >> > 115 fe2f6d4d422b4c341a2938707dc116834a325 :map - integration / macrostrat / map_integration / __init__ .py
441464 console .print (
442465 f"[green] \n Finished staging setup for { slug } . "
443466 f"View map here: https://dev.macrostrat.org/maps/ingestion/{ source_id } / [/green] \n "
@@ -451,25 +474,40 @@ def staging(
451474# commands nested under 'macrostrat maps staging...'
452475
453476
454- @staging_cli .command ("s3-upload-dir " )
455- def cmd_upload_dir (slug : str = ..., data_path : Path = ..., ext : str = Option ("" )):
477+ @staging_cli .command ("s3-upload" )
478+ def cmd_upload_dir (slug : str = ..., data_path : Path = ..., ext : str = Option (".gdb" , help = "extension of the data path" ), ingest_process_id : int = Option ( None )):
456479 """Upload a local directory to the staging bucket under SLUG/."""
457480 db = get_database ()
458- res , object_ids = staging_upload_dir (slug , data_path , ext , db )
481+ source_id = db .run_query (
482+ "SELECT source_id FROM maps.sources WHERE slug = :slug" ,
483+ dict (slug = slug ),
484+ ).scalar ()
485+ ingest_id = db .run_query (
486+ """
487+ SELECT id
488+ FROM maps_metadata.ingest_process
489+ WHERE source_id = :source_id
490+ ORDER BY id DESC
491+ LIMIT 1
492+ """ ,
493+ dict (source_id = source_id ),
494+ ).scalar ()
495+ res = staging_upload_dir (slug , data_path , ext , db , ingest_id )
459496 pretty_res = json .dumps (res , indent = 2 )
460497 console .print (f"[green] Processed files \n { pretty_res } [/green]" )
461- return object_ids
498+ return
462499
463500
464- @staging_cli .command ("s3-delete-dir " )
501+ @staging_cli .command ("s3-delete" )
465502def cmd_delete_dir (
466503 slug : str = ...,
467504 file_name : str = Option (
468505 None , help = "deletes a specified file within the slug directory."
469506 ),
470507):
471508 """Delete all objects under SLUG/ in the staging bucket."""
472- staging_delete_dir (slug , file_name )
509+ db = get_database ()
510+ staging_delete_dir (slug , db )
473511 console .print (
474512 f"[green] Successfully deleted objects within the s3 bucket under slug: { slug } [/green]"
475513 )
@@ -506,7 +544,7 @@ def cmd_list_dir(
506544 token = page ["next_page_token" ]
507545
508546
509- @staging_cli .command ("s3-download-dir " )
547+ @staging_cli .command ("s3-download" )
510548def cmd_download_dir (
511549 slug : str = ...,
512550 dest_path : pathlib .Path = Option (
@@ -519,6 +557,99 @@ def cmd_download_dir(
519557 console .print (json .dumps (res , indent = 2 ))
520558
521559
560+ @staging_cli .command ("convert-e00" )
561+ def convert_e00_to_gpkg (
562+ data_path : str = Option (..., help = "Directory containing .e00 files" ),
563+ slug : str = Option (..., help = "Output basename (no .gpkg needed)" ),
564+ ):
565+ data_dir = Path (data_path ).expanduser ().resolve ()
566+ out_gpkg = data_dir / f"{ slug } .gpkg"
567+ e00_files = sorted (data_dir .glob ("*.e00" ))
568+
569+ if not e00_files :
570+ raise ValueError (f"No .e00 files found in { data_dir } " )
571+
572+ def list_layers (e00_path : Path ) -> set [str ]:
573+ # ogrinfo output includes lines like: "1: ARC (Line String)"
574+ p = subprocess .run (
575+ ["ogrinfo" , "-ro" , "-so" , str (e00_path )],
576+ capture_output = True ,
577+ text = True ,
578+ )
579+ text_out = (p .stdout or "" ) + "\n " + (p .stderr or "" )
580+ layers = set ()
581+ for line in text_out .splitlines ():
582+ line = line .strip ()
583+ # matches: "1: ARC (Line String)"
584+ if ":" in line and "(" in line :
585+ left = line .split (":" , 1 )[1 ].strip ()
586+ name = left .split ("(" , 1 )[0 ].strip ()
587+ if name :
588+ layers .add (name )
589+ return layers
590+
591+ def run (cmd ):
592+ p = subprocess .run (cmd , capture_output = True , text = True )
593+ return p .returncode , p .stdout , p .stderr
594+ created = False
595+ for f in e00_files :
596+ base = f .stem
597+ layers = list_layers (f )
598+ line_layers = [lyr for lyr in ("ARC" ,) if lyr in layers ]
599+ point_layers = [lyr for lyr in ("CNT" , "LAB" , "POINT" ) if lyr in layers ]
600+ poly_layers = [lyr for lyr in ("PAL" , "AREA" ) if lyr in layers ]
601+
602+ # Lines
603+ for lyr in line_layers :
604+ cmd = ["ogr2ogr" , "-f" , "GPKG" ]
605+ if created :
606+ cmd += ["-update" , "-append" ]
607+ else :
608+ # create/overwrite first successful write
609+ cmd += ["-overwrite" ]
610+ cmd += [
611+ str (out_gpkg ), str (f ), lyr ,
612+ "-nln" , f"{ base } _lines" ,
613+ "-nlt" , "LINESTRING" ,
614+ ]
615+ rc , _ , err = run (cmd )
616+ if rc == 0 :
617+ created = True
618+
619+ # Points
620+ for lyr in point_layers :
621+ if not created :
622+ cmd = ["ogr2ogr" , "-f" , "GPKG" , "-overwrite" ]
623+ else :
624+ cmd = ["ogr2ogr" , "-f" , "GPKG" , "-update" , "-append" ]
625+ cmd += [
626+ str (out_gpkg ), str (f ), lyr ,
627+ "-nln" , f"{ base } _points" ,
628+ "-nlt" , "POINT" ,
629+ ]
630+ rc , _ , _ = run (cmd )
631+ if rc == 0 :
632+ created = True
633+
634+ # Polygons
635+ for lyr in poly_layers :
636+ if not created :
637+ cmd = ["ogr2ogr" , "-f" , "GPKG" , "-overwrite" ]
638+ else :
639+ cmd = ["ogr2ogr" , "-f" , "GPKG" , "-update" , "-append" ]
640+ cmd += [
641+ str (out_gpkg ), str (f ), lyr ,
642+ "-nln" , f"{ base } _polygons" ,
643+ "-nlt" , "POLYGON" ,
644+ ]
645+ rc , _ , _ = run (cmd )
646+ if rc == 0 :
647+ created = True
648+
649+ print (f"{ f .name } : layers={ sorted (layers )} " )
650+
651+ print (f"Done: { out_gpkg } " )
652+
522653# ----------------------------------------------------------------------------------------------------------------------
523654
524655
@@ -550,9 +681,6 @@ def staging_bulk(
550681 for region_path in region_dirs :
551682 slug , name , ext = normalize_slug (prefix , Path (region_path ))
552683
553- # upload to the s3 bucket!
554- object_ids = cmd_upload_dir (slug = slug , data_path = region_path , ext = ext )
555-
556684 print (f"Ingesting { slug } from { region_path } " )
557685 gis_files , excluded_files = find_gis_files (Path (region_path ), filter = filter )
558686 if not gis_files :
@@ -653,11 +781,16 @@ def staging_bulk(
653781 ),
654782 )
655783
784+
785+ cmd_upload_dir (slug = slug , data_path = region_path , ext = ext )
786+
787+
656788 map_info = get_map_info (db , slug )
657789 _prepare_fields (map_info )
658790 create_rgeom (map_info )
659791 create_webgeom (map_info )
660792
793+ < << << << HEAD :py - modules / map - integration / macrostrat / map_integration / __init__ .py
661794 # Ingest process assertions
662795 if len (object_ids ) > 0 :
663796 ingest_id = db .run_query (
@@ -680,6 +813,8 @@ def staging_bulk(
680813 dict (ingest_process_id = ingest_id , object_id = object ),
681814 )
682815
816+ == == == =
817+ >> >> >> > 115 fe2f6d4d422b4c341a2938707dc116834a325 :map - integration / macrostrat / map_integration / __init__ .py
683818 print (
684819 f"\n Finished staging setup for { slug } . View map here: https://dev.macrostrat.org/maps/ingestion/{ source_id } / \n "
685820 )
0 commit comments