1919from macrostrat .map_integration .process .geometry import create_rgeom , create_webgeom
2020from macrostrat .map_integration .utils .file_discovery import find_gis_files
2121from macrostrat .map_integration .utils .map_info import get_map_info
22- from macrostrat .map_integration .utils .staging_upload_dir import *
22+ from macrostrat .map_integration .utils .s3_file_management import *
2323
2424from . import pipeline
2525from .commands .copy_sources import copy_macrostrat_sources
@@ -85,27 +85,35 @@ def set_active_map(map: MapInfo = None):
8585
8686@sources .command (name = "delete" )
8787def delete_sources (
88- slugs : list [str ],
88+ slug : list [str ] = Option (
89+ ...,
90+ help = "BULK delete = filename.txt [every line lists the slug_name to delete. no whitespaces.]\n "
91+ + "SINGLE delete = 'slug_name' [list the slug_name in quotes]" ,
92+ ),
93+ file_name : str = Option (
94+ None , help = "deletes a specified file within the slug's directory."
95+ ),
8996 dry_run : bool = Option (False , "--dry-run" ),
9097 all_data : bool = Option (False , "--all-data" ),
9198):
9299 """Delete sources from the map ingestion database."""
93100 db = get_database ()
94101
95- if not stdin .isatty () and len (slugs ) == 1 and slugs [0 ] == "-" :
96- slugs = [line .strip () for line in stdin ]
97- elif len (slugs ) == 1 and os .path .isfile (slugs [0 ]):
98- with open (slugs [0 ]) as file :
99- slugs = [line .strip () for line in file if line .strip ()]
102+ if not stdin .isatty () and len (slug ) == 1 and slug [0 ] == "-" :
103+ slug = [line .strip () for line in stdin ]
104+ elif len (slug ) == 1 and os .path .isfile (slug [0 ]):
105+ with open (slug [0 ]) as file :
106+ slug = [line .strip () for line in file if line .strip ()]
100107
101108 if dry_run :
102109 print ("Deleting maps:" )
103- print (" " + "\n " .join (slugs ))
110+ print (" " + "\n " .join (slug ))
104111
105112 print ("\n Dry run; not actually deleting anything" )
106113 return
107114
108- for slug in slugs :
115+ for slug in slug :
116+ cmd_delete_dir (slug , file_name )
109117 print (f"Deleting map { slug } " )
110118 print (slug )
111119 tables = db .run_query (
@@ -260,11 +268,8 @@ def staging(
260268 slug : str ,
261269 data_path : str ,
262270 name : str ,
263- meta_path : str = Option (
264- None , help = "metadata URL to merge into the sources polygons/lines/points table"
265- ),
266271 merge_key : str = Option (
267- None ,
272+ "mapunit" ,
268273 help = "primary key to left join the metadata into the sources polygons/lines/points table" ,
269274 ),
270275 meta_table : str = Option (
@@ -277,9 +282,13 @@ def staging(
277282 Ingest a map, update metadata, prepare fields, and build geometries.
278283 """
279284 db = get_database ()
285+ data_path_ext = Path (data_path )
286+ ext = data_path_ext .suffix .lower ()
287+ # upload to the s3 bucket!
288+ cmd_upload_dir (slug , data_path_ext , ext )
280289 print (f"Ingesting { slug } from { data_path } " )
281290
282- gis_files , excluded_files = find_gis_files (Path ( data_path ) , filter = filter )
291+ gis_files , excluded_files = find_gis_files (data_path_ext , filter = filter )
283292 if not gis_files :
284293 raise ValueError (f"No GIS files found in { data_path } " )
285294
@@ -296,7 +305,7 @@ def staging(
296305 slug ,
297306 gis_files ,
298307 if_exists = "replace" ,
299- meta_path = meta_path ,
308+ meta_path = data_path ,
300309 merge_key = merge_key ,
301310 meta_table = meta_table ,
302311 )
@@ -322,8 +331,8 @@ def staging(
322331 # add map_url later
323332 db .run_sql (
324333 """
325- INSERT INTO maps_metadata.ingest_process (state, source_id, object_group_id, ingested_by, ingest_pipeline, comments)
326- VALUES (:state, :source_id, :object_group_id, :ingested_by, :ingest_pipeline, :comments);
334+ INSERT INTO maps_metadata.ingest_process (state, source_id, object_group_id, ingested_by, ingest_pipeline, comments, slug )
335+ VALUES (:state, :source_id, :object_group_id, :ingested_by, :ingest_pipeline, :comments, :slug );
327336 """ ,
328337 dict (
329338 state = state ,
@@ -332,6 +341,7 @@ def staging(
332341 ingested_by = "macrostrat-admin" ,
333342 ingest_pipeline = ingest_pipeline ,
334343 comments = comments ,
344+ slug = slug ,
335345 ),
336346 )
337347
@@ -378,29 +388,30 @@ def staging(
378388 if any (val is None for val in [row , ingest_process , rgeom , web_geom ]):
379389 raise RuntimeError ("Staging failed: Some expected records were not inserted." )
380390
381- print (
382- f"\n Finished staging setup for { slug } . View map here: https://dev.macrostrat.org/maps/ingestion/{ source_id } / \n "
391+ console .print (
392+ f"[green] \n Finished staging setup for { slug } . "
393+ f"View map here: https://dev.macrostrat.org/maps/ingestion/{ source_id } / [\green] \n "
383394 )
384395
385396
386397staging_cli .add_command (staging , name = "ingest" )
398+ staging_cli .command ("delete" )(delete_sources )
387399
388400# ------------------------------------------
389401# commands nested under 'macrostrat maps staging...'
390402
391403
392- @staging_cli .command ("upload-dir" )
393- def cmd_upload_dir (
394- slug : str = ...,
395- data_path : Path = ...,
396- ):
404+ @staging_cli .command ("s3-upload-dir" )
405+ def cmd_upload_dir (slug : str = ..., data_path : Path = ..., ext : str = Option ("" )):
397406 """Upload a local directory to the staging bucket under SLUG/."""
398- res = staging_upload_dir (slug , data_path )
407+ res = staging_upload_dir (slug , data_path , ext )
399408 pretty_res = json .dumps (res , indent = 2 )
400- console .print (f"[green] Upload successful! \n { pretty_res } [/green]" )
409+ console .print (
410+ f"[green] Upload to s3 bucket was successful! \n { pretty_res } [/green]"
411+ )
401412
402413
403- @staging_cli .command ("delete-dir" )
414+ @staging_cli .command ("s3- delete-dir" )
404415def cmd_delete_dir (
405416 slug : str = ...,
406417 file_name : str = Option (
@@ -410,63 +421,72 @@ def cmd_delete_dir(
410421 """Delete all objects under SLUG/ in the staging bucket."""
411422 staging_delete_dir (slug , file_name )
412423 console .print (
413- f"[green] Delete successful! \n Deleted objects under slug: { slug } [/green]"
424+ f"[green] Successfully deleted objects within the s3 bucket under slug: { slug } [/green]"
414425 )
415426
416427
417- @staging_cli .command ("list" )
428+ @staging_cli .command ("s3- list" )
418429def cmd_list_dir (
419- slug : str = ...,
420- page_token : int = Option (0 , "--page-token" , "-t" , help = "Offset to start from" ),
421- page_size : int = Option (10 , "--page-size" , "-s" , help = "Items per page" ),
422- more : bool = Option (
423- False , "--more" , "-m" , help = "Interactively page through results"
430+ slug : str = Option (
431+ ...,
432+ help = "lists all files within a slug directory. Input 'all' to list all the slug directories." ,
424433 ),
434+ page_token : int = Option (0 , "--page-token" , "-t" , help = "Offset to start from" ),
435+ page_size : int = Option (20 , "--page-size" , "-s" , help = "Items per page" ),
425436):
426437 """List paginated files under SLUG."""
427- if not more :
428- page = staging_list_dir (slug , page_token = page_token , page_size = page_size )
429- files = json .dumps (page , indent = 2 )
430- console .print (f"[green] { files } [/green]" )
431- return
432438
433439 token = page_token
440+ count = 0
434441 while True :
435442 page = staging_list_dir (slug , page_token = token , page_size = page_size )
436443 for f in page ["files" ]:
437- console .print (f"[green]{ f } [/green]" )
444+ console .print (f"[blue]{ f } [/blue]" )
445+ count += 1
446+
438447 if page ["next_page_token" ] is None :
448+ console .print (f"[green]Total files: { count } [/green]" )
439449 print ("\n -- End of list --" )
440450 break
441- resp = (
442- input ("\n Press Enter for next page, or type 'exit' to stop: " )
443- .strip ()
444- .lower ()
445- )
451+ console .print (f"[green]Scrolled through: { count } files[/green]" )
452+
453+ resp = input ("\n Press 'enter' for next page, or 'q' to exit: " ).strip ().lower ()
446454 if resp in ("exit" , "quit" , "q" ):
447455 break
448456 token = page ["next_page_token" ]
449457
450458
459+ @staging_cli .command ("s3-download-dir" )
460+ def cmd_download_dir (
461+ slug : str = ...,
462+ dest_path : pathlib .Path = Option (
463+ ..., help = "Local destination path to save slug directory to."
464+ ),
465+ ):
466+ """Download a staging prefix to a local directory."""
467+ res = staging_download_dir (slug = slug , dest_path = dest_path )
468+ console .print (f"[green] Download successful![/green]" )
469+ console .print (json .dumps (res , indent = 2 ))
470+
471+
451472# ----------------------------------------------------------------------------------------------------------------------
452473
453474
454- @cli .command (name = "bulk-staging " )
475+ @staging_cli .command ("bulk-ingest " )
455476def staging_bulk (
456477 # required
457478 meta_parent_path : str = Option (
458479 ..., help = "Parent directory containing region subfolders"
459480 ),
460481 # required
461482 prefix : str = Option (..., help = "Slug prefix to avoid collisions" ),
462- # required
463483 merge_key : str = Option (
464- ... ,
484+ "mapunit" ,
465485 help = "primary key to left join the metadata into the sources polygons/lines/points table" ,
466486 ),
467487 meta_table : str = Option (
468488 "polygons" ,
469- help = "Options: polygons, lines, or points. specifies the table in which the legend metadata is merged into. It defaults to sources polygons" ,
489+ help = "Options: polygons, lines, or points. specifies the table in which the metadata is merged into. It defaults to sources polygons" ,
470490 ),
471491 filter : str = Option (None , help = "Filter applied to GIS file selection" ),
472492):
@@ -488,9 +508,13 @@ def staging_bulk(
488508 clean_stem = re .sub (r"[^a-z0-9_]" , "" , clean_stem )
489509 slug = f"{ prefix } _{ clean_stem } "
490510 name = region_path .stem
511+ ext = region_path .suffix .lower ()
491512 meta_path = region_path
492513 staged_slugs .append (slug )
493514
515+ # upload to the s3 bucket!
516+ cmd_upload_dir (slug , region_path , ext )
517+
494518 print (f"Ingesting { slug } from { meta_path } " )
495519 gis_files , excluded_files = find_gis_files (Path (meta_path ), filter = filter )
496520 if not gis_files :
0 commit comments