2020
2121from . import parallel
2222from .hpss import hpss_get
23- from .settings import (
24- BLOCK_SIZE ,
25- DEFAULT_CACHE ,
26- TIME_TOL ,
27- FilesRow ,
28- TupleFilesRow ,
29- logger ,
30- )
23+ from .settings import BLOCK_SIZE , TIME_TOL , FilesRow , TupleFilesRow , logger
3124from .utils import CommandInfo , HPSSType , tars_table_exists
3225
3326
@@ -60,7 +53,7 @@ def extract(keep_files: bool = True):
6053 )
6154
6255
63- def setup_extract (command_info : CommandInfo ) -> Tuple [ argparse .Namespace , str ] :
56+ def setup_extract (command_info : CommandInfo ) -> argparse .Namespace :
6457 parser : argparse .ArgumentParser = argparse .ArgumentParser (
6558 usage = "zstash extract [<args>] [files]" ,
6659 description = "Extract files from existing archive" ,
@@ -155,16 +148,16 @@ def parse_tars_option(tars: str, first_tar: str, last_tar: str) -> List[str]:
155148 return tar_list
156149
157150
158- def extract_database (command_info : CommandInfo ,
159- args : argparse .Namespace , keep_files : bool
151+ def extract_database (
152+ command_info : CommandInfo , args : argparse .Namespace , keep_files : bool
160153) -> List [FilesRow ]:
161154
162155 # Open database
163156 logger .debug ("Opening index database" )
164157 if not os .path .exists (command_info .get_db_name ()):
165158 # Will need to retrieve from HPSS
166159 if command_info .hpss_type != HPSSType .UNDEFINED :
167- hpss_get (command_info . config . hpss , command_info .get_db_name ())
160+ hpss_get (command_info , command_info .get_db_name ())
168161 else :
169162 error_str : str = (
170163 "--hpss argument is required when local copy of database is unavailable"
@@ -261,10 +254,15 @@ def extract_database(command_info: CommandInfo,
261254 if args .workers > 1 :
262255 logger .debug ("Running zstash {} with multiprocessing" .format (cmd ))
263256 failures = multiprocess_extract (
264- args .workers , matches , keep_files , command_info .keep , command_info .cache_dir , cur , args
257+ args .workers ,
258+ command_info ,
259+ matches ,
260+ keep_files ,
261+ cur ,
262+ args ,
265263 )
266264 else :
267- failures = extractFiles (matches , keep_files , command_info . keep , command_info . cache_dir , cur , args )
265+ failures = extractFiles (command_info , matches , keep_files , cur , args )
268266
269267 # Close database
270268 logger .debug ("Closing index database" )
@@ -275,10 +273,9 @@ def extract_database(command_info: CommandInfo,
275273
276274def multiprocess_extract (
277275 num_workers : int ,
276+ command_info : CommandInfo ,
278277 matches : List [FilesRow ],
279278 keep_files : bool ,
280- keep_tars : Optional [bool ],
281- cache : str ,
282279 cur : sqlite3 .Cursor ,
283280 args : argparse .Namespace ,
284281) -> List [FilesRow ]:
@@ -353,7 +350,7 @@ def multiprocess_extract(
353350 )
354351 process : multiprocessing .Process = multiprocessing .Process (
355352 target = extractFiles ,
356- args = (matches , keep_files , keep_tars , cache , cur , args , worker ),
353+ args = (command_info , matches , keep_files , cur , args , worker ),
357354 daemon = True ,
358355 )
359356 process .start ()
@@ -397,10 +394,9 @@ def check_sizes_match(cur, tfname):
397394
398395# FIXME: C901 'extractFiles' is too complex (33)
399396def extractFiles ( # noqa: C901
397+ command_info : CommandInfo ,
400398 files : List [FilesRow ],
401399 keep_files : bool ,
402- keep_tars : Optional [bool ],
403- cache : str ,
404400 cur : sqlite3 .Cursor ,
405401 args : argparse .Namespace ,
406402 multiprocess_worker : Optional [parallel .ExtractWorker ] = None ,
@@ -413,14 +409,15 @@ def extractFiles( # noqa: C901
413409 This is used for when checking if the files in an HPSS
414410 repository are valid.
415411
416- If keep_tars is True, the tar archives that are downloaded are kept,
412+ If command_info.keep is True, the tar archives that are downloaded are kept,
417413 even after the program has terminated. Otherwise, they are deleted.
418414
419415 If running in parallel, then multiprocess_worker is the Worker
420416 that called this function.
421417 We need a reference to it so we can signal it to print
422418 the contents of what's in its print queue.
423419 """
420+
424421 failures : List [FilesRow ] = []
425422 tfname : str
426423 newtar : bool = True
@@ -442,17 +439,13 @@ def extractFiles( # noqa: C901
442439 # Open new tar archive
443440 if newtar :
444441 newtar = False
445- tfname = os .path .join (cache , files_row .tar )
442+ tfname = os .path .join (command_info . cache_dir , files_row .tar )
446443 # Everytime we're extracting a new tar, if running in parallel,
447444 # let the process know.
448445 # This is to synchronize the print statements.
449446 if multiprocess_worker :
450447 multiprocess_worker .set_curr_tar (files_row .tar )
451448
452- if config .hpss is not None :
453- hpss : str = config .hpss
454- else :
455- raise TypeError ("Invalid config.hpss={}" .format (config .hpss ))
456449 tries : int = args .retries + 1
457450 # Set to True to test the `--retries` option with a forced failure.
458451 # Then run `python -m unittest tests.test_extract.TestExtract.testExtractRetries`
@@ -471,7 +464,7 @@ def extractFiles( # noqa: C901
471464 test_retry = False
472465 raise RuntimeError
473466 if do_retrieve :
474- hpss_get (hpss , tfname , cache )
467+ hpss_get (command_info , tfname )
475468 if not check_sizes_match (cur , tfname ):
476469 raise RuntimeError (
477470 f"{ tfname } size does not match expected size."
@@ -612,7 +605,7 @@ def extractFiles( # noqa: C901
612605 newtar = True
613606
614607 # Delete this tar if the corresponding command-line arg was used.
615- if not keep_tars :
608+ if not command_info . keep :
616609 if tfname is not None :
617610 os .remove (tfname )
618611 else :
0 commit comments