From e0b79e9dc2b2066b284b179bcff0d42252522924 Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Fri, 2 Jan 2026 18:32:36 -0600
Subject: [PATCH 01/34] Initial refactor to delete tars properly

---
 zstash/create.py            |  48 ++++---
 zstash/globus.py            | 262 ++++++++++++++++++++++--------------
 zstash/hpss.py              |  73 +++++-----
 zstash/hpss_utils.py        |  11 +-
 zstash/transfer_tracking.py |  58 ++++++++
 zstash/update.py            |  57 ++++----
 6 files changed, 321 insertions(+), 188 deletions(-)
 create mode 100644 zstash/transfer_tracking.py

diff --git a/zstash/create.py b/zstash/create.py
index b502f1e6..3eb9e602 100644
--- a/zstash/create.py
+++ b/zstash/create.py
@@ -14,6 +14,7 @@
 from .hpss import hpss_put
 from .hpss_utils import add_files
 from .settings import DEFAULT_CACHE, config, get_db_filename, logger
+from .transfer_tracking import TransferManager
 from .utils import (
     create_tars_table,
     get_files_to_archive,
@@ -52,12 +53,13 @@ def create():
         logger.error(input_path_error_str)
         raise NotADirectoryError(input_path_error_str)
 
+    transfer_manager: TransferManager = TransferManager()
     if hpss != "none":
         url = urlparse(hpss)
         if url.scheme == "globus":
             # identify globus endpoints
             logger.debug(f"{ts_utc()}:Calling globus_activate(hpss)")
-            globus_activate(hpss)
+            transfer_manager.globus_config = globus_activate(hpss)
         else:
             # config.hpss is not "none", so we need to
             # create target HPSS directory
@@ -88,14 +90,21 @@ def create():
 
     # Create and set up the database
     logger.debug(f"{ts_utc()}: Calling create_database()")
-    failures: List[str] = create_database(cache, args)
+    failures: List[str] = create_database(cache, args, transfer_manager)
 
     # Transfer to HPSS. Always keep a local copy.
     logger.debug(f"{ts_utc()}: calling hpss_put() for {get_db_filename(cache)}")
-    hpss_put(hpss, get_db_filename(cache), cache, keep=args.keep, is_index=True)
+    hpss_put(
+        hpss,
+        get_db_filename(cache),
+        cache,
+        keep=args.keep,
+        is_index=True,
+        transfer_manager=transfer_manager,
+    )
 
     logger.debug(f"{ts_utc()}: calling globus_finalize()")
-    globus_finalize(non_blocking=args.non_blocking)
+    globus_finalize(transfer_manager, non_blocking=args.non_blocking)
 
     if len(failures) > 0:
         # List the failures
@@ -204,7 +213,9 @@ def setup_create() -> Tuple[str, argparse.Namespace]:
     return cache, args
 
 
-def create_database(cache: str, args: argparse.Namespace) -> List[str]:
+def create_database(
+    cache: str, args: argparse.Namespace, transfer_manager: TransferManager
+) -> List[str]:
     # Create new database
     logger.debug(f"{ts_utc()}:Creating index database")
     if os.path.exists(get_db_filename(cache)):
@@ -263,26 +274,7 @@ def create_database(cache: str, args: argparse.Namespace) -> List[str]:
     files: List[str] = get_files_to_archive(cache, args.include, args.exclude)
 
     failures: List[str]
-    if args.follow_symlinks:
-        try:
-            # Add files to archive
-            failures = add_files(
-                cur,
-                con,
-                -1,
-                files,
-                cache,
-                args.keep,
-                args.follow_symlinks,
-                skip_tars_md5=args.no_tars_md5,
-                non_blocking=args.non_blocking,
-                error_on_duplicate_tar=args.error_on_duplicate_tar,
-                overwrite_duplicate_tars=args.overwrite_duplicate_tars,
-                force_database_corruption=args.for_developers_force_database_corruption,
-            )
-        except FileNotFoundError:
-            raise Exception("Archive creation failed due to broken symlink.")
-    else:
+    try:
         # Add files to archive
         failures = add_files(
             cur,
@@ -297,7 +289,13 @@ def create_database(cache: str, args: argparse.Namespace) -> List[str]:
             error_on_duplicate_tar=args.error_on_duplicate_tar,
             overwrite_duplicate_tars=args.overwrite_duplicate_tars,
             force_database_corruption=args.for_developers_force_database_corruption,
+            transfer_manager=transfer_manager,
         )
+    except FileNotFoundError as e:
+        if args.follow_symlinks:
+            raise Exception("Archive creation failed due to broken symlink.")
+        else:
+            raise e
 
     # Close database
     con.commit()
diff --git a/zstash/globus.py b/zstash/globus.py
index 2cacad5f..0ff56731 100644
--- a/zstash/globus.py
+++ b/zstash/globus.py
@@ -3,9 +3,8 @@
 import sys
 from typing import List, Optional
 
-from globus_sdk import TransferAPIError, TransferClient, TransferData
+from globus_sdk import TransferAPIError, TransferClient
 from globus_sdk.response import GlobusHTTPResponse
-from globus_sdk.services.transfer.response.iterable import IterableTransferResponse
 from six.moves.urllib.parse import urlparse
 
 from .globus_utils import (
@@ -17,78 +16,85 @@
     submit_transfer_with_checks,
 )
 from .settings import logger
+from .transfer_tracking import GlobusConfig, TransferBatch, TransferManager
 from .utils import ts_utc
 
-remote_endpoint = None
-local_endpoint = None
-transfer_client: TransferClient = None
-transfer_data: TransferData = None
-task_id = None
-archive_directory_listing: IterableTransferResponse = None
 
-
-def globus_activate(hpss: str):
+def globus_activate(
+    hpss: str, globus_config: Optional[GlobusConfig] = None
+) -> Optional[GlobusConfig]:
     """
     Read the local globus endpoint UUID from ~/.zstash.ini.
     If the ini file does not exist, create an ini file with empty values,
     and try to find the local endpoint UUID based on the FQDN
     """
-    global transfer_client
-    global local_endpoint
-    global remote_endpoint
 
     url = urlparse(hpss)
     if url.scheme != "globus":
-        return
+        return None
+    if globus_config is None:
+        globus_config = GlobusConfig()
     check_state_files()
-    remote_endpoint = url.netloc
-    local_endpoint = get_local_endpoint_id(local_endpoint)
-    if remote_endpoint.upper() in HPSS_ENDPOINT_MAP.keys():
-        remote_endpoint = HPSS_ENDPOINT_MAP.get(remote_endpoint.upper())
-    both_endpoints: List[Optional[str]] = [local_endpoint, remote_endpoint]
-    transfer_client = get_transfer_client_with_auth(both_endpoints)
+    globus_config.remote_endpoint = url.netloc
+    globus_config.local_endpoint = get_local_endpoint_id(globus_config.local_endpoint)
+    upper_remote_ep = globus_config.remote_endpoint.upper()
+    if upper_remote_ep in HPSS_ENDPOINT_MAP.keys():
+        globus_config.remote_endpoint = HPSS_ENDPOINT_MAP[upper_remote_ep]
+    both_endpoints: List[Optional[str]] = [
+        globus_config.local_endpoint,
+        globus_config.remote_endpoint,
+    ]
+    globus_config.transfer_client = get_transfer_client_with_auth(both_endpoints)
     for ep_id in both_endpoints:
-        r = transfer_client.endpoint_autoactivate(ep_id, if_expires_in=600)
+        r = globus_config.transfer_client.endpoint_autoactivate(
+            ep_id, if_expires_in=600
+        )
         if r.get("code") == "AutoActivationFailed":
             logger.error(
                 f"The {ep_id} endpoint is not activated or the current activation expires soon. Please go to https://app.globus.org/file-manager/collections/{ep_id} and (re)activate the endpoint."
             )
             sys.exit(1)
+    return globus_config
 
 
-def file_exists(name: str) -> bool:
-
+def file_exists(archive_directory_listing, name: str) -> bool:
     for entry in archive_directory_listing:
         if entry.get("name") == name:
             return True
     return False
 
 
-global_variable_tarfiles_pushed = 0
-
-
 # C901 'globus_transfer' is too complex (20)
 def globus_transfer(  # noqa: C901
-    remote_ep: str, remote_path: str, name: str, transfer_type: str, non_blocking: bool
+    transfer_manager: TransferManager,
+    remote_ep: str,
+    remote_path: str,
+    name: str,
+    transfer_type: str,
+    non_blocking: bool,
 ):
-    global transfer_data
-    global task_id
-    global archive_directory_listing
-    global global_variable_tarfiles_pushed
 
     logger.info(f"{ts_utc()}: Entered globus_transfer() for name = {name}")
     logger.debug(f"{ts_utc()}: non_blocking = {non_blocking}")
-    if not transfer_client:
-        globus_activate("globus://" + remote_ep)
-    if not transfer_client:
+    if (not transfer_manager.globus_config) or (
+        not transfer_manager.globus_config.transfer_client
+    ):
+        transfer_manager.globus_config = globus_activate("globus://" + remote_ep)
+    if (not transfer_manager.globus_config) or (
+        not transfer_manager.globus_config.transfer_client
+    ):
         sys.exit(1)
 
     if transfer_type == "get":
-        if not archive_directory_listing:
-            archive_directory_listing = transfer_client.operation_ls(
-                remote_endpoint, remote_path
+        if not transfer_manager.globus_config.archive_directory_listing:
+            transfer_manager.globus_config.archive_directory_listing = (
+                transfer_manager.globus_config.transfer_client.operation_ls(
+                    transfer_manager.globus_config.remote_endpoint, remote_path
+                )
             )
-        if not file_exists(name):
+        if not file_exists(
+            transfer_manager.globus_config.archive_directory_listing, name
+        ):
             logger.error(
                 "Remote file globus://{}{}/{} does not exist".format(
                     remote_ep, remote_path, name
@@ -96,45 +102,45 @@ def globus_transfer(  # noqa: C901
             )
             sys.exit(1)
 
+    mrt: Optional[TransferBatch] = transfer_manager.get_most_recent_transfer()
     transfer_data = set_up_TransferData(
         transfer_type,
-        local_endpoint,  # Global
-        remote_endpoint,  # Global
+        transfer_manager.globus_config.local_endpoint,
+        transfer_manager.globus_config.remote_endpoint,
         remote_path,
         name,
-        transfer_client,  # Global
-        transfer_data,  # Global
+        transfer_manager.globus_config.transfer_client,
+        mrt.transfer_data if mrt else None,
     )
 
     task: GlobusHTTPResponse
     try:
-        if task_id:
-            task = transfer_client.get_task(task_id)
-            prev_task_status = task["status"]
+        if mrt and mrt.task_id:
+            task = transfer_manager.globus_config.transfer_client.get_task(mrt.task_id)
+            mrt.task_status = task["status"]
             # one  of {ACTIVE, SUCCEEDED, FAILED, CANCELED, PENDING, INACTIVE}
             # NOTE: How we behave here depends upon whether we want to support mutliple active transfers.
             # Presently, we do not, except inadvertantly (if status == PENDING)
-            if prev_task_status == "ACTIVE":
+            if mrt.task_status == "ACTIVE":
                 logger.info(
-                    f"{ts_utc()}: Previous task_id {task_id} Still Active. Returning ACTIVE."
+                    f"{ts_utc()}: Previous task_id {mrt.task_id} Still Active. Returning ACTIVE."
                 )
-                return "ACTIVE"
-            elif prev_task_status == "SUCCEEDED":
+                # Don't return early - continue to submit the new transfer
+                # The previous transfer will complete asynchronously
+            elif mrt.task_status == "SUCCEEDED":
                 logger.info(
-                    f"{ts_utc()}: Previous task_id {task_id} status = SUCCEEDED."
+                    f"{ts_utc()}: Previous task_id {mrt.task_id} status = SUCCEEDED."
                 )
                 src_ep = task["source_endpoint_id"]
                 dst_ep = task["destination_endpoint_id"]
                 label = task["label"]
                 ts = ts_utc()
                 logger.info(
-                    "{}:Globus transfer {}, from {} to {}: {} succeeded".format(
-                        ts, task_id, src_ep, dst_ep, label
-                    )
+                    f"{ts}:Globus transfer {mrt.task_id}, from {src_ep} to {dst_ep}: {label} succeeded"
                 )
             else:
                 logger.error(
-                    f"{ts_utc()}: Previous task_id {task_id} status = {prev_task_status}."
+                    f"{ts_utc()}: Previous task_id {mrt.task_id} status = {mrt.task_status}."
                 )
 
         # DEBUG: review accumulated items in TransferData
@@ -142,15 +148,17 @@ def globus_transfer(  # noqa: C901
         attribs = transfer_data.__dict__
         for item in attribs["data"]["DATA"]:
             if item["DATA_TYPE"] == "transfer_item":
-                global_variable_tarfiles_pushed += 1
+                transfer_manager.cumulative_tarfiles_pushed += 1
                 print(
-                    f"   (routine)  PUSHING (#{global_variable_tarfiles_pushed}) STORED source item: {item['source_path']}",
+                    f"   (routine)  PUSHING (#{transfer_manager.cumulative_tarfiles_pushed}) STORED source item: {item['source_path']}",
                     flush=True,
                 )
 
-        # SUBMIT new transfer here
+        # Submit the current transfer_data
         logger.info(f"{ts_utc()}: DIVING: Submit Transfer for {transfer_data['label']}")
-        task = submit_transfer_with_checks(transfer_client, transfer_data)
+        task = submit_transfer_with_checks(
+            transfer_manager.globus_config.transfer_client, transfer_data
+        )
         task_id = task.get("task_id")
         # NOTE: This log message is misleading. If we have accumulated multiple tar files for transfer,
         # the "lable" given here refers only to the LAST tarfile in the TransferData list.
@@ -158,7 +166,19 @@ def globus_transfer(  # noqa: C901
             f"{ts_utc()}: SURFACE Submit Transfer returned new task_id = {task_id} for label {transfer_data['label']}"
         )
 
+        # Create new transfer record with the task info
+        new_transfer = TransferBatch()
+        new_transfer.file_paths = []  # TODO: is this right?
+        new_transfer.task_id = task_id
+        new_transfer.task_status = "UNKNOWN"
+        new_transfer.is_globus = True
+        new_transfer.transfer_data = (
+            None  # This batch was submitted # TODO: is this right?
+        )
+        transfer_manager.batches.append(new_transfer)
+
         # Nullify the submitted transfer data structure so that a new one will be created on next call.
+        # Reset for building the next batch
         transfer_data = None
     except TransferAPIError as e:
         if e.code == "NoCredException":
@@ -174,26 +194,36 @@ def globus_transfer(  # noqa: C901
         logger.error("Exception: {}".format(e))
         sys.exit(1)
 
+    new_mrt: Optional[TransferBatch] = transfer_manager.get_most_recent_transfer()
     # test for blocking on new task_id
     task_status = "UNKNOWN"
-    if not non_blocking:
-        task_status = globus_block_wait(
-            task_id=task_id, wait_timeout=7200, polling_interval=10, max_retries=5
-        )
-    else:
-        logger.info(f"{ts_utc()}: NO BLOCKING (task_wait) for task_id {task_id}")
+    if new_mrt and new_mrt.task_id:
+        if not non_blocking:
+            new_mrt.task_status = globus_block_wait(
+                transfer_manager.globus_config.transfer_client,
+                task_id=new_mrt.task_id,
+                wait_timeout=7200,
+                max_retries=5,
+            )
+        else:
+            logger.info(
+                f"{ts_utc()}: NO BLOCKING (task_wait) for task_id {new_mrt.task_id}"
+            )
 
     if transfer_type == "put":
         return task_status
 
     if transfer_type == "get" and task_id:
-        globus_wait(task_id)
+        globus_wait(transfer_manager.globus_config.transfer_client, task_id)
 
     return task_status
 
 
 def globus_block_wait(
-    task_id: str, wait_timeout: int, polling_interval: int, max_retries: int
+    transfer_client: TransferClient,
+    task_id: str,
+    wait_timeout: int,
+    max_retries: int,
 ):
 
     # poll every "polling_interval" seconds to speed up small transfers.  Report every 2 hours, stop waiting aftert 5*2 = 10 hours
@@ -238,8 +268,7 @@ def globus_block_wait(
     return task_status
 
 
-def globus_wait(task_id: str):
-
+def globus_wait(transfer_client: TransferClient, task_id: str):
     try:
         """
         A Globus transfer job (task) can be in one of the three states:
@@ -281,44 +310,73 @@ def globus_wait(task_id: str):
         sys.exit(1)
 
 
-def globus_finalize(non_blocking: bool = False):
-    global global_variable_tarfiles_pushed
+def globus_finalize(transfer_manager: TransferManager, non_blocking: bool = False):
 
     last_task_id = None
 
-    if transfer_data:
-        # DEBUG: review accumulated items in TransferData
-        logger.info(f"{ts_utc()}: FINAL TransferData: accumulated items:")
-        attribs = transfer_data.__dict__
-        for item in attribs["data"]["DATA"]:
-            if item["DATA_TYPE"] == "transfer_item":
-                global_variable_tarfiles_pushed += 1
-                print(
-                    f"    (finalize) PUSHING ({global_variable_tarfiles_pushed}) source item: {item['source_path']}",
-                    flush=True,
-                )
+    if transfer_manager.globus_config is None:
+        logger.warning("No GlobusConfig object provided for finalization")
+        return
 
-        # SUBMIT new transfer here
-        logger.info(f"{ts_utc()}: DIVING: Submit Transfer for {transfer_data['label']}")
-        try:
-            last_task = submit_transfer_with_checks(transfer_client, transfer_data)
-            last_task_id = last_task.get("task_id")
-        except TransferAPIError as e:
-            if e.code == "NoCredException":
-                logger.error(
-                    "{}. Please go to https://app.globus.org/endpoints and activate the endpoint.".format(
-                        e.message
+    transfer: Optional[TransferBatch] = transfer_manager.get_most_recent_transfer()
+    if transfer:
+        # Check if there's any pending transfer data that hasn't been submitted yet
+        if transfer.transfer_data:
+            # DEBUG: review accumulated items in TransferData
+            logger.info(f"{ts_utc()}: FINAL TransferData: accumulated items:")
+            attribs = transfer.transfer_data.__dict__
+            for item in attribs["data"]["DATA"]:
+                if item["DATA_TYPE"] == "transfer_item":
+                    transfer_manager.cumulative_tarfiles_pushed += 1
+                    print(
+                        f"    (finalize) PUSHING ({transfer_manager.cumulative_tarfiles_pushed}) source item: {item['source_path']}",
+                        flush=True,
                     )
+
+            # SUBMIT new transfer here
+            logger.info(
+                f"{ts_utc()}: DIVING: Submit Transfer for {transfer.transfer_data['label']}"
+            )
+            try:
+                last_task = submit_transfer_with_checks(
+                    transfer_manager.globus_config.transfer_client,
+                    transfer.transfer_data,
                 )
-            else:
-                logger.error(e)
-            sys.exit(1)
-        except Exception as e:
-            logger.error("Exception: {}".format(e))
-            sys.exit(1)
+                last_task_id = last_task.get("task_id")
+            except TransferAPIError as e:
+                if e.code == "NoCredException":
+                    logger.error(
+                        "{}. Please go to https://app.globus.org/endpoints and activate the endpoint.".format(
+                            e.message
+                        )
+                    )
+                else:
+                    logger.error(e)
+                sys.exit(1)
+            except Exception as e:
+                logger.error("Exception: {}".format(e))
+                sys.exit(1)
+
+        # Wait for any submitted transfers to complete
+        # In non-blocking mode, this ensures index.db and any accumulated tar files complete
+        # In blocking mode, this is redundant but harmless
+        skip_last_wait: bool = False
+        if transfer and transfer.task_id:
+            if transfer.task_id == last_task_id:
+                skip_last_wait = (
+                    True  # No reason to call globus_wait twice on the same task_id
+                )
+            logger.info(
+                f"{ts_utc()}: Waiting for transfer task_id={transfer.task_id} to complete"
+            )
+            globus_wait(
+                transfer_manager.globus_config.transfer_client, transfer.task_id
+            )
+        if last_task_id and (not skip_last_wait):
+            logger.info(
+                f"{ts_utc()}: Waiting for last transfer task_id={last_task_id} to complete"
+            )
+            globus_wait(transfer_manager.globus_config.transfer_client, last_task_id)
 
-    if not non_blocking:
-        if task_id:
-            globus_wait(task_id)
-        if last_task_id:
-            globus_wait(last_task_id)
+        # Clean up tar files that were queued for deletion
+        transfer_manager.delete_successfully_transferred_files()
diff --git a/zstash/hpss.py b/zstash/hpss.py
index 24603388..ef41210b 100644
--- a/zstash/hpss.py
+++ b/zstash/hpss.py
@@ -2,12 +2,13 @@
 
 import os.path
 import subprocess
-from typing import List
+from typing import List, Optional
 
 from six.moves.urllib.parse import urlparse
 
 from .globus import globus_transfer
 from .settings import get_db_filename, logger
+from .transfer_tracking import GlobusConfig, TransferBatch, TransferManager
 from .utils import run_command, ts_utc
 
 prev_transfers: List[str] = list()
@@ -22,16 +23,10 @@ def hpss_transfer(
     keep: bool = False,
     non_blocking: bool = False,
     is_index: bool = False,
+    transfer_manager: Optional[TransferManager] = None,
 ):
-    global prev_transfers
-    global curr_transfers
-
-    logger.info(
-        f"{ts_utc()}: in hpss_transfer, prev_transfers is starting as {prev_transfers}"
-    )
-    # logger.debug(
-    #     f"{ts_utc()}: in hpss_transfer, curr_transfers is starting as {curr_transfers}"
-    # )
+    if not transfer_manager:
+        transfer_manager = TransferManager()
 
     if hpss == "none":
         logger.info("{}: HPSS is unavailable".format(transfer_type))
@@ -85,7 +80,8 @@ def hpss_transfer(
         endpoint = url.netloc
         url_path = url.path
 
-        curr_transfers.append(file_path)
+        # Append to curr_transfers:
+        transfer_manager.batches[-1].file_paths.append(file_path)
         # logger.debug(
         #     f"{ts_utc()}: curr_transfers has been appended to, is now {curr_transfers}"
         # )
@@ -104,16 +100,21 @@ def hpss_transfer(
             # For `get`, this directory is where the file we get from HPSS will go.
             os.chdir(path)
 
+        globus_status: str = "UNKNOWN"
         if scheme == "globus":
-            globus_status = "UNKNOWN"
+            if not transfer_manager.globus_config:
+                raise RuntimeError("Scheme is 'globus' but no GlobusConfig provided")
             # Transfer file using the Globus Transfer Service
             logger.info(f"{ts_utc()}: DIVING: hpss calls globus_transfer(name={name})")
-            globus_status = globus_transfer(
-                endpoint, url_path, name, transfer_type, non_blocking
-            )
-            logger.info(
-                f"{ts_utc()}: SURFACE hpss globus_transfer(name={name}) returns {globus_status}"
+            globus_transfer(
+                transfer_manager, endpoint, url_path, name, transfer_type, non_blocking
             )
+            mrt: Optional[TransferBatch] = transfer_manager.get_most_recent_transfer()
+            if mrt and mrt.task_status:
+                globus_status = mrt.task_status
+                logger.info(
+                    f"{ts_utc()}: SURFACE hpss globus_transfer(name={name}) returns {globus_status}"
+                )
             # NOTE: Here, the status could be "EXHAUSTED_TIMEOUT_RETRIES", meaning a very long transfer
             # or perhaps transfer is hanging. We should decide whether to ignore it, or cancel it, but
             # we'd need the task_id to issue a cancellation.  Perhaps we should have globus_transfer
@@ -129,20 +130,10 @@ def hpss_transfer(
             os.chdir(cwd)
 
         if transfer_type == "put":
-            if not keep:
-                if (scheme != "globus") or (globus_status == "SUCCEEDED"):
-                    # Note: This is intended to fulfill the default removal of successfully-transfered
-                    # tar files when keep=False, irrespective of non-blocking status
-                    logger.debug(
-                        f"{ts_utc()}: deleting transfered files {prev_transfers}"
-                    )
-                    for src_path in prev_transfers:
-                        os.remove(src_path)
-                    prev_transfers = curr_transfers
-                    curr_transfers = list()
-                    logger.info(
-                        f"{ts_utc()}: prev_transfers has been set to {prev_transfers}"
-                    )
+            if (not keep) and (not is_index):
+                # We never delete if `--keep` is set.
+                # We also never delete `index.db`.
+                transfer_manager.delete_successfully_transferred_files()
 
 
 def hpss_put(
@@ -152,18 +143,34 @@ def hpss_put(
     keep: bool = True,
     non_blocking: bool = False,
     is_index=False,
+    transfer_manager: Optional[TransferManager] = None,
 ):
     """
     Put a file to the HPSS archive.
     """
-    hpss_transfer(hpss, file_path, "put", cache, keep, non_blocking, is_index)
+    hpss_transfer(
+        hpss,
+        file_path,
+        "put",
+        cache,
+        keep,
+        non_blocking,
+        is_index,
+        transfer_manager,
+    )
 
 
 def hpss_get(hpss: str, file_path: str, cache: str):
     """
     Get a file from the HPSS archive.
     """
-    hpss_transfer(hpss, file_path, "get", cache, False)
+    url = urlparse(hpss)
+    transfer_manager: TransferManager = TransferManager()
+    if url.scheme == "globus":
+        transfer_manager.globus_config = GlobusConfig()
+    hpss_transfer(
+        hpss, file_path, "get", cache, False, transfer_manager=transfer_manager
+    )
 
 
 def hpss_chgrp(hpss: str, group: str, recurse: bool = False):
diff --git a/zstash/hpss_utils.py b/zstash/hpss_utils.py
index 2f1158bc..76326d9b 100644
--- a/zstash/hpss_utils.py
+++ b/zstash/hpss_utils.py
@@ -13,6 +13,7 @@
 
 from .hpss import hpss_put
 from .settings import TupleFilesRowNoId, TupleTarsRowNoId, config, logger
+from .transfer_tracking import TransferManager
 from .utils import create_tars_table, tars_table_exists, ts_utc
 
 
@@ -66,6 +67,7 @@ def add_files(
     error_on_duplicate_tar: bool = False,
     overwrite_duplicate_tars: bool = False,
     force_database_corruption: str = "",
+    transfer_manager: Optional[TransferManager] = None,
 ) -> List[str]:
 
     # Now, perform the actual archiving
@@ -160,7 +162,14 @@ def add_files(
             logger.info(
                 f"{ts_utc()}: DIVING: (add_files): Calling hpss_put to dispatch archive file {tfname} [keep, non_blocking] = [{keep}, {non_blocking}]"
             )
-            hpss_put(hpss, os.path.join(cache, tfname), cache, keep, non_blocking)
+            hpss_put(
+                hpss,
+                os.path.join(cache, tfname),
+                cache,
+                keep,
+                non_blocking,
+                transfer_manager,
+            )
             logger.info(
                 f"{ts_utc()}: SURFACE (add_files): Called hpss_put to dispatch archive file {tfname}"
             )
diff --git a/zstash/transfer_tracking.py b/zstash/transfer_tracking.py
new file mode 100644
index 00000000..f7d6b5c5
--- /dev/null
+++ b/zstash/transfer_tracking.py
@@ -0,0 +1,58 @@
+import os
+from typing import List, Optional
+
+from globus_sdk import TransferClient, TransferData
+from globus_sdk.services.transfer.response.iterable import IterableTransferResponse
+
+
+class GlobusConfig:
+    """Globus connection configuration"""
+
+    def __init__(self):
+        self.remote_endpoint: str
+        self.local_endpoint: str
+        self.transfer_client: TransferClient
+        self.archive_directory_listing: Optional[IterableTransferResponse] = None
+
+
+class TransferBatch:
+    """Represents one batch of files being transferred"""
+
+    def __init__(self):
+        self.file_paths: List[str] = []
+        self.task_id: Optional[str] = None
+        self.task_status: Optional[str] = None
+        self.is_globus: bool = False
+        self.transfer_data: Optional[TransferData] = None  # Only for Globus
+
+    def delete_files(self):
+        for src_path in self.file_paths:
+            if os.path.exists(src_path):
+                os.remove(src_path)
+
+
+class TransferManager:
+    def __init__(self):
+        # All transfer batches (Globus or HPSS)
+        self.batches: List[TransferBatch] = []
+        self.cumulative_tarfiles_pushed: int = 0
+
+        # Connection state (Globus-specific, None if not using Globus)
+        self.globus_config: Optional[GlobusConfig] = None
+
+    def get_most_recent_transfer(self) -> Optional[TransferBatch]:
+        return self.batches[-1] if self.batches else None
+
+    def delete_successfully_transferred_files(self):
+        new_batch_list: List[TransferBatch] = []
+        for batch in self.batches:
+            if (not batch.is_globus) or (batch.task_status == "SUCCEEDED"):
+                # The files were transferred successfully.
+                # So, we can delete them.
+                batch.delete_files()
+            else:
+                new_batch_list.append(batch)
+        # We don't need to keep tracking batches that have been both:
+        # 1. transferred successfully
+        # 2. been deleted
+        self.batches = new_batch_list
diff --git a/zstash/update.py b/zstash/update.py
index ebc4f84c..a20a912c 100644
--- a/zstash/update.py
+++ b/zstash/update.py
@@ -11,7 +11,16 @@
 from .globus import globus_activate, globus_finalize
 from .hpss import hpss_get, hpss_put
 from .hpss_utils import add_files
-from .settings import DEFAULT_CACHE, TIME_TOL, config, get_db_filename, logger
+from .settings import (
+    DEFAULT_CACHE,
+    TIME_TOL,
+    FilesRow,
+    TupleFilesRow,
+    config,
+    get_db_filename,
+    logger,
+)
+from .transfer_tracking import TransferManager
 from .utils import get_files_to_archive_with_stats, update_config
 
 
@@ -21,7 +30,8 @@ def update():
     cache: str
     args, cache = setup_update()
 
-    result: Optional[List[str]] = update_database(args, cache)
+    transfer_manager = TransferManager()
+    result: Optional[List[str]] = update_database(args, cache, transfer_manager)
 
     if result is None:
         # There was either nothing to update or `--dry-run` was set.
@@ -34,9 +44,16 @@ def update():
         hpss = config.hpss
     else:
         raise TypeError("Invalid config.hpss={}".format(config.hpss))
-    hpss_put(hpss, get_db_filename(cache), cache, keep=args.keep, is_index=True)
+    hpss_put(
+        hpss,
+        get_db_filename(cache),
+        cache,
+        keep=args.keep,
+        is_index=True,
+        transfer_manager=transfer_manager,
+    )
 
-    globus_finalize(non_blocking=args.non_blocking)
+    globus_finalize(transfer_manager, non_blocking=args.non_blocking)
 
     # List failures
     if len(failures) > 0:
@@ -138,7 +155,7 @@ def setup_update() -> Tuple[argparse.Namespace, str]:
 
 # C901 'update_database' is too complex (20)
 def update_database(  # noqa: C901
-    args: argparse.Namespace, cache: str
+    args: argparse.Namespace, cache: str, transfer_manager: TransferManager
 ) -> Optional[List[str]]:
     # Open database
     logger.debug("Opening index database")
@@ -151,7 +168,7 @@ def update_database(  # noqa: C901
                 hpss: str = config.hpss
             else:
                 raise TypeError("Invalid config.hpss={}".format(config.hpss))
-            globus_activate(hpss)
+            transfer_manager.globus_config = globus_activate(hpss)
             hpss_get(hpss, get_db_filename(cache), cache)
         else:
             error_str: str = (
@@ -266,27 +283,7 @@ def update_database(  # noqa: C901
     for tfile in tfiles:
         tfile_string: str = tfile[0]
         itar = max(itar, int(tfile_string[0:6], 16))
-
-    # Add files
-    failures: List[str]
-    if args.follow_symlinks:
-        try:
-            # Add files
-            failures = add_files(
-                cur,
-                con,
-                itar,
-                newfiles,
-                cache,
-                keep,
-                args.follow_symlinks,
-                non_blocking=args.non_blocking,
-                error_on_duplicate_tar=args.error_on_duplicate_tar,
-                overwrite_duplicate_tars=args.overwrite_duplicate_tars,
-            )
-        except FileNotFoundError:
-            raise Exception("Archive update failed due to broken symlink.")
-    else:
+    try:
         # Add files
         failures = add_files(
             cur,
@@ -299,7 +296,13 @@ def update_database(  # noqa: C901
             non_blocking=args.non_blocking,
             error_on_duplicate_tar=args.error_on_duplicate_tar,
             overwrite_duplicate_tars=args.overwrite_duplicate_tars,
+            transfer_manager=transfer_manager,
         )
+    except FileNotFoundError as e:
+        if args.follow_symlinks:
+            raise Exception("Archive update failed due to broken symlink.")
+        else:
+            raise e
 
     # Close database
     con.commit()

From 8a4e74e65e23c1aa30c4d216b18920b4a085a2d6 Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Fri, 2 Jan 2026 18:47:19 -0600
Subject: [PATCH 02/34] Fixes from Claude

---
 zstash/globus.py            |  7 +++----
 zstash/hpss.py              | 33 +++++++++++++++++++++------------
 zstash/transfer_tracking.py | 22 ++++++++++++++++++++++
 3 files changed, 46 insertions(+), 16 deletions(-)

diff --git a/zstash/globus.py b/zstash/globus.py
index 0ff56731..cec7a5c5 100644
--- a/zstash/globus.py
+++ b/zstash/globus.py
@@ -168,13 +168,12 @@ def globus_transfer(  # noqa: C901
 
         # Create new transfer record with the task info
         new_transfer = TransferBatch()
-        new_transfer.file_paths = []  # TODO: is this right?
+        new_transfer.file_paths = (
+            mrt.file_paths if mrt else []
+        )  # Copy from the batch being submitted
         new_transfer.task_id = task_id
         new_transfer.task_status = "UNKNOWN"
         new_transfer.is_globus = True
-        new_transfer.transfer_data = (
-            None  # This batch was submitted # TODO: is this right?
-        )
         transfer_manager.batches.append(new_transfer)
 
         # Nullify the submitted transfer data structure so that a new one will be created on next call.
diff --git a/zstash/hpss.py b/zstash/hpss.py
index ef41210b..dc47c5e3 100644
--- a/zstash/hpss.py
+++ b/zstash/hpss.py
@@ -28,6 +28,19 @@ def hpss_transfer(
     if not transfer_manager:
         transfer_manager = TransferManager()
 
+    url = urlparse(hpss)
+    scheme = url.scheme
+
+    # Create a new batch if needed (before we start adding files)
+    if not transfer_manager.batches or transfer_manager.batches[-1].task_id:
+        # Either no batches exist, or the last batch was already submitted
+        new_batch = TransferBatch()
+        new_batch.is_globus = scheme == "globus"
+        transfer_manager.batches.append(new_batch)
+        logger.debug(
+            f"{ts_utc()}: Created new TransferBatch, total batches: {len(transfer_manager.batches)}"
+        )
+
     if hpss == "none":
         logger.info("{}: HPSS is unavailable".format(transfer_type))
         if transfer_type == "put" and file_path != get_db_filename(cache):
@@ -70,22 +83,18 @@ def hpss_transfer(
         else:
             raise ValueError("Invalid transfer_type={}".format(transfer_type))
         logger.info("Transferring file {} HPSS: {}".format(transfer_word, file_path))
-        scheme: str
-        endpoint: str
-        path: str
-        name: str
 
-        url = urlparse(hpss)
-        scheme = url.scheme
-        endpoint = url.netloc
+        endpoint: str = url.netloc
         url_path = url.path
+        path: str
+        name: str
+        path, name = os.path.split(file_path)
 
-        # Append to curr_transfers:
+        # Add this file to the current batch
         transfer_manager.batches[-1].file_paths.append(file_path)
-        # logger.debug(
-        #     f"{ts_utc()}: curr_transfers has been appended to, is now {curr_transfers}"
-        # )
-        path, name = os.path.split(file_path)
+        logger.debug(
+            f"{ts_utc()}: Added {file_path} to current batch, batch now has {len(transfer_manager.batches[-1].file_paths)} files"
+        )
 
         # Need to be in local directory for `hsi` to work
         cwd = os.getcwd()
diff --git a/zstash/transfer_tracking.py b/zstash/transfer_tracking.py
index f7d6b5c5..5434039d 100644
--- a/zstash/transfer_tracking.py
+++ b/zstash/transfer_tracking.py
@@ -4,6 +4,9 @@
 from globus_sdk import TransferClient, TransferData
 from globus_sdk.services.transfer.response.iterable import IterableTransferResponse
 
+from .settings import logger
+from .utils import ts_utc
+
 
 class GlobusConfig:
     """Globus connection configuration"""
@@ -44,13 +47,32 @@ def get_most_recent_transfer(self) -> Optional[TransferBatch]:
         return self.batches[-1] if self.batches else None
 
     def delete_successfully_transferred_files(self):
+        """Check transfer status and delete files from successful transfers"""
         new_batch_list: List[TransferBatch] = []
         for batch in self.batches:
+            # Check if this is a Globus transfer that needs status update
+            if batch.is_globus and batch.task_id and (batch.task_status != "SUCCEEDED"):
+                if self.globus_config and self.globus_config.transfer_client:
+                    # Non-blocking status check
+                    logger.debug(
+                        f"{ts_utc()}: Checking status of task_id={batch.task_id}"
+                    )
+                    task = self.globus_config.transfer_client.get_task(batch.task_id)
+                    batch.task_status = task["status"]
+                    logger.debug(
+                        f"{ts_utc()}: task_id={batch.task_id} status={batch.task_status}"
+                    )
+
+            # Now delete if successful
             if (not batch.is_globus) or (batch.task_status == "SUCCEEDED"):
                 # The files were transferred successfully.
                 # So, we can delete them.
+                logger.info(
+                    f"{ts_utc()}: Deleting {len(batch.file_paths)} files from successful transfer"
+                )
                 batch.delete_files()
             else:
+                # Keep tracking this batch - not yet successful
                 new_batch_list.append(batch)
         # We don't need to keep tracking batches that have been both:
         # 1. transferred successfully

From 43963ee0113651d9406da99b72c5f4593845f4eb Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Mon, 5 Jan 2026 19:00:24 -0600
Subject: [PATCH 03/34] Further fixes

---
 .../test_globus_tar_deletion.bash             |  3 +-
 zstash/globus.py                              | 17 ++++------
 zstash/hpss.py                                | 14 ++++----
 zstash/hpss_utils.py                          |  3 +-
 zstash/transfer_tracking.py                   | 32 +++++++++++--------
 5 files changed, 37 insertions(+), 32 deletions(-)

diff --git a/tests/integration/bash_tests/run_from_any/test_globus_tar_deletion.bash b/tests/integration/bash_tests/run_from_any/test_globus_tar_deletion.bash
index 4b76f4c5..ed75a69d 100755
--- a/tests/integration/bash_tests/run_from_any/test_globus_tar_deletion.bash
+++ b/tests/integration/bash_tests/run_from_any/test_globus_tar_deletion.bash
@@ -137,7 +137,8 @@ test_globus_tar_deletion()
         keep_flag=""
     fi
 
-    zstash create ${blocking_flag} ${keep_flag} --hpss=${globus_path}/${case_name} --maxsize 128 zstash_demo 2>&1 | tee ${case_name}.log
+    # Use -v so debug logs show up.
+    zstash create ${blocking_flag} ${keep_flag} --hpss=${globus_path}/${case_name} --maxsize 128 -v zstash_demo 2>&1 | tee ${case_name}.log
     if [ $? != 0 ]; then
         echo "${case_name} failed. Check ${case_name}_create.log for details. Cannot continue."
         return 1
diff --git a/zstash/globus.py b/zstash/globus.py
index cec7a5c5..d74965c5 100644
--- a/zstash/globus.py
+++ b/zstash/globus.py
@@ -166,18 +166,15 @@ def globus_transfer(  # noqa: C901
             f"{ts_utc()}: SURFACE Submit Transfer returned new task_id = {task_id} for label {transfer_data['label']}"
         )
 
-        # Create new transfer record with the task info
-        new_transfer = TransferBatch()
-        new_transfer.file_paths = (
-            mrt.file_paths if mrt else []
-        )  # Copy from the batch being submitted
-        new_transfer.task_id = task_id
-        new_transfer.task_status = "UNKNOWN"
-        new_transfer.is_globus = True
-        transfer_manager.batches.append(new_transfer)
+        # Update the current batch with the task info
+        # The batch was already created in hpss_transfer with files added to it
+        # We just need to mark it as submitted
+        if transfer_manager.batches:
+            transfer_manager.batches[-1].task_id = task_id
+            transfer_manager.batches[-1].task_status = "UNKNOWN"
+            transfer_manager.batches[-1].transfer_data = None  # Was just submitted
 
         # Nullify the submitted transfer data structure so that a new one will be created on next call.
-        # Reset for building the next batch
         transfer_data = None
     except TransferAPIError as e:
         if e.code == "NoCredException":
diff --git a/zstash/hpss.py b/zstash/hpss.py
index dc47c5e3..683a01b4 100644
--- a/zstash/hpss.py
+++ b/zstash/hpss.py
@@ -91,10 +91,11 @@ def hpss_transfer(
         path, name = os.path.split(file_path)
 
         # Add this file to the current batch
-        transfer_manager.batches[-1].file_paths.append(file_path)
-        logger.debug(
-            f"{ts_utc()}: Added {file_path} to current batch, batch now has {len(transfer_manager.batches[-1].file_paths)} files"
-        )
+        if (not keep) and (not is_index):  # Never track index.db for deletion
+            transfer_manager.batches[-1].file_paths.append(file_path)
+            logger.debug(
+                f"{ts_utc()}: Added {file_path} to current batch, batch now has {len(transfer_manager.batches[-1].file_paths)} files"
+            )
 
         # Need to be in local directory for `hsi` to work
         cwd = os.getcwd()
@@ -112,7 +113,7 @@ def hpss_transfer(
         globus_status: str = "UNKNOWN"
         if scheme == "globus":
             if not transfer_manager.globus_config:
-                raise RuntimeError("Scheme is 'globus' but no GlobusConfig provided")
+                transfer_manager.globus_config = GlobusConfig()
             # Transfer file using the Globus Transfer Service
             logger.info(f"{ts_utc()}: DIVING: hpss calls globus_transfer(name={name})")
             globus_transfer(
@@ -139,9 +140,8 @@ def hpss_transfer(
             os.chdir(cwd)
 
         if transfer_type == "put":
-            if (not keep) and (not is_index):
+            if not keep:
                 # We never delete if `--keep` is set.
-                # We also never delete `index.db`.
                 transfer_manager.delete_successfully_transferred_files()
 
 
diff --git a/zstash/hpss_utils.py b/zstash/hpss_utils.py
index 76326d9b..573640c9 100644
--- a/zstash/hpss_utils.py
+++ b/zstash/hpss_utils.py
@@ -168,7 +168,8 @@ def add_files(
                 cache,
                 keep,
                 non_blocking,
-                transfer_manager,
+                is_index=False,
+                transfer_manager=transfer_manager,
             )
             logger.info(
                 f"{ts_utc()}: SURFACE (add_files): Called hpss_put to dispatch archive file {tfname}"
diff --git a/zstash/transfer_tracking.py b/zstash/transfer_tracking.py
index 5434039d..47e1ee61 100644
--- a/zstash/transfer_tracking.py
+++ b/zstash/transfer_tracking.py
@@ -12,9 +12,9 @@ class GlobusConfig:
     """Globus connection configuration"""
 
     def __init__(self):
-        self.remote_endpoint: str
-        self.local_endpoint: str
-        self.transfer_client: TransferClient
+        self.remote_endpoint: Optional[str] = None
+        self.local_endpoint: Optional[str] = None
+        self.transfer_client: Optional[TransferClient] = None
         self.archive_directory_listing: Optional[IterableTransferResponse] = None
 
 
@@ -48,10 +48,18 @@ def get_most_recent_transfer(self) -> Optional[TransferBatch]:
 
     def delete_successfully_transferred_files(self):
         """Check transfer status and delete files from successful transfers"""
-        new_batch_list: List[TransferBatch] = []
+        logger.info(
+            f"{ts_utc()}: Checking for successfully transferred files to delete"
+        )
         for batch in self.batches:
+            # Skip if already processed
+            if not batch.file_paths:
+                logger.debug(f"{ts_utc()}: batch was already processed, skipping")
+                continue
+
             # Check if this is a Globus transfer that needs status update
             if batch.is_globus and batch.task_id and (batch.task_status != "SUCCEEDED"):
+                logger.debug(f"{ts_utc()}: batch is globus AND is not yet successful")
                 if self.globus_config and self.globus_config.transfer_client:
                     # Non-blocking status check
                     logger.debug(
@@ -62,19 +70,17 @@ def delete_successfully_transferred_files(self):
                     logger.debug(
                         f"{ts_utc()}: task_id={batch.task_id} status={batch.task_status}"
                     )
+                else:
+                    logger.debug(
+                        f"{ts_utc()}: globus_config is not set up with a transfer client"
+                    )
 
             # Now delete if successful
             if (not batch.is_globus) or (batch.task_status == "SUCCEEDED"):
-                # The files were transferred successfully.
-                # So, we can delete them.
+                # The files were transferred successfully, so delete them
                 logger.info(
                     f"{ts_utc()}: Deleting {len(batch.file_paths)} files from successful transfer"
                 )
                 batch.delete_files()
-            else:
-                # Keep tracking this batch - not yet successful
-                new_batch_list.append(batch)
-        # We don't need to keep tracking batches that have been both:
-        # 1. transferred successfully
-        # 2. been deleted
-        self.batches = new_batch_list
+                logger.debug("Deletion completed")
+                batch.file_paths = []  # Mark as processed

From 25b606bf0f3b1854cf75a11904c180261341204d Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Tue, 6 Jan 2026 13:16:36 -0600
Subject: [PATCH 04/34] Add progressive deletion tests

---
 .../test_globus_tar_deletion.bash             | 121 +++++++++++++++++-
 1 file changed, 117 insertions(+), 4 deletions(-)

diff --git a/tests/integration/bash_tests/run_from_any/test_globus_tar_deletion.bash b/tests/integration/bash_tests/run_from_any/test_globus_tar_deletion.bash
index ed75a69d..8de806e6 100755
--- a/tests/integration/bash_tests/run_from_any/test_globus_tar_deletion.bash
+++ b/tests/integration/bash_tests/run_from_any/test_globus_tar_deletion.bash
@@ -147,7 +147,7 @@ test_globus_tar_deletion()
     check_log_has "Creating new tar archive 000000.tar" ${case_name}.log || return 2
 
     echo ""
-    echo "Checking directory status after 'zstash create' has completed. src should only have index.db. dst should have tar and index.db."
+    echo "Checking directory status after 'zstash create' has completed."
     echo "Checking logs in current directory: ${PWD}"
 
     echo ""
@@ -182,6 +182,98 @@ test_globus_tar_deletion()
     return 0  # Success
 }
 
+test_globus_progressive_deletion()
+{
+    local path_to_repo=$1
+    local dst_endpoint=$2
+    local dst_dir=$3
+    local blocking_str=$4
+
+    src_dir=${path_to_repo}/tests/utils/globus_tar_deletion
+    rm -rf ${src_dir}
+    mkdir -p ${src_dir}
+    dst_endpoint_uuid=$(get_endpoint ${dst_endpoint})
+    globus_path=globus://${dst_endpoint_uuid}/${dst_dir}
+
+    case_name=${blocking_str}_progressive_deletion
+    echo "Running test_globus_progressive_deletion on case=${case_name}"
+    echo "Exit codes: 0 -- success, 1 -- zstash failed, 2 -- grep check failed"
+
+    setup ${case_name} "${src_dir}"
+
+    # Create files totaling >2 GB to trigger multiple tars with maxsize=1 GB
+    # Each file is ~700 MB, so we'll get 3 tars
+    echo "Creating large test files (this may take a minute)..."
+    dd if=/dev/zero of=zstash_demo/file1.dat bs=1M count=700 2>/dev/null   # 700 MB
+    dd if=/dev/zero of=zstash_demo/file2.dat bs=1M count=700 2>/dev/null   # 700 MB
+    dd if=/dev/zero of=zstash_demo/file3.dat bs=1M count=700 2>/dev/null   # 700 MB
+    echo "✓ Test files created"
+
+    if [ "$blocking_str" == "non-blocking" ]; then
+        blocking_flag="--non-blocking"
+    else
+        blocking_flag=""
+    fi
+
+    # Run with maxsize=1 GB to create multiple tars
+    echo "Running zstash create (this may take several minutes due to file size and transfers)..."
+    zstash create ${blocking_flag} --hpss=${globus_path}/${case_name} --maxsize 1 -v zstash_demo 2>&1 | tee ${case_name}.log
+    if [ $? != 0 ]; then
+        echo "${case_name} failed."
+        return 1
+    fi
+
+    # Check that multiple tar files were created
+    tar_count=$(grep -c "Creating new tar archive" ${case_name}.log)
+    if [ ${tar_count} -lt 2 ]; then
+        echo "Expected at least 2 tar archives to be created, found ${tar_count}"
+        return 2
+    fi
+    echo "✓ Created ${tar_count} tar archives"
+
+    # Check that files were deleted progressively
+    deletion_count=$(grep -c "Deleting .* files from successful transfer" ${case_name}.log)
+
+    if [ "$blocking_str" == "blocking" ]; then
+        # In blocking mode, we should see deletion after each tar transfer
+        if [ ${deletion_count} -lt $((tar_count - 1)) ]; then
+            echo "Expected at least $((tar_count - 1)) deletion events in blocking mode, found ${deletion_count}"
+            return 2
+        fi
+        echo "✓ Files deleted progressively (${deletion_count} deletion events)"
+    else
+        # In non-blocking mode, deletions happen when we check status
+        if [ ${deletion_count} -lt 1 ]; then
+            echo "Expected at least 1 deletion event in non-blocking mode, found ${deletion_count}"
+            return 2
+        fi
+        echo "✓ Files deleted (${deletion_count} deletion events in non-blocking mode)"
+    fi
+
+    # Verify that NO tar files remain in source after completion
+    echo "Checking that no tar files remain in source"
+    ls ${src_dir}/${case_name}/zstash_demo/zstash/*.tar 2>&1 | tee ls_tar_check.log
+    if grep -q "\.tar" ls_tar_check.log && ! grep -q "No such file" ls_tar_check.log; then
+        echo "Found tar files that should have been deleted!"
+        return 2
+    fi
+    echo "✓ All tar files successfully deleted from source"
+
+    # Verify tar files exist in destination
+    if [ "$blocking_str" == "non-blocking" ]; then
+        wait_for_directory "${dst_dir}/${case_name}" || return 1
+    fi
+
+    dst_tar_count=$(ls ${dst_dir}/${case_name}/*.tar 2>/dev/null | wc -l)
+    if [ ${dst_tar_count} -ne ${tar_count} ]; then
+        echo "Expected ${tar_count} tar files in destination, found ${dst_tar_count}"
+        return 2
+    fi
+    echo "✓ All ${tar_count} tar files present in destination"
+
+    return 0
+}
+
 # Follow these directions #####################################################
 
 # Example usage:
@@ -233,7 +325,14 @@ run_test_with_tracking() {
     echo "Running: ${test_name}"
     echo "=========================================="
 
-    if test_globus_tar_deletion "${args[@]}"; then
+    # Determine which test function to call based on test name
+    if [[ "${test_name}" == *"progressive"* ]]; then
+        test_func=test_globus_progressive_deletion
+    else
+        test_func=test_globus_tar_deletion
+    fi
+
+    if ${test_func} "${args[@]}"; then
         # Print test result in the output block AND at the end
         echo "✓ ${test_name} PASSED"
         test_results+=("✓ ${test_name} PASSED") # Uses Global variable
@@ -253,15 +352,29 @@ tests_passed=0
 tests_failed=0
 test_results=() # Global variable to hold test results
 
-echo "Primary tests: single authentication code tests for each endpoint"
+echo "Primary tests: basic functionality tests"
 echo "If a test hangs, check if https://app.globus.org/activity reports any errors on your transfers."
 
-# Run all tests independently
+# Run basic tests
+# These check that AT THE END of the run,
+# we either still have the files (keep) or the files are deleted (non-keep).
 run_test_with_tracking "blocking_non-keep" ${path_to_repo} ${endpoint_str} ${machine_dst_dir} "blocking" "non-keep" || true
 run_test_with_tracking "non-blocking_non-keep" ${path_to_repo} ${endpoint_str} ${machine_dst_dir} "non-blocking" "non-keep" || true
 run_test_with_tracking "blocking_keep" ${path_to_repo} ${endpoint_str} ${machine_dst_dir} "blocking" "keep" || true
 run_test_with_tracking "non-blocking_keep" ${path_to_repo} ${endpoint_str} ${machine_dst_dir} "non-blocking" "keep" || true
 
+echo ""
+echo "Progressive deletion tests: verify files are deleted as transfers complete"
+echo "WARNING: These tests create ~2GB of data and will take several minutes"
+
+# Run progressive deletion tests
+# Thes check that DURING the run,
+# files are deleted after successful transfers (non-keep only).
+# Blocking -- get files, transfer files, delete at src, start next transfer.
+# Non-blocking -- get files, transfer files, get next set of files, transfer those files, check if previous transfer is done (and if so, delete at src).
+run_test_with_tracking "blocking_progressive_deletion" ${path_to_repo} ${endpoint_str} ${machine_dst_dir} "blocking" || true
+run_test_with_tracking "non-blocking_progressive_deletion" ${path_to_repo} ${endpoint_str} ${machine_dst_dir} "non-blocking" || true
+
 # Print summary
 echo ""
 echo "=========================================="

From ed3248d17a6209dcd1a3467d6853478d4312e6b8 Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Wed, 4 Mar 2026 15:29:59 -0600
Subject: [PATCH 05/34] Pre-commit fixes

---
 zstash/update.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/zstash/update.py b/zstash/update.py
index a20a912c..a151a0bc 100644
--- a/zstash/update.py
+++ b/zstash/update.py
@@ -11,15 +11,7 @@
 from .globus import globus_activate, globus_finalize
 from .hpss import hpss_get, hpss_put
 from .hpss_utils import add_files
-from .settings import (
-    DEFAULT_CACHE,
-    TIME_TOL,
-    FilesRow,
-    TupleFilesRow,
-    config,
-    get_db_filename,
-    logger,
-)
+from .settings import DEFAULT_CACHE, TIME_TOL, config, get_db_filename, logger
 from .transfer_tracking import TransferManager
 from .utils import get_files_to_archive_with_stats, update_config
 

From 864abe249817475a80856b65bc14afb668c28a9b Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Wed, 4 Mar 2026 19:27:38 -0600
Subject: [PATCH 06/34] Address code review comments

---
 zstash/create.py | 6 +++---
 zstash/globus.py | 5 +++--
 zstash/hpss.py   | 3 ---
 zstash/update.py | 6 +++---
 4 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/zstash/create.py b/zstash/create.py
index 3eb9e602..4671a2b9 100644
--- a/zstash/create.py
+++ b/zstash/create.py
@@ -104,7 +104,7 @@ def create():
     )
 
     logger.debug(f"{ts_utc()}: calling globus_finalize()")
-    globus_finalize(transfer_manager, non_blocking=args.non_blocking)
+    globus_finalize(transfer_manager)
 
     if len(failures) > 0:
         # List the failures
@@ -291,11 +291,11 @@ def create_database(
             force_database_corruption=args.for_developers_force_database_corruption,
             transfer_manager=transfer_manager,
         )
-    except FileNotFoundError as e:
+    except FileNotFoundError:
         if args.follow_symlinks:
             raise Exception("Archive creation failed due to broken symlink.")
         else:
-            raise e
+            raise
 
     # Close database
     con.commit()
diff --git a/zstash/globus.py b/zstash/globus.py
index d74965c5..da87369d 100644
--- a/zstash/globus.py
+++ b/zstash/globus.py
@@ -72,7 +72,7 @@ def globus_transfer(  # noqa: C901
     name: str,
     transfer_type: str,
     non_blocking: bool,
-):
+) -> str:
 
     logger.info(f"{ts_utc()}: Entered globus_transfer() for name = {name}")
     logger.debug(f"{ts_utc()}: non_blocking = {non_blocking}")
@@ -201,6 +201,7 @@ def globus_transfer(  # noqa: C901
                 wait_timeout=7200,
                 max_retries=5,
             )
+            task_status = new_mrt.task_status
         else:
             logger.info(
                 f"{ts_utc()}: NO BLOCKING (task_wait) for task_id {new_mrt.task_id}"
@@ -306,7 +307,7 @@ def globus_wait(transfer_client: TransferClient, task_id: str):
         sys.exit(1)
 
 
-def globus_finalize(transfer_manager: TransferManager, non_blocking: bool = False):
+def globus_finalize(transfer_manager: TransferManager):
 
     last_task_id = None
 
diff --git a/zstash/hpss.py b/zstash/hpss.py
index 683a01b4..3806ed08 100644
--- a/zstash/hpss.py
+++ b/zstash/hpss.py
@@ -11,9 +11,6 @@
 from .transfer_tracking import GlobusConfig, TransferBatch, TransferManager
 from .utils import run_command, ts_utc
 
-prev_transfers: List[str] = list()
-curr_transfers: List[str] = list()
-
 
 def hpss_transfer(
     hpss: str,
diff --git a/zstash/update.py b/zstash/update.py
index a151a0bc..270f3536 100644
--- a/zstash/update.py
+++ b/zstash/update.py
@@ -45,7 +45,7 @@ def update():
         transfer_manager=transfer_manager,
     )
 
-    globus_finalize(transfer_manager, non_blocking=args.non_blocking)
+    globus_finalize(transfer_manager)
 
     # List failures
     if len(failures) > 0:
@@ -290,11 +290,11 @@ def update_database(  # noqa: C901
             overwrite_duplicate_tars=args.overwrite_duplicate_tars,
             transfer_manager=transfer_manager,
         )
-    except FileNotFoundError as e:
+    except FileNotFoundError:
         if args.follow_symlinks:
             raise Exception("Archive update failed due to broken symlink.")
         else:
-            raise e
+            raise
 
     # Close database
     con.commit()

From 67e8a000d00f5beb9f2933a6832603ccc045bd15 Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Thu, 5 Mar 2026 15:41:18 -0600
Subject: [PATCH 07/34] Minor performance improvements

---
 zstash/transfer_tracking.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/zstash/transfer_tracking.py b/zstash/transfer_tracking.py
index 47e1ee61..74bab2bb 100644
--- a/zstash/transfer_tracking.py
+++ b/zstash/transfer_tracking.py
@@ -30,8 +30,10 @@ def __init__(self):
 
     def delete_files(self):
         for src_path in self.file_paths:
-            if os.path.exists(src_path):
+            try:
                 os.remove(src_path)
+            except FileNotFoundError:
+                logger.warning(f"File already deleted: {src_path}")
 
 
 class TransferManager:
@@ -51,12 +53,10 @@ def delete_successfully_transferred_files(self):
         logger.info(
             f"{ts_utc()}: Checking for successfully transferred files to delete"
         )
+        self.batches = [
+            batch for batch in self.batches if batch.file_paths
+        ]  # Clean up empty batches
         for batch in self.batches:
-            # Skip if already processed
-            if not batch.file_paths:
-                logger.debug(f"{ts_utc()}: batch was already processed, skipping")
-                continue
-
             # Check if this is a Globus transfer that needs status update
             if batch.is_globus and batch.task_id and (batch.task_status != "SUCCEEDED"):
                 logger.debug(f"{ts_utc()}: batch is globus AND is not yet successful")

From e6f07e66d32480f70baede6cfebb47a4e5a02fb5 Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Thu, 5 Mar 2026 16:48:30 -0600
Subject: [PATCH 08/34] Add type annotation

---
 zstash/globus.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/zstash/globus.py b/zstash/globus.py
index da87369d..dd0027a5 100644
--- a/zstash/globus.py
+++ b/zstash/globus.py
@@ -5,6 +5,7 @@
 
 from globus_sdk import TransferAPIError, TransferClient
 from globus_sdk.response import GlobusHTTPResponse
+from globus_sdk.services.transfer.response.iterable import IterableTransferResponse
 from six.moves.urllib.parse import urlparse
 
 from .globus_utils import (
@@ -57,7 +58,7 @@ def globus_activate(
     return globus_config
 
 
-def file_exists(archive_directory_listing, name: str) -> bool:
+def file_exists(archive_directory_listing: IterableTransferResponse, name: str) -> bool:
     for entry in archive_directory_listing:
         if entry.get("name") == name:
             return True

From d8cf91954ed683440846636b457ae2abe7f40547 Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Thu, 5 Mar 2026 17:09:40 -0600
Subject: [PATCH 09/34] Fix typo

---
 .../bash_tests/run_from_any/test_globus_tar_deletion.bash       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/bash_tests/run_from_any/test_globus_tar_deletion.bash b/tests/integration/bash_tests/run_from_any/test_globus_tar_deletion.bash
index 8de806e6..8649dacb 100755
--- a/tests/integration/bash_tests/run_from_any/test_globus_tar_deletion.bash
+++ b/tests/integration/bash_tests/run_from_any/test_globus_tar_deletion.bash
@@ -368,7 +368,7 @@ echo "Progressive deletion tests: verify files are deleted as transfers complete
 echo "WARNING: These tests create ~2GB of data and will take several minutes"
 
 # Run progressive deletion tests
-# Thes check that DURING the run,
+# These check that DURING the run,
 # files are deleted after successful transfers (non-keep only).
 # Blocking -- get files, transfer files, delete at src, start next transfer.
 # Non-blocking -- get files, transfer files, get next set of files, transfer those files, check if previous transfer is done (and if so, delete at src).

From cf05c5350fee3edef1b0b872a8f344ffd156f78f Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Fri, 6 Mar 2026 17:59:50 -0600
Subject: [PATCH 10/34] Refactor add_files

---
 zstash/create.py     |   4 +-
 zstash/hpss_utils.py | 510 +++++++++++++++++++++++--------------------
 zstash/update.py     |   4 +-
 3 files changed, 277 insertions(+), 241 deletions(-)

diff --git a/zstash/create.py b/zstash/create.py
index 4671a2b9..74c7e583 100644
--- a/zstash/create.py
+++ b/zstash/create.py
@@ -12,7 +12,7 @@
 
 from .globus import globus_activate, globus_finalize
 from .hpss import hpss_put
-from .hpss_utils import add_files
+from .hpss_utils import construct_tars
 from .settings import DEFAULT_CACHE, config, get_db_filename, logger
 from .transfer_tracking import TransferManager
 from .utils import (
@@ -276,7 +276,7 @@ def create_database(
     failures: List[str]
     try:
         # Add files to archive
-        failures = add_files(
+        failures = construct_tars(
             cur,
             con,
             -1,
diff --git a/zstash/hpss_utils.py b/zstash/hpss_utils.py
index 573640c9..c45a0285 100644
--- a/zstash/hpss_utils.py
+++ b/zstash/hpss_utils.py
@@ -17,6 +17,193 @@
 from .utils import create_tars_table, tars_table_exists, ts_utc
 
 
+class TarWrapper(object):
+    def __init__(self, tar_num: int, cache: str, do_hash: bool, follow_symlinks: bool):
+        # Create a hex value at least 6 digits long
+        tname: str = "{0:0{1}x}".format(tar_num, 6)
+        # Create the tar file name by adding ".tar"
+        self.tfname: str = f"{tname}.tar"
+        logger.info(f"{ts_utc()}: Creating new tar archive {self.tfname}")
+        # Open that tar file in the cache
+        self.tarFileObject = HashIO(os.path.join(cache, self.tfname), "wb", do_hash)
+        # FIXME: error: Argument "fileobj" to "open" has incompatible type "HashIO"; expected "Optional[IO[bytes]]"
+        self.tar = tarfile.open(mode="w", fileobj=self.tarFileObject, dereference=follow_symlinks)  # type: ignore
+
+    def process_file(
+        self, current_file: str, archived: List[TupleFilesRowNoId], failures: List[str]
+    ) -> int:
+        logger.info(f"Archiving {current_file}")
+        tar_size: int = 0
+        try:
+            offset: int
+            size: int
+            mtime: datetime
+            md5: Optional[str]
+            offset, size, mtime, md5 = add_file_to_tar_archive(self.tar, current_file)
+            t: TupleFilesRowNoId = (
+                current_file,
+                size,
+                mtime,
+                md5,
+                self.tfname,
+                offset,
+            )
+            archived.append(t)
+            # Increase tarsize by the size of the current file.
+            # Use `tell()` to also include the tar's metadata in the size.
+            tar_size = self.tarFileObject.tell()
+        except Exception:
+            # Catch all exceptions here.
+            traceback.print_exc()
+            logger.error(f"Archiving {current_file}")
+            failures.append(current_file)
+        return tar_size
+
+    def process_tar(
+        self,
+        cache: str,
+        keep: bool,
+        non_blocking: bool,
+        transfer_manager: Optional[TransferManager],
+        skip_tars_md5: bool,
+        cur: sqlite3.Cursor,
+        con: sqlite3.Connection,
+        force_database_corruption: str,
+        error_on_duplicate_tar: bool,
+        overwrite_duplicate_tars: bool,
+        archived: List[TupleFilesRowNoId],
+    ):
+        logger.debug(f"{ts_utc()}: Closing tar archive {self.tfname}")
+        self.tar.close()
+
+        tarsize = self.tarFileObject.tell()
+        tar_md5: Optional[str] = self.tarFileObject.md5()
+        self.tarFileObject.close()
+        logger.info(f"{ts_utc()}: (add_files): Completed archive file {self.tfname}")
+
+        # Transfer tar to HPSS
+        if config.hpss is not None:
+            hpss: str = config.hpss
+        else:
+            raise TypeError("Invalid config.hpss={}".format(config.hpss))
+
+        logger.info(f"Contents of the cache prior to `hpss_put`: {os.listdir(cache)}")
+
+        logger.info(
+            f"{ts_utc()}: DIVING: (add_files): Calling hpss_put to dispatch archive file {self.tfname} [keep, non_blocking] = [{keep}, {non_blocking}]"
+        )
+        hpss_put(
+            hpss,
+            os.path.join(cache, self.tfname),
+            cache,
+            keep,
+            non_blocking,
+            is_index=False,
+            transfer_manager=transfer_manager,
+        )
+        logger.info(
+            f"{ts_utc()}: SURFACE (add_files): Called hpss_put to dispatch archive file {self.tfname}"
+        )
+
+        if not skip_tars_md5:
+            tar_tuple: TupleTarsRowNoId = (self.tfname, tarsize, tar_md5)
+            logger.info("tar name={}, tar size={}, tar md5={}".format(*tar_tuple))
+            if not tars_table_exists(cur):
+                # Need to create tars table
+                create_tars_table(cur, con)
+
+            # For developers only! For debugging purposes only!
+            if force_database_corruption == "simulate_row_existing":
+                # Tested by database_corruption.bash Cases 3, 5
+                logger.info(
+                    f"TESTING/DEBUGGING ONLY: Simulating row existing for {self.tfname}."
+                )
+                cur.execute("INSERT INTO tars VALUES (NULL,?,?,?)", tar_tuple)
+            elif force_database_corruption == "simulate_row_existing_bad_size":
+                # Tested by database_corruption.bash CaseS 4, 7
+                logger.info(
+                    f"TESTING/DEBUGGING ONLY: Simulating row existing with bad size for {self.tfname}."
+                )
+                cur.execute(
+                    "INSERT INTO tars VALUES (NULL,?,?,?)",
+                    (self.tfname, tarsize + 1000, tar_md5),
+                )
+
+            # We're done adding files to the tar.
+            # And we've transferred it to HPSS.
+            # Now we can insert the tar into the database.
+            cur.execute("SELECT COUNT(*) FROM tars WHERE name = ?", (self.tfname,))
+            tar_count: int = cur.fetchone()[0]
+            if tar_count != 0:
+                error_str: str = (
+                    f"Database corruption detected! {self.tfname} is already in the database."
+                )
+                if error_on_duplicate_tar:
+                    # Tested by database_corruption.bash Case 3
+                    # Exists - error out
+                    logger.error(error_str)
+                    raise RuntimeError(error_str)
+                elif overwrite_duplicate_tars:
+                    # Tested by database_corruption.bash Case 4
+                    # Exists - update with new size and md5
+                    logger.warning(error_str)
+                    logger.warning(f"Updating existing tar {self.tfname} to proceed.")
+                    cur.execute(
+                        "UPDATE tars SET size = ?, md5 = ? WHERE name = ?",
+                        (tarsize, tar_md5, self.tfname),
+                    )
+                else:
+                    # Tested by database_corruption.bash Cases 5,7
+                    # Proceed as if we're in the typical case -- insert new
+                    logger.warning(error_str)
+                    logger.warning(f"Adding a new entry for {self.tfname}.")
+                    cur.execute("INSERT INTO tars VALUES (NULL,?,?,?)", tar_tuple)
+            elif force_database_corruption == "simulate_no_correct_size":
+                # Tested by database_corruption.bash Case 6
+                # For developers only! For debugging purposes only!
+                # Add this tar twice, with different sizes.
+                logger.info(
+                    f"TESTING/DEBUGGING ONLY: Simulating no correct size for {self.tfname}."
+                )
+                cur.execute(
+                    "INSERT INTO tars VALUES (NULL,?,?,?)",
+                    (self.tfname, tarsize + 1000, tar_md5),
+                )
+                cur.execute(
+                    "INSERT INTO tars VALUES (NULL,?,?,?)",
+                    (self.tfname, tarsize + 2000, tar_md5),
+                )
+            elif force_database_corruption == "simulate_bad_size_for_most_recent":
+                # Tested by database_corruption.bash Case 8
+                # For developers only! For debugging purposes only!
+                # Add this tar twice, second time with bad size.
+                logger.info(
+                    f"TESTING/DEBUGGING ONLY: Simulating bad size for most recent entry for {self.tfname}."
+                )
+                cur.execute(
+                    "INSERT INTO tars VALUES (NULL,?,?,?)",
+                    (self.tfname, tarsize, tar_md5),
+                )
+                cur.execute(
+                    "INSERT INTO tars VALUES (NULL,?,?,?)",
+                    (self.tfname, tarsize + 2000, tar_md5),
+                )
+            else:
+                # Tested by database_corruption.bash Cases 1,2
+                # Typical case
+                # Doesn't exist - insert new
+                logger.info(f"Adding {self.tfname} to the database.")
+                cur.execute("INSERT INTO tars VALUES (NULL,?,?,?)", tar_tuple)
+
+            con.commit()
+
+        # Update database with the individual files that have been archived
+        # Add a row to the "files" table,
+        # the last 6 columns matching the values of `archived`
+        cur.executemany("insert into files values (NULL,?,?,?,?,?,?)", archived)
+        con.commit()
+
+
 # Minimum output file object
 class HashIO(object):
     def __init__(self, name: str, mode: str, do_hash: bool):
@@ -54,7 +241,42 @@ def close(self):
         self.closed = True
 
 
-def add_files(
+# Add file to tar archive while computing its hash
+# Return file offset (in tar archive), size and md5 hash
+def add_file_to_tar_archive(
+    tar: tarfile.TarFile, file_name: str
+) -> Tuple[int, int, datetime, Optional[str]]:
+    offset = tar.offset
+    tarinfo = tar.gettarinfo(file_name)
+
+    if tarinfo.islnk():
+        tarinfo.size = os.path.getsize(file_name)
+
+    md5 = None
+
+    # For files/hardlinks
+    if tarinfo.isfile() or tarinfo.islnk():
+        if tarinfo.size > 0:
+            # Non-empty files: stream with hash computation
+            hash_md5 = hashlib.md5()
+            with open(file_name, "rb") as f:
+                wrapper = HashingFileWrapper(f, hash_md5)
+                tar.addfile(tarinfo, wrapper)
+            md5 = hash_md5.hexdigest()
+        else:
+            # Empty files: just add to tar, compute hash of empty data
+            tar.addfile(tarinfo)
+            md5 = hashlib.md5(b"").hexdigest()  # MD5 of empty bytes
+    else:
+        # Directories, symlinks, etc.
+        tar.addfile(tarinfo)
+
+    size = tarinfo.size
+    mtime = datetime.utcfromtimestamp(tarinfo.mtime)
+    return offset, size, mtime, md5
+
+
+def construct_tars(
     cur: sqlite3.Cursor,
     con: sqlite3.Connection,
     itar: int,
@@ -70,211 +292,60 @@ def add_files(
     transfer_manager: Optional[TransferManager] = None,
 ) -> List[str]:
 
-    # Now, perform the actual archiving
     failures: List[str] = []
-    create_new_tar: bool = True
     nfiles: int = len(files)
-    archived: List[TupleFilesRowNoId]
-    tarsize: int
-    tname: str
-    tfname: str
-    tarFileObject: HashIO
-    tar: tarfile.TarFile
-    for i in range(nfiles):
-
-        # New tar in the local cache
-        if create_new_tar:
-            create_new_tar = False
-            archived = []
-            tarsize = 0
-            itar += 1
-            # Create a hex value at least 6 digits long
-            tname = "{0:0{1}x}".format(itar, 6)
-            # Create the tar file name by adding ".tar"
-            tfname = "{}.tar".format(tname)
-            logger.info(f"{ts_utc()}: Creating new tar archive {tfname}")
-            # Open that tar file in the cache
-            do_hash: bool
-            if not skip_tars_md5:
-                # If we're not skipping tars, we want to calculate the hash of the tars.
-                do_hash = True
-            else:
-                do_hash = False
-            tarFileObject = HashIO(os.path.join(cache, tfname), "wb", do_hash)
-            # FIXME: error: Argument "fileobj" to "open" has incompatible type "HashIO"; expected "Optional[IO[bytes]]"
-            tar = tarfile.open(mode="w", fileobj=tarFileObject, dereference=follow_symlinks)  # type: ignore
-
-        # Add current file to tar archive
-        current_file: str = files[i]
-        logger.info("Archiving {}".format(current_file))
-        try:
-            offset: int
-            size: int
-            mtime: datetime
-            md5: Optional[str]
-            offset, size, mtime, md5 = add_file(tar, current_file, follow_symlinks)
-            t: TupleFilesRowNoId = (
-                current_file,
-                size,
-                mtime,
-                md5,
-                tfname,
-                offset,
-            )
-            archived.append(t)
-            # Increase tarsize by the size of the current file.
-            # Use `tell()` to also include the tar's metadata in the size.
-            tarsize = tarFileObject.tell()
-        except Exception:
-            # Catch all exceptions here.
-            traceback.print_exc()
-            logger.error("Archiving {}".format(current_file))
-            failures.append(current_file)
-
-        # Close tar archive if current file is the last one or
-        # if adding one more would push us over the limit.
-        next_file_size: int = tar.gettarinfo(current_file).size
-        if config.maxsize is not None:
-            maxsize: int = config.maxsize
-        else:
-            raise TypeError("Invalid config.maxsize={}".format(config.maxsize))
-        if i == nfiles - 1 or tarsize + next_file_size > maxsize:
-
-            # Close current temporary file
-            logger.debug(f"{ts_utc()}: Closing tar archive {tfname}")
-            tar.close()
 
-            tarsize = tarFileObject.tell()
-            tar_md5: Optional[str] = tarFileObject.md5()
-            tarFileObject.close()
-            logger.info(f"{ts_utc()}: (add_files): Completed archive file {tfname}")
-
-            # Transfer tar to HPSS
-            if config.hpss is not None:
-                hpss: str = config.hpss
+    if config.maxsize is not None:
+        max_size: int = config.maxsize
+    else:
+        raise TypeError(f"Invalid config.maxsize={config.maxsize}")
+
+    i_file: int = 0
+    while i_file < nfiles:
+        # Each iteration of this loop constructs one tar
+
+        # `create` passes in itar=-1, so the first tar will be 000001.tar
+        # `update` passes in itar=max existing tar number, so the first tar will be max+1
+        itar += 1
+        tar_size: int = 0
+        archived: List[TupleFilesRowNoId] = []
+
+        # Open a new tar
+        # Note: if we're not skipping tars, we want to calculate the hash of the tars.
+        tar_wrapper = TarWrapper(
+            tar_num=itar,
+            cache=cache,
+            do_hash=not skip_tars_md5,
+            follow_symlinks=follow_symlinks,
+        )
+
+        # Add files to the tar until we reach the max size
+        while i_file < nfiles:
+            current_file: str = files[i_file]
+            current_file_size: int = os.path.getsize(current_file)
+            # Check if adding this file would send us over the max size.
+            if tar_size + current_file_size < max_size:
+                # Add current file to tar archive
+                tar_size += tar_wrapper.process_file(current_file, archived, failures)
+                i_file += 1
             else:
-                raise TypeError("Invalid config.hpss={}".format(config.hpss))
-
-            logger.info(
-                f"Contents of the cache prior to `hpss_put`: {os.listdir(cache)}"
-            )
-
-            logger.info(
-                f"{ts_utc()}: DIVING: (add_files): Calling hpss_put to dispatch archive file {tfname} [keep, non_blocking] = [{keep}, {non_blocking}]"
-            )
-            hpss_put(
-                hpss,
-                os.path.join(cache, tfname),
-                cache,
-                keep,
-                non_blocking,
-                is_index=False,
-                transfer_manager=transfer_manager,
-            )
-            logger.info(
-                f"{ts_utc()}: SURFACE (add_files): Called hpss_put to dispatch archive file {tfname}"
-            )
-
-            if not skip_tars_md5:
-                tar_tuple: TupleTarsRowNoId = (tfname, tarsize, tar_md5)
-                logger.info("tar name={}, tar size={}, tar md5={}".format(*tar_tuple))
-                if not tars_table_exists(cur):
-                    # Need to create tars table
-                    create_tars_table(cur, con)
-
-                # For developers only! For debugging purposes only!
-                if force_database_corruption == "simulate_row_existing":
-                    # Tested by database_corruption.bash Cases 3, 5
-                    logger.info(
-                        f"TESTING/DEBUGGING ONLY: Simulating row existing for {tfname}."
-                    )
-                    cur.execute("INSERT INTO tars VALUES (NULL,?,?,?)", tar_tuple)
-                elif force_database_corruption == "simulate_row_existing_bad_size":
-                    # Tested by database_corruption.bash CaseS 4, 7
-                    logger.info(
-                        f"TESTING/DEBUGGING ONLY: Simulating row existing with bad size for {tfname}."
-                    )
-                    cur.execute(
-                        "INSERT INTO tars VALUES (NULL,?,?,?)",
-                        (tfname, tarsize + 1000, tar_md5),
-                    )
-
-                # We're done adding files to the tar.
-                # And we've transferred it to HPSS.
-                # Now we can insert the tar into the database.
-                cur.execute("SELECT COUNT(*) FROM tars WHERE name = ?", (tfname,))
-                tar_count: int = cur.fetchone()[0]
-                if tar_count != 0:
-                    error_str: str = (
-                        f"Database corruption detected! {tfname} is already in the database."
-                    )
-                    if error_on_duplicate_tar:
-                        # Tested by database_corruption.bash Case 3
-                        # Exists - error out
-                        logger.error(error_str)
-                        raise RuntimeError(error_str)
-                    elif overwrite_duplicate_tars:
-                        # Tested by database_corruption.bash Case 4
-                        # Exists - update with new size and md5
-                        logger.warning(error_str)
-                        logger.warning(f"Updating existing tar {tfname} to proceed.")
-                        cur.execute(
-                            "UPDATE tars SET size = ?, md5 = ? WHERE name = ?",
-                            (tarsize, tar_md5, tfname),
-                        )
-                    else:
-                        # Tested by database_corruption.bash Cases 5,7
-                        # Proceed as if we're in the typical case -- insert new
-                        logger.warning(error_str)
-                        logger.warning(f"Adding a new entry for {tfname}.")
-                        cur.execute("INSERT INTO tars VALUES (NULL,?,?,?)", tar_tuple)
-                elif force_database_corruption == "simulate_no_correct_size":
-                    # Tested by database_corruption.bash Case 6
-                    # For developers only! For debugging purposes only!
-                    # Add this tar twice, with different sizes.
-                    logger.info(
-                        f"TESTING/DEBUGGING ONLY: Simulating no correct size for {tfname}."
-                    )
-                    cur.execute(
-                        "INSERT INTO tars VALUES (NULL,?,?,?)",
-                        (tfname, tarsize + 1000, tar_md5),
-                    )
-                    cur.execute(
-                        "INSERT INTO tars VALUES (NULL,?,?,?)",
-                        (tfname, tarsize + 2000, tar_md5),
-                    )
-                elif force_database_corruption == "simulate_bad_size_for_most_recent":
-                    # Tested by database_corruption.bash Case 8
-                    # For developers only! For debugging purposes only!
-                    # Add this tar twice, second time with bad size.
-                    logger.info(
-                        f"TESTING/DEBUGGING ONLY: Simulating bad size for most recent entry for {tfname}."
-                    )
-                    cur.execute(
-                        "INSERT INTO tars VALUES (NULL,?,?,?)",
-                        (tfname, tarsize, tar_md5),
-                    )
-                    cur.execute(
-                        "INSERT INTO tars VALUES (NULL,?,?,?)",
-                        (tfname, tarsize + 2000, tar_md5),
-                    )
-                else:
-                    # Tested by database_corruption.bash Cases 1,2
-                    # Typical case
-                    # Doesn't exist - insert new
-                    logger.info(f"Adding {tfname} to the database.")
-                    cur.execute("INSERT INTO tars VALUES (NULL,?,?,?)", tar_tuple)
-
-                con.commit()
-
-            # Update database with the individual files that have been archived
-            # Add a row to the "files" table,
-            # the last 6 columns matching the values of `archived`
-            cur.executemany("insert into files values (NULL,?,?,?,?,?,?)", archived)
-            con.commit()
-
-            # Open new tar next time
-            create_new_tar = True
+                # Time to close and transfer this tar archive.
+                break
+
+        # Close and transfer this tar archive, and update the database with the archived files.
+        tar_wrapper.process_tar(
+            cache,
+            keep,
+            non_blocking,
+            transfer_manager,
+            skip_tars_md5,
+            cur,
+            con,
+            force_database_corruption,
+            error_on_duplicate_tar,
+            overwrite_duplicate_tars,
+            archived,
+        )
 
     return failures
 
@@ -290,38 +361,3 @@ def read(self, size=-1):
         if data:
             self.hasher.update(data)
         return data
-
-
-# Add file to tar archive while computing its hash
-# Return file offset (in tar archive), size and md5 hash
-def add_file(
-    tar: tarfile.TarFile, file_name: str, follow_symlinks: bool
-) -> Tuple[int, int, datetime, Optional[str]]:
-    offset = tar.offset
-    tarinfo = tar.gettarinfo(file_name)
-
-    if tarinfo.islnk():
-        tarinfo.size = os.path.getsize(file_name)
-
-    md5 = None
-
-    # For files/hardlinks
-    if tarinfo.isfile() or tarinfo.islnk():
-        if tarinfo.size > 0:
-            # Non-empty files: stream with hash computation
-            hash_md5 = hashlib.md5()
-            with open(file_name, "rb") as f:
-                wrapper = HashingFileWrapper(f, hash_md5)
-                tar.addfile(tarinfo, wrapper)
-            md5 = hash_md5.hexdigest()
-        else:
-            # Empty files: just add to tar, compute hash of empty data
-            tar.addfile(tarinfo)
-            md5 = hashlib.md5(b"").hexdigest()  # MD5 of empty bytes
-    else:
-        # Directories, symlinks, etc.
-        tar.addfile(tarinfo)
-
-    size = tarinfo.size
-    mtime = datetime.utcfromtimestamp(tarinfo.mtime)
-    return offset, size, mtime, md5
diff --git a/zstash/update.py b/zstash/update.py
index 270f3536..38516a4d 100644
--- a/zstash/update.py
+++ b/zstash/update.py
@@ -10,7 +10,7 @@
 
 from .globus import globus_activate, globus_finalize
 from .hpss import hpss_get, hpss_put
-from .hpss_utils import add_files
+from .hpss_utils import construct_tars
 from .settings import DEFAULT_CACHE, TIME_TOL, config, get_db_filename, logger
 from .transfer_tracking import TransferManager
 from .utils import get_files_to_archive_with_stats, update_config
@@ -277,7 +277,7 @@ def update_database(  # noqa: C901
         itar = max(itar, int(tfile_string[0:6], 16))
     try:
         # Add files
-        failures = add_files(
+        failures = construct_tars(
             cur,
             con,
             itar,

From 42a8ea28b930c7cb56d9e2940bd819c347a1a00c Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Fri, 6 Mar 2026 18:52:06 -0600
Subject: [PATCH 11/34] Refactor process_tar

---
 zstash/create.py     |  11 +++--
 zstash/hpss_utils.py | 106 +++++++++++++++++++++++++++----------------
 zstash/update.py     |  10 ++--
 3 files changed, 81 insertions(+), 46 deletions(-)

diff --git a/zstash/create.py b/zstash/create.py
index 74c7e583..5916d82a 100644
--- a/zstash/create.py
+++ b/zstash/create.py
@@ -12,7 +12,7 @@
 
 from .globus import globus_activate, globus_finalize
 from .hpss import hpss_put
-from .hpss_utils import construct_tars
+from .hpss_utils import DevOptions, construct_tars
 from .settings import DEFAULT_CACHE, config, get_db_filename, logger
 from .transfer_tracking import TransferManager
 from .utils import (
@@ -274,6 +274,11 @@ def create_database(
     files: List[str] = get_files_to_archive(cache, args.include, args.exclude)
 
     failures: List[str]
+    dev_options: DevOptions = DevOptions(
+        error_on_duplicate_tar=args.error_on_duplicate_tar,
+        overwrite_duplicate_tars=args.overwrite_duplicate_tars,
+        force_database_corruption=args.for_developers_force_database_corruption,
+    )
     try:
         # Add files to archive
         failures = construct_tars(
@@ -284,11 +289,9 @@ def create_database(
             cache,
             args.keep,
             args.follow_symlinks,
+            dev_options,
             skip_tars_md5=args.no_tars_md5,
             non_blocking=args.non_blocking,
-            error_on_duplicate_tar=args.error_on_duplicate_tar,
-            overwrite_duplicate_tars=args.overwrite_duplicate_tars,
-            force_database_corruption=args.for_developers_force_database_corruption,
             transfer_manager=transfer_manager,
         )
     except FileNotFoundError:
diff --git a/zstash/hpss_utils.py b/zstash/hpss_utils.py
index c45a0285..95a39913 100644
--- a/zstash/hpss_utils.py
+++ b/zstash/hpss_utils.py
@@ -17,6 +17,44 @@
 from .utils import create_tars_table, tars_table_exists, ts_utc
 
 
+# This class holds parameters for developer options.
+# I.e., these parameters should only ever be activated by developers during debugging and/or testing.
+class DevOptions(object):
+    def __init__(
+        self,
+        error_on_duplicate_tar: bool,
+        overwrite_duplicate_tars: bool,
+        force_database_corruption: str,
+    ):
+        self.error_on_duplicate_tar: bool = error_on_duplicate_tar
+        self.overwrite_duplicate_tars: bool = overwrite_duplicate_tars
+        self.force_database_corruption: str = force_database_corruption
+
+    def simulate_row_existing(
+        self,
+        tfname: str,
+        cur: sqlite3.Cursor,
+        tar_tuple: TupleTarsRowNoId,
+        tar_size: int,
+        tar_md5: Optional[str],
+    ):
+        if self.force_database_corruption == "simulate_row_existing":
+            # Tested by database_corruption.bash Cases 3, 5
+            logger.info(
+                f"TESTING/DEBUGGING ONLY: Simulating row existing for {tfname}."
+            )
+            cur.execute("INSERT INTO tars VALUES (NULL,?,?,?)", tar_tuple)
+        elif self.force_database_corruption == "simulate_row_existing_bad_size":
+            # Tested by database_corruption.bash Cases 4, 7
+            logger.info(
+                f"TESTING/DEBUGGING ONLY: Simulating row existing with bad size for {tfname}."
+            )
+            cur.execute(
+                "INSERT INTO tars VALUES (NULL,?,?,?)",
+                (tfname, tar_size + 1000, tar_md5),
+            )
+
+
 class TarWrapper(object):
     def __init__(self, tar_num: int, cache: str, do_hash: bool, follow_symlinks: bool):
         # Create a hex value at least 6 digits long
@@ -49,7 +87,7 @@ def process_file(
                 offset,
             )
             archived.append(t)
-            # Increase tarsize by the size of the current file.
+            # Increase tar_size by the size of the current file.
             # Use `tell()` to also include the tar's metadata in the size.
             tar_size = self.tarFileObject.tell()
         except Exception:
@@ -68,20 +106,19 @@ def process_tar(
         skip_tars_md5: bool,
         cur: sqlite3.Cursor,
         con: sqlite3.Connection,
-        force_database_corruption: str,
-        error_on_duplicate_tar: bool,
-        overwrite_duplicate_tars: bool,
+        dev_options: DevOptions,
         archived: List[TupleFilesRowNoId],
     ):
+        # 1. Close the tar ####################################################
         logger.debug(f"{ts_utc()}: Closing tar archive {self.tfname}")
         self.tar.close()
 
-        tarsize = self.tarFileObject.tell()
+        tar_size = self.tarFileObject.tell()
         tar_md5: Optional[str] = self.tarFileObject.md5()
         self.tarFileObject.close()
         logger.info(f"{ts_utc()}: (add_files): Completed archive file {self.tfname}")
 
-        # Transfer tar to HPSS
+        # 2. Transfer the tar to HPSS #########################################
         if config.hpss is not None:
             hpss: str = config.hpss
         else:
@@ -92,6 +129,7 @@ def process_tar(
         logger.info(
             f"{ts_utc()}: DIVING: (add_files): Calling hpss_put to dispatch archive file {self.tfname} [keep, non_blocking] = [{keep}, {non_blocking}]"
         )
+        # Actually transfer the tar file
         hpss_put(
             hpss,
             os.path.join(cache, self.tfname),
@@ -105,29 +143,18 @@ def process_tar(
             f"{ts_utc()}: SURFACE (add_files): Called hpss_put to dispatch archive file {self.tfname}"
         )
 
+        # 3. Add the tar itself to the tars table #############################
         if not skip_tars_md5:
-            tar_tuple: TupleTarsRowNoId = (self.tfname, tarsize, tar_md5)
+            tar_tuple: TupleTarsRowNoId = (self.tfname, tar_size, tar_md5)
             logger.info("tar name={}, tar size={}, tar md5={}".format(*tar_tuple))
             if not tars_table_exists(cur):
                 # Need to create tars table
                 create_tars_table(cur, con)
 
-            # For developers only! For debugging purposes only!
-            if force_database_corruption == "simulate_row_existing":
-                # Tested by database_corruption.bash Cases 3, 5
-                logger.info(
-                    f"TESTING/DEBUGGING ONLY: Simulating row existing for {self.tfname}."
-                )
-                cur.execute("INSERT INTO tars VALUES (NULL,?,?,?)", tar_tuple)
-            elif force_database_corruption == "simulate_row_existing_bad_size":
-                # Tested by database_corruption.bash CaseS 4, 7
-                logger.info(
-                    f"TESTING/DEBUGGING ONLY: Simulating row existing with bad size for {self.tfname}."
-                )
-                cur.execute(
-                    "INSERT INTO tars VALUES (NULL,?,?,?)",
-                    (self.tfname, tarsize + 1000, tar_md5),
-                )
+            # For developers only! For debugging/testing purposes only!
+            dev_options.simulate_row_existing(
+                self.tfname, cur, tar_tuple, tar_size, tar_md5
+            )
 
             # We're done adding files to the tar.
             # And we've transferred it to HPSS.
@@ -138,19 +165,19 @@ def process_tar(
                 error_str: str = (
                     f"Database corruption detected! {self.tfname} is already in the database."
                 )
-                if error_on_duplicate_tar:
+                if dev_options.error_on_duplicate_tar:
                     # Tested by database_corruption.bash Case 3
                     # Exists - error out
                     logger.error(error_str)
                     raise RuntimeError(error_str)
-                elif overwrite_duplicate_tars:
+                elif dev_options.overwrite_duplicate_tars:
                     # Tested by database_corruption.bash Case 4
                     # Exists - update with new size and md5
                     logger.warning(error_str)
                     logger.warning(f"Updating existing tar {self.tfname} to proceed.")
                     cur.execute(
                         "UPDATE tars SET size = ?, md5 = ? WHERE name = ?",
-                        (tarsize, tar_md5, self.tfname),
+                        (tar_size, tar_md5, self.tfname),
                     )
                 else:
                     # Tested by database_corruption.bash Cases 5,7
@@ -158,7 +185,7 @@ def process_tar(
                     logger.warning(error_str)
                     logger.warning(f"Adding a new entry for {self.tfname}.")
                     cur.execute("INSERT INTO tars VALUES (NULL,?,?,?)", tar_tuple)
-            elif force_database_corruption == "simulate_no_correct_size":
+            elif dev_options.force_database_corruption == "simulate_no_correct_size":
                 # Tested by database_corruption.bash Case 6
                 # For developers only! For debugging purposes only!
                 # Add this tar twice, with different sizes.
@@ -167,13 +194,16 @@ def process_tar(
                 )
                 cur.execute(
                     "INSERT INTO tars VALUES (NULL,?,?,?)",
-                    (self.tfname, tarsize + 1000, tar_md5),
+                    (self.tfname, tar_size + 1000, tar_md5),
                 )
                 cur.execute(
                     "INSERT INTO tars VALUES (NULL,?,?,?)",
-                    (self.tfname, tarsize + 2000, tar_md5),
+                    (self.tfname, tar_size + 2000, tar_md5),
                 )
-            elif force_database_corruption == "simulate_bad_size_for_most_recent":
+            elif (
+                dev_options.force_database_corruption
+                == "simulate_bad_size_for_most_recent"
+            ):
                 # Tested by database_corruption.bash Case 8
                 # For developers only! For debugging purposes only!
                 # Add this tar twice, second time with bad size.
@@ -182,11 +212,11 @@ def process_tar(
                 )
                 cur.execute(
                     "INSERT INTO tars VALUES (NULL,?,?,?)",
-                    (self.tfname, tarsize, tar_md5),
+                    (self.tfname, tar_size, tar_md5),
                 )
                 cur.execute(
                     "INSERT INTO tars VALUES (NULL,?,?,?)",
-                    (self.tfname, tarsize + 2000, tar_md5),
+                    (self.tfname, tar_size + 2000, tar_md5),
                 )
             else:
                 # Tested by database_corruption.bash Cases 1,2
@@ -197,6 +227,7 @@ def process_tar(
 
             con.commit()
 
+        # 4. Add the files included in this tar to the files table ############
         # Update database with the individual files that have been archived
         # Add a row to the "files" table,
         # the last 6 columns matching the values of `archived`
@@ -252,7 +283,7 @@ def add_file_to_tar_archive(
     if tarinfo.islnk():
         tarinfo.size = os.path.getsize(file_name)
 
-    md5 = None
+    md5: Optional[str] = None
 
     # For files/hardlinks
     if tarinfo.isfile() or tarinfo.islnk():
@@ -269,6 +300,7 @@ def add_file_to_tar_archive(
             md5 = hashlib.md5(b"").hexdigest()  # MD5 of empty bytes
     else:
         # Directories, symlinks, etc.
+        # md5 will be None in these cases.
         tar.addfile(tarinfo)
 
     size = tarinfo.size
@@ -284,11 +316,9 @@ def construct_tars(
     cache: str,
     keep: bool,
     follow_symlinks: bool,
+    dev_options: DevOptions,
     skip_tars_md5: bool = False,
     non_blocking: bool = False,
-    error_on_duplicate_tar: bool = False,
-    overwrite_duplicate_tars: bool = False,
-    force_database_corruption: str = "",
     transfer_manager: Optional[TransferManager] = None,
 ) -> List[str]:
 
@@ -341,9 +371,7 @@ def construct_tars(
             skip_tars_md5,
             cur,
             con,
-            force_database_corruption,
-            error_on_duplicate_tar,
-            overwrite_duplicate_tars,
+            dev_options,
             archived,
         )
 
diff --git a/zstash/update.py b/zstash/update.py
index 38516a4d..ae96696a 100644
--- a/zstash/update.py
+++ b/zstash/update.py
@@ -10,7 +10,7 @@
 
 from .globus import globus_activate, globus_finalize
 from .hpss import hpss_get, hpss_put
-from .hpss_utils import construct_tars
+from .hpss_utils import DevOptions, construct_tars
 from .settings import DEFAULT_CACHE, TIME_TOL, config, get_db_filename, logger
 from .transfer_tracking import TransferManager
 from .utils import get_files_to_archive_with_stats, update_config
@@ -275,6 +275,11 @@ def update_database(  # noqa: C901
     for tfile in tfiles:
         tfile_string: str = tfile[0]
         itar = max(itar, int(tfile_string[0:6], 16))
+    dev_options: DevOptions = DevOptions(
+        error_on_duplicate_tar=args.error_on_duplicate_tar,
+        overwrite_duplicate_tars=args.overwrite_duplicate_tars,
+        force_database_corruption="",
+    )
     try:
         # Add files
         failures = construct_tars(
@@ -285,9 +290,8 @@ def update_database(  # noqa: C901
             cache,
             keep,
             args.follow_symlinks,
+            dev_options,
             non_blocking=args.non_blocking,
-            error_on_duplicate_tar=args.error_on_duplicate_tar,
-            overwrite_duplicate_tars=args.overwrite_duplicate_tars,
             transfer_manager=transfer_manager,
         )
     except FileNotFoundError:

From f6fcb3f6b9be8f89ee29040c535f14c0a9df516e Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Fri, 6 Mar 2026 19:00:10 -0600
Subject: [PATCH 12/34] Better structure file size logic

---
 zstash/hpss_utils.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/zstash/hpss_utils.py b/zstash/hpss_utils.py
index 95a39913..072fdc79 100644
--- a/zstash/hpss_utils.py
+++ b/zstash/hpss_utils.py
@@ -352,15 +352,16 @@ def construct_tars(
         # Add files to the tar until we reach the max size
         while i_file < nfiles:
             current_file: str = files[i_file]
+            # TODO: check if the extra stat call (via getsize) impacts performance
             current_file_size: int = os.path.getsize(current_file)
             # Check if adding this file would send us over the max size.
-            if tar_size + current_file_size < max_size:
-                # Add current file to tar archive
+            if tar_size + current_file_size > max_size:
+                # If so, time to close and transfer this tar archive.
+                break
+            else:
+                # If not, add current file to tar archive.
                 tar_size += tar_wrapper.process_file(current_file, archived, failures)
                 i_file += 1
-            else:
-                # Time to close and transfer this tar archive.
-                break
 
         # Close and transfer this tar archive, and update the database with the archived files.
         tar_wrapper.process_tar(

From 0c3cd7c73ca9db061bca0661d4a94120e1778a73 Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Fri, 6 Mar 2026 20:28:36 -0600
Subject: [PATCH 13/34] Clarify and improve Globus logic

---
 zstash/globus.py | 55 ++++++++++++++++++++++++++++++------------------
 1 file changed, 35 insertions(+), 20 deletions(-)

diff --git a/zstash/globus.py b/zstash/globus.py
index dd0027a5..bf1f03ef 100644
--- a/zstash/globus.py
+++ b/zstash/globus.py
@@ -120,14 +120,13 @@ def globus_transfer(  # noqa: C901
             task = transfer_manager.globus_config.transfer_client.get_task(mrt.task_id)
             mrt.task_status = task["status"]
             # one  of {ACTIVE, SUCCEEDED, FAILED, CANCELED, PENDING, INACTIVE}
-            # NOTE: How we behave here depends upon whether we want to support mutliple active transfers.
-            # Presently, we do not, except inadvertantly (if status == PENDING)
             if mrt.task_status == "ACTIVE":
                 logger.info(
                     f"{ts_utc()}: Previous task_id {mrt.task_id} Still Active. Returning ACTIVE."
                 )
-                # Don't return early - continue to submit the new transfer
-                # The previous transfer will complete asynchronously
+                # Don't return early -- continue to submit the new transfer.
+                # The previous transfer will complete asynchronously.
+                # That is, we will permit multiple active transfers.
             elif mrt.task_status == "SUCCEEDED":
                 logger.info(
                     f"{ts_utc()}: Previous task_id {mrt.task_id} status = SUCCEEDED."
@@ -155,8 +154,9 @@ def globus_transfer(  # noqa: C901
                     flush=True,
                 )
 
-        # Submit the current transfer_data
         logger.info(f"{ts_utc()}: DIVING: Submit Transfer for {transfer_data['label']}")
+        # Submit the current transfer_data
+        # ALWAYS submit. If we've gotten to this point, we're ready to submit.
         task = submit_transfer_with_checks(
             transfer_manager.globus_config.transfer_client, transfer_data
         )
@@ -196,11 +196,11 @@ def globus_transfer(  # noqa: C901
     task_status = "UNKNOWN"
     if new_mrt and new_mrt.task_id:
         if not non_blocking:
+            # If blocking, wait for the task to complete and get the final status,
+            # before we proceed with any more transfers.
             new_mrt.task_status = globus_block_wait(
                 transfer_manager.globus_config.transfer_client,
                 task_id=new_mrt.task_id,
-                wait_timeout=7200,
-                max_retries=5,
             )
             task_status = new_mrt.task_status
         else:
@@ -220,33 +220,48 @@ def globus_transfer(  # noqa: C901
 def globus_block_wait(
     transfer_client: TransferClient,
     task_id: str,
-    wait_timeout: int,
-    max_retries: int,
+    wait_timeout: int = 7200,  # 7200/3600 = 2 hours
+    max_retries: int = 5,
 ):
-
-    # poll every "polling_interval" seconds to speed up small transfers.  Report every 2 hours, stop waiting aftert 5*2 = 10 hours
+    # Poll every "polling_interval" seconds to speed up small transfers.
+    # Report every "wait_timeout" seconds, and stop waiting after "max_retries" reports.
+    # By default: report every 2 hours, stop waiting after 5*2 = 10 hours
     logger.info(
         f"{ts_utc()}: BLOCKING START: invoking task_wait for task_id = {task_id}"
     )
-    task_status = "UNKNOWN"
-    retry_count = 0
+    task_status: str = "UNKNOWN"
+    retry_count: int = 0
     while retry_count < max_retries:
         try:
-            # Wait for the task to complete
             logger.info(
                 f"{ts_utc()}: on task_wait try {retry_count + 1} out of {max_retries}"
             )
-            transfer_client.task_wait(
+            # Wait for the task to complete. This is what makes this function BLOCKING.
+            # From https://globus-sdk-python.readthedocs.io/en/stable/services/transfer.html#globus_sdk.TransferClient.task_wait: Wait until a Task is complete or fails, with a time limit. If the task is “ACTIVE” after time runs out, returns False. Otherwise returns True.
+            task_is_not_active: bool = transfer_client.task_wait(
                 task_id, timeout=wait_timeout, polling_interval=10
             )
+            if task_is_not_active:
+                curr_task = transfer_client.get_task(task_id)
+                task_status = curr_task["status"]
+                if task_status == "SUCCEEDED":
+                    break  # Break out of the while-loop. The transfer already succeeded, so no need to retry.
+                elif task_status in ["FAILED", "CANCELED"]:
+                    error_str = f"{ts_utc()}: task_wait returned True, but task_status={task_status} for task_id {task_id}. No reason to keep retrying now."
+                    logger.warning(error_str)
+                    # We still need to break, because no matter how long we wait now, nothing will change with the transfer status.
+                    break
+                elif task_status in ["PENDING", "INACTIVE"]:
+                    error_str = f"{ts_utc()}: task_wait returned True, but task_status={task_status} for task_id {task_id}. Will retry waiting until max_retries is reached."
+                    logger.warning(error_str)
+                    # Don't break -- continue retries
+                else:
+                    error_str = f"{ts_utc()}: task_wait returned True, but task_status={task_status} is unexpected for task_id {task_id}. Will retry waiting until max_retries is reached."
+                    logger.warning(error_str)
+                    # Don't break -- continue retries
             logger.info(f"{ts_utc()}: done with wait")
         except Exception as e:
             logger.error(f"Unexpected Exception: {e}")
-        else:
-            curr_task = transfer_client.get_task(task_id)
-            task_status = curr_task["status"]
-            if task_status == "SUCCEEDED":
-                break
         finally:
             retry_count += 1
             logger.info(

From 5f32decd5715236bca7875323a1a7ffc9143dfc6 Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Mon, 9 Mar 2026 13:51:49 -0700
Subject: [PATCH 14/34] Fix Python tests

---
 zstash/hpss_utils.py | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/zstash/hpss_utils.py b/zstash/hpss_utils.py
index 072fdc79..096d989f 100644
--- a/zstash/hpss_utils.py
+++ b/zstash/hpss_utils.py
@@ -352,16 +352,25 @@ def construct_tars(
         # Add files to the tar until we reach the max size
         while i_file < nfiles:
             current_file: str = files[i_file]
-            # TODO: check if the extra stat call (via getsize) impacts performance
-            current_file_size: int = os.path.getsize(current_file)
+            # It's ok that current_file isn't in the tar yet.
+            tarinfo = tar_wrapper.tar.gettarinfo(current_file)
+            current_file_size = tarinfo.size
+
             # Check if adding this file would send us over the max size.
-            if tar_size + current_file_size > max_size:
-                # If so, time to close and transfer this tar archive.
-                break
-            else:
-                # If not, add current file to tar archive.
-                tar_size += tar_wrapper.process_file(current_file, archived, failures)
+            if (tar_size == 0) or (tar_size + current_file_size <= max_size):
+                # Case 1: if the tar is empty, always add the file, even if it's over the max size.
+                # Case 2: if the tar is nonempty, only add the file if it won't put us over the max size.
+                new_tar_size = tar_wrapper.process_file(
+                    current_file, archived, failures
+                )
+                if new_tar_size != 0:
+                    tar_size = new_tar_size
+                # Else: process_file failed, so we should keep the original tar_size
                 i_file += 1
+            else:
+                # Over the size limit.
+                # Time to close and transfer this tar archive.
+                break
 
         # Close and transfer this tar archive, and update the database with the archived files.
         tar_wrapper.process_tar(

From f03160cac59561fda69c65180812825b9e1a7243 Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Mon, 9 Mar 2026 16:46:05 -0700
Subject: [PATCH 15/34] Perlmutter tests passing

---
 zstash/globus.py     | 21 ++++++++++++---
 zstash/hpss.py       |  5 +++-
 zstash/hpss_utils.py | 63 +++++++++++++++++++++++++++++++-------------
 3 files changed, 66 insertions(+), 23 deletions(-)

diff --git a/zstash/globus.py b/zstash/globus.py
index bf1f03ef..c9acffba 100644
--- a/zstash/globus.py
+++ b/zstash/globus.py
@@ -124,9 +124,22 @@ def globus_transfer(  # noqa: C901
                 logger.info(
                     f"{ts_utc()}: Previous task_id {mrt.task_id} Still Active. Returning ACTIVE."
                 )
-                # Don't return early -- continue to submit the new transfer.
-                # The previous transfer will complete asynchronously.
-                # That is, we will permit multiple active transfers.
+                # There is currently an active Globus transfer.
+                # Globus allows 3 simultaneous transfers, but right now, we only do one at a time.
+                # So, return for now, skipping the rest of `globus_transfer`.
+                # What happens next?
+                # We return to constructing tars.
+                # We won't need to create a new batch because the current batch exists.
+                # Once the transfer status is finally SUCCEEDED,
+                # we'll submit the current batch for Globus transfer.
+                if non_blocking:
+                    return "ACTIVE"
+                else:
+                    error_str: str = (
+                        "task_status='ACTIVE', but in blocking mode, the previous transfer should have waited through globus_block_wait"
+                    )
+                    logger.error(error_str)
+                    raise RuntimeError(error_str)
             elif mrt.task_status == "SUCCEEDED":
                 logger.info(
                     f"{ts_utc()}: Previous task_id {mrt.task_id} status = SUCCEEDED."
@@ -138,6 +151,8 @@ def globus_transfer(  # noqa: C901
                 logger.info(
                     f"{ts}:Globus transfer {mrt.task_id}, from {src_ep} to {dst_ep}: {label} succeeded"
                 )
+                # The previous transfer succeeded.
+                # That means we can transfer the current batch now.
             else:
                 logger.error(
                     f"{ts_utc()}: Previous task_id {mrt.task_id} status = {mrt.task_status}."
diff --git a/zstash/hpss.py b/zstash/hpss.py
index 3806ed08..00b2c8f0 100644
--- a/zstash/hpss.py
+++ b/zstash/hpss.py
@@ -113,9 +113,12 @@ def hpss_transfer(
                 transfer_manager.globus_config = GlobusConfig()
             # Transfer file using the Globus Transfer Service
             logger.info(f"{ts_utc()}: DIVING: hpss calls globus_transfer(name={name})")
-            globus_transfer(
+            task_status: str = globus_transfer(
                 transfer_manager, endpoint, url_path, name, transfer_type, non_blocking
             )
+            logger.info(
+                f"{ts_utc()}: globus_transfer(name={name}) returned task_status={task_status}"
+            )
             mrt: Optional[TransferBatch] = transfer_manager.get_most_recent_transfer()
             if mrt and mrt.task_status:
                 globus_status = mrt.task_status
diff --git a/zstash/hpss_utils.py b/zstash/hpss_utils.py
index 096d989f..0967d87e 100644
--- a/zstash/hpss_utils.py
+++ b/zstash/hpss_utils.py
@@ -68,7 +68,11 @@ def __init__(self, tar_num: int, cache: str, do_hash: bool, follow_symlinks: boo
         self.tar = tarfile.open(mode="w", fileobj=self.tarFileObject, dereference=follow_symlinks)  # type: ignore
 
     def process_file(
-        self, current_file: str, archived: List[TupleFilesRowNoId], failures: List[str]
+        self,
+        current_file: str,
+        tar_info: tarfile.TarInfo,
+        archived: List[TupleFilesRowNoId],
+        failures: List[str],
     ) -> int:
         logger.info(f"Archiving {current_file}")
         tar_size: int = 0
@@ -77,7 +81,9 @@ def process_file(
             size: int
             mtime: datetime
             md5: Optional[str]
-            offset, size, mtime, md5 = add_file_to_tar_archive(self.tar, current_file)
+            offset, size, mtime, md5 = add_file_to_tar_archive(
+                self.tar, current_file, tar_info
+            )
             t: TupleFilesRowNoId = (
                 current_file,
                 size,
@@ -275,36 +281,32 @@ def close(self):
 # Add file to tar archive while computing its hash
 # Return file offset (in tar archive), size and md5 hash
 def add_file_to_tar_archive(
-    tar: tarfile.TarFile, file_name: str
+    tar: tarfile.TarFile, file_name: str, tar_info: tarfile.TarInfo
 ) -> Tuple[int, int, datetime, Optional[str]]:
     offset = tar.offset
-    tarinfo = tar.gettarinfo(file_name)
-
-    if tarinfo.islnk():
-        tarinfo.size = os.path.getsize(file_name)
 
     md5: Optional[str] = None
 
     # For files/hardlinks
-    if tarinfo.isfile() or tarinfo.islnk():
-        if tarinfo.size > 0:
+    if tar_info.isfile() or tar_info.islnk():
+        if tar_info.size > 0:
             # Non-empty files: stream with hash computation
             hash_md5 = hashlib.md5()
             with open(file_name, "rb") as f:
                 wrapper = HashingFileWrapper(f, hash_md5)
-                tar.addfile(tarinfo, wrapper)
+                tar.addfile(tar_info, wrapper)
             md5 = hash_md5.hexdigest()
         else:
             # Empty files: just add to tar, compute hash of empty data
-            tar.addfile(tarinfo)
+            tar.addfile(tar_info)
             md5 = hashlib.md5(b"").hexdigest()  # MD5 of empty bytes
     else:
         # Directories, symlinks, etc.
         # md5 will be None in these cases.
-        tar.addfile(tarinfo)
+        tar.addfile(tar_info)
 
-    size = tarinfo.size
-    mtime = datetime.utcfromtimestamp(tarinfo.mtime)
+    size = tar_info.size
+    mtime = datetime.utcfromtimestamp(tar_info.mtime)
     return offset, size, mtime, md5
 
 
@@ -331,6 +333,7 @@ def construct_tars(
         raise TypeError(f"Invalid config.maxsize={config.maxsize}")
 
     i_file: int = 0
+    carried_over_tar_info: Optional[tarfile.TarInfo] = None
     while i_file < nfiles:
         # Each iteration of this loop constructs one tar
 
@@ -352,24 +355,46 @@ def construct_tars(
         # Add files to the tar until we reach the max size
         while i_file < nfiles:
             current_file: str = files[i_file]
-            # It's ok that current_file isn't in the tar yet.
-            tarinfo = tar_wrapper.tar.gettarinfo(current_file)
-            current_file_size = tarinfo.size
+            if carried_over_tar_info:
+                # If current_file wasn't added to the last tar,
+                # then we can immediately add it now.
+                # No need to repeat the size calculations.
+                tar_info = carried_over_tar_info
+                current_file_size = tar_info.size
+            else:
+                try:
+                    # It's ok that current_file isn't in the tar yet.
+                    tar_info = tar_wrapper.tar.gettarinfo(current_file)
+                    if tar_info.islnk():
+                        tar_info.size = os.path.getsize(current_file)
+                    current_file_size = tar_info.size
+                except Exception:
+                    # We couldn't even get the size of this file.
+                    # So let's continue to the next file.
+                    # (Likely cause: broken symlink)
+                    traceback.print_exc()
+                    logger.error(f"Archiving {current_file}")
+                    # failures.append(current_file)  # Mark this file as an error
+                    # i_file += 1  # Go to the next file
+                    raise
 
             # Check if adding this file would send us over the max size.
             if (tar_size == 0) or (tar_size + current_file_size <= max_size):
                 # Case 1: if the tar is empty, always add the file, even if it's over the max size.
                 # Case 2: if the tar is nonempty, only add the file if it won't put us over the max size.
                 new_tar_size = tar_wrapper.process_file(
-                    current_file, archived, failures
+                    current_file, tar_info, archived, failures
                 )
                 if new_tar_size != 0:
                     tar_size = new_tar_size
                 # Else: process_file failed, so we should keep the original tar_size
                 i_file += 1
             else:
-                # Over the size limit.
+                # Over the size limit:
                 # Time to close and transfer this tar archive.
+                carried_over_tar_info = tar_info  # Carry over this info to the next tar
+                # Break out of the inner while-loop:
+                # Done adding files to this particular tar.
                 break
 
         # Close and transfer this tar archive, and update the database with the archived files.

From 221a1b6ee52fe53e395b493cd42ab75e2ae60e3d Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Tue, 10 Mar 2026 11:15:21 -0500
Subject: [PATCH 16/34] Address simple code review comments

---
 zstash/create.py     | 34 ++++++++++++++--------------------
 zstash/hpss_utils.py | 24 +++++++++++++++---------
 zstash/update.py     | 32 +++++++++++++-------------------
 3 files changed, 42 insertions(+), 48 deletions(-)

diff --git a/zstash/create.py b/zstash/create.py
index 5916d82a..7cd76231 100644
--- a/zstash/create.py
+++ b/zstash/create.py
@@ -279,26 +279,20 @@ def create_database(
         overwrite_duplicate_tars=args.overwrite_duplicate_tars,
         force_database_corruption=args.for_developers_force_database_corruption,
     )
-    try:
-        # Add files to archive
-        failures = construct_tars(
-            cur,
-            con,
-            -1,
-            files,
-            cache,
-            args.keep,
-            args.follow_symlinks,
-            dev_options,
-            skip_tars_md5=args.no_tars_md5,
-            non_blocking=args.non_blocking,
-            transfer_manager=transfer_manager,
-        )
-    except FileNotFoundError:
-        if args.follow_symlinks:
-            raise Exception("Archive creation failed due to broken symlink.")
-        else:
-            raise
+    # Add files to archive
+    failures = construct_tars(
+        cur,
+        con,
+        -1,
+        files,
+        cache,
+        args.keep,
+        args.follow_symlinks,
+        dev_options,
+        skip_tars_md5=args.no_tars_md5,
+        non_blocking=args.non_blocking,
+        transfer_manager=transfer_manager,
+    )
 
     # Close database
     con.commit()
diff --git a/zstash/hpss_utils.py b/zstash/hpss_utils.py
index 0967d87e..0cd8ba81 100644
--- a/zstash/hpss_utils.py
+++ b/zstash/hpss_utils.py
@@ -332,12 +332,18 @@ def construct_tars(
     else:
         raise TypeError(f"Invalid config.maxsize={config.maxsize}")
 
+    operation: str
+    if itar == -1:
+        operation = "create"
+    else:
+        operation = "update"
+
     i_file: int = 0
     carried_over_tar_info: Optional[tarfile.TarInfo] = None
     while i_file < nfiles:
         # Each iteration of this loop constructs one tar
 
-        # `create` passes in itar=-1, so the first tar will be 000001.tar
+        # `create` passes in itar=-1, so the first tar will be 000000.tar
         # `update` passes in itar=max existing tar number, so the first tar will be max+1
         itar += 1
         tar_size: int = 0
@@ -361,6 +367,7 @@ def construct_tars(
                 # No need to repeat the size calculations.
                 tar_info = carried_over_tar_info
                 current_file_size = tar_info.size
+                carried_over_tar_info = None  # Reset for next iteration
             else:
                 try:
                     # It's ok that current_file isn't in the tar yet.
@@ -368,15 +375,14 @@ def construct_tars(
                     if tar_info.islnk():
                         tar_info.size = os.path.getsize(current_file)
                     current_file_size = tar_info.size
-                except Exception:
-                    # We couldn't even get the size of this file.
-                    # So let's continue to the next file.
-                    # (Likely cause: broken symlink)
-                    traceback.print_exc()
+                except FileNotFoundError:
                     logger.error(f"Archiving {current_file}")
-                    # failures.append(current_file)  # Mark this file as an error
-                    # i_file += 1  # Go to the next file
-                    raise
+                    if follow_symlinks:
+                        raise Exception(
+                            f"Archive {operation} failed due to broken symlink."
+                        )
+                    else:
+                        raise
 
             # Check if adding this file would send us over the max size.
             if (tar_size == 0) or (tar_size + current_file_size <= max_size):
diff --git a/zstash/update.py b/zstash/update.py
index ae96696a..5c1db6f5 100644
--- a/zstash/update.py
+++ b/zstash/update.py
@@ -280,25 +280,19 @@ def update_database(  # noqa: C901
         overwrite_duplicate_tars=args.overwrite_duplicate_tars,
         force_database_corruption="",
     )
-    try:
-        # Add files
-        failures = construct_tars(
-            cur,
-            con,
-            itar,
-            newfiles,
-            cache,
-            keep,
-            args.follow_symlinks,
-            dev_options,
-            non_blocking=args.non_blocking,
-            transfer_manager=transfer_manager,
-        )
-    except FileNotFoundError:
-        if args.follow_symlinks:
-            raise Exception("Archive update failed due to broken symlink.")
-        else:
-            raise
+    # Add files
+    failures = construct_tars(
+        cur,
+        con,
+        itar,
+        newfiles,
+        cache,
+        keep,
+        args.follow_symlinks,
+        dev_options,
+        non_blocking=args.non_blocking,
+        transfer_manager=transfer_manager,
+    )
 
     # Close database
     con.commit()

From f7e02750f0bc19d0aad4a28883497a31d148fc62 Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Tue, 10 Mar 2026 11:44:50 -0500
Subject: [PATCH 17/34] Address globus_finalize comment

---
 zstash/globus.py | 235 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 172 insertions(+), 63 deletions(-)

diff --git a/zstash/globus.py b/zstash/globus.py
index c9acffba..4c1f10b0 100644
--- a/zstash/globus.py
+++ b/zstash/globus.py
@@ -1,7 +1,7 @@
 from __future__ import absolute_import, print_function
 
 import sys
-from typing import List, Optional
+from typing import Dict, List, Optional, Set, Tuple
 
 from globus_sdk import TransferAPIError, TransferClient
 from globus_sdk.response import GlobusHTTPResponse
@@ -338,73 +338,182 @@ def globus_wait(transfer_client: TransferClient, task_id: str):
         sys.exit(1)
 
 
-def globus_finalize(transfer_manager: TransferManager):
+def _submit_pending_transfer_data(
+    transfer_client: TransferClient,
+    transfer_manager: TransferManager,
+) -> Optional[str]:
+    """
+    If the most recent batch has unsubmitted TransferData, submit it and return task_id.
+    Otherwise return None.
+    """
+    transfer: Optional[TransferBatch] = transfer_manager.get_most_recent_transfer()
+    if not transfer or not transfer.transfer_data:
+        return None
+
+    logger.info(f"{ts_utc()}: FINAL TransferData: accumulated items:")
+    attribs = transfer.transfer_data.__dict__
+    for item in attribs["data"]["DATA"]:
+        if item["DATA_TYPE"] == "transfer_item":
+            transfer_manager.cumulative_tarfiles_pushed += 1
+            print(
+                f"    (finalize) PUSHING ({transfer_manager.cumulative_tarfiles_pushed}) source item: {item['source_path']}",
+                flush=True,
+            )
+
+    logger.info(
+        f"{ts_utc()}: DIVING: Submit Transfer for {transfer.transfer_data['label']}"
+    )
+    try:
+        last_task = submit_transfer_with_checks(transfer_client, transfer.transfer_data)
+        task_id = last_task.get("task_id")
+
+        # Best-effort: if this batch represents the submission, store the task_id.
+        if task_id and transfer.is_globus and not transfer.task_id:
+            transfer.task_id = task_id
+
+        return task_id
+
+    except TransferAPIError as e:
+        if e.code == "NoCredException":
+            logger.error(
+                "{}. Please go to https://app.globus.org/endpoints and activate the endpoint.".format(
+                    e.message
+                )
+            )
+        else:
+            logger.error(e)
+        sys.exit(1)
+    except Exception as e:
+        logger.error("Exception: {}".format(e))
+        sys.exit(1)
+
+
+def _collect_globus_task_ids(
+    transfer_manager: TransferManager,
+    extra_task_id: Optional[str],
+) -> Tuple[List[str], Dict[str, TransferBatch]]:
+    """
+    Return (ordered unique task_ids, task_id->batch mapping for first occurrence).
 
-    last_task_id = None
+    Includes:
+      - All Globus batch.task_id values where batch.file_paths is non-empty
+        (i.e., still pending deletion)
+      - extra_task_id if provided
 
+    Skips:
+      - Non-Globus batches
+      - Batches without task_id
+      - Batches whose file_paths is empty (already processed/deleted)
+    """
+    task_ids: List[str] = []
+    seen: Set[str] = set()
+    task_to_batch: Dict[str, TransferBatch] = {}
+
+    for batch in transfer_manager.batches:
+        already_deleted: bool = not batch.file_paths
+
+        if (not batch.is_globus) or (not batch.task_id) or (already_deleted):
+            continue
+
+        # By this point, we know batch.task_id is not None
+        tid: str = batch.task_id
+        if tid in seen:
+            continue
+
+        seen.add(tid)
+        task_ids.append(tid)
+        task_to_batch[tid] = batch
+
+    # Always include extra_task_id (e.g., just-submitted transfer),
+    # even if not yet reflected in batches.
+    if extra_task_id and (extra_task_id not in seen):
+        task_ids.append(extra_task_id)
+
+    return task_ids, task_to_batch
+
+
+def _refresh_batch_status(
+    transfer_client: TransferClient,
+    task_id: str,
+    task_to_batch: Dict[str, TransferBatch],
+) -> Optional[str]:
+    """
+    Fetch Globus task status and update corresponding batch.task_status if present.
+    Returns status, or None if fetch fails.
+    """
+    try:
+        task: GlobusHTTPResponse = transfer_client.get_task(task_id)
+        status = task["status"]
+        batch: Optional[TransferBatch] = task_to_batch.get(task_id)
+        if batch:
+            batch.task_status = status
+        return status
+    except Exception as e:
+        logger.warning(
+            f"{ts_utc()}: Could not fetch status for task_id={task_id}; will wait anyway. ({e})"
+        )
+        return None
+
+
+def _wait_for_all_tasks(
+    transfer_client: TransferClient,
+    task_ids: List[str],
+    task_to_batch: Dict[str, TransferBatch],
+) -> None:
+    """
+    For each task_id, refresh status; if not SUCCEEDED, block via globus_wait;
+    then refresh status again for deletion logic.
+    """
+    for tid in task_ids:
+        status = _refresh_batch_status(transfer_client, tid, task_to_batch)
+        if status == "SUCCEEDED":
+            logger.info(f"{ts_utc()}: task_id={tid} already SUCCEEDED; skipping wait")
+            continue
+
+        logger.info(
+            f"{ts_utc()}: Waiting for transfer task_id={tid} to complete (status={status})"
+        )
+        globus_wait(transfer_client, tid)
+
+        # After wait returns, task is terminal; refresh once more.
+        _refresh_batch_status(transfer_client, tid, task_to_batch)
+
+
+def _prune_empty_batches(transfer_manager: TransferManager) -> None:
+    """
+    Remove batches which have no remaining files to manage.
+
+    Note: we only prune batches whose file_paths is empty, regardless of Globus/HPSS.
+    That matches current semantics where file_paths=[] means "processed".
+    """
+    before = len(transfer_manager.batches)
+    transfer_manager.batches = [b for b in transfer_manager.batches if b.file_paths]
+    after = len(transfer_manager.batches)
+    if after != before:
+        logger.debug(f"{ts_utc()}: Pruned {before - after} empty transfer batches")
+
+
+def globus_finalize(transfer_manager: TransferManager) -> None:
     if transfer_manager.globus_config is None:
         logger.warning("No GlobusConfig object provided for finalization")
         return
+    if transfer_manager.globus_config.transfer_client is None:
+        logger.warning("GlobusConfig provided but transfer_client is None")
+        return
 
-    transfer: Optional[TransferBatch] = transfer_manager.get_most_recent_transfer()
-    if transfer:
-        # Check if there's any pending transfer data that hasn't been submitted yet
-        if transfer.transfer_data:
-            # DEBUG: review accumulated items in TransferData
-            logger.info(f"{ts_utc()}: FINAL TransferData: accumulated items:")
-            attribs = transfer.transfer_data.__dict__
-            for item in attribs["data"]["DATA"]:
-                if item["DATA_TYPE"] == "transfer_item":
-                    transfer_manager.cumulative_tarfiles_pushed += 1
-                    print(
-                        f"    (finalize) PUSHING ({transfer_manager.cumulative_tarfiles_pushed}) source item: {item['source_path']}",
-                        flush=True,
-                    )
+    # By this point, we know transfer_client is not None
+    transfer_client: TransferClient = transfer_manager.globus_config.transfer_client
 
-            # SUBMIT new transfer here
-            logger.info(
-                f"{ts_utc()}: DIVING: Submit Transfer for {transfer.transfer_data['label']}"
-            )
-            try:
-                last_task = submit_transfer_with_checks(
-                    transfer_manager.globus_config.transfer_client,
-                    transfer.transfer_data,
-                )
-                last_task_id = last_task.get("task_id")
-            except TransferAPIError as e:
-                if e.code == "NoCredException":
-                    logger.error(
-                        "{}. Please go to https://app.globus.org/endpoints and activate the endpoint.".format(
-                            e.message
-                        )
-                    )
-                else:
-                    logger.error(e)
-                sys.exit(1)
-            except Exception as e:
-                logger.error("Exception: {}".format(e))
-                sys.exit(1)
-
-        # Wait for any submitted transfers to complete
-        # In non-blocking mode, this ensures index.db and any accumulated tar files complete
-        # In blocking mode, this is redundant but harmless
-        skip_last_wait: bool = False
-        if transfer and transfer.task_id:
-            if transfer.task_id == last_task_id:
-                skip_last_wait = (
-                    True  # No reason to call globus_wait twice on the same task_id
-                )
-            logger.info(
-                f"{ts_utc()}: Waiting for transfer task_id={transfer.task_id} to complete"
-            )
-            globus_wait(
-                transfer_manager.globus_config.transfer_client, transfer.task_id
-            )
-        if last_task_id and (not skip_last_wait):
-            logger.info(
-                f"{ts_utc()}: Waiting for last transfer task_id={last_task_id} to complete"
-            )
-            globus_wait(transfer_manager.globus_config.transfer_client, last_task_id)
+    last_task_id: Optional[str] = _submit_pending_transfer_data(
+        transfer_client, transfer_manager
+    )
+
+    task_ids: List[str]
+    task_to_batch: Dict[str, TransferBatch]
+    task_ids, task_to_batch = _collect_globus_task_ids(transfer_manager, last_task_id)
+
+    _wait_for_all_tasks(transfer_client, task_ids, task_to_batch)
+
+    transfer_manager.delete_successfully_transferred_files()
 
-        # Clean up tar files that were queued for deletion
-        transfer_manager.delete_successfully_transferred_files()
+    _prune_empty_batches(transfer_manager)

From c64d0593d5490ef70edb9a1e9e1fd3c6713aabee Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Tue, 10 Mar 2026 10:20:50 -0700
Subject: [PATCH 18/34] Make fix to pass follow_symlinks.sh

---
 zstash/hpss_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/zstash/hpss_utils.py b/zstash/hpss_utils.py
index 0cd8ba81..64154821 100644
--- a/zstash/hpss_utils.py
+++ b/zstash/hpss_utils.py
@@ -334,7 +334,7 @@ def construct_tars(
 
     operation: str
     if itar == -1:
-        operation = "create"
+        operation = "creation"
     else:
         operation = "update"
 

From 5a7274247bb5439f429a14986a57bc4b74c7d240 Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Tue, 10 Mar 2026 14:02:52 -0500
Subject: [PATCH 19/34] Address simple code review comments

---
 .../bash_tests/run_from_any/test_globus_tar_deletion.bash     | 2 +-
 zstash/globus.py                                              | 4 ++--
 zstash/hpss_utils.py                                          | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/integration/bash_tests/run_from_any/test_globus_tar_deletion.bash b/tests/integration/bash_tests/run_from_any/test_globus_tar_deletion.bash
index 8649dacb..cd5ac041 100755
--- a/tests/integration/bash_tests/run_from_any/test_globus_tar_deletion.bash
+++ b/tests/integration/bash_tests/run_from_any/test_globus_tar_deletion.bash
@@ -140,7 +140,7 @@ test_globus_tar_deletion()
     # Use -v so debug logs show up.
     zstash create ${blocking_flag} ${keep_flag} --hpss=${globus_path}/${case_name} --maxsize 128 -v zstash_demo 2>&1 | tee ${case_name}.log
     if [ $? != 0 ]; then
-        echo "${case_name} failed. Check ${case_name}_create.log for details. Cannot continue."
+        echo "${case_name} failed. Check ${case_name}.log for details. Cannot continue."
         return 1
     fi
     echo "${case_name} completed successfully. Checking ${case_name}.log now."
diff --git a/zstash/globus.py b/zstash/globus.py
index 4c1f10b0..6fb27bf6 100644
--- a/zstash/globus.py
+++ b/zstash/globus.py
@@ -495,10 +495,10 @@ def _prune_empty_batches(transfer_manager: TransferManager) -> None:
 
 def globus_finalize(transfer_manager: TransferManager) -> None:
     if transfer_manager.globus_config is None:
-        logger.warning("No GlobusConfig object provided for finalization")
+        logger.debug("No GlobusConfig object provided for finalization")
         return
     if transfer_manager.globus_config.transfer_client is None:
-        logger.warning("GlobusConfig provided but transfer_client is None")
+        logger.debug("GlobusConfig provided but transfer_client is None")
         return
 
     # By this point, we know transfer_client is not None
diff --git a/zstash/hpss_utils.py b/zstash/hpss_utils.py
index 64154821..c7b01e41 100644
--- a/zstash/hpss_utils.py
+++ b/zstash/hpss_utils.py
@@ -130,7 +130,7 @@ def process_tar(
         else:
             raise TypeError("Invalid config.hpss={}".format(config.hpss))
 
-        logger.info(f"Contents of the cache prior to `hpss_put`: {os.listdir(cache)}")
+        logger.debug(f"Contents of the cache prior to `hpss_put`: {os.listdir(cache)}")
 
         logger.info(
             f"{ts_utc()}: DIVING: (add_files): Calling hpss_put to dispatch archive file {self.tfname} [keep, non_blocking] = [{keep}, {non_blocking}]"

From 35a5a43abb1f175e89e5b78dfcc0906f51d514c7 Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Tue, 10 Mar 2026 14:20:07 -0500
Subject: [PATCH 20/34] Address remaining comments

---
 zstash/globus.py            | 14 ++++------
 zstash/hpss.py              | 12 ++++++--
 zstash/transfer_tracking.py | 56 ++++++++++++++++++++++---------------
 zstash/update.py            |  2 +-
 4 files changed, 50 insertions(+), 34 deletions(-)

diff --git a/zstash/globus.py b/zstash/globus.py
index 6fb27bf6..fe37b4aa 100644
--- a/zstash/globus.py
+++ b/zstash/globus.py
@@ -124,14 +124,12 @@ def globus_transfer(  # noqa: C901
                 logger.info(
                     f"{ts_utc()}: Previous task_id {mrt.task_id} Still Active. Returning ACTIVE."
                 )
-                # There is currently an active Globus transfer.
-                # Globus allows 3 simultaneous transfers, but right now, we only do one at a time.
-                # So, return for now, skipping the rest of `globus_transfer`.
-                # What happens next?
-                # We return to constructing tars.
-                # We won't need to create a new batch because the current batch exists.
-                # Once the transfer status is finally SUCCEEDED,
-                # we'll submit the current batch for Globus transfer.
+                # There is currently an active Globus transfer associated with this
+                # managed transfer record. For this call, we skip submitting a new
+                # transfer for this record and return early.
+                # In non-blocking mode, callers may have multiple Globus transfers
+                # in flight overall; this branch only ensures we do not resubmit
+                # the same task while it is still ACTIVE.
                 if non_blocking:
                     return "ACTIVE"
                 else:
diff --git a/zstash/hpss.py b/zstash/hpss.py
index 00b2c8f0..6163b33e 100644
--- a/zstash/hpss.py
+++ b/zstash/hpss.py
@@ -169,13 +169,19 @@ def hpss_put(
     )
 
 
-def hpss_get(hpss: str, file_path: str, cache: str):
+def hpss_get(
+    hpss: str,
+    file_path: str,
+    cache: str,
+    transfer_manager: Optional[TransferManager] = None,
+):
     """
     Get a file from the HPSS archive.
     """
     url = urlparse(hpss)
-    transfer_manager: TransferManager = TransferManager()
-    if url.scheme == "globus":
+    if not transfer_manager:
+        transfer_manager = TransferManager()
+    if (url.scheme == "globus") and not (transfer_manager.globus_config):
         transfer_manager.globus_config = GlobusConfig()
     hpss_transfer(
         hpss, file_path, "get", cache, False, transfer_manager=transfer_manager
diff --git a/zstash/transfer_tracking.py b/zstash/transfer_tracking.py
index 74bab2bb..2ab8c6c5 100644
--- a/zstash/transfer_tracking.py
+++ b/zstash/transfer_tracking.py
@@ -53,29 +53,41 @@ def delete_successfully_transferred_files(self):
         logger.info(
             f"{ts_utc()}: Checking for successfully transferred files to delete"
         )
-        self.batches = [
-            batch for batch in self.batches if batch.file_paths
-        ]  # Clean up empty batches
+        # Clean up empty batches first
+        self.batches = [batch for batch in self.batches if batch.file_paths]
+        # Identify pending Globus batches (not yet succeeded)
+        pending_globus_batches = [
+            batch
+            for batch in self.batches
+            if batch.is_globus and batch.task_id and batch.task_status != "SUCCEEDED"
+        ]
+        # To avoid excessive Globus API calls, only poll a small subset of
+        # pending batches on each invocation (here: at most one).
+        if pending_globus_batches:
+            batch_to_poll = pending_globus_batches[0]
+            logger.debug(
+                f"{ts_utc()}: batch is globus AND is not yet successful "
+                f"(task_id={batch_to_poll.task_id})"
+            )
+            if self.globus_config and self.globus_config.transfer_client:
+                # Non-blocking status check
+                logger.debug(
+                    f"{ts_utc()}: Checking status of task_id={batch_to_poll.task_id}"
+                )
+                task = self.globus_config.transfer_client.get_task(
+                    batch_to_poll.task_id
+                )
+                batch_to_poll.task_status = task["status"]
+                logger.debug(
+                    f"{ts_utc()}: task_id={batch_to_poll.task_id} "
+                    f"status={batch_to_poll.task_status}"
+                )
+            else:
+                logger.debug(
+                    f"{ts_utc()}: globus_config is not set up with a transfer client"
+                )
+        # Now delete files for successful transfers
         for batch in self.batches:
-            # Check if this is a Globus transfer that needs status update
-            if batch.is_globus and batch.task_id and (batch.task_status != "SUCCEEDED"):
-                logger.debug(f"{ts_utc()}: batch is globus AND is not yet successful")
-                if self.globus_config and self.globus_config.transfer_client:
-                    # Non-blocking status check
-                    logger.debug(
-                        f"{ts_utc()}: Checking status of task_id={batch.task_id}"
-                    )
-                    task = self.globus_config.transfer_client.get_task(batch.task_id)
-                    batch.task_status = task["status"]
-                    logger.debug(
-                        f"{ts_utc()}: task_id={batch.task_id} status={batch.task_status}"
-                    )
-                else:
-                    logger.debug(
-                        f"{ts_utc()}: globus_config is not set up with a transfer client"
-                    )
-
-            # Now delete if successful
             if (not batch.is_globus) or (batch.task_status == "SUCCEEDED"):
                 # The files were transferred successfully, so delete them
                 logger.info(
diff --git a/zstash/update.py b/zstash/update.py
index 5c1db6f5..dcb4c790 100644
--- a/zstash/update.py
+++ b/zstash/update.py
@@ -161,7 +161,7 @@ def update_database(  # noqa: C901
             else:
                 raise TypeError("Invalid config.hpss={}".format(config.hpss))
             transfer_manager.globus_config = globus_activate(hpss)
-            hpss_get(hpss, get_db_filename(cache), cache)
+            hpss_get(hpss, get_db_filename(cache), cache, transfer_manager)
         else:
             error_str: str = (
                 "--hpss argument is required when local copy of database is unavailable"

From c408e5bf6e81a3dcbdd951ac00256279f141aa7c Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Tue, 10 Mar 2026 15:30:43 -0500
Subject: [PATCH 21/34] Improve batch status handling and comments

---
 zstash/globus.py | 32 +++++++++++++++++++++++---------
 zstash/hpss.py   |  5 +++--
 2 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/zstash/globus.py b/zstash/globus.py
index fe37b4aa..242b4c68 100644
--- a/zstash/globus.py
+++ b/zstash/globus.py
@@ -117,22 +117,29 @@ def globus_transfer(  # noqa: C901
     task: GlobusHTTPResponse
     try:
         if mrt and mrt.task_id:
+            # This the current transfer task associated with the most recent batch.
             task = transfer_manager.globus_config.transfer_client.get_task(mrt.task_id)
+            # Update the most recent batch's task_status based on the current status from Globus API.
             mrt.task_status = task["status"]
-            # one  of {ACTIVE, SUCCEEDED, FAILED, CANCELED, PENDING, INACTIVE}
+            # According to https://docs.globus.org/api/transfer/task/#task_fields,
+            # this will be one  of {ACTIVE, INACTIVE, SUCCEEDED, FAILED}
             if mrt.task_status == "ACTIVE":
+                # The most recent transfer (mrt) is still active.
                 logger.info(
                     f"{ts_utc()}: Previous task_id {mrt.task_id} Still Active. Returning ACTIVE."
                 )
-                # There is currently an active Globus transfer associated with this
-                # managed transfer record. For this call, we skip submitting a new
-                # transfer for this record and return early.
-                # In non-blocking mode, callers may have multiple Globus transfers
-                # in flight overall; this branch only ensures we do not resubmit
-                # the same task while it is still ACTIVE.
                 if non_blocking:
+                    # Globus allows up to 3 simulataneous transfers,
+                    # but zstash is currently configured to only ever allow 1.
+                    # If we're in this block, then we're already at 1 active transfer.
+                    # We will therefore wait to submit a new transfer until it's done.
+                    # So, we'll simply return and the next run of globus_transfer
+                    # (i.e., on the next tar) will evaluate if the active transfer has finished.
                     return "ACTIVE"
                 else:
+                    # If we're in this block, then the blocking wait
+                    # for the previous transfer to finish was unsuccessful.
+                    # This is an unexpected state and so we raise an error.
                     error_str: str = (
                         "task_status='ACTIVE', but in blocking mode, the previous transfer should have waited through globus_block_wait"
                     )
@@ -259,16 +266,23 @@ def globus_block_wait(
                 task_status = curr_task["status"]
                 if task_status == "SUCCEEDED":
                     break  # Break out of the while-loop. The transfer already succeeded, so no need to retry.
-                elif task_status in ["FAILED", "CANCELED"]:
+                elif task_status == "FAILED":
                     error_str = f"{ts_utc()}: task_wait returned True, but task_status={task_status} for task_id {task_id}. No reason to keep retrying now."
                     logger.warning(error_str)
                     # We still need to break, because no matter how long we wait now, nothing will change with the transfer status.
                     break
-                elif task_status in ["PENDING", "INACTIVE"]:
+                elif task_status in [
+                    "INACTIVE",
+                    "UNKNOWN",
+                    "EXHAUSTED_TIMEOUT_RETRIES",
+                ]:
+                    # The latter two options here are ones we assign manually and aren't included on
+                    # https://docs.globus.org/api/transfer/task/#task_fields
                     error_str = f"{ts_utc()}: task_wait returned True, but task_status={task_status} for task_id {task_id}. Will retry waiting until max_retries is reached."
                     logger.warning(error_str)
                     # Don't break -- continue retries
                 else:
+                    # If we're in this block, then somehow an unexpected task_status was returned.
                     error_str = f"{ts_utc()}: task_wait returned True, but task_status={task_status} is unexpected for task_id {task_id}. Will retry waiting until max_retries is reached."
                     logger.warning(error_str)
                     # Don't break -- continue retries
diff --git a/zstash/hpss.py b/zstash/hpss.py
index 6163b33e..19a961b7 100644
--- a/zstash/hpss.py
+++ b/zstash/hpss.py
@@ -87,8 +87,9 @@ def hpss_transfer(
         name: str
         path, name = os.path.split(file_path)
 
-        # Add this file to the current batch
-        if (not keep) and (not is_index):  # Never track index.db for deletion
+        # Never track index.db for deletion, only the tar files
+        if (not keep) and (not is_index):
+            # Add this tar file to the current batch
             transfer_manager.batches[-1].file_paths.append(file_path)
             logger.debug(
                 f"{ts_utc()}: Added {file_path} to current batch, batch now has {len(transfer_manager.batches[-1].file_paths)} files"

From b2fe7c695a4362ad6dd231464f6d5bf680cbd51a Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Tue, 10 Mar 2026 15:44:00 -0500
Subject: [PATCH 22/34] Remove pending_globus_batches check

---
 zstash/transfer_tracking.py | 31 -------------------------------
 1 file changed, 31 deletions(-)

diff --git a/zstash/transfer_tracking.py b/zstash/transfer_tracking.py
index 2ab8c6c5..72ee5a4c 100644
--- a/zstash/transfer_tracking.py
+++ b/zstash/transfer_tracking.py
@@ -55,37 +55,6 @@ def delete_successfully_transferred_files(self):
         )
         # Clean up empty batches first
         self.batches = [batch for batch in self.batches if batch.file_paths]
-        # Identify pending Globus batches (not yet succeeded)
-        pending_globus_batches = [
-            batch
-            for batch in self.batches
-            if batch.is_globus and batch.task_id and batch.task_status != "SUCCEEDED"
-        ]
-        # To avoid excessive Globus API calls, only poll a small subset of
-        # pending batches on each invocation (here: at most one).
-        if pending_globus_batches:
-            batch_to_poll = pending_globus_batches[0]
-            logger.debug(
-                f"{ts_utc()}: batch is globus AND is not yet successful "
-                f"(task_id={batch_to_poll.task_id})"
-            )
-            if self.globus_config and self.globus_config.transfer_client:
-                # Non-blocking status check
-                logger.debug(
-                    f"{ts_utc()}: Checking status of task_id={batch_to_poll.task_id}"
-                )
-                task = self.globus_config.transfer_client.get_task(
-                    batch_to_poll.task_id
-                )
-                batch_to_poll.task_status = task["status"]
-                logger.debug(
-                    f"{ts_utc()}: task_id={batch_to_poll.task_id} "
-                    f"status={batch_to_poll.task_status}"
-                )
-            else:
-                logger.debug(
-                    f"{ts_utc()}: globus_config is not set up with a transfer client"
-                )
         # Now delete files for successful transfers
         for batch in self.batches:
             if (not batch.is_globus) or (batch.task_status == "SUCCEEDED"):

From ee135f68e9fd7db79e4b3b2802356aa7ee8cca6b Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Tue, 10 Mar 2026 16:33:13 -0500
Subject: [PATCH 23/34] Address comment on keep

---
 zstash/create.py |  2 +-
 zstash/globus.py | 31 ++++++++++++++-----------------
 zstash/update.py |  2 +-
 3 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/zstash/create.py b/zstash/create.py
index 7cd76231..ad48bd4f 100644
--- a/zstash/create.py
+++ b/zstash/create.py
@@ -104,7 +104,7 @@ def create():
     )
 
     logger.debug(f"{ts_utc()}: calling globus_finalize()")
-    globus_finalize(transfer_manager)
+    globus_finalize(transfer_manager, args.keep)
 
     if len(failures) > 0:
         # List the failures
diff --git a/zstash/globus.py b/zstash/globus.py
index 242b4c68..0e2a3f71 100644
--- a/zstash/globus.py
+++ b/zstash/globus.py
@@ -401,30 +401,25 @@ def _submit_pending_transfer_data(
 
 
 def _collect_globus_task_ids(
-    transfer_manager: TransferManager,
-    extra_task_id: Optional[str],
+    transfer_manager: TransferManager, extra_task_id: Optional[str], keep: bool
 ) -> Tuple[List[str], Dict[str, TransferBatch]]:
     """
     Return (ordered unique task_ids, task_id->batch mapping for first occurrence).
-
-    Includes:
-      - All Globus batch.task_id values where batch.file_paths is non-empty
-        (i.e., still pending deletion)
-      - extra_task_id if provided
-
-    Skips:
-      - Non-Globus batches
-      - Batches without task_id
-      - Batches whose file_paths is empty (already processed/deleted)
     """
     task_ids: List[str] = []
     seen: Set[str] = set()
     task_to_batch: Dict[str, TransferBatch] = {}
 
     for batch in transfer_manager.batches:
-        already_deleted: bool = not batch.file_paths
-
-        if (not batch.is_globus) or (not batch.task_id) or (already_deleted):
+        if not keep:
+            # NOTE: This is always true if `keep` is set,
+            # since we never track files for deletion if `keep` is set.
+            already_deleted: bool = not batch.file_paths
+            if already_deleted:
+                # This batch has already been processed and files deleted, so we can skip it.
+                continue
+
+        if (not batch.is_globus) or (not batch.task_id):
             continue
 
         # By this point, we know batch.task_id is not None
@@ -505,7 +500,7 @@ def _prune_empty_batches(transfer_manager: TransferManager) -> None:
         logger.debug(f"{ts_utc()}: Pruned {before - after} empty transfer batches")
 
 
-def globus_finalize(transfer_manager: TransferManager) -> None:
+def globus_finalize(transfer_manager: TransferManager, keep: bool) -> None:
     if transfer_manager.globus_config is None:
         logger.debug("No GlobusConfig object provided for finalization")
         return
@@ -522,7 +517,9 @@ def globus_finalize(transfer_manager: TransferManager) -> None:
 
     task_ids: List[str]
     task_to_batch: Dict[str, TransferBatch]
-    task_ids, task_to_batch = _collect_globus_task_ids(transfer_manager, last_task_id)
+    task_ids, task_to_batch = _collect_globus_task_ids(
+        transfer_manager, last_task_id, keep
+    )
 
     _wait_for_all_tasks(transfer_client, task_ids, task_to_batch)
 
diff --git a/zstash/update.py b/zstash/update.py
index dcb4c790..1d27d799 100644
--- a/zstash/update.py
+++ b/zstash/update.py
@@ -45,7 +45,7 @@ def update():
         transfer_manager=transfer_manager,
     )
 
-    globus_finalize(transfer_manager)
+    globus_finalize(transfer_manager, args.keep)
 
     # List failures
     if len(failures) > 0:

From 75efad0379ec54b4c99f5764f866b55a97f76992 Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Mon, 23 Mar 2026 13:10:06 -0500
Subject: [PATCH 24/34] Update logger calls

Reviewed globus.py, hpss.py, hpss_utils.py, transfer_tracking.py
---
 zstash/hpss.py       | 4 ++--
 zstash/hpss_utils.py | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/zstash/hpss.py b/zstash/hpss.py
index 19a961b7..10f449a4 100644
--- a/zstash/hpss.py
+++ b/zstash/hpss.py
@@ -118,13 +118,13 @@ def hpss_transfer(
                 transfer_manager, endpoint, url_path, name, transfer_type, non_blocking
             )
             logger.info(
-                f"{ts_utc()}: globus_transfer(name={name}) returned task_status={task_status}"
+                f"{ts_utc()}: SURFACE: hpss globus_transfer(name={name}) returned task_status={task_status}"
             )
             mrt: Optional[TransferBatch] = transfer_manager.get_most_recent_transfer()
             if mrt and mrt.task_status:
                 globus_status = mrt.task_status
                 logger.info(
-                    f"{ts_utc()}: SURFACE hpss globus_transfer(name={name}) returns {globus_status}"
+                    f"{ts_utc()}: Most recent globus_transfer returned task_status={globus_status}"
                 )
             # NOTE: Here, the status could be "EXHAUSTED_TIMEOUT_RETRIES", meaning a very long transfer
             # or perhaps transfer is hanging. We should decide whether to ignore it, or cancel it, but
diff --git a/zstash/hpss_utils.py b/zstash/hpss_utils.py
index c7b01e41..35125a9c 100644
--- a/zstash/hpss_utils.py
+++ b/zstash/hpss_utils.py
@@ -122,7 +122,7 @@ def process_tar(
         tar_size = self.tarFileObject.tell()
         tar_md5: Optional[str] = self.tarFileObject.md5()
         self.tarFileObject.close()
-        logger.info(f"{ts_utc()}: (add_files): Completed archive file {self.tfname}")
+        logger.info(f"{ts_utc()}: (process_tar): Completed archive file {self.tfname}")
 
         # 2. Transfer the tar to HPSS #########################################
         if config.hpss is not None:
@@ -133,7 +133,7 @@ def process_tar(
         logger.debug(f"Contents of the cache prior to `hpss_put`: {os.listdir(cache)}")
 
         logger.info(
-            f"{ts_utc()}: DIVING: (add_files): Calling hpss_put to dispatch archive file {self.tfname} [keep, non_blocking] = [{keep}, {non_blocking}]"
+            f"{ts_utc()}: DIVING: (process_tar): Calling hpss_put to dispatch archive file {self.tfname} [keep, non_blocking] = [{keep}, {non_blocking}]"
         )
         # Actually transfer the tar file
         hpss_put(
@@ -146,7 +146,7 @@ def process_tar(
             transfer_manager=transfer_manager,
         )
         logger.info(
-            f"{ts_utc()}: SURFACE (add_files): Called hpss_put to dispatch archive file {self.tfname}"
+            f"{ts_utc()}: SURFACE (process_tar): Called hpss_put to dispatch archive file {self.tfname}"
         )
 
         # 3. Add the tar itself to the tars table #############################

From 985f94ee0ecfb337b7cb85ccbeef357f859d9dc3 Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Mon, 23 Mar 2026 13:19:21 -0500
Subject: [PATCH 25/34] Rename tar table skip parameter

---
 zstash/create.py     |  2 +-
 zstash/hpss_utils.py | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/zstash/create.py b/zstash/create.py
index ad48bd4f..db851b99 100644
--- a/zstash/create.py
+++ b/zstash/create.py
@@ -289,7 +289,7 @@ def create_database(
         args.keep,
         args.follow_symlinks,
         dev_options,
-        skip_tars_md5=args.no_tars_md5,
+        skip_tars_table=args.no_tars_md5,
         non_blocking=args.non_blocking,
         transfer_manager=transfer_manager,
     )
diff --git a/zstash/hpss_utils.py b/zstash/hpss_utils.py
index 35125a9c..bc6b88a7 100644
--- a/zstash/hpss_utils.py
+++ b/zstash/hpss_utils.py
@@ -109,7 +109,7 @@ def process_tar(
         keep: bool,
         non_blocking: bool,
         transfer_manager: Optional[TransferManager],
-        skip_tars_md5: bool,
+        skip_tars_table: bool,
         cur: sqlite3.Cursor,
         con: sqlite3.Connection,
         dev_options: DevOptions,
@@ -150,7 +150,7 @@ def process_tar(
         )
 
         # 3. Add the tar itself to the tars table #############################
-        if not skip_tars_md5:
+        if not skip_tars_table:
             tar_tuple: TupleTarsRowNoId = (self.tfname, tar_size, tar_md5)
             logger.info("tar name={}, tar size={}, tar md5={}".format(*tar_tuple))
             if not tars_table_exists(cur):
@@ -319,7 +319,7 @@ def construct_tars(
     keep: bool,
     follow_symlinks: bool,
     dev_options: DevOptions,
-    skip_tars_md5: bool = False,
+    skip_tars_table: bool = False,
     non_blocking: bool = False,
     transfer_manager: Optional[TransferManager] = None,
 ) -> List[str]:
@@ -354,7 +354,7 @@ def construct_tars(
         tar_wrapper = TarWrapper(
             tar_num=itar,
             cache=cache,
-            do_hash=not skip_tars_md5,
+            do_hash=not skip_tars_table,
             follow_symlinks=follow_symlinks,
         )
 
@@ -409,7 +409,7 @@ def construct_tars(
             keep,
             non_blocking,
             transfer_manager,
-            skip_tars_md5,
+            skip_tars_table,
             cur,
             con,
             dev_options,

From bb11a8d659c3379ae650714e77d3adb1595293c9 Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Mon, 23 Mar 2026 13:28:43 -0500
Subject: [PATCH 26/34] Add comment explaining write method

---
 zstash/hpss_utils.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/zstash/hpss_utils.py b/zstash/hpss_utils.py
index bc6b88a7..a6c761cf 100644
--- a/zstash/hpss_utils.py
+++ b/zstash/hpss_utils.py
@@ -257,6 +257,18 @@ def tell(self) -> int:
         return self.position
 
     def write(self, s):
+        """
+        This is called implicitly.
+        In TarWrapper.__init__:
+
+        ```
+        self.tarFileObject = HashIO(os.path.join(cache, self.tfname), "wb", do_hash)
+        self.tar = tarfile.open(mode="w", fileobj=self.tarFileObject, dereference=follow_symlinks)
+        ```
+
+        tarfile.open requires that the fileobj argument has a write() method.
+        It calls that method to write data to the tar file.
+        """
         self.f.write(s)
         if self.hash:
             self.hash.update(s)

From 2931d9e73087f3dd97424fb2243237e4875f84c5 Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Mon, 23 Mar 2026 13:37:43 -0500
Subject: [PATCH 27/34] Add comment explaining tars table

---
 zstash/hpss_utils.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/zstash/hpss_utils.py b/zstash/hpss_utils.py
index a6c761cf..7310ef8b 100644
--- a/zstash/hpss_utils.py
+++ b/zstash/hpss_utils.py
@@ -362,7 +362,12 @@ def construct_tars(
         archived: List[TupleFilesRowNoId] = []
 
         # Open a new tar
-        # Note: if we're not skipping tars, we want to calculate the hash of the tars.
+        # Note: if we're not skipping the tars table, then we DO want to calculate the hash of the tars.
+        # That is, we DO want to add the tar to the tars table in the database.
+        # That means we need to calculate the hash of the tar file as well.
+        #
+        # We ALWAYS want to calculate the hashes of the individual files, regardless of skip_tars_table,
+        # because we need to add those to the files table.
         tar_wrapper = TarWrapper(
             tar_num=itar,
             cache=cache,

From 357b26499785a9ab5a7f6aa04a66aec130c7553d Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Mon, 23 Mar 2026 13:44:10 -0500
Subject: [PATCH 28/34] Update comments on tar submission

---
 zstash/hpss_utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/zstash/hpss_utils.py b/zstash/hpss_utils.py
index 7310ef8b..3a603f48 100644
--- a/zstash/hpss_utils.py
+++ b/zstash/hpss_utils.py
@@ -124,7 +124,7 @@ def process_tar(
         self.tarFileObject.close()
         logger.info(f"{ts_utc()}: (process_tar): Completed archive file {self.tfname}")
 
-        # 2. Transfer the tar to HPSS #########################################
+        # 2. Submit the tar to the transfer manager's batch transfer system ###
         if config.hpss is not None:
             hpss: str = config.hpss
         else:
@@ -135,7 +135,7 @@ def process_tar(
         logger.info(
             f"{ts_utc()}: DIVING: (process_tar): Calling hpss_put to dispatch archive file {self.tfname} [keep, non_blocking] = [{keep}, {non_blocking}]"
         )
-        # Actually transfer the tar file
+        # Actually submit the tar file
         hpss_put(
             hpss,
             os.path.join(cache, self.tfname),
@@ -420,7 +420,7 @@ def construct_tars(
                 # Done adding files to this particular tar.
                 break
 
-        # Close and transfer this tar archive, and update the database with the archived files.
+        # Close the tar, submit it to the batch transfer system, and update the database with the archived files (and optionally the tar as well, depending on skip_tars_table)
         tar_wrapper.process_tar(
             cache,
             keep,

From 902df88b6e0d58894ff2dd0b3aa4d3e1ecd81285 Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Mon, 23 Mar 2026 18:26:47 -0500
Subject: [PATCH 29/34] Reduce complexity of construct_tars

---
 tests/unit/test_optimized_update.py | 34 -----------
 zstash/create.py                    | 11 ++--
 zstash/hpss_utils.py                | 92 +++++++++++++++--------------
 zstash/update.py                    |  8 +--
 zstash/utils.py                     |  9 ---
 5 files changed, 60 insertions(+), 94 deletions(-)

diff --git a/tests/unit/test_optimized_update.py b/tests/unit/test_optimized_update.py
index 59b5d78d..5934209b 100644
--- a/tests/unit/test_optimized_update.py
+++ b/tests/unit/test_optimized_update.py
@@ -563,40 +563,6 @@ def test_time_tolerance_check(self):
             assert is_within_tolerance == should_match
 
 
-class TestBackwardCompatibility:
-    """Tests to ensure backward compatibility with existing code."""
-
-    def test_get_files_to_archive_still_works(self, tmp_path):
-        """Test that legacy get_files_to_archive function still works."""
-        from zstash.utils import get_files_to_archive
-
-        (tmp_path / "file.txt").write_text("content")
-
-        os.chdir(tmp_path)
-        result = get_files_to_archive("cache", None, None)
-
-        # Should return list of strings
-        assert isinstance(result, list)
-        assert len(result) > 0
-        assert all(isinstance(item, str) for item in result)
-
-    def test_output_format_matches_original(self, tmp_path):
-        """Test that file paths are normalized the same way as original."""
-        subdir = tmp_path / "subdir"
-        subdir.mkdir()
-        (subdir / "file.txt").write_text("content")
-
-        os.chdir(tmp_path)
-
-        from zstash.utils import get_files_to_archive
-
-        legacy_result = get_files_to_archive("cache", None, None)
-        new_result = list(get_files_to_archive_with_stats("cache", None, None).keys())
-
-        # Should produce same file list
-        assert legacy_result == new_result
-
-
 @pytest.fixture
 def mock_database():
     """Fixture providing a mock database cursor."""
diff --git a/zstash/create.py b/zstash/create.py
index db851b99..1c956a72 100644
--- a/zstash/create.py
+++ b/zstash/create.py
@@ -6,7 +6,8 @@
 import os.path
 import sqlite3
 import sys
-from typing import Any, List, Tuple
+from datetime import datetime
+from typing import Any, Dict, List, Tuple
 
 from six.moves.urllib.parse import urlparse
 
@@ -17,7 +18,7 @@
 from .transfer_tracking import TransferManager
 from .utils import (
     create_tars_table,
-    get_files_to_archive,
+    get_files_to_archive_with_stats,
     run_command,
     tars_table_exists,
     ts_utc,
@@ -271,7 +272,9 @@ def create_database(
             cur.execute("insert into config values (?,?)", (attr, value))
     con.commit()
 
-    files: List[str] = get_files_to_archive(cache, args.include, args.exclude)
+    file_stats: Dict[str, Tuple[int, datetime]] = get_files_to_archive_with_stats(
+        cache, args.include, args.exclude
+    )
 
     failures: List[str]
     dev_options: DevOptions = DevOptions(
@@ -284,7 +287,7 @@ def create_database(
         cur,
         con,
         -1,
-        files,
+        file_stats,
         cache,
         args.keep,
         args.follow_symlinks,
diff --git a/zstash/hpss_utils.py b/zstash/hpss_utils.py
index 3a603f48..aa25c6af 100644
--- a/zstash/hpss_utils.py
+++ b/zstash/hpss_utils.py
@@ -7,7 +7,7 @@
 import tarfile
 import traceback
 from datetime import datetime
-from typing import List, Optional, Tuple
+from typing import Dict, List, Optional, Tuple
 
 import _hashlib
 
@@ -290,6 +290,19 @@ def close(self):
         self.closed = True
 
 
+def estimate_tar_entry_size(file_size: int) -> int:
+    """
+    Estimate how much space a file of a given size would take in the tar archive,
+    including metadata and padding.
+    """
+    TAR_BLOCK_SIZE = 512
+    TAR_HEADER_SIZE = 512  # per file header
+    # This formula computes: ceil(file_size / TAR_BLOCK_SIZE)
+    # But faster and avoiding floats.
+    data_blocks = (file_size + TAR_BLOCK_SIZE - 1) // TAR_BLOCK_SIZE
+    return TAR_HEADER_SIZE + (data_blocks * TAR_BLOCK_SIZE)
+
+
 # Add file to tar archive while computing its hash
 # Return file offset (in tar archive), size and md5 hash
 def add_file_to_tar_archive(
@@ -326,7 +339,7 @@ def construct_tars(
     cur: sqlite3.Cursor,
     con: sqlite3.Connection,
     itar: int,
-    files: List[str],
+    file_stats: Dict[str, Tuple[int, datetime]],
     cache: str,
     keep: bool,
     follow_symlinks: bool,
@@ -337,6 +350,7 @@ def construct_tars(
 ) -> List[str]:
 
     failures: List[str] = []
+    files: List[str] = list(file_stats.keys())
     nfiles: int = len(files)
 
     if config.maxsize is not None:
@@ -351,14 +365,13 @@ def construct_tars(
         operation = "update"
 
     i_file: int = 0
-    carried_over_tar_info: Optional[tarfile.TarInfo] = None
     while i_file < nfiles:
         # Each iteration of this loop constructs one tar
 
         # `create` passes in itar=-1, so the first tar will be 000000.tar
         # `update` passes in itar=max existing tar number, so the first tar will be max+1
         itar += 1
-        tar_size: int = 0
+        cumulative_tar_size: int = 0
         archived: List[TupleFilesRowNoId] = []
 
         # Open a new tar
@@ -378,47 +391,40 @@ def construct_tars(
         # Add files to the tar until we reach the max size
         while i_file < nfiles:
             current_file: str = files[i_file]
-            if carried_over_tar_info:
-                # If current_file wasn't added to the last tar,
-                # then we can immediately add it now.
-                # No need to repeat the size calculations.
-                tar_info = carried_over_tar_info
-                current_file_size = tar_info.size
-                carried_over_tar_info = None  # Reset for next iteration
-            else:
-                try:
-                    # It's ok that current_file isn't in the tar yet.
-                    tar_info = tar_wrapper.tar.gettarinfo(current_file)
-                    if tar_info.islnk():
-                        tar_info.size = os.path.getsize(current_file)
-                    current_file_size = tar_info.size
-                except FileNotFoundError:
-                    logger.error(f"Archiving {current_file}")
-                    if follow_symlinks:
-                        raise Exception(
-                            f"Archive {operation} failed due to broken symlink."
-                        )
-                    else:
-                        raise
-
-            # Check if adding this file would send us over the max size.
-            if (tar_size == 0) or (tar_size + current_file_size <= max_size):
-                # Case 1: if the tar is empty, always add the file, even if it's over the max size.
-                # Case 2: if the tar is nonempty, only add the file if it won't put us over the max size.
-                new_tar_size = tar_wrapper.process_file(
-                    current_file, tar_info, archived, failures
-                )
-                if new_tar_size != 0:
-                    tar_size = new_tar_size
-                # Else: process_file failed, so we should keep the original tar_size
-                i_file += 1
-            else:
-                # Over the size limit:
-                # Time to close and transfer this tar archive.
-                carried_over_tar_info = tar_info  # Carry over this info to the next tar
-                # Break out of the inner while-loop:
+            current_file_size: int
+            current_file_size, _ = file_stats[current_file]
+            estimated_entry_size: int = estimate_tar_entry_size(current_file_size)
+            if (cumulative_tar_size != 0) and (
+                cumulative_tar_size + estimated_entry_size > max_size
+            ):
+                # Over the size limit: time to close and transfer this tar archive.
                 # Done adding files to this particular tar.
+                # Break out of the inner while-loop
                 break
+            # If we make it this far,
+            # we know we can add the current file without going over the max size.
+            # (Either that, or the tar is currently empty,
+            # in which case we add the file even if it's over the max size.)
+            try:
+                tar_info = tar_wrapper.tar.gettarinfo(current_file)
+                if tar_info.islnk():
+                    tar_info.size = os.path.getsize(current_file)
+            except FileNotFoundError:
+                logger.error(f"Archiving {current_file}")
+                if follow_symlinks:
+                    raise Exception(
+                        f"Archive {operation} failed due to broken symlink."
+                    )
+                else:
+                    raise
+            new_cumulative_tar_size = tar_wrapper.process_file(
+                current_file, tar_info, archived, failures
+            )
+            if new_cumulative_tar_size != 0:
+                # Update the cumulative tar size with the new tar size returned by process_file.
+                cumulative_tar_size = new_cumulative_tar_size
+            # Else: process_file failed, so we should keep the original cumulative_tar_size
+            i_file += 1
 
         # Close the tar, submit it to the batch transfer system, and update the database with the archived files (and optionally the tar as well, depending on skip_tars_table)
         tar_wrapper.process_tar(
diff --git a/zstash/update.py b/zstash/update.py
index 1d27d799..817da424 100644
--- a/zstash/update.py
+++ b/zstash/update.py
@@ -226,7 +226,7 @@ def update_database(  # noqa: C901
         else:
             archived_files[file_path] = (size, mtime)
 
-    newfiles: List[str] = []
+    newfiles: Dict[str, Tuple[int, datetime]] = {}
     files_checked = 0
 
     for file_path in files:
@@ -236,7 +236,7 @@ def update_database(  # noqa: C901
         # Check if file exists in database
         if file_path not in archived_files:
             # File not in database - it's new
-            newfiles.append(file_path)
+            newfiles[file_path] = (size_new, mdtime_new)
         else:
             # File exists in database - check if it changed
             archived_size, archived_mtime = archived_files[file_path]
@@ -246,7 +246,7 @@ def update_database(  # noqa: C901
                 and (abs((mdtime_new - archived_mtime).total_seconds()) <= TIME_TOL)
             ):
                 # File has changed
-                newfiles.append(file_path)
+                newfiles[file_path] = (size_new, mdtime_new)
 
         files_checked += 1
 
@@ -261,7 +261,7 @@ def update_database(  # noqa: C901
     # --dry-run option
     if args.dry_run:
         print("List of files to be updated")
-        for file_path in newfiles:
+        for file_path in newfiles.keys():
             print(file_path)
         # Close database
         con.commit()
diff --git a/zstash/utils.py b/zstash/utils.py
index 1bec4f3e..04154136 100644
--- a/zstash/utils.py
+++ b/zstash/utils.py
@@ -220,15 +220,6 @@ def get_files_to_archive_with_stats(
     return file_stats
 
 
-def get_files_to_archive(cache: str, include: str, exclude: str) -> List[str]:
-    """
-    LEGACY VERSION: Still used for `zstash create`.
-    Uses the optimized version but returns only the file list.
-    """
-    file_stats = get_files_to_archive_with_stats(cache, include, exclude)
-    return list(file_stats.keys())
-
-
 def update_config(cur: sqlite3.Cursor):
     # Retrieve some configuration settings from database
     # Loop through all attributes of config.

From 898e95e12c01b51517cea9e71b3b9eeb65f3a82e Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Thu, 26 Mar 2026 15:35:54 -0500
Subject: [PATCH 30/34] Address comments

---
 zstash/globus.py            | 112 +++++++++++++++++++++---------------
 zstash/globus_utils.py      |   3 +
 zstash/hpss.py              |   6 +-
 zstash/transfer_tracking.py |   3 +-
 4 files changed, 74 insertions(+), 50 deletions(-)

diff --git a/zstash/globus.py b/zstash/globus.py
index 0e2a3f71..b20d01e5 100644
--- a/zstash/globus.py
+++ b/zstash/globus.py
@@ -3,7 +3,7 @@
 import sys
 from typing import Dict, List, Optional, Set, Tuple
 
-from globus_sdk import TransferAPIError, TransferClient
+from globus_sdk import TransferAPIError, TransferClient, TransferData
 from globus_sdk.response import GlobusHTTPResponse
 from globus_sdk.services.transfer.response.iterable import IterableTransferResponse
 from six.moves.urllib.parse import urlparse
@@ -65,6 +65,20 @@ def file_exists(archive_directory_listing: IterableTransferResponse, name: str)
     return False
 
 
+def update_cumulative_tarfiles_pushed(
+    transfer_manager: TransferManager, transfer_data: TransferData
+) -> None:
+    logger.info(f"{ts_utc()}: TransferData: accumulated items:")
+    attribs = transfer_data.__dict__
+    for item in attribs["data"]["DATA"]:
+        if item["DATA_TYPE"] == "transfer_item":
+            transfer_manager.cumulative_tarfiles_pushed += 1
+            print(
+                f"PUSHED (#{transfer_manager.cumulative_tarfiles_pushed}) tars, STORED source item: {item['source_path']}",
+                flush=True,
+            )
+
+
 # C901 'globus_transfer' is too complex (20)
 def globus_transfer(  # noqa: C901
     transfer_manager: TransferManager,
@@ -103,7 +117,11 @@ def globus_transfer(  # noqa: C901
             )
             sys.exit(1)
 
-    mrt: Optional[TransferBatch] = transfer_manager.get_most_recent_transfer()
+    mrb: Optional[TransferBatch] = transfer_manager.get_most_recent_batch()
+    if not mrb:
+        raise RuntimeError(
+            "The transfer manager should always have at least one batch by the time globus_transfer is called, however, the batch list is empty."
+        )
     transfer_data = set_up_TransferData(
         transfer_type,
         transfer_manager.globus_config.local_endpoint,
@@ -111,22 +129,22 @@ def globus_transfer(  # noqa: C901
         remote_path,
         name,
         transfer_manager.globus_config.transfer_client,
-        mrt.transfer_data if mrt else None,
+        mrb.transfer_data,
     )
 
     task: GlobusHTTPResponse
     try:
-        if mrt and mrt.task_id:
+        if mrb.task_id:
             # This the current transfer task associated with the most recent batch.
-            task = transfer_manager.globus_config.transfer_client.get_task(mrt.task_id)
+            task = transfer_manager.globus_config.transfer_client.get_task(mrb.task_id)
             # Update the most recent batch's task_status based on the current status from Globus API.
-            mrt.task_status = task["status"]
+            mrb.task_status = task["status"]
             # According to https://docs.globus.org/api/transfer/task/#task_fields,
             # this will be one  of {ACTIVE, INACTIVE, SUCCEEDED, FAILED}
-            if mrt.task_status == "ACTIVE":
-                # The most recent transfer (mrt) is still active.
+            if mrb.task_status == "ACTIVE":
+                # The most recent transfer (mrb) is still active.
                 logger.info(
-                    f"{ts_utc()}: Previous task_id {mrt.task_id} Still Active. Returning ACTIVE."
+                    f"{ts_utc()}: Previous task_id {mrb.task_id} Still Active. Returning ACTIVE."
                 )
                 if non_blocking:
                     # Globus allows up to 3 simulataneous transfers,
@@ -145,34 +163,32 @@ def globus_transfer(  # noqa: C901
                     )
                     logger.error(error_str)
                     raise RuntimeError(error_str)
-            elif mrt.task_status == "SUCCEEDED":
+            elif mrb.task_status == "SUCCEEDED":
                 logger.info(
-                    f"{ts_utc()}: Previous task_id {mrt.task_id} status = SUCCEEDED."
+                    f"{ts_utc()}: Previous task_id {mrb.task_id} status = SUCCEEDED."
                 )
                 src_ep = task["source_endpoint_id"]
                 dst_ep = task["destination_endpoint_id"]
                 label = task["label"]
                 ts = ts_utc()
                 logger.info(
-                    f"{ts}:Globus transfer {mrt.task_id}, from {src_ep} to {dst_ep}: {label} succeeded"
+                    f"{ts}:Globus transfer {mrb.task_id}, from {src_ep} to {dst_ep}: {label} succeeded"
                 )
                 # The previous transfer succeeded.
                 # That means we can transfer the current batch now.
             else:
+                # The previous transfer is in an unexpected state (i.e., "INACTIVE", "FAILED").
+                # Either way, the previous transfer is effectively terminated,
+                # so we will proceed with the current transfer attempt.
+                # (I.e., we will not return yet).
+                # Note: any status we manually set
+                # (I.e., "UNKNOWN", "SUBMITTED", "EXHAUSTED_TIMEOUT_RETRIES") is NOT possible here,
+                # because we're using `task["status"]` from the globus_sdk TransferClient.
                 logger.error(
-                    f"{ts_utc()}: Previous task_id {mrt.task_id} status = {mrt.task_status}."
+                    f"{ts_utc()}: Previous task_id {mrb.task_id} status = {mrb.task_status}."
                 )
 
-        # DEBUG: review accumulated items in TransferData
-        logger.info(f"{ts_utc()}: TransferData: accumulated items:")
-        attribs = transfer_data.__dict__
-        for item in attribs["data"]["DATA"]:
-            if item["DATA_TYPE"] == "transfer_item":
-                transfer_manager.cumulative_tarfiles_pushed += 1
-                print(
-                    f"   (routine)  PUSHING (#{transfer_manager.cumulative_tarfiles_pushed}) STORED source item: {item['source_path']}",
-                    flush=True,
-                )
+        update_cumulative_tarfiles_pushed(transfer_manager, transfer_data)
 
         logger.info(f"{ts_utc()}: DIVING: Submit Transfer for {transfer_data['label']}")
         # Submit the current transfer_data
@@ -181,19 +197,26 @@ def globus_transfer(  # noqa: C901
             transfer_manager.globus_config.transfer_client, transfer_data
         )
         task_id = task.get("task_id")
-        # NOTE: This log message is misleading. If we have accumulated multiple tar files for transfer,
-        # the "lable" given here refers only to the LAST tarfile in the TransferData list.
         logger.info(
-            f"{ts_utc()}: SURFACE Submit Transfer returned new task_id = {task_id} for label {transfer_data['label']}"
+            f"{ts_utc()}: SURFACE Submit Transfer returned new task_id = {task_id}, with last tarfile having label: {transfer_data['label']}"
         )
 
         # Update the current batch with the task info
         # The batch was already created in hpss_transfer with files added to it
         # We just need to mark it as submitted
         if transfer_manager.batches:
+            # Update these two fields of the most recent batch
+            # (which is still available in this function as `mrb`).
             transfer_manager.batches[-1].task_id = task_id
-            transfer_manager.batches[-1].task_status = "UNKNOWN"
-            transfer_manager.batches[-1].transfer_data = None  # Was just submitted
+            transfer_manager.batches[-1].task_status = "SUBMITTED"
+        else:
+            # This block should be impossible to reach.
+            # By now, we've ensured that `get_most_recent_batch()` returns a batch,
+            # and we haven't removed any batches since then,
+            # so there should always be at least one batch in `batches`.
+            error_str = "transfer_manager has no batches"
+            logger.error(error_str)
+            raise RuntimeError(error_str)
 
         # Nullify the submitted transfer data structure so that a new one will be created on next call.
         transfer_data = None
@@ -211,22 +234,26 @@ def globus_transfer(  # noqa: C901
         logger.error("Exception: {}".format(e))
         sys.exit(1)
 
-    new_mrt: Optional[TransferBatch] = transfer_manager.get_most_recent_transfer()
-    # test for blocking on new task_id
     task_status = "UNKNOWN"
-    if new_mrt and new_mrt.task_id:
+    if mrb.task_id:
         if not non_blocking:
             # If blocking, wait for the task to complete and get the final status,
             # before we proceed with any more transfers.
-            new_mrt.task_status = globus_block_wait(
+            mrb.task_status = globus_block_wait(
                 transfer_manager.globus_config.transfer_client,
-                task_id=new_mrt.task_id,
+                task_id=mrb.task_id,
             )
-            task_status = new_mrt.task_status
+            task_status = mrb.task_status
         else:
             logger.info(
-                f"{ts_utc()}: NO BLOCKING (task_wait) for task_id {new_mrt.task_id}"
+                f"{ts_utc()}: NO BLOCKING (task_wait) for task_id {mrb.task_id}"
             )
+    else:
+        # This block should be impossible to reach.
+        # By now, we've set `transfer_manager.batches[-1].task_id = task_id` or else raised an error, so `mrb.task_id` should always be set.
+        error_str = "No task_id found for most recent batch after submission"
+        logger.error(f"{ts_utc()}: {error_str}")
+        raise RuntimeError(error_str)
 
     if transfer_type == "put":
         return task_status
@@ -274,9 +301,10 @@ def globus_block_wait(
                 elif task_status in [
                     "INACTIVE",
                     "UNKNOWN",
+                    "SUBMITTED",
                     "EXHAUSTED_TIMEOUT_RETRIES",
                 ]:
-                    # The latter two options here are ones we assign manually and aren't included on
+                    # The latter three options here are ones we assign manually and aren't included on
                     # https://docs.globus.org/api/transfer/task/#task_fields
                     error_str = f"{ts_utc()}: task_wait returned True, but task_status={task_status} for task_id {task_id}. Will retry waiting until max_retries is reached."
                     logger.warning(error_str)
@@ -358,19 +386,11 @@ def _submit_pending_transfer_data(
     If the most recent batch has unsubmitted TransferData, submit it and return task_id.
     Otherwise return None.
     """
-    transfer: Optional[TransferBatch] = transfer_manager.get_most_recent_transfer()
+    transfer: Optional[TransferBatch] = transfer_manager.get_most_recent_batch()
     if not transfer or not transfer.transfer_data:
         return None
 
-    logger.info(f"{ts_utc()}: FINAL TransferData: accumulated items:")
-    attribs = transfer.transfer_data.__dict__
-    for item in attribs["data"]["DATA"]:
-        if item["DATA_TYPE"] == "transfer_item":
-            transfer_manager.cumulative_tarfiles_pushed += 1
-            print(
-                f"    (finalize) PUSHING ({transfer_manager.cumulative_tarfiles_pushed}) source item: {item['source_path']}",
-                flush=True,
-            )
+    update_cumulative_tarfiles_pushed(transfer_manager, transfer.transfer_data)
 
     logger.info(
         f"{ts_utc()}: DIVING: Submit Transfer for {transfer.transfer_data['label']}"
diff --git a/zstash/globus_utils.py b/zstash/globus_utils.py
index e5346f69..669b6831 100644
--- a/zstash/globus_utils.py
+++ b/zstash/globus_utils.py
@@ -226,6 +226,9 @@ def set_up_TransferData(
     transfer_client: TransferClient,
     transfer_data: Optional[TransferData] = None,
 ) -> TransferData:
+    """
+    Set up the TransferData object, creating one if not provided.
+    """
     if not local_endpoint:
         raise ValueError("Local endpoint ID is not set.")
     if not remote_endpoint:
diff --git a/zstash/hpss.py b/zstash/hpss.py
index 10f449a4..e8693e4c 100644
--- a/zstash/hpss.py
+++ b/zstash/hpss.py
@@ -120,9 +120,9 @@ def hpss_transfer(
             logger.info(
                 f"{ts_utc()}: SURFACE: hpss globus_transfer(name={name}) returned task_status={task_status}"
             )
-            mrt: Optional[TransferBatch] = transfer_manager.get_most_recent_transfer()
-            if mrt and mrt.task_status:
-                globus_status = mrt.task_status
+            mrb: Optional[TransferBatch] = transfer_manager.get_most_recent_batch()
+            if mrb and mrb.task_status:
+                globus_status = mrb.task_status
                 logger.info(
                     f"{ts_utc()}: Most recent globus_transfer returned task_status={globus_status}"
                 )
diff --git a/zstash/transfer_tracking.py b/zstash/transfer_tracking.py
index 72ee5a4c..84f74a11 100644
--- a/zstash/transfer_tracking.py
+++ b/zstash/transfer_tracking.py
@@ -45,7 +45,8 @@ def __init__(self):
         # Connection state (Globus-specific, None if not using Globus)
         self.globus_config: Optional[GlobusConfig] = None
 
-    def get_most_recent_transfer(self) -> Optional[TransferBatch]:
+    def get_most_recent_batch(self) -> Optional[TransferBatch]:
+        """Get the last batch added to the manager, or None if no batches exist"""
         return self.batches[-1] if self.batches else None
 
     def delete_successfully_transferred_files(self):

From 5a6c7f0f38c10b82271cbab17e9a05a6b7ce4f5a Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Thu, 26 Mar 2026 17:08:03 -0500
Subject: [PATCH 31/34] Add TaskStatus Enum

---
 zstash/globus.py            | 62 +++++++++++++++----------------------
 zstash/hpss.py              |  6 ++--
 zstash/transfer_tracking.py | 43 +++++++++++++++++++++++--
 3 files changed, 69 insertions(+), 42 deletions(-)

diff --git a/zstash/globus.py b/zstash/globus.py
index b20d01e5..16f22b80 100644
--- a/zstash/globus.py
+++ b/zstash/globus.py
@@ -17,7 +17,7 @@
     submit_transfer_with_checks,
 )
 from .settings import logger
-from .transfer_tracking import GlobusConfig, TransferBatch, TransferManager
+from .transfer_tracking import GlobusConfig, TaskStatus, TransferBatch, TransferManager
 from .utils import ts_utc
 
 
@@ -87,7 +87,7 @@ def globus_transfer(  # noqa: C901
     name: str,
     transfer_type: str,
     non_blocking: bool,
-) -> str:
+) -> TaskStatus:
 
     logger.info(f"{ts_utc()}: Entered globus_transfer() for name = {name}")
     logger.debug(f"{ts_utc()}: non_blocking = {non_blocking}")
@@ -138,10 +138,8 @@ def globus_transfer(  # noqa: C901
             # This the current transfer task associated with the most recent batch.
             task = transfer_manager.globus_config.transfer_client.get_task(mrb.task_id)
             # Update the most recent batch's task_status based on the current status from Globus API.
-            mrb.task_status = task["status"]
-            # According to https://docs.globus.org/api/transfer/task/#task_fields,
-            # this will be one  of {ACTIVE, INACTIVE, SUCCEEDED, FAILED}
-            if mrb.task_status == "ACTIVE":
+            mrb.task_status = TaskStatus.convert_from_status_from_globus_sdk(task)
+            if mrb.task_status == TaskStatus.ACTIVE:
                 # The most recent transfer (mrb) is still active.
                 logger.info(
                     f"{ts_utc()}: Previous task_id {mrb.task_id} Still Active. Returning ACTIVE."
@@ -153,7 +151,7 @@ def globus_transfer(  # noqa: C901
                     # We will therefore wait to submit a new transfer until it's done.
                     # So, we'll simply return and the next run of globus_transfer
                     # (i.e., on the next tar) will evaluate if the active transfer has finished.
-                    return "ACTIVE"
+                    return TaskStatus.ACTIVE
                 else:
                     # If we're in this block, then the blocking wait
                     # for the previous transfer to finish was unsuccessful.
@@ -163,7 +161,7 @@ def globus_transfer(  # noqa: C901
                     )
                     logger.error(error_str)
                     raise RuntimeError(error_str)
-            elif mrb.task_status == "SUCCEEDED":
+            elif mrb.task_status == TaskStatus.SUCCEEDED:
                 logger.info(
                     f"{ts_utc()}: Previous task_id {mrb.task_id} status = SUCCEEDED."
                 )
@@ -208,7 +206,7 @@ def globus_transfer(  # noqa: C901
             # Update these two fields of the most recent batch
             # (which is still available in this function as `mrb`).
             transfer_manager.batches[-1].task_id = task_id
-            transfer_manager.batches[-1].task_status = "SUBMITTED"
+            transfer_manager.batches[-1].task_status = TaskStatus.SUBMITTED
         else:
             # This block should be impossible to reach.
             # By now, we've ensured that `get_most_recent_batch()` returns a batch,
@@ -234,7 +232,7 @@ def globus_transfer(  # noqa: C901
         logger.error("Exception: {}".format(e))
         sys.exit(1)
 
-    task_status = "UNKNOWN"
+    task_status: TaskStatus = TaskStatus.UNKNOWN
     if mrb.task_id:
         if not non_blocking:
             # If blocking, wait for the task to complete and get the final status,
@@ -269,14 +267,14 @@ def globus_block_wait(
     task_id: str,
     wait_timeout: int = 7200,  # 7200/3600 = 2 hours
     max_retries: int = 5,
-):
+) -> TaskStatus:
     # Poll every "polling_interval" seconds to speed up small transfers.
     # Report every "wait_timeout" seconds, and stop waiting after "max_retries" reports.
     # By default: report every 2 hours, stop waiting after 5*2 = 10 hours
     logger.info(
         f"{ts_utc()}: BLOCKING START: invoking task_wait for task_id = {task_id}"
     )
-    task_status: str = "UNKNOWN"
+    task_status: TaskStatus = TaskStatus.UNKNOWN
     retry_count: int = 0
     while retry_count < max_retries:
         try:
@@ -289,29 +287,17 @@ def globus_block_wait(
                 task_id, timeout=wait_timeout, polling_interval=10
             )
             if task_is_not_active:
-                curr_task = transfer_client.get_task(task_id)
-                task_status = curr_task["status"]
-                if task_status == "SUCCEEDED":
+                curr_task: GlobusHTTPResponse = transfer_client.get_task(task_id)
+                task_status = TaskStatus.convert_from_status_from_globus_sdk(curr_task)
+                if task_status == TaskStatus.SUCCEEDED:
                     break  # Break out of the while-loop. The transfer already succeeded, so no need to retry.
-                elif task_status == "FAILED":
+                elif task_status == TaskStatus.FAILED:
                     error_str = f"{ts_utc()}: task_wait returned True, but task_status={task_status} for task_id {task_id}. No reason to keep retrying now."
                     logger.warning(error_str)
                     # We still need to break, because no matter how long we wait now, nothing will change with the transfer status.
                     break
-                elif task_status in [
-                    "INACTIVE",
-                    "UNKNOWN",
-                    "SUBMITTED",
-                    "EXHAUSTED_TIMEOUT_RETRIES",
-                ]:
-                    # The latter three options here are ones we assign manually and aren't included on
-                    # https://docs.globus.org/api/transfer/task/#task_fields
-                    error_str = f"{ts_utc()}: task_wait returned True, but task_status={task_status} for task_id {task_id}. Will retry waiting until max_retries is reached."
-                    logger.warning(error_str)
-                    # Don't break -- continue retries
                 else:
-                    # If we're in this block, then somehow an unexpected task_status was returned.
-                    error_str = f"{ts_utc()}: task_wait returned True, but task_status={task_status} is unexpected for task_id {task_id}. Will retry waiting until max_retries is reached."
+                    error_str = f"{ts_utc()}: task_wait returned True, but task_status={task_status} for task_id {task_id}. Will retry waiting until max_retries is reached."
                     logger.warning(error_str)
                     # Don't break -- continue retries
             logger.info(f"{ts_utc()}: done with wait")
@@ -327,7 +313,7 @@ def globus_block_wait(
         logger.info(
             f"{ts_utc()}: BLOCKING EXHAUSTED {max_retries} of timeout {wait_timeout} seconds"
         )
-        task_status = "EXHAUSTED_TIMEOUT_RETRIES"
+        task_status = TaskStatus.EXHAUSTED_TIMEOUT_RETRIES
 
     logger.info(
         f"{ts_utc()}: BLOCKING ENDS: task_id {task_id} returned from task_wait with status {task_status}"
@@ -339,8 +325,10 @@ def globus_block_wait(
 def globus_wait(transfer_client: TransferClient, task_id: str):
     try:
         """
-        A Globus transfer job (task) can be in one of the three states:
-        ACTIVE, SUCCEEDED, FAILED. The script every 20 seconds polls a
+        A Globus transfer job (task) can be in one of the four states:
+        {ACTIVE, SUCCEEDED, FAILED, INACTIVE}
+        according to https://docs.globus.org/api/transfer/task/#task_fields.
+        The script every 20 seconds polls a
         status of the transfer job (task) from the Globus Transfer service,
         with 20 second timeout limit. If the task is ACTIVE after time runs
         out 'task_wait' returns False, and True otherwise.
@@ -351,8 +339,8 @@ def globus_wait(transfer_client: TransferClient, task_id: str):
         The Globus transfer job (task) has been finished (SUCCEEDED or FAILED).
         Check if the transfer SUCCEEDED or FAILED.
         """
-        task = transfer_client.get_task(task_id)
-        if task["status"] == "SUCCEEDED":
+        task: GlobusHTTPResponse = transfer_client.get_task(task_id)
+        if TaskStatus.convert_from_status_from_globus_sdk(task) == TaskStatus.SUCCEEDED:
             src_ep = task["source_endpoint_id"]
             dst_ep = task["destination_endpoint_id"]
             label = task["label"]
@@ -463,14 +451,14 @@ def _refresh_batch_status(
     transfer_client: TransferClient,
     task_id: str,
     task_to_batch: Dict[str, TransferBatch],
-) -> Optional[str]:
+) -> Optional[TaskStatus]:
     """
     Fetch Globus task status and update corresponding batch.task_status if present.
     Returns status, or None if fetch fails.
     """
     try:
         task: GlobusHTTPResponse = transfer_client.get_task(task_id)
-        status = task["status"]
+        status: TaskStatus = TaskStatus.convert_from_status_from_globus_sdk(task)
         batch: Optional[TransferBatch] = task_to_batch.get(task_id)
         if batch:
             batch.task_status = status
@@ -493,7 +481,7 @@ def _wait_for_all_tasks(
     """
     for tid in task_ids:
         status = _refresh_batch_status(transfer_client, tid, task_to_batch)
-        if status == "SUCCEEDED":
+        if status == TaskStatus.SUCCEEDED:
             logger.info(f"{ts_utc()}: task_id={tid} already SUCCEEDED; skipping wait")
             continue
 
diff --git a/zstash/hpss.py b/zstash/hpss.py
index e8693e4c..fac368f8 100644
--- a/zstash/hpss.py
+++ b/zstash/hpss.py
@@ -8,7 +8,7 @@
 
 from .globus import globus_transfer
 from .settings import get_db_filename, logger
-from .transfer_tracking import GlobusConfig, TransferBatch, TransferManager
+from .transfer_tracking import GlobusConfig, TaskStatus, TransferBatch, TransferManager
 from .utils import run_command, ts_utc
 
 
@@ -108,13 +108,13 @@ def hpss_transfer(
             # For `get`, this directory is where the file we get from HPSS will go.
             os.chdir(path)
 
-        globus_status: str = "UNKNOWN"
+        globus_status: TaskStatus = TaskStatus.UNKNOWN
         if scheme == "globus":
             if not transfer_manager.globus_config:
                 transfer_manager.globus_config = GlobusConfig()
             # Transfer file using the Globus Transfer Service
             logger.info(f"{ts_utc()}: DIVING: hpss calls globus_transfer(name={name})")
-            task_status: str = globus_transfer(
+            task_status: TaskStatus = globus_transfer(
                 transfer_manager, endpoint, url_path, name, transfer_type, non_blocking
             )
             logger.info(
diff --git a/zstash/transfer_tracking.py b/zstash/transfer_tracking.py
index 84f74a11..1cac18d5 100644
--- a/zstash/transfer_tracking.py
+++ b/zstash/transfer_tracking.py
@@ -1,7 +1,9 @@
 import os
+from enum import Enum, auto
 from typing import List, Optional
 
 from globus_sdk import TransferClient, TransferData
+from globus_sdk.response import GlobusHTTPResponse
 from globus_sdk.services.transfer.response.iterable import IterableTransferResponse
 
 from .settings import logger
@@ -18,13 +20,50 @@ def __init__(self):
         self.archive_directory_listing: Optional[IterableTransferResponse] = None
 
 
+class TaskStatus(Enum):
+    """Enum for Globus transfer task status"""
+
+    # The first 4 values are defined by the Globus API:
+    # https://docs.globus.org/api/transfer/task/#task_fields
+    SUCCEEDED = auto()
+    ACTIVE = auto()
+    INACTIVE = auto()
+    FAILED = auto()
+    # The last 3 values are custom statuses we add on.
+    UNKNOWN = auto()
+    SUBMITTED = auto()
+    EXHAUSTED_TIMEOUT_RETRIES = auto()
+
+    @classmethod
+    def convert_from_status_from_globus_sdk(cls, globus_task: GlobusHTTPResponse):
+        """Convert a Globus API status string to a TaskStatus enum value"""
+        status_from_globus_sdk: str = globus_task["status"]
+        status_from_globus_sdk = status_from_globus_sdk.upper()
+        if status_from_globus_sdk == "SUCCEEDED":
+            return TaskStatus.SUCCEEDED
+        elif status_from_globus_sdk == "ACTIVE":
+            return TaskStatus.ACTIVE
+        elif status_from_globus_sdk == "INACTIVE":
+            return TaskStatus.INACTIVE
+        elif status_from_globus_sdk == "FAILED":
+            return TaskStatus.FAILED
+        else:
+            logger.warning(
+                f"Received unrecognized Globus status: {status_from_globus_sdk}"
+            )
+            return TaskStatus.UNKNOWN
+
+    def __str__(self) -> str:
+        return self.name
+
+
 class TransferBatch:
     """Represents one batch of files being transferred"""
 
     def __init__(self):
         self.file_paths: List[str] = []
         self.task_id: Optional[str] = None
-        self.task_status: Optional[str] = None
+        self.task_status: Optional[TaskStatus] = None
         self.is_globus: bool = False
         self.transfer_data: Optional[TransferData] = None  # Only for Globus
 
@@ -58,7 +97,7 @@ def delete_successfully_transferred_files(self):
         self.batches = [batch for batch in self.batches if batch.file_paths]
         # Now delete files for successful transfers
         for batch in self.batches:
-            if (not batch.is_globus) or (batch.task_status == "SUCCEEDED"):
+            if (not batch.is_globus) or (batch.task_status == TaskStatus.SUCCEEDED):
                 # The files were transferred successfully, so delete them
                 logger.info(
                     f"{ts_utc()}: Deleting {len(batch.file_paths)} files from successful transfer"

From f7570944af33951275e0b813538ca1fdfc14a563 Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Mon, 30 Mar 2026 14:59:56 -0500
Subject: [PATCH 32/34] Improve handling of TransferData

---
 zstash/globus.py       | 40 ++++++++++++++++++++------
 zstash/globus_utils.py | 64 ++++++++++++++++++++++++------------------
 2 files changed, 68 insertions(+), 36 deletions(-)

diff --git a/zstash/globus.py b/zstash/globus.py
index 16f22b80..dbf46f6e 100644
--- a/zstash/globus.py
+++ b/zstash/globus.py
@@ -10,10 +10,12 @@
 
 from .globus_utils import (
     HPSS_ENDPOINT_MAP,
+    add_file_to_TransferData,
     check_state_files,
+    create_TransferData,
+    get_label,
     get_local_endpoint_id,
     get_transfer_client_with_auth,
-    set_up_TransferData,
     submit_transfer_with_checks,
 )
 from .settings import logger
@@ -122,14 +124,37 @@ def globus_transfer(  # noqa: C901
         raise RuntimeError(
             "The transfer manager should always have at least one batch by the time globus_transfer is called, however, the batch list is empty."
         )
-    transfer_data = set_up_TransferData(
+
+    if transfer_manager.globus_config.local_endpoint:
+        local_endpoint: str = transfer_manager.globus_config.local_endpoint
+    else:
+        raise ValueError("Local endpoint ID is not set.")
+    if transfer_manager.globus_config.remote_endpoint:
+        remote_endpoint: str = transfer_manager.globus_config.remote_endpoint
+    else:
+        raise ValueError("Remote endpoint ID is not set.")
+    label: str = get_label(remote_path, name)
+    transfer_data: TransferData
+    if mrb.transfer_data:
+        # We already have a TransferData for this batch.
+        transfer_data = mrb.transfer_data
+    else:
+        # We need to create a new TransferData for this batch.
+        transfer_data = create_TransferData(
+            transfer_type,
+            local_endpoint,
+            remote_endpoint,
+            transfer_manager.globus_config.transfer_client,
+            label,
+        )
+    add_file_to_TransferData(
         transfer_type,
-        transfer_manager.globus_config.local_endpoint,
-        transfer_manager.globus_config.remote_endpoint,
+        local_endpoint,
+        remote_endpoint,
         remote_path,
         name,
-        transfer_manager.globus_config.transfer_client,
-        mrb.transfer_data,
+        transfer_data,
+        label,
     )
 
     task: GlobusHTTPResponse
@@ -215,9 +240,6 @@ def globus_transfer(  # noqa: C901
             error_str = "transfer_manager has no batches"
             logger.error(error_str)
             raise RuntimeError(error_str)
-
-        # Nullify the submitted transfer data structure so that a new one will be created on next call.
-        transfer_data = None
     except TransferAPIError as e:
         if e.code == "NoCredException":
             logger.error(
diff --git a/zstash/globus_utils.py b/zstash/globus_utils.py
index 669b6831..9c6f4e2a 100644
--- a/zstash/globus_utils.py
+++ b/zstash/globus_utils.py
@@ -217,51 +217,61 @@ def save_tokens(token_response):
 
 
 # Primarily used by globus_transfer ###########################################
-def set_up_TransferData(
+def get_label(remote_path: str, name: str) -> str:
+    subdir = os.path.basename(os.path.normpath(remote_path))
+    subdir_label = re.sub("[^A-Za-z0-9_ -]", "", subdir)
+    filename = name.split(".")[0]
+    label = subdir_label + " " + filename
+    return label
+
+
+def create_TransferData(
+    transfer_type: str,
+    local_endpoint: str,
+    remote_endpoint: str,
+    transfer_client: TransferClient,
+    label: str,
+) -> TransferData:
+    if transfer_type == "get":
+        src_ep = remote_endpoint
+        dst_ep = local_endpoint
+    else:
+        src_ep = local_endpoint
+        dst_ep = remote_endpoint
+    transfer_data = TransferData(
+        transfer_client,
+        src_ep,
+        dst_ep,
+        label=label,
+        verify_checksum=True,
+        preserve_timestamp=True,
+        fail_on_quota_errors=True,
+    )
+    return transfer_data
+
+
+def add_file_to_TransferData(
     transfer_type: str,
     local_endpoint: Optional[str],
     remote_endpoint: Optional[str],
     remote_path: str,
     name: str,
-    transfer_client: TransferClient,
-    transfer_data: Optional[TransferData] = None,
-) -> TransferData:
-    """
-    Set up the TransferData object, creating one if not provided.
-    """
+    transfer_data: TransferData,
+    label: str,
+):
     if not local_endpoint:
         raise ValueError("Local endpoint ID is not set.")
     if not remote_endpoint:
         raise ValueError("Remote endpoint ID is not set.")
     if transfer_type == "get":
-        src_ep = remote_endpoint
         src_path = os.path.join(remote_path, name)
-        dst_ep = local_endpoint
         dst_path = os.path.join(os.getcwd(), name)
     else:
-        src_ep = local_endpoint
         src_path = os.path.join(os.getcwd(), name)
-        dst_ep = remote_endpoint
         dst_path = os.path.join(remote_path, name)
 
-    subdir = os.path.basename(os.path.normpath(remote_path))
-    subdir_label = re.sub("[^A-Za-z0-9_ -]", "", subdir)
-    filename = name.split(".")[0]
-    label = subdir_label + " " + filename
-
-    if not transfer_data:
-        transfer_data = TransferData(
-            transfer_client,
-            src_ep,
-            dst_ep,
-            label=label,
-            verify_checksum=True,
-            preserve_timestamp=True,
-            fail_on_quota_errors=True,
-        )
     transfer_data.add_item(src_path, dst_path)
     transfer_data["label"] = label
-    return transfer_data
 
 
 def submit_transfer_with_checks(transfer_client, transfer_data) -> GlobusHTTPResponse:

From d57c3118e6b7e7a6651b36d91f8e4445d14639d5 Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Fri, 3 Apr 2026 13:20:36 -0500
Subject: [PATCH 33/34] Address comments

---
 zstash/create.py     | 4 ++--
 zstash/globus.py     | 2 +-
 zstash/hpss.py       | 2 +-
 zstash/hpss_utils.py | 6 +++---
 zstash/update.py     | 4 ++--
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/zstash/create.py b/zstash/create.py
index 1c956a72..872b91e4 100644
--- a/zstash/create.py
+++ b/zstash/create.py
@@ -99,9 +99,9 @@ def create():
         hpss,
         get_db_filename(cache),
         cache,
+        transfer_manager,
         keep=args.keep,
         is_index=True,
-        transfer_manager=transfer_manager,
     )
 
     logger.debug(f"{ts_utc()}: calling globus_finalize()")
@@ -292,9 +292,9 @@ def create_database(
         args.keep,
         args.follow_symlinks,
         dev_options,
+        transfer_manager,
         skip_tars_table=args.no_tars_md5,
         non_blocking=args.non_blocking,
-        transfer_manager=transfer_manager,
     )
 
     # Close database
diff --git a/zstash/globus.py b/zstash/globus.py
index dbf46f6e..b99a6392 100644
--- a/zstash/globus.py
+++ b/zstash/globus.py
@@ -207,7 +207,7 @@ def globus_transfer(  # noqa: C901
                 # Note: any status we manually set
                 # (I.e., "UNKNOWN", "SUBMITTED", "EXHAUSTED_TIMEOUT_RETRIES") is NOT possible here,
                 # because we're using `task["status"]` from the globus_sdk TransferClient.
-                logger.error(
+                logger.warning(
                     f"{ts_utc()}: Previous task_id {mrb.task_id} status = {mrb.task_status}."
                 )
 
diff --git a/zstash/hpss.py b/zstash/hpss.py
index fac368f8..2c59145a 100644
--- a/zstash/hpss.py
+++ b/zstash/hpss.py
@@ -150,10 +150,10 @@ def hpss_put(
     hpss: str,
     file_path: str,
     cache: str,
+    transfer_manager: TransferManager,
     keep: bool = True,
     non_blocking: bool = False,
     is_index=False,
-    transfer_manager: Optional[TransferManager] = None,
 ):
     """
     Put a file to the HPSS archive.
diff --git a/zstash/hpss_utils.py b/zstash/hpss_utils.py
index aa25c6af..0956cd89 100644
--- a/zstash/hpss_utils.py
+++ b/zstash/hpss_utils.py
@@ -108,7 +108,7 @@ def process_tar(
         cache: str,
         keep: bool,
         non_blocking: bool,
-        transfer_manager: Optional[TransferManager],
+        transfer_manager: TransferManager,
         skip_tars_table: bool,
         cur: sqlite3.Cursor,
         con: sqlite3.Connection,
@@ -140,10 +140,10 @@ def process_tar(
             hpss,
             os.path.join(cache, self.tfname),
             cache,
+            transfer_manager,
             keep,
             non_blocking,
             is_index=False,
-            transfer_manager=transfer_manager,
         )
         logger.info(
             f"{ts_utc()}: SURFACE (process_tar): Called hpss_put to dispatch archive file {self.tfname}"
@@ -344,9 +344,9 @@ def construct_tars(
     keep: bool,
     follow_symlinks: bool,
     dev_options: DevOptions,
+    transfer_manager: TransferManager,
     skip_tars_table: bool = False,
     non_blocking: bool = False,
-    transfer_manager: Optional[TransferManager] = None,
 ) -> List[str]:
 
     failures: List[str] = []
diff --git a/zstash/update.py b/zstash/update.py
index 817da424..69f72e0f 100644
--- a/zstash/update.py
+++ b/zstash/update.py
@@ -40,9 +40,9 @@ def update():
         hpss,
         get_db_filename(cache),
         cache,
+        transfer_manager,
         keep=args.keep,
         is_index=True,
-        transfer_manager=transfer_manager,
     )
 
     globus_finalize(transfer_manager, args.keep)
@@ -290,8 +290,8 @@ def update_database(  # noqa: C901
         keep,
         args.follow_symlinks,
         dev_options,
+        transfer_manager,
         non_blocking=args.non_blocking,
-        transfer_manager=transfer_manager,
     )
 
     # Close database

From 3d8b908c6baa8abc8c9734e17d0ccb16a85dfa7d Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Fri, 3 Apr 2026 14:41:07 -0500
Subject: [PATCH 34/34] Preliminary refactor of hpss_transfer()

---
 zstash/hpss.py | 148 +++++++++++++++++++++++++++++--------------------
 1 file changed, 87 insertions(+), 61 deletions(-)

diff --git a/zstash/hpss.py b/zstash/hpss.py
index 2c59145a..19855488 100644
--- a/zstash/hpss.py
+++ b/zstash/hpss.py
@@ -12,6 +12,85 @@
 from .utils import run_command, ts_utc
 
 
+def ensure_transfer_batch(transfer_manager: TransferManager, scheme: str):
+    # Create a new batch if needed (before we start adding files)
+    if not transfer_manager.batches or transfer_manager.batches[-1].task_id:
+        # Either no batches exist, or the last batch was already submitted
+        new_batch = TransferBatch()
+        new_batch.is_globus = scheme == "globus"
+        transfer_manager.batches.append(new_batch)
+        logger.debug(
+            f"{ts_utc()}: Created new TransferBatch, total batches: {len(transfer_manager.batches)}"
+        )
+
+
+def local_put(file_path: str, cache: str):
+    if file_path != get_db_filename(cache):
+        # We are adding a file (that is not the cache) to the local non-HPSS archive
+        logger.info("put: Keeping tar files locally and removing write permissions")
+        # https://unix.stackexchange.com/questions/46915/get-the-chmod-numerical-value-for-a-file
+        display_mode_command: List[str] = "stat --format '%a' {}".format(
+            file_path
+        ).split()
+        display_mode_output: bytes = subprocess.check_output(
+            display_mode_command
+        ).strip()
+        logger.info("{!r} original mode={!r}".format(file_path, display_mode_output))
+        # https://www.washington.edu/doit/technology-tips-chmod-overview
+        # Remove write-permission from user, group, and others,
+        # without changing read or execute permissions for any.
+        change_mode_command: List[str] = "chmod ugo-w {}".format(file_path).split()
+        # An error will be raised if this line fails.
+        subprocess.check_output(change_mode_command)
+        new_display_mode_output: bytes = subprocess.check_output(
+            display_mode_command
+        ).strip()
+        logger.info("{!r} new mode={!r}".format(file_path, new_display_mode_output))
+    # else: no action needed
+
+
+def run_hpss_put(hpss: str, name: str):
+    command: str = f'hsi -q "cd {hpss}; put {name}"'
+    error_str: str = f"Transferring file to HPSS: {name}"
+    run_command(command, error_str)
+
+
+def run_hpss_get(hpss: str, name: str):
+    command: str = f'hsi -q "cd {hpss}; get {name}"'
+    error_str: str = f"Transferring file from HPSS: {name}"
+    run_command(command, error_str)
+
+
+def globus_transfer_wrapper(
+    transfer_manager: TransferManager,
+    endpoint: str,
+    url_path: str,
+    name: str,
+    transfer_type: str,
+    non_blocking: bool,
+):
+    if not transfer_manager.globus_config:
+        transfer_manager.globus_config = GlobusConfig()
+    # Transfer file using the Globus Transfer Service
+    logger.info(f"{ts_utc()}: DIVING: hpss calls globus_transfer(name={name})")
+    task_status: TaskStatus = globus_transfer(
+        transfer_manager, endpoint, url_path, name, transfer_type, non_blocking
+    )
+    logger.info(
+        f"{ts_utc()}: SURFACE: hpss globus_transfer(name={name}) returned task_status={task_status}"
+    )
+    mrb: Optional[TransferBatch] = transfer_manager.get_most_recent_batch()
+    if mrb and mrb.task_status:
+        globus_status: TaskStatus = mrb.task_status
+        logger.info(
+            f"{ts_utc()}: Most recent globus_transfer returned task_status={globus_status}"
+        )
+    # NOTE: Here, the status could be "EXHAUSTED_TIMEOUT_RETRIES", meaning a very long transfer
+    # or perhaps transfer is hanging. We should decide whether to ignore it, or cancel it, but
+    # we'd need the task_id to issue a cancellation.  Perhaps we should have globus_transfer
+    # return a tuple (task_id, status).
+
+
 def hpss_transfer(
     hpss: str,
     file_path: str,
@@ -28,55 +107,19 @@ def hpss_transfer(
     url = urlparse(hpss)
     scheme = url.scheme
 
-    # Create a new batch if needed (before we start adding files)
-    if not transfer_manager.batches or transfer_manager.batches[-1].task_id:
-        # Either no batches exist, or the last batch was already submitted
-        new_batch = TransferBatch()
-        new_batch.is_globus = scheme == "globus"
-        transfer_manager.batches.append(new_batch)
-        logger.debug(
-            f"{ts_utc()}: Created new TransferBatch, total batches: {len(transfer_manager.batches)}"
-        )
+    ensure_transfer_batch(transfer_manager, scheme)
 
     if hpss == "none":
         logger.info("{}: HPSS is unavailable".format(transfer_type))
-        if transfer_type == "put" and file_path != get_db_filename(cache):
-            # We are adding a file (that is not the cache) to the local non-HPSS archive
-            logger.info(
-                "{}: Keeping tar files locally and removing write permissions".format(
-                    transfer_type
-                )
-            )
-            # https://unix.stackexchange.com/questions/46915/get-the-chmod-numerical-value-for-a-file
-            display_mode_command: List[str] = "stat --format '%a' {}".format(
-                file_path
-            ).split()
-            display_mode_output: bytes = subprocess.check_output(
-                display_mode_command
-            ).strip()
-            logger.info(
-                "{!r} original mode={!r}".format(file_path, display_mode_output)
-            )
-            # https://www.washington.edu/doit/technology-tips-chmod-overview
-            # Remove write-permission from user, group, and others,
-            # without changing read or execute permissions for any.
-            change_mode_command: List[str] = "chmod ugo-w {}".format(file_path).split()
-            # An error will be raised if this line fails.
-            subprocess.check_output(change_mode_command)
-            new_display_mode_output: bytes = subprocess.check_output(
-                display_mode_command
-            ).strip()
-            logger.info("{!r} new mode={!r}".format(file_path, new_display_mode_output))
+        if transfer_type == "put":
+            local_put(file_path, cache)
         # else: no action needed
     else:
         transfer_word: str
-        transfer_command: str
         if transfer_type == "put":
             transfer_word = "to"
-            transfer_command = "put"
         elif transfer_type == "get":
             transfer_word = "from"
-            transfer_command = "get"
         else:
             raise ValueError("Invalid transfer_type={}".format(transfer_type))
         logger.info("Transferring file {} HPSS: {}".format(transfer_word, file_path))
@@ -108,33 +151,16 @@ def hpss_transfer(
             # For `get`, this directory is where the file we get from HPSS will go.
             os.chdir(path)
 
-        globus_status: TaskStatus = TaskStatus.UNKNOWN
         if scheme == "globus":
-            if not transfer_manager.globus_config:
-                transfer_manager.globus_config = GlobusConfig()
-            # Transfer file using the Globus Transfer Service
-            logger.info(f"{ts_utc()}: DIVING: hpss calls globus_transfer(name={name})")
-            task_status: TaskStatus = globus_transfer(
+            globus_transfer_wrapper(
                 transfer_manager, endpoint, url_path, name, transfer_type, non_blocking
             )
-            logger.info(
-                f"{ts_utc()}: SURFACE: hpss globus_transfer(name={name}) returned task_status={task_status}"
-            )
-            mrb: Optional[TransferBatch] = transfer_manager.get_most_recent_batch()
-            if mrb and mrb.task_status:
-                globus_status = mrb.task_status
-                logger.info(
-                    f"{ts_utc()}: Most recent globus_transfer returned task_status={globus_status}"
-                )
-            # NOTE: Here, the status could be "EXHAUSTED_TIMEOUT_RETRIES", meaning a very long transfer
-            # or perhaps transfer is hanging. We should decide whether to ignore it, or cancel it, but
-            # we'd need the task_id to issue a cancellation.  Perhaps we should have globus_transfer
-            # return a tuple (task_id, status).
         else:
             # Transfer file using `hsi`
-            command: str = 'hsi -q "cd {}; {} {}"'.format(hpss, transfer_command, name)
-            error_str: str = "Transferring file {} HPSS: {}".format(transfer_word, name)
-            run_command(command, error_str)
+            if transfer_type == "put":
+                run_hpss_put(hpss, name)
+            else:
+                run_hpss_get(hpss, name)
 
         # Return to original working directory
         if path != "":