E3SM-Project · forsyth2 · Oct 3, 2025 · Sep 15, 2025 · Sep 15, 2025 · Sep 16, 2025
diff --git a/.github/workflows/build_workflow.yml b/.github/workflows/build_workflow.yml
@@ -19,21 +19,24 @@ jobs:
       - name: Checkout Code Repository
         uses: actions/checkout@v3
 
-      - name: Set up Python 3.9
+      - name: Set up Python 3.13
         uses: actions/setup-python@v4
         with:
-          python-version: 3.9
+          python-version: "3.13"
 
       # Run all pre-commit hooks on all the files.
       # Getting only staged files can be tricky in case a new PR is opened
       # since the action is run on a branch in detached head state.
       # This is the equivalent of running "pre-commit run --all-files" locally.
       # If you commit with the `--no-verify` flag, this check may fail.
       - name: Install and Run Pre-commit
-        uses: pre-commit/[email protected].0
+        uses: pre-commit/[email protected].1
 
   build:
     runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.11", "3.12", "3.13"]
     defaults:
       run:
         shell: bash -l {0}
@@ -44,11 +47,11 @@ jobs:
       - name: Cache Conda
         uses: actions/cache@v3
         env:
-          CACHE_NUMBER: 0
+          CACHE_NUMBER: 1  # Increment this to invalidate cache
         with:
           path: ~/conda_pkgs_dir
           key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{
-            hashFiles('conda/dev.yml') }}
+            hashFiles('conda/dev.yml') }}-python${{ matrix.python-version }}
 
       - name: Build Conda Environment
         uses: conda-incubator/setup-miniconda@v3
@@ -57,13 +60,27 @@ jobs:
           miniforge-variant: Miniforge3
           miniforge-version: latest
           environment-file: conda/dev.yml
-          channel-priority: strict
+          channel-priority: flexible  # Changed from strict to flexible
           auto-update-conda: true
+          python-version: ${{ matrix.python-version }}
+          channels: conda-forge
+          use-only-tar-bz2: true
+
+      - name: Verify Environment and Fix Dependencies
+        run: |
+          conda info
+          conda list
+          # Ensure we have the right Python version
+          python --version
+          # Fix pip issues for Python 3.12+
+          if [[ "${{ matrix.python-version }}" == "3.12" ]] || [[ "${{ matrix.python-version }}" == "3.13" ]]; then
+            python -m ensurepip --upgrade || true
+            python -m pip install --upgrade --force-reinstall pip setuptools wheel
+          fi
 
       - name: Install `zstash` Package
         run: |
-          python -m pip install --upgrade pip
-          pip install .
+          python -m pip install .
 
       - name: Run Tests
         run: |
@@ -77,7 +94,7 @@ jobs:
     defaults:
       run:
         shell: bash -l {0}
-    timeout-minutes: 5
+    timeout-minutes: 10  # Increased timeout for docs
     steps:
       - uses: actions/checkout@v3
         with:
@@ -87,11 +104,11 @@ jobs:
       - name: Cache Conda
         uses: actions/cache@v3
         env:
-          CACHE_NUMBER: 0
+          CACHE_NUMBER: 1  # Match the build job cache number
         with:
           path: ~/conda_pkgs_dir
           key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{
-            hashFiles('conda/dev.yml') }}
+            hashFiles('conda/dev.yml') }}-docs
 
       - name: Build Conda Environment
         uses: conda-incubator/setup-miniconda@v3
@@ -100,8 +117,9 @@ jobs:
           miniforge-variant: Miniforge3
           miniforge-version: latest
           environment-file: conda/dev.yml
-          channel-priority: strict
+          channel-priority: flexible  # Changed from strict to flexible
           auto-update-conda: true
+          python-version: "3.13"  # Use stable Python version for docs
 
       # sphinx-multiversion allows for version docs.
       - name: Build Sphinx Docs

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -12,27 +12,27 @@ repos:
         exclude: conda/meta.yaml
 
   - repo: https://github.com/psf/black
-    rev: 24.10.0
+    rev: 25.1.0
     hooks:
       - id: black
 
   - repo: https://github.com/PyCQA/isort
-    rev: 5.13.2
+    rev: 6.0.1
     hooks:
       - id: isort
 
   # Need to use flake8 GitHub mirror due to CentOS git issue with GitLab
   # https://github.com/pre-commit/pre-commit/issues/1206
   - repo: https://github.com/pycqa/flake8
-    rev: 7.1.1
+    rev: 7.3.0
     hooks:
       - id: flake8
         args: ["--config=setup.cfg"]
         additional_dependencies: [flake8-isort]
         exclude: analysis_data_preprocess
 
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.11.2
+    rev: v1.18.2
     hooks:
       - id: mypy
         args: ["--config=setup.cfg", "--install-types", "--non-interactive"]

diff --git a/conda/dev.yml b/conda/dev.yml
@@ -1,31 +1,30 @@
 name: zstash_dev
 channels:
   - conda-forge
-  - defaults
 dependencies:
   # Base
   # =================
-  - pip=22.2.2
-  - python=3.9.13
-  - six=1.16.0
-  - globus-sdk=3.15.0
+  - pip
+  - python >=3.11,<3.14
+  - sqlite
+  - six >=1.16.0
+  - globus-sdk >=3.15.0
   # Developer Tools
   # =================
   # If versions are updated, also update 'rev' in `.pre-commit.config.yaml`
-  - black=24.10.0
-  - flake8=7.1.1
-  - flake8-isort=6.1.1
-  - mypy=1.11.2
-  - pre-commit=4.0.1
-  - tbump=6.9.0
+  - black ==25.1.0
+  - flake8 ==7.3.0
+  - isort ==6.0.1
+  - mypy ==1.18.2
+  - pre-commit ==4.3.0
+  - tbump >=6.9.0
   # Documentation
   # =================
   # If versions are updated, also update in `.github/workflows/workflow.yml`
-  - jinja2<3.1
-  - sphinx=5.2.3
-  - sphinx-multiversion=0.2.4
-  - sphinx_rtd_theme=1.0.0
+  - jinja2 <3.1
+  - sphinx >=5.2.0
+  - sphinx-multiversion >=0.2.4
+  - sphinx_rtd_theme >=1.0.0
   # Need to pin docutils because 0.17 has a bug with unordered lists
   # https://github.com/readthedocs/sphinx_rtd_theme/issues/1115
-  - docutils=0.16
-prefix: /opt/miniconda3/envs/zstash_dev
+  - docutils >=0.16,<0.17
diff --git a/conda/meta.yaml b/conda/meta.yaml
diff --git a/setup.cfg b/setup.cfg
@@ -46,7 +46,7 @@ exclude =
     venv
 
 [mypy]
-python_version = 3.9
+python_version = 3.13
 check_untyped_defs = True
 ignore_missing_imports = True
 warn_unused_ignores = True

diff --git a/setup.py b/setup.py
@@ -7,6 +7,6 @@
     author_email="[email protected], [email protected], [email protected]",
     description="Long term HPSS archiving software for E3SM",
     packages=find_packages(include=["zstash", "zstash.*"]),
-    python_requires=">=3.9",
+    python_requires=">=3.11,<3.14",
     entry_points={"console_scripts": ["zstash=zstash.main:main"]},
 )
diff --git a/zstash/extract.py b/zstash/extract.py
@@ -591,7 +591,7 @@ def extractFiles(  # noqa: C901
         try:
             # Seek file position
             if tar.fileobj is not None:
-                fileobj: _io.BufferedReader = tar.fileobj
+                fileobj = tar.fileobj
             else:
                 raise TypeError("Invalid tar.fileobj={}".format(tar.fileobj))
             fileobj.seek(files_row.offset)
@@ -665,7 +665,7 @@ def extractFiles(  # noqa: C901
                 # relying here on 'touch'. This is not the prettiest solution.
                 # Maybe a better one can be implemented later.
                 if tarinfo.issym():
-                    tmp1: int = tarinfo.mtime
+                    tmp1 = tarinfo.mtime
                     tmp2: datetime = datetime.fromtimestamp(tmp1)
                     tmp3: str = tmp2.strftime("%Y%m%d%H%M.%S")
                     os.system("touch -h -t %s %s" % (tmp3, tarinfo.name))

diff --git a/zstash/globus.py b/zstash/globus.py
@@ -57,7 +57,6 @@ def globus_activate(hpss: str):
 
 
 def file_exists(name: str) -> bool:
-    global archive_directory_listing
 
     for entry in archive_directory_listing:
         if entry.get("name") == name:
@@ -72,9 +71,6 @@ def file_exists(name: str) -> bool:
 def globus_transfer(  # noqa: C901
     remote_ep: str, remote_path: str, name: str, transfer_type: str, non_blocking: bool
 ):
-    global transfer_client
-    global local_endpoint
-    global remote_endpoint
     global transfer_data
     global task_id
     global archive_directory_listing
@@ -199,7 +195,6 @@ def globus_transfer(  # noqa: C901
 def globus_block_wait(
     task_id: str, wait_timeout: int, polling_interval: int, max_retries: int
 ):
-    global transfer_client
 
     # poll every "polling_interval" seconds to speed up small transfers.  Report every 2 hours, stop waiting aftert 5*2 = 10 hours
     logger.info(
@@ -211,7 +206,7 @@ def globus_block_wait(
         try:
             # Wait for the task to complete
             logger.info(
-                f"{ts_utc()}: on task_wait try {retry_count+1} out of {max_retries}"
+                f"{ts_utc()}: on task_wait try {retry_count + 1} out of {max_retries}"
             )
             transfer_client.task_wait(
                 task_id, timeout=wait_timeout, polling_interval=10
@@ -244,7 +239,6 @@ def globus_block_wait(
 
 
 def globus_wait(task_id: str):
-    global transfer_client
 
     try:
         """
@@ -288,9 +282,6 @@ def globus_wait(task_id: str):
 
 
 def globus_finalize(non_blocking: bool = False):
-    global transfer_client
-    global transfer_data
-    global task_id
     global global_variable_tarfiles_pushed
 
     last_task_id = None

diff --git a/zstash/hpss_utils.py b/zstash/hpss_utils.py
@@ -10,7 +10,6 @@
 from typing import List, Optional, Tuple
 
 import _hashlib
-import _io
 
 from .hpss import hpss_put
 from .settings import BLOCK_SIZE, TupleFilesRowNoId, TupleTarsRowNoId, config, logger
@@ -20,7 +19,7 @@
 # Minimum output file object
 class HashIO(object):
     def __init__(self, name: str, mode: str, do_hash: bool):
-        self.f: _io.BufferedWriter = open(name, mode)
+        self.f = open(name, mode)
         self.hash: Optional[_hashlib.HASH]
         if do_hash:
             self.hash = hashlib.md5()
@@ -281,43 +280,29 @@ def add_file(
     # Change the size of any hardlinks from 0 to the size of the actual file
     if tarinfo.islnk():
         tarinfo.size = os.path.getsize(file_name)
+
     # Add the file to the tar
-    tar.addfile(tarinfo)
+    if (tarinfo.isfile() or tarinfo.islnk()) and tarinfo.size > 0:
+        with open(file_name, "rb") as fileobj:
+            tar.addfile(tarinfo, fileobj)
+    else:
+        tar.addfile(tarinfo)
 
     md5: Optional[str] = None
     # Only add files or hardlinks.
     # (So don't add directories or softlinks.)
     if tarinfo.isfile() or tarinfo.islnk():
-        f: _io.TextIOWrapper = open(file_name, "rb")
+        f = open(file_name, "rb")
         hash_md5: _hashlib.HASH = hashlib.md5()
-        if tar.fileobj is not None:
-            fileobj: _io.BufferedWriter = tar.fileobj
-        else:
-            raise TypeError("Invalid tar.fileobj={}".format(tar.fileobj))
+
         while True:
-            s: str = f.read(BLOCK_SIZE)
-            if len(s) > 0:
-                # If the block read in is non-empty, write it to fileobj and update the hash
-                fileobj.write(s)
-                hash_md5.update(s)
-            if len(s) < BLOCK_SIZE:
-                # If the block read in is smaller than BLOCK_SIZE,
-                # then we have reached the end of the file.
-                # blocks = how many blocks of tarfile.BLOCKSIZE fit in tarinfo.size
-                # remainder = how much more content is required to reach tarinfo.size
-                blocks: int
-                remainder: int
-                blocks, remainder = divmod(tarinfo.size, tarfile.BLOCKSIZE)
-                if remainder > 0:
-                    null_bytes: bytes = tarfile.NUL
-                    # Write null_bytes to get the last block to tarfile.BLOCKSIZE
-                    fileobj.write(null_bytes * (tarfile.BLOCKSIZE - remainder))
-                    blocks += 1
-                # Increase the offset by the amount already saved to the tar
-                tar.offset += blocks * tarfile.BLOCKSIZE
+            data = f.read(BLOCK_SIZE)
+            if len(data) > 0:
+                hash_md5.update(data)
+            if len(data) < BLOCK_SIZE:
                 break
-        f.close()
         md5 = hash_md5.hexdigest()
+        f.close()
     size: int = tarinfo.size
     mtime: datetime = datetime.utcfromtimestamp(tarinfo.mtime)
     return offset, size, mtime, md5