-
Notifications
You must be signed in to change notification settings - Fork 10
Update Python support #384
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
6cc19d4
20ba7b9
8a76833
117f695
942f96e
b97745e
be1b66b
eaa7870
b53fadf
5439c10
c028292
884e994
2caba4e
cc37237
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,21 +19,24 @@ jobs: | |
| - name: Checkout Code Repository | ||
| uses: actions/checkout@v3 | ||
|
|
||
| - name: Set up Python 3.9 | ||
| - name: Set up Python 3.13 | ||
| uses: actions/setup-python@v4 | ||
| with: | ||
| python-version: 3.9 | ||
| python-version: "3.13" | ||
|
|
||
| # Run all pre-commit hooks on all the files. | ||
| # Getting only staged files can be tricky in case a new PR is opened | ||
| # since the action is run on a branch in detached head state. | ||
| # This is the equivalent of running "pre-commit run --all-files" locally. | ||
| # If you commit with the `--no-verify` flag, this check may fail. | ||
| - name: Install and Run Pre-commit | ||
| uses: pre-commit/[email protected].0 | ||
| uses: pre-commit/[email protected].1 | ||
|
|
||
| build: | ||
| runs-on: ubuntu-latest | ||
| strategy: | ||
| matrix: | ||
| python-version: ["3.11", "3.12", "3.13"] | ||
| defaults: | ||
| run: | ||
| shell: bash -l {0} | ||
|
|
@@ -44,11 +47,11 @@ jobs: | |
| - name: Cache Conda | ||
| uses: actions/cache@v3 | ||
| env: | ||
| CACHE_NUMBER: 0 | ||
| CACHE_NUMBER: 1 # Increment this to invalidate cache | ||
| with: | ||
| path: ~/conda_pkgs_dir | ||
| key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ | ||
| hashFiles('conda/dev.yml') }} | ||
| hashFiles('conda/dev.yml') }}-python${{ matrix.python-version }} | ||
|
|
||
| - name: Build Conda Environment | ||
| uses: conda-incubator/setup-miniconda@v3 | ||
|
|
@@ -57,13 +60,27 @@ jobs: | |
| miniforge-variant: Miniforge3 | ||
| miniforge-version: latest | ||
| environment-file: conda/dev.yml | ||
| channel-priority: strict | ||
| channel-priority: flexible # Changed from strict to flexible | ||
| auto-update-conda: true | ||
| python-version: ${{ matrix.python-version }} | ||
| channels: conda-forge | ||
| use-only-tar-bz2: true | ||
|
|
||
| - name: Verify Environment and Fix Dependencies | ||
| run: | | ||
| conda info | ||
| conda list | ||
| # Ensure we have the right Python version | ||
| python --version | ||
| # Fix pip issues for Python 3.12+ | ||
| if [[ "${{ matrix.python-version }}" == "3.12" ]] || [[ "${{ matrix.python-version }}" == "3.13" ]]; then | ||
| python -m ensurepip --upgrade || true | ||
| python -m pip install --upgrade --force-reinstall pip setuptools wheel | ||
| fi | ||
|
|
||
| - name: Install `zstash` Package | ||
| run: | | ||
| python -m pip install --upgrade pip | ||
| pip install . | ||
| python -m pip install . | ||
|
|
||
| - name: Run Tests | ||
| run: | | ||
|
|
@@ -77,7 +94,7 @@ jobs: | |
| defaults: | ||
| run: | ||
| shell: bash -l {0} | ||
| timeout-minutes: 5 | ||
| timeout-minutes: 10 # Increased timeout for docs | ||
| steps: | ||
| - uses: actions/checkout@v3 | ||
| with: | ||
|
|
@@ -87,11 +104,11 @@ jobs: | |
| - name: Cache Conda | ||
| uses: actions/cache@v3 | ||
| env: | ||
| CACHE_NUMBER: 0 | ||
| CACHE_NUMBER: 1 # Match the build job cache number | ||
| with: | ||
| path: ~/conda_pkgs_dir | ||
| key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ | ||
| hashFiles('conda/dev.yml') }} | ||
| hashFiles('conda/dev.yml') }}-docs | ||
|
|
||
| - name: Build Conda Environment | ||
| uses: conda-incubator/setup-miniconda@v3 | ||
|
|
@@ -100,8 +117,9 @@ jobs: | |
| miniforge-variant: Miniforge3 | ||
| miniforge-version: latest | ||
| environment-file: conda/dev.yml | ||
| channel-priority: strict | ||
| channel-priority: flexible # Changed from strict to flexible | ||
| auto-update-conda: true | ||
| python-version: "3.11" # Use stable Python version for docs | ||
|
|
||
| # sphinx-multiversion allows for version docs. | ||
| - name: Build Sphinx Docs | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,31 +1,30 @@ | ||
| name: zstash_dev | ||
| channels: | ||
| - conda-forge | ||
| - defaults | ||
| dependencies: | ||
| # Base | ||
| # ================= | ||
| - pip=22.2.2 | ||
| - python=3.9.13 | ||
| - six=1.16.0 | ||
| - globus-sdk=3.15.0 | ||
| - pip | ||
| - python >=3.11,<3.14 | ||
| - sqlite | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @tomvothecoder How do we determine the constraints (
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is based on the conda-forge recipe and compatibility with other packages. If things break with new package versions, that suggests updating the constraints accordingly. |
||
| - six >=1.16.0 | ||
| - globus-sdk >=3.15.0 | ||
| # Developer Tools | ||
| # ================= | ||
| # If versions are updated, also update 'rev' in `.pre-commit.config.yaml` | ||
| - black=24.10.0 | ||
| - flake8=7.1.1 | ||
| - flake8-isort=6.1.1 | ||
| - mypy=1.11.2 | ||
| - pre-commit=4.0.1 | ||
| - tbump=6.9.0 | ||
| - black >=24.0.0 | ||
| - flake8 >=7.0.0 | ||
| - flake8-isort >=6.0.0 | ||
| - mypy >=1.11.0 | ||
|
||
| - pre-commit >=4.0.0 | ||
| - tbump >=6.9.0 | ||
| # Documentation | ||
| # ================= | ||
| # If versions are updated, also update in `.github/workflows/workflow.yml` | ||
| - jinja2<3.1 | ||
| - sphinx=5.2.3 | ||
| - sphinx-multiversion=0.2.4 | ||
| - sphinx_rtd_theme=1.0.0 | ||
| - jinja2 <3.1 | ||
| - sphinx >=5.2.0 | ||
| - sphinx-multiversion >=0.2.4 | ||
| - sphinx_rtd_theme >=1.0.0 | ||
| # Need to pin docutils because 0.17 has a bug with unordered lists | ||
| # https://github.com/readthedocs/sphinx_rtd_theme/issues/1115 | ||
| - docutils=0.16 | ||
| prefix: /opt/miniconda3/envs/zstash_dev | ||
| - docutils >=0.16,<0.17 | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,14 +16,14 @@ build: | |
|
|
||
| requirements: | ||
| host: | ||
| - python >=3.9 | ||
| - python >=3.11,<3.14 | ||
| - pip | ||
|
|
||
| run: | ||
| - python >=3.9 | ||
| - fair-research-login >=0.2.6,<0.3.0 | ||
| - globus-sdk >=3.0.0,<4.0.0 | ||
| - six | ||
| - python >=3.11,<3.14 | ||
| - globus-sdk=3.15.0 | ||
| - six=1.16.0 | ||
| - sqlite | ||
|
||
|
|
||
| test: | ||
| imports: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -46,7 +46,7 @@ exclude = | |
| venv | ||
|
|
||
| [mypy] | ||
| python_version = 3.9 | ||
| python_version = 3.13 | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How do we choose the specific version for this?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Latest stable version of Python. |
||
| check_untyped_defs = True | ||
| ignore_missing_imports = True | ||
| warn_unused_ignores = True | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,6 +7,6 @@ | |
| author_email="[email protected], [email protected], [email protected]", | ||
| description="Long term HPSS archiving software for E3SM", | ||
| packages=find_packages(include=["zstash", "zstash.*"]), | ||
| python_requires=">=3.9", | ||
| python_requires=">=3.11,<3.14", | ||
| entry_points={"console_scripts": ["zstash=zstash.main:main"]}, | ||
| ) | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -4,6 +4,7 @@ | |||||||||||||||||||||||||||||||||
| import os | ||||||||||||||||||||||||||||||||||
| import os.path | ||||||||||||||||||||||||||||||||||
| import sqlite3 | ||||||||||||||||||||||||||||||||||
| import sys | ||||||||||||||||||||||||||||||||||
| import tarfile | ||||||||||||||||||||||||||||||||||
| import traceback | ||||||||||||||||||||||||||||||||||
| from datetime import datetime | ||||||||||||||||||||||||||||||||||
|
|
@@ -281,43 +282,67 @@ def add_file( | |||||||||||||||||||||||||||||||||
| # Change the size of any hardlinks from 0 to the size of the actual file | ||||||||||||||||||||||||||||||||||
| if tarinfo.islnk(): | ||||||||||||||||||||||||||||||||||
| tarinfo.size = os.path.getsize(file_name) | ||||||||||||||||||||||||||||||||||
| # Add the file to the tar | ||||||||||||||||||||||||||||||||||
| tar.addfile(tarinfo) | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| # Add the file to the tar - ONLY change this line for Python 3.13+ | ||||||||||||||||||||||||||||||||||
| if ( | ||||||||||||||||||||||||||||||||||
| sys.version_info >= (3, 13) | ||||||||||||||||||||||||||||||||||
| and (tarinfo.isfile() or tarinfo.islnk()) | ||||||||||||||||||||||||||||||||||
| and tarinfo.size > 0 | ||||||||||||||||||||||||||||||||||
| ): | ||||||||||||||||||||||||||||||||||
| # Python 3.13+ requires fileobj for non-empty regular files | ||||||||||||||||||||||||||||||||||
| with open(file_name, "rb") as fileobj: | ||||||||||||||||||||||||||||||||||
| tar.addfile(tarinfo, fileobj) | ||||||||||||||||||||||||||||||||||
| else: | ||||||||||||||||||||||||||||||||||
| # Original code - unchanged | ||||||||||||||||||||||||||||||||||
| tar.addfile(tarinfo) | ||||||||||||||||||||||||||||||||||
|
Comment on lines
295
to
313
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @chengzhuzhang @xylar -- what Python versions do we need to support for the next E3SM Unified? Is anything above 3.10 ok? From Claude:
That is, an actual functionality change is needed, not just changes in DevOps. The functionality change in this file ( So, if we don't need to support Python 3.13 right now, I'd be inclined to leave this actual functionality change to after this upcoming Unified release. Alternatively, we could devote some time to the Unified testing period to
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @andrewdnolan ^ This is what I mentioned in the EZ meeting re: Python 3.13 support
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Unit tests would help verify this here. If the tests pass between versions, even with logical changes, then you're good to go. Otherwise a quick sanity test is to write a Python script using both tar functions and comparing the results. If both tar functions produce the same results, you can remove the old tar functions entirely (if the new ones support Python 3.11-3.12 too). There would be no need to have both around in this case.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @forsyth2, we need to support the range of python version stated here:
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
No, I do not think there is any way that could happen. It is very important that zstash supports all the python version that Unified supports. We cannot have any of our core software opting out of some versions and into others.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @forsyth2, looking at the docs, https://docs.python.org/3/library/tarfile.html, it seems that
Suggested change
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similarly, presumably different logic should not be needed below for python >=3.13 and python <3.13.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also for future reference, Python and Python-based packages typically implement I am usually able to catch these early by scanning the unit test suite and fixing them subsequently before it's too late. If unit tests are not implemented, then you may notice warnings in the logs during production usage. The
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @tomvothecoder @xylar Thanks for the suggestions. I added a new commit: 884e994. Please review. That is passing on all the GitHubActions -- tests for 3.11, 3.12, 3.13.
The It's very hard to do true "unit" testing on The So ideally there would both be more testing and more robust testing, yes, but implementing that is infeasible on the eve of release.
The more I think about this, this is a more general concern with Perhaps something to add in the suggested/eventual unit test update.
I never noticed any such warnings; I have seen warnings in
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I'm not familiar with the functionalities of Just something to think about for future test suite enhancements. |
||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| md5: Optional[str] = None | ||||||||||||||||||||||||||||||||||
| # Only add files or hardlinks. | ||||||||||||||||||||||||||||||||||
| # (So don't add directories or softlinks.) | ||||||||||||||||||||||||||||||||||
| if tarinfo.isfile() or tarinfo.islnk(): | ||||||||||||||||||||||||||||||||||
| f: _io.TextIOWrapper = open(file_name, "rb") | ||||||||||||||||||||||||||||||||||
| hash_md5: _hashlib.HASH = hashlib.md5() | ||||||||||||||||||||||||||||||||||
| if tar.fileobj is not None: | ||||||||||||||||||||||||||||||||||
| fileobj: _io.BufferedWriter = tar.fileobj | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| # For Python 3.13+, addfile() already wrote the content, so we only calculate MD5 | ||||||||||||||||||||||||||||||||||
| if sys.version_info >= (3, 13) and tarinfo.size > 0: | ||||||||||||||||||||||||||||||||||
| # Just calculate MD5, don't write to tar (already done by addfile) | ||||||||||||||||||||||||||||||||||
| while True: | ||||||||||||||||||||||||||||||||||
| data: bytes = f.read(BLOCK_SIZE) | ||||||||||||||||||||||||||||||||||
| if len(data) > 0: | ||||||||||||||||||||||||||||||||||
| hash_md5.update(data) | ||||||||||||||||||||||||||||||||||
| if len(data) < BLOCK_SIZE: | ||||||||||||||||||||||||||||||||||
| break | ||||||||||||||||||||||||||||||||||
| md5 = hash_md5.hexdigest() | ||||||||||||||||||||||||||||||||||
| else: | ||||||||||||||||||||||||||||||||||
| raise TypeError("Invalid tar.fileobj={}".format(tar.fileobj)) | ||||||||||||||||||||||||||||||||||
| while True: | ||||||||||||||||||||||||||||||||||
| s: str = f.read(BLOCK_SIZE) | ||||||||||||||||||||||||||||||||||
| if len(s) > 0: | ||||||||||||||||||||||||||||||||||
| # If the block read in is non-empty, write it to fileobj and update the hash | ||||||||||||||||||||||||||||||||||
| fileobj.write(s) | ||||||||||||||||||||||||||||||||||
| hash_md5.update(s) | ||||||||||||||||||||||||||||||||||
| if len(s) < BLOCK_SIZE: | ||||||||||||||||||||||||||||||||||
| # If the block read in is smaller than BLOCK_SIZE, | ||||||||||||||||||||||||||||||||||
| # then we have reached the end of the file. | ||||||||||||||||||||||||||||||||||
| # blocks = how many blocks of tarfile.BLOCKSIZE fit in tarinfo.size | ||||||||||||||||||||||||||||||||||
| # remainder = how much more content is required to reach tarinfo.size | ||||||||||||||||||||||||||||||||||
| blocks: int | ||||||||||||||||||||||||||||||||||
| remainder: int | ||||||||||||||||||||||||||||||||||
| blocks, remainder = divmod(tarinfo.size, tarfile.BLOCKSIZE) | ||||||||||||||||||||||||||||||||||
| if remainder > 0: | ||||||||||||||||||||||||||||||||||
| null_bytes: bytes = tarfile.NUL | ||||||||||||||||||||||||||||||||||
| # Write null_bytes to get the last block to tarfile.BLOCKSIZE | ||||||||||||||||||||||||||||||||||
| fileobj.write(null_bytes * (tarfile.BLOCKSIZE - remainder)) | ||||||||||||||||||||||||||||||||||
| blocks += 1 | ||||||||||||||||||||||||||||||||||
| # Increase the offset by the amount already saved to the tar | ||||||||||||||||||||||||||||||||||
| tar.offset += blocks * tarfile.BLOCKSIZE | ||||||||||||||||||||||||||||||||||
|
Comment on lines
-304
to
-317
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It appears we don't actually need to be tracking/updating
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After following # Add file to tar archive while computing its hash
# Return file offset (in tar archive), size and md5 hash
def addfile(tar, file):
offset = tar.offset
tarinfo = tar.gettarinfo(file)
tar.addfile(tarinfo)
if tarinfo.isfile():
f = open(file, "rb")
hash_md5 = hashlib.md5()
while True:
s = f.read(BLOCK_SIZE)
if len(s) > 0:
tar.fileobj.write(s)
hash_md5.update(s)
if len(s) < BLOCK_SIZE:
blocks, remainder = divmod(tarinfo.size, tarfile.BLOCKSIZE)
if remainder > 0:
tar.fileobj.write(tarfile.NUL *
(tarfile.BLOCKSIZE - remainder))
blocks += 1
tar.offset += blocks * tarfile.BLOCKSIZE
break
f.close()
md5 = hash_md5.hexdigest()
else:
md5 = None
size = tarinfo.size
mtime = datetime.utcfromtimestamp(tarinfo.mtime)
return offset, size, mtime, md5So, I just want to make sure the new
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My reading of the documentation is that yes, the tar file is updated already with the The advantage of the original implementation was probably that each file got opened once only and was added to the archive at the same time as computing its md5 hash. The new code reopens the file to create the md5 hash but I think that's fine (and seemingly unavoidable with the new API).
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm sorry, but I cannot approve this change. The ability to stream data to tar files and compute hashes at the same time was a key functionality of zstash. Computation of md5 hashes is expensive, removing this functionality will have a significant performance impact.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Claude's feedback:If the old implementation breaks on Python 3.13 (likely due to changes in how Options to Consider1. Accept the performance hit (pragmatic choice)
2. Implement a hybrid solution (best of both worlds)def add_file(tar, file_name, follow_symlinks):
offset = tar.offset
tarinfo = tar.gettarinfo(file_name)
if tarinfo.islnk():
tarinfo.size = os.path.getsize(file_name)
md5 = None
# For files/hardlinks, stream data while computing hash
if (tarinfo.isfile() or tarinfo.islnk()) and tarinfo.size > 0:
hash_md5 = hashlib.md5()
# Create a wrapper that computes hash while data passes through
class HashingFileWrapper:
def __init__(self, fileobj, hasher):
self.fileobj = fileobj
self.hasher = hasher
def read(self, size=-1):
data = self.fileobj.read(size)
if data:
self.hasher.update(data)
return data
with open(file_name, "rb") as f:
wrapper = HashingFileWrapper(f, hash_md5)
tar.addfile(tarinfo, wrapper)
md5 = hash_md5.hexdigest()
else:
tar.addfile(tarinfo)
size = tarinfo.size
mtime = datetime.utcfromtimestamp(tarinfo.mtime)
return offset, size, mtime, md5This wrapper approach:
3. Version-specific implementationsUse the old code for Python <3.13 and new code for 3.13+, but this adds maintenance burden. My RecommendationTry the hybrid solution first (Option 2). It should give you Python 3.13 compatibility while preserving the streaming performance that Reviewer 2 correctly identified as important. If If the wrapper approach doesn't work for some reason, then you'll need to accept the performance regression as the cost of Python 3.13 support, but at least you'll have tried to preserve the optimization.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @golaz, I will veto any solution that does not allow us to advance to the latest stable python (python 3.13), since this is something I have been fighting to have happen in Unified for years and it has finally become possible because of dropping CDAT. It seems like zstash has not been getting the TLC it has needed over the years to remain compatible with modern python and that's pretty disappointing. I think @forsyth2 is spread too thin and no one else has stepped up. Goodness knows I'm spread too thin. Hopefully, we can get things there for Unified, since testing should start on Monday. Hopefully, we can also find a longer term solution for zstash to have a "champion".
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry I haven’t followed this thread too closely. Which Python version is causing the incompatibility with zstash? I think it’s a solid achievement that we could update from 3.10 (now end-of-life) to 3.11 (or higher, if our tools are ready) for Unified with all the work we put in to migrate from CDAT. In this case, it seems zstash isn’t ready yet, and we are time-constrained to update it with thorough testing. Can we target Python 3.11 or 3.12 (depending on zstash readiness) for this upcoming unified? Both are still widely used, and 3.11 will continue receiving security updates until October 2027. Again, for a tool like E3SM-unified, I believe we should continue prioritizing maximum compatibility.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AAAAAAaargh!! Okay, I think it would be important to find out if zstash works with python 3.12 and without requiring these changes. If we only support python 3.11 in zstash and Unified, that's not good. That's the state we just got out of with dropping CDAT. I also never want this to happen again with a future Unified. All our software must, must, must support the 3 latest stable versions of python at all times. This is just necessary for our ecosystem to function well. Otherwise, the neglected tools really drag down the tools that are trying to take advantage of more modern capabilities.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I've implemented this in the latest commit: cc37237. It required an additional change to handle empty files. It now passes the GitHub Actions for 3.11, 3.12, and 3.13. I believe this means this PR should be good to merge. @xylar @golaz Can you please re-confirm based on this latest commit?
Yes, I agree with all this. After the Unified testing period, perhaps it would be useful to have a team discussion about this. The problem is certainly not a lack of ideas re: code maintainability (e.g., removing all those global variables or improving the testing).
3.13
It does. I only ran into errors when I started matrix-testing on 3.13.
This is good to keep in mind, thanks.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I agree with @chengzhuzhang that maximizing compatibility is essential, and with @xylar that we must support the latest stable Python versions (currently 3.11, 3.12, 3.13). Falling behind with Python support (mainly due to CDAT) means we’re not truly maximizing compatibility and leaves us playing catch-up with the broader ecosystem. Our sub-dependencies (e.g., |
||||||||||||||||||||||||||||||||||
| break | ||||||||||||||||||||||||||||||||||
| # Original logic for Python < 3.13 or empty files | ||||||||||||||||||||||||||||||||||
| if tar.fileobj is not None: | ||||||||||||||||||||||||||||||||||
| tar_fileobj: _io.BufferedWriter = tar.fileobj | ||||||||||||||||||||||||||||||||||
| else: | ||||||||||||||||||||||||||||||||||
| raise TypeError("Invalid tar.fileobj={}".format(tar.fileobj)) | ||||||||||||||||||||||||||||||||||
| while True: | ||||||||||||||||||||||||||||||||||
| s: bytes = f.read(BLOCK_SIZE) | ||||||||||||||||||||||||||||||||||
| if len(s) > 0: | ||||||||||||||||||||||||||||||||||
| # If the block read in is non-empty, write it to fileobj and update the hash | ||||||||||||||||||||||||||||||||||
| tar_fileobj.write(s) | ||||||||||||||||||||||||||||||||||
| hash_md5.update(s) | ||||||||||||||||||||||||||||||||||
| if len(s) < BLOCK_SIZE: | ||||||||||||||||||||||||||||||||||
| # If the block read in is smaller than BLOCK_SIZE, | ||||||||||||||||||||||||||||||||||
| # then we have reached the end of the file. | ||||||||||||||||||||||||||||||||||
| # blocks = how many blocks of tarfile.BLOCKSIZE fit in tarinfo.size | ||||||||||||||||||||||||||||||||||
| # remainder = how much more content is required to reach tarinfo.size | ||||||||||||||||||||||||||||||||||
| blocks: int | ||||||||||||||||||||||||||||||||||
| remainder: int | ||||||||||||||||||||||||||||||||||
| blocks, remainder = divmod(tarinfo.size, tarfile.BLOCKSIZE) | ||||||||||||||||||||||||||||||||||
| if remainder > 0: | ||||||||||||||||||||||||||||||||||
| null_bytes: bytes = tarfile.NUL | ||||||||||||||||||||||||||||||||||
| # Write null_bytes to get the last block to tarfile.BLOCKSIZE | ||||||||||||||||||||||||||||||||||
| tar_fileobj.write(null_bytes * (tarfile.BLOCKSIZE - remainder)) | ||||||||||||||||||||||||||||||||||
| blocks += 1 | ||||||||||||||||||||||||||||||||||
| # Increase the offset by the amount already saved to the tar | ||||||||||||||||||||||||||||||||||
| tar.offset += blocks * tarfile.BLOCKSIZE | ||||||||||||||||||||||||||||||||||
| break | ||||||||||||||||||||||||||||||||||
| md5 = hash_md5.hexdigest() | ||||||||||||||||||||||||||||||||||
| f.close() | ||||||||||||||||||||||||||||||||||
| md5 = hash_md5.hexdigest() | ||||||||||||||||||||||||||||||||||
| size: int = tarinfo.size | ||||||||||||||||||||||||||||||||||
| mtime: datetime = datetime.utcfromtimestamp(tarinfo.mtime) | ||||||||||||||||||||||||||||||||||
| return offset, size, mtime, md5 | ||||||||||||||||||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.