Skip to content

Commit 48c2b88

Browse files
authored
Functions to write cutouts to a ZIP (#167)
* function to write cutouts to zip * style fixes, changelog * Test fix Additional test coverage Fix error message * Adding types * Applying copilot feedback * Refactor _make_cutout_filename into cutout.py, fix floating point precision Doctest fix * Comments, reworking inheritance of method * More descriptive changelog Fix indentation on changelog Sync changelog * Update docstrings for new zip filename format
1 parent db9a1ea commit 48c2b88

File tree

12 files changed

+418
-48
lines changed

12 files changed

+418
-48
lines changed

CHANGES.rst

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,35 @@
11
Unreleased
22
----------
33

4-
- Added support in `ra_dec_crossmatch` for a cutout size of zero, enabling single-point matching to FFIs that contain
4+
- Added support in ``ra_dec_crossmatch`` for a cutout size of zero, enabling single-point matching to FFIs that contain
55
the specified coordinates. [#166]
6+
- Added ``write_as_zip`` method to ``ASDFCutout``, ``FITSCutout``, ``TessCubeCutout``, and ``TessFootprintCutout`` classes to facilitate
7+
writing multiple cutouts into a single ZIP archive. [#167]
68
- Added ``get_tess_sectors`` function to return TESS sector information for sectors whose footprints overlap with
79
the given sky coordinates and cutout size. [#168]
810

11+
Breaking Changes
12+
^^^^^^^^^^^^^^^^
13+
14+
- Cube cutout filenames now use a hyphen between dimensions (e.g., ``10-x-10`` instead of ``10x10``). They also include unit suffixes when
15+
users request sizes as an ``astropy.units.Quantity`` object (e.g., ``5arcmin-x-4arcmin`` or ``30arcsec-x-20arcsec``). RA/Dec formatting within
16+
filenames now uses 7 decimal places (``{:.7f}``) for consistency across classes. These changes may break code that parses filenames or relies on
17+
old glob patterns. [#167]
18+
19+
Migration:
20+
21+
- Update glob patterns from ``*_<ra>_<dec>_<ny>x<nx>_astrocut.fits`` to ``*_<ra>_<dec>_*-x-*_astrocut.fits``.
22+
- If parsing filenames, switch to flexible regex patterns:
23+
24+
- RA/Dec: ``_(?P<ra>[-+]?\\d+(?:\\.\\d+)?)_(?P<dec>[-+]?\\d+(?:\\.\\d+)?)_``
25+
- Dimensions (with optional units): ``(?P<ny>\\d+(?:\\.\\d+)?)(?P<ny_unit>arcsec|arcmin|deg|pixel|pix)?-x-(?P<nx>\\d+(?:\\.\\d+)?)(?P<nx_unit>arcsec|arcmin|deg|pixel|pix)?``
26+
- Prefer reading RA/Dec, dimensions, and scales from file metadata (FITS headers/WCS) instead of relying on filenames.
27+
- Example transition:
28+
29+
- Old: ``..._83.406310_-62.489771_64x64_astrocut.fits``
30+
- New (no unit - pixels assumed): ``..._83.4063100_-62.4897710_64-x-64_astrocut.fits``
31+
- New (with units): ``..._83.4063100_-62.4897710_5arcmin-x-4arcmin_astrocut.fits``
32+
933

1034
1.1.0 (2025-09-15)
1135
------------------

astrocut/asdf_cutout.py

Lines changed: 64 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,33 @@ def cutout(self) -> Union[str, List[str], List[fits.HDUList]]:
419419
log.debug('Total time: %.2f sec', monotonic() - start_time)
420420

421421
return self.cutouts
422+
423+
def _make_cutout_filename(self, file: Union[str, Path], output_format: str) -> str:
424+
"""
425+
Generate a standardized filename for the cutout.
426+
427+
Overrides the superclass method to include the '_lite' tag if applicable and the output format.
428+
429+
Parameters
430+
----------
431+
file : str | Path
432+
The input file name.
433+
output_format : str
434+
The output format to write the cutout to. Options are '.fits' and '.asdf'.
435+
436+
Returns
437+
-------
438+
filename : str
439+
The generated filename for the cutout.
440+
"""
441+
return '{}_{:.7f}_{:.7f}_{}-x-{}{}_astrocut{}'.format(
442+
Path(file).stem,
443+
self._coordinates.ra.value,
444+
self._coordinates.dec.value,
445+
str(self._cutout_size[0]).replace(' ', ''),
446+
str(self._cutout_size[1]).replace(' ', ''),
447+
'_lite' if self._lite else '',
448+
output_format)
422449

423450
def _write_as_format(self, output_format: str, output_dir: Union[str, Path] = '.') -> List[str]:
424451
"""
@@ -440,14 +467,7 @@ def _write_as_format(self, output_format: str, output_dir: Union[str, Path] = '.
440467
cutout_paths = [] # List to store paths to cutout files
441468
for i, file in enumerate(self.cutouts_by_file):
442469
# Determine the output path
443-
filename = '{}_{:.7f}_{:.7f}_{}-x-{}{}_astrocut{}'.format(
444-
Path(file).stem,
445-
self._coordinates.ra.value,
446-
self._coordinates.dec.value,
447-
str(self._cutout_size[0]).replace(' ', ''),
448-
str(self._cutout_size[1]).replace(' ', ''),
449-
'_lite' if self._lite else '',
450-
output_format)
470+
filename = self._make_cutout_filename(file, output_format)
451471
cutout_path = Path(output_dir, filename)
452472

453473
if output_format == '.fits':
@@ -496,6 +516,42 @@ def write_as_asdf(self, output_dir: Union[str, Path] = '.') -> List[str]:
496516
A list of paths to the cutout ASDF files.
497517
"""
498518
return self._write_as_format(output_format='.asdf', output_dir=output_dir)
519+
520+
def write_as_zip(self, output_dir: Union[str, Path] = '.', filename: Union[str, Path, None] = None,
521+
*, output_format: str = '.asdf') -> str:
522+
"""
523+
Package the ASDF or FITS cutouts into a zip archive without writing intermediates.
524+
525+
Parameters
526+
----------
527+
output_dir : str | Path, optional
528+
Directory where the zip will be created. Default '.'.
529+
filename : str | Path | None, optional
530+
Name (or path) of the output zip file. If not provided, defaults to
531+
'astrocut_{ra}_{dec}_{size}.zip'. If provided without a '.zip' suffix,
532+
the suffix is added automatically.
533+
output_format : str, optional
534+
Either '.asdf' (default) or '.fits'. Determines which in-memory representation is zipped.
535+
536+
Returns
537+
-------
538+
str
539+
Path to the created zip file.
540+
"""
541+
fmt = output_format.lower().strip()
542+
fmt = '.' + fmt if not fmt.startswith('.') else fmt
543+
if fmt not in ('.asdf', '.fits'):
544+
raise InvalidInputError("File format must be either '.asdf' or '.fits'")
545+
546+
def build_entries():
547+
use_fits = fmt == '.fits'
548+
objs = self.fits_cutouts if use_fits else self.asdf_cutouts
549+
550+
for i, file in enumerate(self.cutouts_by_file):
551+
arcname = self._make_cutout_filename(file, fmt)
552+
yield arcname, objs[i]
553+
554+
return self._write_cutouts_to_zip(output_dir=output_dir, filename=filename, build_entries=build_entries)
499555

500556

501557
def get_center_pixel(gwcsobj: gwcs.wcs.WCS, ra: float, dec: float) -> Tuple[Tuple[int, int], WCS]:

astrocut/cutout.py

Lines changed: 109 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
1+
import warnings
2+
import io
3+
import zipfile
14
from abc import abstractmethod, ABC
25
from pathlib import Path
3-
from typing import List, Union, Tuple
4-
import warnings
6+
from typing import List, Union, Tuple, Iterable, Callable, Any, Optional
57

6-
from astropy import wcs
8+
import asdf
79
import astropy.units as u
10+
import numpy as np
11+
from astropy import wcs
12+
from astropy.io import fits
813
from s3path import S3Path
914
from astropy.coordinates import SkyCoord
10-
import numpy as np
1115

1216
from astrocut.exceptions import InputWarning, InvalidInputError, InvalidQueryError
1317

@@ -148,6 +152,107 @@ def cutout(self):
148152
This method is abstract and should be defined in subclasses.
149153
"""
150154
raise NotImplementedError('Subclasses must implement this method.')
155+
156+
def _make_cutout_filename(self, file_stem: str) -> str:
157+
"""
158+
Create a cutout filename based on a file stem, coordinates, and cutout size.
159+
160+
Parameters
161+
----------
162+
file_stem : str
163+
The stem of the input file to use in the cutout filename.
164+
165+
Returns
166+
-------
167+
filename : str
168+
The generated cutout filename.
169+
"""
170+
return '{}_{:.7f}_{:.7f}_{}-x-{}_astrocut.fits'.format(
171+
file_stem,
172+
self._coordinates.ra.value,
173+
self._coordinates.dec.value,
174+
str(self._cutout_size[0]).replace(' ', ''),
175+
str(self._cutout_size[1]).replace(' ', ''))
176+
177+
def _obj_to_bytes(self, obj: Union[fits.HDUList, asdf.AsdfFile]) -> bytes:
178+
"""
179+
Convert a supported object into bytes for writing into a zip stream.
180+
181+
Parameters
182+
----------
183+
obj : `astropy.io.fits.HDUList` | `asdf.AsdfFile`
184+
The object to convert to bytes.
185+
186+
Returns
187+
-------
188+
bytes
189+
The byte representation of the object.
190+
"""
191+
# HDUList to bytes
192+
if isinstance(obj, fits.HDUList):
193+
buf = io.BytesIO()
194+
with warnings.catch_warnings():
195+
warnings.simplefilter('ignore', fits.verify.VerifyWarning)
196+
obj.writeto(buf, overwrite=True, checksum=True)
197+
# `AsdfFile` to bytes
198+
elif isinstance(obj, asdf.AsdfFile):
199+
buf = io.BytesIO()
200+
obj.write_to(buf)
201+
else:
202+
raise TypeError(
203+
'Unsupported payload type for zip entry. Expected `HDUList` or `AsdfFile`.'
204+
)
205+
206+
return buf.getvalue()
207+
208+
def _write_cutouts_to_zip(
209+
self,
210+
output_dir: Union[str, Path] = ".",
211+
filename: Optional[Union[str, Path]] = None,
212+
build_entries: Optional[Callable[[], Iterable[Tuple[str, Any]]]] = None
213+
) -> str:
214+
"""
215+
Create a zip archive containing all cutout files without writing intermediate files.
216+
217+
Parameters
218+
----------
219+
output_dir : str | Path, optional
220+
Directory where the zip will be created. Default '.'
221+
filename : str | Path | None, optional
222+
Name (or path) of the output zip file. If not provided, defaults to
223+
'astrocut_{ra}_{dec}_{size}.zip'. If provided without a '.zip' suffix,
224+
the suffix is added automatically.
225+
build_entries : callable -> iterable of (arcname, payload), optional
226+
Function that yields entries lazily. Useful to build streams on demand.
227+
228+
Returns
229+
-------
230+
str
231+
Path to the created zip file.
232+
"""
233+
# Resolve zip path and ensure directory exists
234+
if filename is None:
235+
filename = 'astrocut_{:.7f}_{:.7f}_{}-x-{}.zip'.format(
236+
self._coordinates.ra.value,
237+
self._coordinates.dec.value,
238+
str(self._cutout_size[0]).replace(' ', ''),
239+
str(self._cutout_size[1]).replace(' ', ''))
240+
filename = Path(filename)
241+
if filename.suffix.lower() != '.zip':
242+
filename = filename.with_suffix('.zip')
243+
244+
output_dir = Path(output_dir)
245+
output_dir.mkdir(parents=True, exist_ok=True)
246+
247+
zip_path = filename if filename.is_absolute() else output_dir / filename
248+
249+
# Stream entries directly into the zip
250+
with zipfile.ZipFile(zip_path, mode='w', compression=zipfile.ZIP_DEFLATED) as zf:
251+
for arcname, payload in build_entries():
252+
data = self._obj_to_bytes(payload)
253+
zf.writestr(arcname, data)
254+
255+
return zip_path.as_posix()
151256

152257
@staticmethod
153258
def parse_size_input(cutout_size, *, allow_zero: bool = False) -> np.ndarray:

astrocut/fits_cutout.py

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -405,12 +405,7 @@ def write_as_fits(self, output_dir: Union[str, Path] = '.', cutout_prefix: str =
405405
log.debug('Returning cutout as a single FITS file.')
406406

407407
cutout_fits = self.fits_cutouts[0]
408-
filename = '{}_{:.7f}_{:.7f}_{}-x-{}_astrocut.fits'.format(
409-
cutout_prefix,
410-
self._coordinates.ra.value,
411-
self._coordinates.dec.value,
412-
str(self._cutout_size[0]).replace(' ', ''),
413-
str(self._cutout_size[1]).replace(' ', ''))
408+
filename = self._make_cutout_filename(cutout_prefix)
414409
cutout_path = Path(output_dir, filename)
415410
with warnings.catch_warnings():
416411
warnings.simplefilter('ignore')
@@ -424,12 +419,7 @@ def write_as_fits(self, output_dir: Union[str, Path] = '.', cutout_prefix: str =
424419
cutout_paths = []
425420
for i, file in enumerate(self.hdu_cutouts_by_file):
426421
cutout_fits = self.fits_cutouts[i]
427-
filename = '{}_{:.7f}_{:.7f}_{}-x-{}_astrocut.fits'.format(
428-
Path(file).stem,
429-
self._coordinates.ra.value,
430-
self._coordinates.dec.value,
431-
str(self._cutout_size[0]).replace(' ', ''),
432-
str(self._cutout_size[1]).replace(' ', ''))
422+
filename = self._make_cutout_filename(Path(file).stem)
433423
cutout_path = Path(output_dir, filename)
434424
with warnings.catch_warnings():
435425
warnings.simplefilter('ignore')
@@ -439,6 +429,39 @@ def write_as_fits(self, output_dir: Union[str, Path] = '.', cutout_prefix: str =
439429

440430
log.debug('Cutout filepaths: {}'.format(cutout_paths))
441431
return cutout_paths
432+
433+
def write_as_zip(self, output_dir: Union[str, Path] = '.', filename: Union[str, Path, None] = None) -> str:
434+
"""
435+
Package the FITS cutouts into a zip archive without writing intermediate files.
436+
437+
Parameters
438+
----------
439+
output_dir : str | Path, optional
440+
Directory where the zip will be created. Default '.'.
441+
filename : str | Path | None, optional
442+
Name (or path) of the output zip file. If not provided, defaults to
443+
'astrocut_{ra}_{dec}_{size}.zip'. If provided without a '.zip' suffix,
444+
the suffix is added automatically.
445+
446+
Returns
447+
-------
448+
str
449+
Path to the created zip file.
450+
"""
451+
def build_entries():
452+
if self._single_outfile:
453+
# Mirror the single-file naming used by write_as_fits
454+
arcname = self._make_cutout_filename('cutout')
455+
hdu = self.fits_cutouts[0]
456+
yield arcname, hdu
457+
else:
458+
# One file per input; mirror write_as_fits naming
459+
for i, file in enumerate(self.hdu_cutouts_by_file):
460+
arcname = self._make_cutout_filename(Path(file).stem)
461+
hdu = self.fits_cutouts[i]
462+
yield arcname, hdu
463+
464+
return self._write_cutouts_to_zip(output_dir=output_dir, filename=filename, build_entries=build_entries)
442465

443466
class CutoutInstance:
444467
"""

astrocut/tess_cube_cutout.py

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -487,15 +487,7 @@ def write_as_tpf(self, output_dir: Union[str, Path] = '.', output_file: str = No
487487
for file, cutout in self.cutouts_by_file.items():
488488
# Determine file name
489489
if not output_file or len(self._input_files) > 1:
490-
cutout_lims = cutout.cutout_lims
491-
width = cutout_lims[0, 1] - cutout_lims[0, 0]
492-
height = cutout_lims[1, 1] - cutout_lims[1, 0]
493-
filename = '{}_{:7f}_{:7f}_{}x{}_astrocut.fits'.format(
494-
Path(file).stem.rstrip('-cube'),
495-
self._coordinates.ra.value,
496-
self._coordinates.dec.value,
497-
width,
498-
height)
490+
filename = self._make_cutout_filename(Path(file).stem.rstrip('-cube'))
499491
else:
500492
filename = output_file
501493

@@ -517,6 +509,31 @@ def write_as_tpf(self, output_dir: Union[str, Path] = '.', output_file: str = No
517509

518510
log.debug('Write time: %.2f sec', (monotonic() - write_time))
519511
return cutout_paths
512+
513+
def write_as_zip(self, output_dir: Union[str, Path] = '.', filename: Union[str, Path, None] = None) -> str:
514+
"""
515+
Package the cutout TPF files into a zip archive without writing intermediate files.
516+
517+
Parameters
518+
----------
519+
output_dir : str | Path, optional
520+
Directory where the zip will be created. Default '.'.
521+
filename : str | Path | None, optional
522+
Name (or path) of the output zip file. If not provided, defaults to
523+
'astrocut_{ra}_{dec}_{size}.zip'. If provided without a '.zip' suffix,
524+
the suffix is added automatically.
525+
526+
Returns
527+
-------
528+
str
529+
Path to the created zip file.
530+
"""
531+
def build_entries():
532+
for file, tpf in self.tpf_cutouts_by_file.items():
533+
arcname = self._make_cutout_filename(Path(file).stem.rstrip('-cube'))
534+
yield arcname, tpf
535+
536+
return self._write_cutouts_to_zip(output_dir=output_dir, filename=filename, build_entries=build_entries)
520537

521538
class CubeCutoutInstance(CubeCutout.CubeCutoutInstance):
522539
"""

0 commit comments

Comments
 (0)