Skip to content

Commit

Permalink
Merge pull request #10991 from SanjaySG:master
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 720485890
  • Loading branch information
The TensorFlow Datasets Authors committed Jan 28, 2025
2 parents b68aa45 + 80c4c48 commit 9969ce5
Showing 1 changed file with 10 additions and 8 deletions.
18 changes: 10 additions & 8 deletions tensorflow_datasets/image_classification/cats_vs_dogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"""Cats vs Dogs dataset."""

import io
import os
import re
import zipfile

Expand Down Expand Up @@ -43,7 +44,7 @@
_NUM_CORRUPT_IMAGES = 1738
_DESCRIPTION = (
"A large set of images of cats and dogs. "
"There are %d corrupted images that are dropped." % _NUM_CORRUPT_IMAGES
f"There are {_NUM_CORRUPT_IMAGES} corrupted images that are dropped."
)

_NAME_RE = re.compile(r"^PetImages[\\/](Cat|Dog)[\\/]\d+\.jpg$")
Expand Down Expand Up @@ -94,7 +95,8 @@ def _generate_examples(self, archive):
"""Generate Cats vs Dogs images and labels given a directory path."""
num_skipped = 0
for fname, fobj in archive:
res = _NAME_RE.match(fname)
norm_fname = os.path.normpath(fname)
res = _NAME_RE.match(norm_fname)
if not res: # README file, ...
continue
label = res.group(1).lower()
Expand All @@ -113,19 +115,19 @@ def _generate_examples(self, archive):
# Converting the recoded image back into a zip file container.
buffer = io.BytesIO()
with zipfile.ZipFile(buffer, "w") as new_zip:
new_zip.writestr(fname, img_recoded.numpy())
new_fobj = zipfile.ZipFile(buffer).open(fname)
new_zip.writestr(norm_fname, img_recoded.numpy())
new_fobj = zipfile.ZipFile(buffer).open(norm_fname)

record = {
"image": new_fobj,
"image/filename": fname,
"image/filename": norm_fname,
"label": label,
}
yield fname, record
yield norm_fname, record

if num_skipped != _NUM_CORRUPT_IMAGES:
raise ValueError(
"Expected %d corrupt images, but found %d"
% (_NUM_CORRUPT_IMAGES, num_skipped)
f"Expected {_NUM_CORRUPT_IMAGES} corrupt images, but found"
f" {num_skipped}."
)
logging.warning("%d images were corrupted and were skipped", num_skipped)

0 comments on commit 9969ce5

Please sign in to comment.