Skip to content

Commit 7915d1c

Browse files
authored
fix: avoid writing empty image files (#139)
* fix: avoid writing empty files * fix: None
1 parent 0ac4142 commit 7915d1c

File tree

3 files changed

+40
-19
lines changed

3 files changed

+40
-19
lines changed

playa/cli.py

Lines changed: 38 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@
8787

8888
import playa
8989
from playa import Document, Page, PDFPasswordIncorrect, asobj
90+
from playa.color import ColorSpace
9091
from playa.data.content import Image
9192
from playa.data.metadata import asobj_document
9293
from playa.outline import Outline
@@ -517,30 +518,50 @@ def get_one_image(stream: ContentStream, path: Path) -> Path:
517518
else:
518519
# Otherwise, try to write a PNM file
519520
bits = stream.bits
520-
colorspace = stream.colorspace
521-
ncomponents = colorspace.ncomponents
522-
if colorspace.name == "Indexed":
523-
from playa.color import get_colorspace
524-
525-
assert isinstance(colorspace.spec, list)
526-
_, underlying, _, _ = colorspace.spec
527-
underlying = get_colorspace(resolve1(underlying))
528-
if underlying is not None:
529-
ncomponents = underlying.ncomponents
521+
colorspace: Union[ColorSpace, None] = stream.colorspace
522+
ncomponents = 1
523+
if colorspace is not None:
524+
ncomponents = colorspace.ncomponents
525+
if colorspace.name == "Indexed":
526+
from playa.color import get_colorspace
527+
528+
assert isinstance(colorspace.spec, list)
529+
_, underlying, _, _ = colorspace.spec
530+
colorspace = get_colorspace(resolve1(underlying))
531+
if colorspace is not None:
532+
ncomponents = colorspace.ncomponents
530533
if bits == 1:
531534
path = path.with_suffix(".pbm")
535+
elif colorspace is None or colorspace.name not in ("DeviceGray", "DeviceRGB"):
536+
path = path.with_suffix(".dat")
537+
LOG.warning(
538+
"Unsupported colorspace %s, writing data to %s", asobj(colorspace), path
539+
)
532540
elif ncomponents == 1:
533541
path = path.with_suffix(".pgm")
534542
elif ncomponents == 3:
535543
path = path.with_suffix(".ppm")
536-
try:
544+
elif ncomponents == 3:
545+
path = path.with_suffix(".ppm")
546+
else:
547+
path = path.with_suffix(".dat")
548+
LOG.warning(
549+
"Unsupported colorspace %s, writing data to %s", asobj(colorspace), path
550+
)
551+
552+
if path.suffix != ".dat":
553+
try:
554+
with open(path, "wb") as outfh:
555+
stream.write_pnm(outfh)
556+
except ValueError:
557+
datpath = path.with_suffix(".dat")
558+
LOG.exception(
559+
"Failed to write PNM to %s, writing data to %s", path, datpath
560+
)
561+
path = datpath
562+
563+
if path.suffix == ".dat":
537564
with open(path, "wb") as outfh:
538-
stream.write_pnm(outfh)
539-
except ValueError:
540-
# Fall back to a binary file
541-
datpath = path.with_suffix(".dat")
542-
LOG.warning("Failed to write PNM to %s, writing data to %s", path, datpath)
543-
with open(datpath, "wb") as outfh:
544565
outfh.write(stream.buffer)
545566
return path
546567

playa/pdftypes.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -632,6 +632,7 @@ def write_pnm(self, outfh: BinaryIO) -> None:
632632
bits = self.bits
633633
colorspace = self.colorspace
634634
data = self.buffer
635+
ftype: Union[bytes, None] = None
635636
if bits == 1:
636637
ftype = b"P4"
637638
elif colorspace.name == "DeviceGray":
@@ -662,7 +663,7 @@ def write_pnm(self, outfh: BinaryIO) -> None:
662663
for i in unpack_indexed_image_data(data, bits, self.width, self.height)
663664
for b in lookup[channels * i : channels * (i + 1)]
664665
)
665-
else:
666+
if ftype is None:
666667
raise ValueError("Unsupported colorspace: %r" % (self.colorspace,))
667668
max_value = (1 << bits) - 1
668669
outfh.write(b"%s %d %d\n" % (ftype, self.width, self.height))

tests/test_lazy_api.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
Test the ContentObject API for pages.
33
"""
44

5-
import sys
65
from pathlib import Path
76

87
import playa

0 commit comments

Comments
 (0)