Skip to content

Commit 7342868

Browse files
committed
Merge pull request #5 from sebdiem/master
Added support for iterable input instead of file
2 parents 9b27455 + 65f1f57 commit 7342868

File tree

4 files changed

+129
-40
lines changed

4 files changed

+129
-40
lines changed

README.markdown

+55-11
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,55 @@
77
zipstream.py is a zip archive generator based on python 3.3's zipfile.py. It was created to
88
generate a zip file generator for streaming (ie web apps). This is beneficial for when you
99
want to provide a downloadable archive of a large collection of regular files, which would be infeasible to
10-
generate the archive prior to downloading.
10+
generate the archive prior to downloading or of a very large file that you do not want to store entirely on disk or on memory.
1111

1212
The archive is generated as an iterator of strings, which, when joined, form
1313
the zip archive. For example, the following code snippet would write a zip
1414
archive containing files from 'path' to a normal file:
1515

1616
```python
17-
zf = open('zipfile.zip', 'wb')
18-
for data in ZipStream(path):
19-
zf.write(data)
20-
zf.close()
17+
import zipstream
18+
19+
z = zipstream.ZipFile()
20+
z.write('path/to/files')
21+
22+
with open('zipfile.zip', 'wb') as f:
23+
for data in z:
24+
f.write(data)
25+
```
26+
27+
zipstream also allows to take as input a byte string iterable and to generate
28+
the archive as an iterator.
29+
This avoids storing large files on disk or in memory.
30+
To do so you could use something like this snippet:
31+
32+
```python
33+
def iterable():
34+
for _ in xrange(10):
35+
yield b'this is a byte string\x01\n'
36+
37+
z = zipstream.ZipFile()
38+
z.write_iter(iterable(), 'my_archive_iter')
39+
40+
with open('zipfile.zip', 'wb') as f:
41+
for data in z:
42+
f.write(data)
43+
```
44+
45+
Of course both approach can be combined:
46+
47+
```python
48+
def iterable():
49+
for _ in xrange(10):
50+
yield b'this is a byte string\x01\n'
51+
52+
z = zipstream.ZipFile()
53+
z.write('path/to/files', 'my_archive_files')
54+
z.write_iter(iterable(), 'my_archive_iter')
55+
56+
with open('zipfile.zip', 'wb') as f:
57+
for data in z:
58+
f.write(data)
2159
```
2260

2361
Since recent versions of web.py support returning iterators of strings to be
@@ -31,10 +69,10 @@ def GET(self):
3169
web.header('Content-type' , 'application/zip')
3270
web.header('Content-Disposition', 'attachment; filename="%s"' % (
3371
zip_filename,))
34-
return ZipStream(path)
72+
return zipstream.ZipFile(path)
3573
```
3674

37-
If the zlib module is available, ZipStream can generate compressed zip
75+
If the zlib module is available, zipstream.ZipFile can generate compressed zip
3876
archives.
3977

4078
## Requirements
@@ -51,7 +89,7 @@ from flask import Response
5189
@app.route('/package.zip', methods=['GET'], endpoint='zipball')
5290
def zipball():
5391
def generator():
54-
z = ZipStream(mode='w', compression=ZIP_DEFLATED)
92+
z = zipstream.ZipFile(mode='w', compression=ZIP_DEFLATED)
5593

5694
z.write('/path/to/file')
5795

@@ -66,7 +104,7 @@ def zipball():
66104

67105
@app.route('/package.zip', methods=['GET'], endpoint='zipball')
68106
def zipball():
69-
z = ZipStream(mode='w', compression=ZIP_DEFLATED)
107+
z = zipstream.ZipFile(mode='w', compression=ZIP_DEFLATED)
70108
z.write('/path/to/file')
71109

72110
response = Response(z, mimetype='application/zip')
@@ -80,7 +118,7 @@ def zipball():
80118
from django.http import StreamingHttpResponse
81119

82120
def zipball(request):
83-
z = ZipStream(mode='w', compression=ZIP_DEFLATED)
121+
z = zipstream.ZipFile(mode='w', compression=ZIP_DEFLATED)
84122
z.write('/path/to/file')
85123

86124
response = StreamingHttpResponse(z, mimetype='application/zip')
@@ -97,5 +135,11 @@ def GET(self):
97135
web.header('Content-type' , 'application/zip')
98136
web.header('Content-Disposition', 'attachment; filename="%s"' % (
99137
zip_filename,))
100-
return ZipStream(path)
138+
return zipstream.ZipFile(path)
101139
```
140+
141+
## Running tests
142+
143+
With python version > 2.6, just run the following command: `python -m unittest discover`
144+
145+
Alternatively, you can use `nose`.

tests/__init__.py

Whitespace-only changes.

tests/test_zipstream.py

+25-2
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,33 @@ def test_write_file(self):
5050
f.close()
5151

5252
z2 = zipfile.ZipFile(f.name, 'r')
53-
z2.testzip()
53+
self.assertFalse(z2.testzip())
5454

5555
os.remove(f.name)
5656

57+
def test_write_iterable(self):
58+
z = zipstream.ZipFile(mode='w')
59+
def string_generator():
60+
for _ in range(10):
61+
yield b'zipstream\x01\n'
62+
data = [string_generator(), string_generator()]
63+
for i, d in enumerate(data):
64+
z.write_iter(iterable=d, arcname='data_{0}'.format(i))
65+
66+
f = tempfile.NamedTemporaryFile(suffix='zip', delete=False)
67+
for chunk in z:
68+
f.write(chunk)
69+
f.close()
70+
71+
z2 = zipfile.ZipFile(f.name, 'r')
72+
self.assertFalse(z2.testzip())
73+
74+
os.remove(f.name)
75+
76+
77+
def test_write_iterable_no_archive(self):
78+
z = zipstream.ZipFile(mode='w')
79+
self.assertRaises(TypeError, z.write_iter, iterable=range(10))
5780

5881
if __name__ == '__main__':
59-
unittest.main()
82+
unittest.main()

zipstream/__init__.py

+49-27
Original file line numberDiff line numberDiff line change
@@ -170,8 +170,8 @@ def __init__(self, fileobj=None, mode='w', compression=ZIP_STORED, allowZip64=Fa
170170
self.paths_to_write = []
171171

172172
def __iter__(self):
173-
for args, kwargs in self.paths_to_write:
174-
for data in self.__write(*args, **kwargs):
173+
for kwargs in self.paths_to_write:
174+
for data in self.__write(**kwargs):
175175
yield data
176176
for data in self.__close():
177177
yield data
@@ -204,21 +204,30 @@ def write(self, filename, arcname=None, compress_type=None):
204204
# TODO: Reflect python's Zipfile.write
205205
# - if filename is file, write as file
206206
# - if filename is directory, write an empty directory
207-
self.paths_to_write.append(
208-
((filename, ), {'arcname': arcname, 'compress_type': compress_type}),
209-
)
207+
kwargs = {'filename': filename, 'arcname': arcname, 'compress_type': compress_type}
208+
self.paths_to_write.append(kwargs)
210209

211-
def __write(self, filename, arcname=None, compress_type=None):
210+
def write_iter(self, arcname, iterable, compress_type=None):
211+
"""Write the bytes iterable `iterable` to the archive under the name `arcname`."""
212+
kwargs = {'arcname': arcname, 'iterable': iterable, 'compress_type': compress_type}
213+
self.paths_to_write.append(kwargs)
214+
215+
def __write(self, filename=None, iterable=None, arcname=None, compress_type=None):
212216
"""Put the bytes from filename into the archive under the name
213-
arcname."""
217+
`arcname`."""
214218
if not self.fp:
215219
raise RuntimeError(
216220
"Attempt to write to ZIP archive that was already closed")
217-
218-
st = os.stat(filename)
219-
isdir = stat.S_ISDIR(st.st_mode)
220-
mtime = time.localtime(st.st_mtime)
221-
date_time = mtime[0:6]
221+
if (filename is None and iterable is None) or (filename is not None and iterable is not None):
222+
raise ValueError("either (exclusively) filename or iterable shall be not None")
223+
224+
if filename:
225+
st = os.stat(filename)
226+
isdir = stat.S_ISDIR(st.st_mode)
227+
mtime = time.localtime(st.st_mtime)
228+
date_time = mtime[0:6]
229+
else:
230+
st, isdir, date_time = None, False, time.localtime()[0:6]
222231
# Create ZipInfo instance to store file information
223232
if arcname is None:
224233
arcname = filename
@@ -228,13 +237,16 @@ def __write(self, filename, arcname=None, compress_type=None):
228237
if isdir:
229238
arcname += '/'
230239
zinfo = ZipInfo(arcname, date_time)
231-
zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
240+
if st:
241+
zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
242+
else:
243+
zinfo.external_attr = 0o600 << 16 # ?rw-------
232244
if compress_type is None:
233245
zinfo.compress_type = self.compression
234246
else:
235247
zinfo.compress_type = compress_type
236248

237-
zinfo.file_size = st.st_size
249+
zinfo.file_size = 0
238250
zinfo.flag_bits = 0x00
239251
zinfo.flag_bits |= 0x08 # ZIP flag bits, bit 3 indicates presence of data descriptor
240252
zinfo.header_offset = self.fp.tell() # Start of header bytes
@@ -255,19 +267,29 @@ def __write(self, filename, arcname=None, compress_type=None):
255267
return
256268

257269
cmpr = _get_compressor(zinfo.compress_type)
258-
with open(filename, 'rb') as fp:
259-
# Must overwrite CRC and sizes with correct data later
260-
zinfo.CRC = CRC = 0
261-
zinfo.compress_size = compress_size = 0
262-
# Compressed size can be larger than uncompressed size
263-
zip64 = self._allowZip64 and \
264-
zinfo.file_size * 1.05 > ZIP64_LIMIT
265-
yield self.fp.write(zinfo.FileHeader(zip64))
266-
file_size = 0
267-
while 1:
268-
buf = fp.read(1024 * 8)
269-
if not buf:
270-
break
270+
271+
# Must overwrite CRC and sizes with correct data later
272+
zinfo.CRC = CRC = 0
273+
zinfo.compress_size = compress_size = 0
274+
# Compressed size can be larger than uncompressed size
275+
zip64 = self._allowZip64 and \
276+
zinfo.file_size * 1.05 > ZIP64_LIMIT
277+
yield self.fp.write(zinfo.FileHeader(zip64))
278+
file_size = 0
279+
if filename:
280+
with open(filename, 'rb') as fp:
281+
while 1:
282+
buf = fp.read(1024 * 8)
283+
if not buf:
284+
break
285+
file_size = file_size + len(buf)
286+
CRC = crc32(buf, CRC) & 0xffffffff
287+
if cmpr:
288+
buf = cmpr.compress(buf)
289+
compress_size = compress_size + len(buf)
290+
yield self.fp.write(buf)
291+
else: # we have an iterable
292+
for buf in iterable:
271293
file_size = file_size + len(buf)
272294
CRC = crc32(buf, CRC) & 0xffffffff
273295
if cmpr:

0 commit comments

Comments
 (0)