Skip to content

Commit b493b1d

Browse files
committed
Use .iterator() to save RAM during export
1 parent 7d30696 commit b493b1d

File tree

1 file changed

+36
-61
lines changed

1 file changed

+36
-61
lines changed

multigtfs/models/base.py

+36-61
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
logger = getLogger(__name__)
3131
re_point = re.compile(r'(?P<name>point)\[(?P<index>\d)\]')
3232
batch_size = 1000
33-
large_queryset_size = 100000
3433
CSV_BOM = BOM_UTF8.decode('utf-8') if PY3 else BOM_UTF8
3534

3635

@@ -365,70 +364,46 @@ def export_txt(cls, feed):
365364
cache[field_name][None] = u''
366365
model_to_field_name[model_name] = field_name
367366

368-
# For large querysets, break up by the first field
369-
if total < large_queryset_size:
370-
querysets = [objects.order_by(*sort_fields)]
371-
else: # pragma: no cover
372-
field1_raw = sort_fields[0]
373-
assert '__' in field1_raw
374-
assert field1_raw in cache
375-
field1 = field1_raw.split('__', 1)[0]
376-
field1_id = field1 + '_id'
377-
378-
# Sort field1 ids by field1 values
379-
val_to_id = dict((v, k) for k, v in cache[field1_raw].items())
380-
assert len(val_to_id) == len(cache[field1_raw])
381-
sorted_vals = sorted(val_to_id.keys())
382-
383-
querysets = []
384-
for val in sorted_vals:
385-
fid = val_to_id[val]
386-
if fid:
387-
qs = objects.filter(
388-
**{field1_id: fid}).order_by(*sort_fields[1:])
389-
querysets.append(qs)
390-
391367
# Assemble the rows, writing when we hit batch size
392368
count = 0
393369
rows = []
394-
for queryset in querysets:
395-
for item in queryset.order_by(*sort_fields):
396-
row = []
397-
for csv_name, field_name in column_map:
398-
obj = item
399-
point_match = re_point.match(field_name)
400-
if '__' in field_name:
401-
# Return relations from cache
402-
local_field_name = field_name.split('__', 1)[0]
403-
field_id = getattr(obj, local_field_name + '_id')
404-
row.append(cache[field_name][field_id])
405-
elif point_match:
406-
# Get the lat or long from the point
407-
name, index = point_match.groups()
408-
field = getattr(obj, name)
409-
row.append(field.coords[int(index)])
370+
for item in objects.order_by(*sort_fields).iterator():
371+
row = []
372+
for csv_name, field_name in column_map:
373+
obj = item
374+
point_match = re_point.match(field_name)
375+
if '__' in field_name:
376+
# Return relations from cache
377+
local_field_name = field_name.split('__', 1)[0]
378+
field_id = getattr(obj, local_field_name + '_id')
379+
row.append(cache[field_name][field_id])
380+
elif point_match:
381+
# Get the lat or long from the point
382+
name, index = point_match.groups()
383+
field = getattr(obj, name)
384+
row.append(field.coords[int(index)])
385+
else:
386+
# Handle other field types
387+
field = getattr(obj, field_name) if obj else ''
388+
if isinstance(field, date):
389+
formatted = field.strftime(u'%Y%m%d')
390+
row.append(text_type(formatted))
391+
elif isinstance(field, bool):
392+
row.append(1 if field else 0)
393+
elif field is None:
394+
row.append(u'')
410395
else:
411-
# Handle other field types
412-
field = getattr(obj, field_name) if obj else ''
413-
if isinstance(field, date):
414-
formatted = field.strftime(u'%Y%m%d')
415-
row.append(text_type(formatted))
416-
elif isinstance(field, bool):
417-
row.append(1 if field else 0)
418-
elif field is None:
419-
row.append(u'')
420-
else:
421-
row.append(text_type(field))
422-
for col in extra_columns:
423-
row.append(obj.extra_data.get(col, u''))
424-
rows.append(row)
425-
if len(rows) % batch_size == 0: # pragma: no cover
426-
write_text_rows(csv_writer, rows)
427-
count += len(rows)
428-
logger.info(
429-
"Exported %d %s",
430-
count, cls._meta.verbose_name_plural)
431-
rows = []
396+
row.append(text_type(field))
397+
for col in extra_columns:
398+
row.append(obj.extra_data.get(col, u''))
399+
rows.append(row)
400+
if len(rows) % batch_size == 0: # pragma: no cover
401+
write_text_rows(csv_writer, rows)
402+
count += len(rows)
403+
logger.info(
404+
"Exported %d %s",
405+
count, cls._meta.verbose_name_plural)
406+
rows = []
432407

433408
# Write rows smaller than batch size
434409
write_text_rows(csv_writer, rows)

0 commit comments

Comments
 (0)