Skip to content

Commit c65aee3

Browse files
committed
Add lazyness metadata to plugins
1 parent 7797ba3 commit c65aee3

23 files changed

+68
-5
lines changed

rows/plugins/dicts.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ def import_from_dicts(data, samples=1000, *args, **kwargs):
5151
*args, **kwargs)
5252

5353

54+
import_from_dicts.is_lazy = True
55+
56+
5457
def export_to_dicts(table, *args, **kwargs):
5558
return [{key: getattr(row, key) for key in table.field_names}
5659
for row in table]

rows/plugins/ods.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,5 +103,10 @@ def import_from_ods(filename_or_fobj, index=0, *args, **kwargs):
103103

104104
max_length = max(len(row) for row in table_rows)
105105
full_rows = complete_with_None(table_rows, max_length)
106+
106107
meta = {'imported_from': 'ods', 'filename': filename,}
108+
107109
return create_table(full_rows, meta=meta, *args, **kwargs)
110+
111+
112+
import_from_ods.is_lazy = False

rows/plugins/plugin_csv.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ def import_from_csv(filename_or_fobj, encoding='utf-8', dialect=None,
7171
return create_table(reader, meta=meta, *args, **kwargs)
7272

7373

74+
import_from_csv.is_lazy = True
75+
76+
7477
def export_to_csv(table, filename_or_fobj=None, encoding='utf-8',
7578
dialect=unicodecsv.excel, *args, **kwargs):
7679
'''Export a table to a CSV file

rows/plugins/plugin_html.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,9 @@ def import_from_html(filename_or_fobj, encoding='utf-8', index=0,
9898
return create_table(table_rows, meta=meta, *args, **kwargs)
9999

100100

101+
import_from_html.is_lazy = False
102+
103+
101104
def export_to_html(table, filename_or_fobj=None, encoding='utf-8', *args,
102105
**kwargs):
103106
serialized_table = serialize(table, *args, **kwargs)
@@ -106,6 +109,7 @@ def export_to_html(table, filename_or_fobj=None, encoding='utf-8', *args,
106109
header = [' <th> {} </th>\n'.format(field) for field in fields]
107110
result.extend(header)
108111
result.extend([' </tr>\n', ' </thead>\n', '\n', ' <tbody>\n', '\n'])
112+
# TODO: could be lazy so we don't need to store the whole table into memory
109113
for index, row in enumerate(serialized_table, start=1):
110114
css_class = 'odd' if index % 2 == 1 else 'even'
111115
result.append(' <tr class="{}">\n'.format(css_class))

rows/plugins/plugin_json.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ def import_from_json(filename_or_fobj, encoding='utf-8', *args, **kwargs):
3838
filename, fobj = get_filename_and_fobj(filename_or_fobj)
3939

4040
json_obj = json.load(fobj, encoding=encoding)
41+
# TODO: may use import_from_dicts here
4142
field_names = list(json_obj[0].keys())
4243
table_rows = [[item[key] for key in field_names] for item in json_obj]
4344

@@ -47,6 +48,9 @@ def import_from_json(filename_or_fobj, encoding='utf-8', *args, **kwargs):
4748
return create_table([field_names] + table_rows, meta=meta, *args, **kwargs)
4849

4950

51+
import_from_json.is_lazy = False
52+
53+
5054
def _convert(value, field_type, *args, **kwargs):
5155
if value is None or field_type in (
5256
fields.BinaryField,
@@ -77,6 +81,8 @@ def export_to_json(table, filename_or_fobj=None, encoding='utf-8', indent=None,
7781
fields = table.fields
7882
prepared_table = prepare_to_export(table, *args, **kwargs)
7983
field_names = next(prepared_table)
84+
85+
# TODO: could be lazy so we don't need to store the whole table into memory
8086
data = [{field_name: _convert(value, fields[field_name], *args, **kwargs)
8187
for field_name, value in zip(field_names, row)}
8288
for row in prepared_table]

rows/plugins/plugin_parquet.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,12 @@ def import_from_parquet(filename_or_fobj, *args, **kwargs):
5656
for schema in parquet._read_footer(fobj).schema
5757
if schema.type is not None])
5858
header = list(types.keys())
59-
table_rows = list(parquet.reader(fobj)) # TODO: be lazy
59+
# TODO: make it lazy
60+
table_rows = list(parquet.reader(fobj))
6061

6162
meta = {'imported_from': 'parquet', 'filename': filename,}
6263
return create_table([header] + table_rows, meta=meta, force_types=types,
6364
*args, **kwargs)
65+
66+
67+
import_from_parquet.is_lazy = False

rows/plugins/sqlite.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,9 @@ def import_from_sqlite(filename_or_connection, table_name='table1', query=None,
128128
return create_table(data, meta=meta, *args, **kwargs)
129129

130130

131+
import_from_sqlite.is_lazy = True
132+
133+
131134
def export_to_sqlite(table, filename_or_connection, table_name=None,
132135
table_name_format='table{index}', batch_size=100,
133136
*args, **kwargs):

rows/plugins/txt.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,18 +34,24 @@ def import_from_txt(filename_or_fobj, encoding='utf-8', *args, **kwargs):
3434
filename, fobj = get_filename_and_fobj(filename_or_fobj, mode='rb')
3535
contents = fobj.read().decode(encoding).strip().splitlines()
3636

37+
# TODO: make it lazy
38+
3739
# remove '+----+----+' lines
3840
contents = contents[1:-1]
3941
del contents[1]
4042

4143
table_rows = [[value.strip() for value in row.split(PIPE)[1:-1]]
4244
for row in contents]
45+
4346
meta = {'imported_from': 'txt',
4447
'filename': filename,
4548
'encoding': encoding,}
4649
return create_table(table_rows, meta=meta, *args, **kwargs)
4750

4851

52+
import_from_txt.is_lazy = False
53+
54+
4955
def export_to_txt(table, filename_or_fobj=None, encoding=None,
5056
*args, **kwargs):
5157
'''Export a `rows.Table` to text
@@ -70,6 +76,7 @@ def export_to_txt(table, filename_or_fobj=None, encoding=None,
7076
split_line = PLUS + PLUS.join(dashes) + PLUS
7177

7278
result = [split_line, header, split_line]
79+
# TODO: make it lazy
7380
for row in table_rows:
7481
values = [value.rjust(max_sizes[field_name])
7582
for field_name, value in zip(field_names, row)]

rows/plugins/utils.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,10 @@ def func(rows_data):
141141
def create_table(data, meta=None, fields=None, skip_header=True,
142142
import_fields=None, samples=None, force_types=None,
143143
lazy=False, *args, **kwargs):
144+
# TODO: change samples to be a fixed number
145+
# TODO: may change samples logic (`float('inf')` or `all`)
144146
# TODO: add auto_detect_types=True parameter
147+
145148
table_rows = iter(data)
146149
sample_rows = []
147150

@@ -163,6 +166,9 @@ def create_table(data, meta=None, fields=None, skip_header=True,
163166
if not isinstance(fields, OrderedDict):
164167
raise ValueError('`fields` must be an `OrderedDict`')
165168

169+
# TODO: if `fields` is set, we're going to have the wrong order,
170+
# compared to the first row (header).
171+
166172
if skip_header:
167173
_ = next(table_rows)
168174

@@ -187,6 +193,7 @@ def create_table(data, meta=None, fields=None, skip_header=True,
187193

188194
if not lazy:
189195
table = Table(fields=fields, meta=meta)
196+
190197
# TODO: put this inside Table.__init__
191198
for row in chain(sample_rows, table_rows):
192199
table.append({field_name: value

rows/plugins/xls.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,9 @@ def import_from_xls(filename_or_fobj, sheet_name=None, sheet_index=0,
156156
return create_table(table_rows, meta=meta, *args, **kwargs)
157157

158158

159+
import_from_xls.is_lazy = False
160+
161+
159162
def export_to_xls(table, filename_or_fobj=None, sheet_name='Sheet1', *args,
160163
**kwargs):
161164

rows/plugins/xlsx.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,12 +77,16 @@ def import_from_xlsx(filename_or_fobj, sheet_name=None, sheet_index=0,
7777
for row_index in range(start_row, end_row + 1)]
7878

7979
filename, _ = get_filename_and_fobj(filename_or_fobj, dont_open=True)
80+
8081
metadata = {'imported_from': 'xlsx',
8182
'filename': filename,
8283
'sheet_name': sheet_name, }
8384
return create_table(table_rows, meta=metadata, *args, **kwargs)
8485

8586

87+
import_from_xlsx.is_lazy = False
88+
89+
8690
FORMATTING_STYLES = {
8791
fields.DateField: 'YYYY-MM-DD',
8892
fields.DatetimeField: 'YYYY-MM-DD HH:MM:SS',

rows/plugins/xpath.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ def import_from_xpath(filename_or_fobj, rows_xpath, fields_xpath,
6969

7070
filename, fobj = get_filename_and_fobj(filename_or_fobj, mode='rb')
7171
xml = fobj.read().decode(encoding)
72+
# TODO: make it lazy (is it possible with lxml?)
7273
tree = tree_from_string(xml)
7374
row_elements = tree.xpath(rows_xpath)
7475

@@ -80,3 +81,6 @@ def import_from_xpath(filename_or_fobj, rows_xpath, fields_xpath,
8081
'filename': filename,
8182
'encoding': encoding,}
8283
return create_table([header] + result_rows, meta=meta, *args, **kwargs)
84+
85+
86+
import_from_xpath.is_lazy = False

tests/tests_plugin_csv.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ class PluginCsvTestCase(utils.RowsTestMixIn, unittest.TestCase):
5454
def test_imports(self):
5555
self.assertIs(rows.import_from_csv, rows.plugins.plugin_csv.import_from_csv)
5656
self.assertIs(rows.export_to_csv, rows.plugins.plugin_csv.export_to_csv)
57+
self.assertTrue(rows.import_from_csv.is_lazy)
5758

5859
@mock.patch('rows.plugins.plugin_csv.create_table')
5960
def test_import_from_csv_uses_create_table(self, mocked_create_table):

tests/tests_plugin_dicts.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def test_imports(self):
4646
self.assertIs(rows.import_from_dicts,
4747
rows.plugins.dicts.import_from_dicts)
4848
self.assertIs(rows.export_to_dicts, rows.plugins.dicts.export_to_dicts)
49+
self.assertTrue(rows.import_from_dicts.is_lazy)
4950

5051
@mock.patch('rows.plugins.dicts.create_table')
5152
def test_import_from_dicts_uses_create_table(self, mocked_create_table):

tests/tests_plugin_html.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# coding: utf-8
22

3-
# Copyright 2014-2016 Álvaro Justen <https://github.com/turicas/rows/>
3+
# Copyright 2014-2017 Álvaro Justen <https://github.com/turicas/rows/>
44
#
55
# This program is free software: you can redistribute it and/or modify
66
# it under the terms of the GNU General Public License as published by
@@ -54,6 +54,7 @@ def test_imports(self):
5454
self.assertIs(rows.import_from_html,
5555
rows.plugins.plugin_html.import_from_html)
5656
self.assertIs(rows.export_to_html, rows.plugins.plugin_html.export_to_html)
57+
self.assertFalse(rows.import_from_html.is_lazy)
5758

5859
def test_import_from_html_filename(self):
5960
table = rows.import_from_html(self.filename, encoding=self.encoding)
@@ -89,7 +90,7 @@ def test_import_from_html_uses_create_table(self, mocked_create_table):
8990
call = mocked_create_table.call_args
9091
kwargs['meta'] = {'imported_from': 'html',
9192
'filename': self.filename,
92-
'encoding': 'iso-8859-1',}
93+
'encoding': 'iso-8859-1', }
9394
self.assertEqual(call[1], kwargs)
9495

9596
def test_export_to_html_filename(self):

tests/tests_plugin_json.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ def test_imports(self):
4747
rows.plugins.plugin_json.import_from_json)
4848
self.assertIs(rows.export_to_json,
4949
rows.plugins.plugin_json.export_to_json)
50+
self.assertFalse(rows.import_from_json.is_lazy)
5051

5152
@mock.patch('rows.plugins.plugin_json.create_table')
5253
def test_import_from_json_uses_create_table(self, mocked_create_table):

tests/tests_plugin_ods.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class PluginOdsTestCase(utils.RowsTestMixIn, unittest.TestCase):
3535

3636
def test_imports(self):
3737
self.assertIs(rows.import_from_ods, rows.plugins.ods.import_from_ods)
38+
self.assertFalse(rows.import_from_ods.is_lazy)
3839

3940
@mock.patch('rows.plugins.ods.create_table')
4041
def test_import_from_ods_uses_create_table(self, mocked_create_table):

tests/tests_plugin_parquet.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ class PluginParquetTestCase(unittest.TestCase):
6363
def test_imports(self):
6464
self.assertIs(rows.import_from_parquet,
6565
rows.plugins.plugin_parquet.import_from_parquet)
66+
self.assertFalse(rows.import_from_parquet.is_lazy)
6667

6768
@mock.patch('rows.plugins.plugin_parquet.create_table')
6869
def test_import_from_parquet_uses_create_table(self, mocked_create_table):

tests/tests_plugin_sqlite.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def test_imports(self):
5050
rows.plugins.sqlite.import_from_sqlite)
5151
self.assertIs(rows.export_to_sqlite,
5252
rows.plugins.sqlite.export_to_sqlite)
53+
self.assertTrue(rows.import_from_sqlite.is_lazy)
5354

5455
@mock.patch('rows.plugins.sqlite.create_table')
5556
def test_import_from_sqlite_uses_create_table(self, mocked_create_table):

tests/tests_plugin_txt.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ class PluginTxtTestCase(utils.RowsTestMixIn, unittest.TestCase):
4141
def test_imports(self):
4242
self.assertIs(rows.import_from_txt, rows.plugins.txt.import_from_txt)
4343
self.assertIs(rows.export_to_txt, rows.plugins.txt.export_to_txt)
44+
self.assertFalse(rows.import_from_txt.is_lazy)
4445

4546
@mock.patch('rows.plugins.txt.create_table')
4647
def test_import_from_txt_uses_create_table(self, mocked_create_table):

tests/tests_plugin_xls.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ class PluginXlsTestCase(utils.RowsTestMixIn, unittest.TestCase):
4646
def test_imports(self):
4747
self.assertIs(rows.import_from_xls, rows.plugins.xls.import_from_xls)
4848
self.assertIs(rows.export_to_xls, rows.plugins.xls.export_to_xls)
49+
self.assertFalse(rows.import_from_xls.is_lazy)
4950

5051
@mock.patch('rows.plugins.xls.create_table')
5152
def test_import_from_xls_uses_create_table(self, mocked_create_table):

tests/tests_plugin_xlsx.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def test_imports(self):
4242
rows.plugins.xlsx.import_from_xlsx)
4343
self.assertIs(rows.export_to_xlsx,
4444
rows.plugins.xlsx.export_to_xlsx)
45+
self.assertFalse(rows.import_from_xlsx.is_lazy)
4546

4647
@mock.patch('rows.plugins.xlsx.create_table')
4748
def test_import_from_xlsx_uses_create_table(self, mocked_create_table):

tests/tests_plugin_xpath.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,9 @@ def test_import_from_xpath_unescape_and_extract_text(self):
107107
fields_xpath = OrderedDict([('name', './/text()'),
108108
('link', './/a/@href')])
109109
table = rows.import_from_xpath(BytesIO(html),
110+
encoding='utf-8',
110111
rows_xpath=rows_xpath,
111-
fields_xpath=fields_xpath,
112-
encoding='utf-8')
112+
fields_xpath=fields_xpath)
113113
self.assertEqual(table[0].name, 'Abadia de Goiás (GO)')
114114
self.assertEqual(table[1].name, 'Abadiânia (GO)')
115115

0 commit comments

Comments
 (0)