Skip to content

Commit c65aee3

Browse files
committed
Add lazyness metadata to plugins
1 parent 7797ba3 commit c65aee3

23 files changed

+68
-5
lines changed

rows/plugins/dicts.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ def import_from_dicts(data, samples=1000, *args, **kwargs):
5151
*args, **kwargs)
5252

5353

54+
import_from_dicts.is_lazy = True
55+
56+
5457
def export_to_dicts(table, *args, **kwargs):
5558
return [{key: getattr(row, key) for key in table.field_names}
5659
for row in table]

rows/plugins/ods.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,5 +103,10 @@ def import_from_ods(filename_or_fobj, index=0, *args, **kwargs):
103103

104104
max_length = max(len(row) for row in table_rows)
105105
full_rows = complete_with_None(table_rows, max_length)
106+
106107
meta = {'imported_from': 'ods', 'filename': filename,}
108+
107109
return create_table(full_rows, meta=meta, *args, **kwargs)
110+
111+
112+
import_from_ods.is_lazy = False

rows/plugins/plugin_csv.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ def import_from_csv(filename_or_fobj, encoding='utf-8', dialect=None,
7171
return create_table(reader, meta=meta, *args, **kwargs)
7272

7373

74+
import_from_csv.is_lazy = True
75+
76+
7477
def export_to_csv(table, filename_or_fobj=None, encoding='utf-8',
7578
dialect=unicodecsv.excel, *args, **kwargs):
7679
'''Export a table to a CSV file

rows/plugins/plugin_html.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,9 @@ def import_from_html(filename_or_fobj, encoding='utf-8', index=0,
9898
return create_table(table_rows, meta=meta, *args, **kwargs)
9999

100100

101+
import_from_html.is_lazy = False
102+
103+
101104
def export_to_html(table, filename_or_fobj=None, encoding='utf-8', *args,
102105
**kwargs):
103106
serialized_table = serialize(table, *args, **kwargs)
@@ -106,6 +109,7 @@ def export_to_html(table, filename_or_fobj=None, encoding='utf-8', *args,
106109
header = [' <th> {} </th>\n'.format(field) for field in fields]
107110
result.extend(header)
108111
result.extend([' </tr>\n', ' </thead>\n', '\n', ' <tbody>\n', '\n'])
112+
# TODO: could be lazy so we don't need to store the whole table into memory
109113
for index, row in enumerate(serialized_table, start=1):
110114
css_class = 'odd' if index % 2 == 1 else 'even'
111115
result.append(' <tr class="{}">\n'.format(css_class))

rows/plugins/plugin_json.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ def import_from_json(filename_or_fobj, encoding='utf-8', *args, **kwargs):
3838
filename, fobj = get_filename_and_fobj(filename_or_fobj)
3939

4040
json_obj = json.load(fobj, encoding=encoding)
41+
# TODO: may use import_from_dicts here
4142
field_names = list(json_obj[0].keys())
4243
table_rows = [[item[key] for key in field_names] for item in json_obj]
4344

@@ -47,6 +48,9 @@ def import_from_json(filename_or_fobj, encoding='utf-8', *args, **kwargs):
4748
return create_table([field_names] + table_rows, meta=meta, *args, **kwargs)
4849

4950

51+
import_from_json.is_lazy = False
52+
53+
5054
def _convert(value, field_type, *args, **kwargs):
5155
if value is None or field_type in (
5256
fields.BinaryField,
@@ -77,6 +81,8 @@ def export_to_json(table, filename_or_fobj=None, encoding='utf-8', indent=None,
7781
fields = table.fields
7882
prepared_table = prepare_to_export(table, *args, **kwargs)
7983
field_names = next(prepared_table)
84+
85+
# TODO: could be lazy so we don't need to store the whole table into memory
8086
data = [{field_name: _convert(value, fields[field_name], *args, **kwargs)
8187
for field_name, value in zip(field_names, row)}
8288
for row in prepared_table]

rows/plugins/plugin_parquet.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,12 @@ def import_from_parquet(filename_or_fobj, *args, **kwargs):
5656
for schema in parquet._read_footer(fobj).schema
5757
if schema.type is not None])
5858
header = list(types.keys())
59-
table_rows = list(parquet.reader(fobj)) # TODO: be lazy
59+
# TODO: make it lazy
60+
table_rows = list(parquet.reader(fobj))
6061

6162
meta = {'imported_from': 'parquet', 'filename': filename,}
6263
return create_table([header] + table_rows, meta=meta, force_types=types,
6364
*args, **kwargs)
65+
66+
67+
import_from_parquet.is_lazy = False

rows/plugins/sqlite.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,9 @@ def import_from_sqlite(filename_or_connection, table_name='table1', query=None,
128128
return create_table(data, meta=meta, *args, **kwargs)
129129

130130

131+
import_from_sqlite.is_lazy = True
132+
133+
131134
def export_to_sqlite(table, filename_or_connection, table_name=None,
132135
table_name_format='table{index}', batch_size=100,
133136
*args, **kwargs):

rows/plugins/txt.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,18 +34,24 @@ def import_from_txt(filename_or_fobj, encoding='utf-8', *args, **kwargs):
3434
filename, fobj = get_filename_and_fobj(filename_or_fobj, mode='rb')
3535
contents = fobj.read().decode(encoding).strip().splitlines()
3636

37+
# TODO: make it lazy
38+
3739
# remove '+----+----+' lines
3840
contents = contents[1:-1]
3941
del contents[1]
4042

4143
table_rows = [[value.strip() for value in row.split(PIPE)[1:-1]]
4244
for row in contents]
45+
4346
meta = {'imported_from': 'txt',
4447
'filename': filename,
4548
'encoding': encoding,}
4649
return create_table(table_rows, meta=meta, *args, **kwargs)
4750

4851

52+
import_from_txt.is_lazy = False
53+
54+
4955
def export_to_txt(table, filename_or_fobj=None, encoding=None,
5056
*args, **kwargs):
5157
'''Export a `rows.Table` to text
@@ -70,6 +76,7 @@ def export_to_txt(table, filename_or_fobj=None, encoding=None,
7076
split_line = PLUS + PLUS.join(dashes) + PLUS
7177

7278
result = [split_line, header, split_line]
79+
# TODO: make it lazy
7380
for row in table_rows:
7481
values = [value.rjust(max_sizes[field_name])
7582
for field_name, value in zip(field_names, row)]

rows/plugins/utils.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,10 @@ def func(rows_data):
141141
def create_table(data, meta=None, fields=None, skip_header=True,
142142
import_fields=None, samples=None, force_types=None,
143143
lazy=False, *args, **kwargs):
144+
# TODO: change samples to be a fixed number
145+
# TODO: may change samples logic (`float('inf')` or `all`)
144146
# TODO: add auto_detect_types=True parameter
147+
145148
table_rows = iter(data)
146149
sample_rows = []
147150

@@ -163,6 +166,9 @@ def create_table(data, meta=None, fields=None, skip_header=True,
163166
if not isinstance(fields, OrderedDict):
164167
raise ValueError('`fields` must be an `OrderedDict`')
165168

169+
# TODO: if `fields` is set, we're going to have the wrong order,
170+
# compared to the first row (header).
171+
166172
if skip_header:
167173
_ = next(table_rows)
168174

@@ -187,6 +193,7 @@ def create_table(data, meta=None, fields=None, skip_header=True,
187193

188194
if not lazy:
189195
table = Table(fields=fields, meta=meta)
196+
190197
# TODO: put this inside Table.__init__
191198
for row in chain(sample_rows, table_rows):
192199
table.append({field_name: value

rows/plugins/xls.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,9 @@ def import_from_xls(filename_or_fobj, sheet_name=None, sheet_index=0,
156156
return create_table(table_rows, meta=meta, *args, **kwargs)
157157

158158

159+
import_from_xls.is_lazy = False
160+
161+
159162
def export_to_xls(table, filename_or_fobj=None, sheet_name='Sheet1', *args,
160163
**kwargs):
161164

0 commit comments

Comments
 (0)