Skip to content

Commit 38fab52

Browse files
Merge pull request #77 from Blockchain-Technology-Lab/output_directory
Restructure output directory
2 parents 0ddc4e0 + a50edda commit 38fab52

File tree

3 files changed

+78
-84
lines changed

3 files changed

+78
-84
lines changed

tests/test_helper.py

+29-47
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,9 @@ def test_input_directories():
6868
assert len(input_dirs) > 0
6969

7070

71-
def test_output_directories():
72-
output_dirs = hlp.get_output_directories()
73-
assert isinstance(output_dirs, list)
74-
assert len(output_dirs) > 0
71+
def test_output_directory():
72+
output_dir = hlp.get_output_directory()
73+
assert isinstance(output_dir, pathlib.Path)
7574

7675

7776
def test_tau_thresholds():
@@ -301,13 +300,6 @@ def test_get_circulation_from_entries():
301300
assert circulation == 21
302301

303302

304-
def test_get_output_files(mocker):
305-
get_config_mock = mocker.patch("tokenomics_decentralization.helper.get_output_directories")
306-
get_config_mock.return_value = [pathlib.Path(__file__).resolve().parent]
307-
output_files = hlp.get_output_files()
308-
assert isinstance(output_files, list)
309-
310-
311303
def test_get_special_addresses():
312304
ethereum_special_addresses = hlp.get_special_addresses('ethereum')
313305
assert isinstance(ethereum_special_addresses, list)
@@ -395,63 +387,53 @@ def test_get_output_row(mocker):
395387
assert csv_row == ['bitcoin', '2010-01-01', False, True, 'absolute', 1, False, True, 1, 0]
396388

397389

398-
def test_write_csv_output(mocker):
399-
get_metrics_mock = mocker.patch('tokenomics_decentralization.helper.get_metrics')
400-
get_metrics_mock.return_value = ['hhi']
401-
402-
get_output_directories_mock = mocker.patch('tokenomics_decentralization.helper.get_output_directories')
403-
get_output_directories_mock.return_value = [pathlib.Path(__file__).resolve().parent]
404-
405-
get_clustering_mock = mocker.patch('tokenomics_decentralization.helper.get_clustering_flag')
390+
def test_get_output_filename(mocker):
391+
get_output_directory_mock = mocker.patch('tokenomics_decentralization.helper.get_output_directory')
392+
get_output_directory_mock.return_value = pathlib.Path(__file__).resolve().parent
406393
get_exclude_contracts_mock = mocker.patch('tokenomics_decentralization.helper.get_exclude_contracts_flag')
407-
get_exclude_below_fees_mock = mocker.patch('tokenomics_decentralization.helper.get_exclude_below_fees_flag')
408-
get_exclude_below_usd_cent_mock = mocker.patch('tokenomics_decentralization.helper.get_exclude_below_usd_cent_flag')
409-
get_top_limit_type_mock = mocker.patch('tokenomics_decentralization.helper.get_top_limit_type')
410-
get_top_limit_value_mock = mocker.patch('tokenomics_decentralization.helper.get_top_limit_value')
411-
412-
get_clustering_mock.return_value = True
413394
get_exclude_contracts_mock.return_value = False
395+
get_exclude_below_fees_mock = mocker.patch('tokenomics_decentralization.helper.get_exclude_below_fees_flag')
414396
get_exclude_below_fees_mock.return_value = False
397+
get_exclude_below_usd_cent_mock = mocker.patch('tokenomics_decentralization.helper.get_exclude_below_usd_cent_flag')
415398
get_exclude_below_usd_cent_mock.return_value = False
399+
get_top_limit_type_mock = mocker.patch('tokenomics_decentralization.helper.get_top_limit_type')
416400
get_top_limit_type_mock.return_value = 'absolute'
401+
get_top_limit_value_mock = mocker.patch('tokenomics_decentralization.helper.get_top_limit_value')
417402
get_top_limit_value_mock.return_value = 0
418403

419-
hlp.write_csv_output([
420-
['bitcoin', '2010-01-01', True, False, 'absolute', 0, False, False, 100],
421-
['ethereum', '2010-01-01', True, False, 'absolute', 0, False, False, 200],
422-
])
423-
with open(pathlib.Path(__file__).resolve().parent / 'output.csv') as f:
424-
lines = f.readlines()
425-
assert lines[0] == ','.join(['ledger', 'snapshot_date', 'clustering', 'exclude_contract_addresses',
426-
'top_limit_type', 'top_limit_value', 'exclude_below_fees',
427-
'exclude_below_usd_cent', 'hhi']) + '\n'
428-
assert lines[1] == ','.join(['bitcoin', '2010-01-01', 'True', 'False', 'absolute', '0', 'False', 'False',
429-
'100']) + '\n'
430-
assert lines[2] == ','.join(['ethereum', '2010-01-01', 'True', 'False', 'absolute', '0', 'False', 'False',
431-
'200']) + '\n'
432-
os.remove(pathlib.Path(__file__).resolve().parent / 'output.csv')
404+
output_filename = hlp.get_output_filename()
405+
assert output_filename == pathlib.Path(__file__).resolve().parent / 'output.csv'
433406

434-
get_clustering_mock.return_value = False
435407
get_exclude_contracts_mock.return_value = True
436408
get_exclude_below_fees_mock.return_value = True
437409
get_exclude_below_usd_cent_mock.return_value = True
438-
get_top_limit_type_mock.return_value = 'absolute'
439410
get_top_limit_value_mock.return_value = 10
440411

412+
output_filename = hlp.get_output_filename()
413+
assert output_filename == pathlib.Path(__file__).resolve().parent / 'output-exclude_contract_addresses-absolute_10-exclude_below_fees-exclude_below_usd_cent.csv'
414+
415+
416+
def test_write_csv_output(mocker):
417+
get_metrics_mock = mocker.patch('tokenomics_decentralization.helper.get_metrics')
418+
get_metrics_mock.return_value = ['hhi']
419+
420+
get_output_filename_mock = mocker.patch('tokenomics_decentralization.helper.get_output_filename')
421+
get_output_filename_mock.return_value = pathlib.Path(__file__).resolve().parent / 'output.csv'
422+
441423
hlp.write_csv_output([
442-
['bitcoin', '2010-01-01', False, False, 'absolute', 0, False, False, 100],
443-
['ethereum', '2010-01-01', False, False, 'absolute', 0, False, False, 200],
424+
['bitcoin', '2010-01-01', True, False, 'absolute', 0, False, False, 100],
425+
['ethereum', '2010-01-01', True, False, 'absolute', 0, False, False, 200],
444426
])
445-
with open(pathlib.Path(__file__).resolve().parent / 'output-no_clustering-exclude_contract_addresses-absolute_10-exclude_below_fees-exclude_below_usd_cent.csv') as f:
427+
with open(pathlib.Path(__file__).resolve().parent / 'output.csv') as f:
446428
lines = f.readlines()
447429
assert lines[0] == ','.join(['ledger', 'snapshot_date', 'clustering', 'exclude_contract_addresses',
448430
'top_limit_type', 'top_limit_value', 'exclude_below_fees',
449431
'exclude_below_usd_cent', 'hhi']) + '\n'
450-
assert lines[1] == ','.join(['bitcoin', '2010-01-01', 'False', 'False', 'absolute', '0', 'False', 'False',
432+
assert lines[1] == ','.join(['bitcoin', '2010-01-01', 'True', 'False', 'absolute', '0', 'False', 'False',
451433
'100']) + '\n'
452-
assert lines[2] == ','.join(['ethereum', '2010-01-01', 'False', 'False', 'absolute', '0', 'False', 'False',
434+
assert lines[2] == ','.join(['ethereum', '2010-01-01', 'True', 'False', 'absolute', '0', 'False', 'False',
453435
'200']) + '\n'
454-
os.remove(pathlib.Path(__file__).resolve().parent / 'output-no_clustering-exclude_contract_addresses-absolute_10-exclude_below_fees-exclude_below_usd_cent.csv')
436+
os.remove(pathlib.Path(__file__).resolve().parent / 'output.csv')
455437

456438

457439
def test_get_active_source_keywords(mocker):

tokenomics_decentralization/analyze.py

+29-14
Original file line numberDiff line numberDiff line change
@@ -122,20 +122,35 @@ def analyze_ledger_snapshot(ledger, date, output_rows, sema):
122122
:param output_rows: a list of strings in the form of csv output rows
123123
:param sema: a multiprocessing semaphore
124124
"""
125-
input_filename = None
126-
input_paths = [input_dir / f'{ledger}_{date}_raw_data.csv' for input_dir in hlp.get_input_directories()]
127-
for filename in input_paths:
128-
if os.path.isfile(filename):
129-
input_filename = filename
130-
break
131-
if input_filename:
132-
logging.info(f'[*] {ledger} - {date}')
133-
134-
entries = get_entries(ledger, date, filename)
135-
metrics_values = analyze_snapshot(entries)
136-
del entries
137-
138-
row = hlp.get_output_row(ledger, date, metrics_values)
125+
row = None
126+
127+
try:
128+
with open(hlp.get_output_filename()) as f:
129+
csv_reader = csv.reader(f)
130+
for line in csv_reader:
131+
if line[0] == ledger and line[1] == date:
132+
row = line
133+
break
134+
except FileNotFoundError:
135+
pass
136+
137+
if not row:
138+
input_filename = None
139+
input_paths = [input_dir / f'{ledger}_{date}_raw_data.csv' for input_dir in hlp.get_input_directories()]
140+
for filename in input_paths:
141+
if os.path.isfile(filename):
142+
input_filename = filename
143+
break
144+
if input_filename:
145+
logging.info(f'[*] {ledger} - {date}')
146+
147+
entries = get_entries(ledger, date, filename)
148+
metrics_values = analyze_snapshot(entries)
149+
del entries
150+
151+
row = hlp.get_output_row(ledger, date, metrics_values)
152+
153+
if row:
139154
output_rows.append(row)
140155

141156
sema.release() # Release the semaphore s.t. the loop in analyze() can continue

tokenomics_decentralization/helper.py

+20-23
Original file line numberDiff line numberDiff line change
@@ -141,13 +141,16 @@ def increment_date(date, by):
141141
raise ValueError(f'Invalid granularity: {by}')
142142

143143

144-
def get_output_directories():
144+
def get_output_directory():
145145
"""
146146
Reads the config file and retrieves the output directories
147147
:returns: a list of directories that might contain the db files
148148
"""
149149
config = get_config_data()
150-
return [pathlib.Path(db_dir).resolve() for db_dir in config['output_directories']]
150+
sources = ' - '.join(get_active_source_keywords())
151+
if not sources:
152+
sources = 'No clustering'
153+
return [pathlib.Path(db_dir).resolve() for db_dir in config['output_directories']][0] / sources
151154

152155

153156
def get_input_directories():
@@ -348,15 +351,6 @@ def get_plot_config_data():
348351
return get_config_data()['plot_parameters']
349352

350353

351-
def get_output_files():
352-
"""
353-
Retrieves all output files produced by some run
354-
:returns: a list of filenames
355-
"""
356-
output_dir = str(get_output_directories()[0])
357-
return [filename for filename in os.listdir(output_dir) if filename.startswith('output') and filename.endswith('.csv')]
358-
359-
360354
def get_special_addresses(ledger):
361355
"""
362356
Retrieves the ledger's special addresses that should be excluded from the analysis
@@ -485,24 +479,17 @@ def get_output_row(ledger, date, metrics):
485479
return csv_row
486480

487481

488-
def write_csv_output(output_rows):
482+
def get_output_filename():
489483
"""
490-
Produces the output csv file for the given data.
491-
:param output_rows: a list of lists, where each list corresponds to a line in the output csv file
484+
Produces the name (full path) of the output file.
485+
:returns output_filename: a pathlib path of the output file
492486
"""
493-
header = ['ledger', 'snapshot_date', 'clustering', 'exclude_contract_addresses', 'top_limit_type',
494-
'top_limit_value', 'exclude_below_fees', 'exclude_below_usd_cent']
495-
header += get_metrics()
496-
497-
clustering = get_clustering_flag()
498487
exclude_contract_addresses_flag = get_exclude_contracts_flag()
499488
top_limit_type = get_top_limit_type()
500489
top_limit_value = get_top_limit_value()
501490
exclude_below_fees_flag = get_exclude_below_fees_flag()
502491
exclude_below_usd_cent_flag = get_exclude_below_usd_cent_flag()
503492
output_filename = 'output'
504-
if not clustering:
505-
output_filename += '-no_clustering'
506493
if exclude_contract_addresses_flag:
507494
output_filename += '-exclude_contract_addresses'
508495
if top_limit_value:
@@ -512,9 +499,19 @@ def write_csv_output(output_rows):
512499
if exclude_below_usd_cent_flag:
513500
output_filename += '-exclude_below_usd_cent'
514501
output_filename += '.csv'
502+
return get_output_directory() / output_filename
503+
504+
505+
def write_csv_output(output_rows):
506+
"""
507+
Produces the output csv file for the given data.
508+
:param output_rows: a list of lists, where each list corresponds to a line in the output csv file
509+
"""
510+
header = ['ledger', 'snapshot_date', 'clustering', 'exclude_contract_addresses', 'top_limit_type',
511+
'top_limit_value', 'exclude_below_fees', 'exclude_below_usd_cent']
512+
header += get_metrics()
515513

516-
output_dir = get_output_directories()[0]
517-
with open(output_dir / output_filename, 'w') as f:
514+
with open(get_output_filename(), 'w') as f:
518515
csv_writer = csv.writer(f)
519516
csv_writer.writerow(header)
520517
csv_writer.writerows(output_rows)

0 commit comments

Comments
 (0)