Skip to content

Commit 832c9c5

Browse files
Merge pull request #70 from Blockchain-Technology-Lab/rename_flag
Rename clustering flag
2 parents 9ca4a23 + ed5a0bb commit 832c9c5

File tree

7 files changed

+49
-48
lines changed

7 files changed

+49
-48
lines changed

config.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ execution_flags:
2828

2929
# Analyze flags
3030
analyze_flags:
31-
no_clustering: false
31+
clustering: true
3232
top_limit_type: "absolute" # one of two types: "absolute" or "percentage"; if absolute then value should be integer; if percentage then value should be float in [0, 1]
3333
top_limit_value: 0
3434
exclude_contract_addresses: false
@@ -61,7 +61,7 @@ plot_parameters:
6161
# if true, then all possible combinations of all params are plotted
6262
# if false, then starting from a baseline where bools are false and top limits are 0, each other param is plotted sequencially while keeping the rest on the default
6363
combine_params: false
64-
no_clustering:
64+
clustering:
6565
- true
6666
- false
6767
top_limit_absolute:

docs/setup.md

+3-2
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,9 @@ page](https://blockchain-technology-lab.github.io/tokenomics-decentralization/co
4949

5050
`analyze_flags` defines various analysis-related flags:
5151

52-
* `no_clustering`: a boolean that disables clustering of addresses (under the
53-
same entity, as defined in the mapping information)
52+
* `clustering`: a boolean that determines whether addresses will be clustered into entities
53+
(as defined in the mapping information). If set to False, no clustering takes
54+
place and the addresses are treated as distinct entities.
5455
* `top_limit_type`: a string of two values (`absolute` or `percentage`) that
5556
enables applying a threshold on the addresses that will be considered
5657
* `top_limit_value`: the value of the top limit that should be applied; if 0,

plot.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def plot():
6666
# Filter rows with boolean flag params defined in config.
6767
# If no value is set for a flag, False is used by default
6868
# If the param consists of more than 2 and/or non-boolean entries, a ValueError is raised
69-
for flag in ['no_clustering', 'exclude_contract_addresses']:
69+
for flag in ['clustering', 'exclude_contract_addresses']:
7070
if plot_line_params[flag] is None:
7171
plot_line_params[flag] = [False]
7272
if len(plot_line_params[flag]) == 1:
@@ -79,22 +79,22 @@ def plot():
7979
# Plot each param in a line sequentially (keeping the other params at the default), instead of plotting the param combinations
8080
if plot_line_params['combine_params'] is False:
8181
dataframes = []
82-
for flag_value in plot_line_params['no_clustering']:
82+
for flag_value in plot_line_params['clustering']:
8383
dataframes.append(output_df[
84-
(output_df['no_clustering'] == flag_value) &
84+
(output_df['clustering'] == flag_value) &
8585
(output_df['exclude_contract_addresses'] == False) & # noqa
8686
(output_df['top_limit_value'] == 0)
8787
])
8888
for flag_value in plot_line_params['exclude_contract_addresses']:
8989
dataframes.append(output_df[
90-
(output_df['no_clustering'] == False) & # noqa
90+
(output_df['clustering'] == True) & # noqa
9191
(output_df['exclude_contract_addresses'] == flag_value) &
9292
(output_df['top_limit_value'] == 0)
9393
])
9494
for limit_type in top_limits.keys():
9595
for limit_val in top_limits[limit_type]:
9696
dataframes.append(output_df[
97-
(output_df['no_clustering'] == False) & # noqa
97+
(output_df['clustering'] == True) & # noqa
9898
(output_df['exclude_contract_addresses'] == False) & # noqa
9999
(output_df['top_limit_type'] == limit_type) &
100100
(output_df['top_limit_value'] == limit_val)
@@ -108,7 +108,7 @@ def plot():
108108
# This column will be used as the plot's legend
109109
for i, row in output_df.iterrows():
110110
output_df.at[i, 'ledger'] = tickers[row['ledger']]
111-
if row['no_clustering']:
111+
if not row['clustering']:
112112
output_df.at[i, 'ledger'] += '_nocluster'
113113
if row['exclude_contract_addresses']:
114114
output_df.at[i, 'ledger'] += '_nocontracts'
@@ -118,9 +118,9 @@ def plot():
118118
limit_val = int(limit_val)
119119
output_df.at[i, 'ledger'] += f'_top_{limit_val}'
120120

121-
output_df['snapshot date'] = pd.to_datetime(output_df['snapshot date'])
121+
output_df['snapshot_date'] = pd.to_datetime(output_df['snapshot_date'])
122122

123-
output_df = output_df.drop_duplicates(subset=['ledger', 'snapshot date'])
123+
output_df = output_df.drop_duplicates(subset=['ledger', 'snapshot_date'])
124124

125125
params = {'legend.fontsize': 14,
126126
'figure.titlesize': 40,
@@ -145,7 +145,7 @@ def plot():
145145

146146
metric_cols = output_df.columns[6:]
147147
for metric in metric_cols:
148-
df_pivot = output_df.pivot(index='snapshot date', columns='ledger', values=metric)
148+
df_pivot = output_df.pivot(index='snapshot_date', columns='ledger', values=metric)
149149
df_pivot.plot(figsize=(25, 13), grid=True, xlabel='Date', ylabel=metric, lw=2)
150150
plt.title(metric.upper(), fontsize=30)
151151
plt.gca().legend().set_title('')

tests/test_analyze.py

+21-21
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,14 @@ def test_get_output_row(mocker):
77
get_metrics_mock = mocker.patch('tokenomics_decentralization.helper.get_metrics')
88
get_metrics_mock.return_value = ['hhi', 'gini']
99

10-
get_no_clustering_mock = mocker.patch('tokenomics_decentralization.helper.get_no_clustering_flag')
10+
get_clustering_mock = mocker.patch('tokenomics_decentralization.helper.get_clustering_flag')
1111
get_exclude_contracts_mock = mocker.patch('tokenomics_decentralization.helper.get_exclude_contracts_flag')
1212
get_exclude_below_fees_mock = mocker.patch('tokenomics_decentralization.helper.get_exclude_below_fees_flag')
1313
get_exclude_below_usd_cent_mock = mocker.patch('tokenomics_decentralization.helper.get_exclude_below_usd_cent_flag')
1414
get_top_limit_type_mock = mocker.patch('tokenomics_decentralization.helper.get_top_limit_type')
1515
get_top_limit_value_mock = mocker.patch('tokenomics_decentralization.helper.get_top_limit_value')
1616

17-
get_no_clustering_mock.return_value = False
17+
get_clustering_mock.return_value = True
1818
get_exclude_contracts_mock.return_value = False
1919
get_exclude_below_fees_mock.return_value = False
2020
get_exclude_below_usd_cent_mock.return_value = False
@@ -23,33 +23,33 @@ def test_get_output_row(mocker):
2323

2424
metrics = {'hhi': 1, 'gini': 0}
2525
csv_row = get_output_row('bitcoin', '2010-01-01', metrics)
26-
assert csv_row == ['bitcoin', '2010-01-01', False, False, 'absolute', 0, False, False, 1, 0]
26+
assert csv_row == ['bitcoin', '2010-01-01', True, False, 'absolute', 0, False, False, 1, 0]
2727

28-
get_no_clustering_mock.return_value = True
28+
get_clustering_mock.return_value = False
2929
metrics = {'non-clustered hhi': 1, 'non-clustered gini': 0}
3030
csv_row = get_output_row('bitcoin', '2010-01-01', metrics)
31-
assert csv_row == ['bitcoin', '2010-01-01', True, False, 'absolute', 0, False, False, 1, 0]
31+
assert csv_row == ['bitcoin', '2010-01-01', False, False, 'absolute', 0, False, False, 1, 0]
3232

3333
get_exclude_contracts_mock.return_value = True
3434
metrics = {'exclude_contracts non-clustered hhi': 1, 'exclude_contracts non-clustered gini': 0}
3535
csv_row = get_output_row('bitcoin', '2010-01-01', metrics)
36-
assert csv_row == ['bitcoin', '2010-01-01', True, True, 'absolute', 0, False, False, 1, 0]
36+
assert csv_row == ['bitcoin', '2010-01-01', False, True, 'absolute', 0, False, False, 1, 0]
3737

3838
get_top_limit_value_mock.return_value = 1
3939
metrics = {'top-1_absolute exclude_contracts non-clustered hhi': 1, 'top-1_absolute exclude_contracts non-clustered gini': 0}
4040
csv_row = get_output_row('bitcoin', '2010-01-01', metrics)
41-
assert csv_row == ['bitcoin', '2010-01-01', True, True, 'absolute', 1, False, False, 1, 0]
41+
assert csv_row == ['bitcoin', '2010-01-01', False, True, 'absolute', 1, False, False, 1, 0]
4242

4343
get_exclude_below_fees_mock.return_value = True
4444
get_top_limit_value_mock.return_value = 1
4545
metrics = {'top-1_absolute exclude_below_fees exclude_contracts non-clustered hhi': 1, 'top-1_absolute exclude_below_fees exclude_contracts non-clustered gini': 0}
4646
csv_row = get_output_row('bitcoin', '2010-01-01', metrics)
47-
assert csv_row == ['bitcoin', '2010-01-01', True, True, 'absolute', 1, True, False, 1, 0]
47+
assert csv_row == ['bitcoin', '2010-01-01', False, True, 'absolute', 1, True, False, 1, 0]
4848

4949

5050
def test_analyze_snapshot(mocker):
5151
get_force_analyze_mock = mocker.patch('tokenomics_decentralization.helper.get_force_analyze_flag')
52-
get_no_clustering_mock = mocker.patch('tokenomics_decentralization.helper.get_no_clustering_flag')
52+
get_clustering_mock = mocker.patch('tokenomics_decentralization.helper.get_clustering_flag')
5353
get_exclude_contracts_mock = mocker.patch('tokenomics_decentralization.helper.get_exclude_contracts_flag')
5454
get_exclude_below_fees_mock = mocker.patch('tokenomics_decentralization.helper.get_exclude_below_fees_flag')
5555
get_exclude_below_usd_cent_mock = mocker.patch('tokenomics_decentralization.helper.get_exclude_below_usd_cent_flag')
@@ -68,7 +68,7 @@ def test_analyze_snapshot(mocker):
6868
compute_tau_mock = mocker.patch('tokenomics_decentralization.analyze.compute_tau')
6969

7070
get_force_analyze_mock.return_value = False
71-
get_no_clustering_mock.return_value = False
71+
get_clustering_mock.return_value = True
7272
get_exclude_contracts_mock.return_value = False
7373
get_exclude_below_fees_mock.return_value = False
7474
get_exclude_below_usd_cent_mock.return_value = False
@@ -85,7 +85,7 @@ def test_analyze_snapshot(mocker):
8585
output = analyze_snapshot(None, 'bitcoin', '2010-01-01')
8686
assert output == {'hhi': 1}
8787

88-
get_no_clustering_mock.return_value = True
88+
get_clustering_mock.return_value = False
8989
get_exclude_contracts_mock.return_value = True
9090
get_exclude_below_fees_mock.return_value = True
9191
get_top_limit_type_mock.return_value = 'absolute'
@@ -106,7 +106,7 @@ def test_analyze_snapshot(mocker):
106106
output = analyze_snapshot(None, 'bitcoin', '2010-01-01')
107107
assert output == {'top-1_absolute exclude_below_fees exclude_contracts non-clustered hhi': 2}
108108

109-
get_no_clustering_mock.return_value = False
109+
get_clustering_mock.return_value = True
110110

111111
compute_hhi_mock.return_value = 3
112112
output = analyze_snapshot(None, 'bitcoin', '2010-01-01')
@@ -140,36 +140,36 @@ def test_write_csv_output(mocker):
140140
get_output_directories_mock = mocker.patch('tokenomics_decentralization.helper.get_output_directories')
141141
get_output_directories_mock.return_value = [pathlib.Path(__file__).resolve().parent]
142142

143-
get_no_clustering_mock = mocker.patch('tokenomics_decentralization.helper.get_no_clustering_flag')
143+
get_clustering_mock = mocker.patch('tokenomics_decentralization.helper.get_clustering_flag')
144144
get_exclude_contracts_mock = mocker.patch('tokenomics_decentralization.helper.get_exclude_contracts_flag')
145145
get_exclude_below_fees_mock = mocker.patch('tokenomics_decentralization.helper.get_exclude_below_fees_flag')
146146
get_exclude_below_usd_cent_mock = mocker.patch('tokenomics_decentralization.helper.get_exclude_below_usd_cent_flag')
147147
get_top_limit_type_mock = mocker.patch('tokenomics_decentralization.helper.get_top_limit_type')
148148
get_top_limit_value_mock = mocker.patch('tokenomics_decentralization.helper.get_top_limit_value')
149149

150-
get_no_clustering_mock.return_value = False
150+
get_clustering_mock.return_value = True
151151
get_exclude_contracts_mock.return_value = False
152152
get_exclude_below_fees_mock.return_value = False
153153
get_exclude_below_usd_cent_mock.return_value = False
154154
get_top_limit_type_mock.return_value = 'absolute'
155155
get_top_limit_value_mock.return_value = 0
156156

157157
write_csv_output([
158-
['bitcoin', '2010-01-01', False, False, 'absolute', 0, False, False, 100],
159-
['ethereum', '2010-01-01', False, False, 'absolute', 0, False, False, 200],
158+
['bitcoin', '2010-01-01', True, False, 'absolute', 0, False, False, 100],
159+
['ethereum', '2010-01-01', True, False, 'absolute', 0, False, False, 200],
160160
])
161161
with open(pathlib.Path(__file__).resolve().parent / 'output.csv') as f:
162162
lines = f.readlines()
163-
assert lines[0] == ','.join(['ledger', 'snapshot date', 'no_clustering', 'exclude_contract_addresses',
163+
assert lines[0] == ','.join(['ledger', 'snapshot_date', 'clustering', 'exclude_contract_addresses',
164164
'top_limit_type', 'top_limit_value', 'exclude_below_fees',
165165
'exclude_below_usd_cent', 'hhi']) + '\n'
166-
assert lines[1] == ','.join(['bitcoin', '2010-01-01', 'False', 'False', 'absolute', '0', 'False', 'False',
166+
assert lines[1] == ','.join(['bitcoin', '2010-01-01', 'True', 'False', 'absolute', '0', 'False', 'False',
167167
'100']) + '\n'
168-
assert lines[2] == ','.join(['ethereum', '2010-01-01', 'False', 'False', 'absolute', '0', 'False', 'False',
168+
assert lines[2] == ','.join(['ethereum', '2010-01-01', 'True', 'False', 'absolute', '0', 'False', 'False',
169169
'200']) + '\n'
170170
os.remove(pathlib.Path(__file__).resolve().parent / 'output.csv')
171171

172-
get_no_clustering_mock.return_value = True
172+
get_clustering_mock.return_value = False
173173
get_exclude_contracts_mock.return_value = True
174174
get_exclude_below_fees_mock.return_value = True
175175
get_top_limit_type_mock.return_value = 'absolute'
@@ -181,7 +181,7 @@ def test_write_csv_output(mocker):
181181
])
182182
with open(pathlib.Path(__file__).resolve().parent / 'output-no_clustering-exclude_contract_addresses-absolute_10-exclude_below_fees.csv') as f:
183183
lines = f.readlines()
184-
assert lines[0] == ','.join(['ledger', 'snapshot date', 'no_clustering', 'exclude_contract_addresses',
184+
assert lines[0] == ','.join(['ledger', 'snapshot_date', 'clustering', 'exclude_contract_addresses',
185185
'top_limit_type', 'top_limit_value', 'exclude_below_fees',
186186
'exclude_below_usd_cent', 'hhi']) + '\n'
187187
assert lines[1] == ','.join(['bitcoin', '2010-01-01', 'False', 'False', 'absolute', '0', 'False', 'False',

tests/test_helper.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def test_config_flags(mocker):
135135
hlp.get_force_map_addresses_flag,
136136
hlp.get_force_map_balances_flag,
137137
hlp.get_force_analyze_flag,
138-
hlp.get_no_clustering_flag,
138+
hlp.get_clustering_flag,
139139
hlp.get_exclude_contracts_flag,
140140
hlp.get_exclude_below_fees_flag,
141141
]

tokenomics_decentralization/analyze.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
def analyze_snapshot(conn, ledger, snapshot):
1414
force_analyze = hlp.get_force_analyze_flag()
15-
no_clustering = hlp.get_no_clustering_flag()
15+
clustering = hlp.get_clustering_flag()
1616
top_limit_type = hlp.get_top_limit_type()
1717
top_limit_value = hlp.get_top_limit_value()
1818
exclude_contract_addresses_flag = hlp.get_exclude_contracts_flag()
@@ -46,7 +46,7 @@ def analyze_snapshot(conn, ledger, snapshot):
4646
metrics_results = {}
4747
for default_metric_name in metric_names:
4848
flagged_metric = default_metric_name
49-
if no_clustering:
49+
if not clustering:
5050
flagged_metric = 'non-clustered ' + flagged_metric
5151
if exclude_contract_addresses_flag:
5252
flagged_metric = 'exclude_contracts ' + flagged_metric
@@ -62,7 +62,7 @@ def analyze_snapshot(conn, ledger, snapshot):
6262
metric_value = val[0]
6363
else:
6464
if not entries:
65-
if no_clustering:
65+
if not clustering:
6666
entries = db_hlp.get_non_clustered_balance_entries(conn, snapshot, ledger, balance_threshold=balance_threshold)
6767
else:
6868
entries = db_hlp.get_balance_entries(conn, snapshot, ledger, balance_threshold=balance_threshold)
@@ -97,19 +97,19 @@ def analyze_snapshot(conn, ledger, snapshot):
9797

9898

9999
def get_output_row(ledger, date, metrics):
100-
no_clustering = hlp.get_no_clustering_flag()
100+
clustering = hlp.get_clustering_flag()
101101
exclude_contract_addresses_flag = hlp.get_exclude_contracts_flag()
102102
exclude_below_fees_flag = hlp.get_exclude_below_fees_flag()
103103
exclude_below_usd_cent_flag = hlp.get_exclude_below_usd_cent_flag()
104104
top_limit_type = hlp.get_top_limit_type()
105105
top_limit_value = hlp.get_top_limit_value()
106106

107-
csv_row = [ledger, date, no_clustering, exclude_contract_addresses_flag, top_limit_type, top_limit_value,
107+
csv_row = [ledger, date, clustering, exclude_contract_addresses_flag, top_limit_type, top_limit_value,
108108
exclude_below_fees_flag, exclude_below_usd_cent_flag]
109109

110110
for metric_name in hlp.get_metrics():
111111
val = metric_name
112-
if no_clustering:
112+
if not clustering:
113113
val = 'non-clustered ' + val
114114
if exclude_contract_addresses_flag:
115115
val = 'exclude_contracts ' + val
@@ -124,18 +124,18 @@ def get_output_row(ledger, date, metrics):
124124

125125

126126
def write_csv_output(output_rows):
127-
header = ['ledger', 'snapshot_date', 'no_clustering', 'exclude_contract_addresses', 'top_limit_type',
127+
header = ['ledger', 'snapshot_date', 'clustering', 'exclude_contract_addresses', 'top_limit_type',
128128
'top_limit_value', 'exclude_below_fees', 'exclude_below_usd_cent']
129129
header += hlp.get_metrics()
130130

131-
no_clustering = hlp.get_no_clustering_flag()
131+
clustering = hlp.get_clustering_flag()
132132
exclude_contract_addresses_flag = hlp.get_exclude_contracts_flag()
133133
top_limit_type = hlp.get_top_limit_type()
134134
top_limit_value = hlp.get_top_limit_value()
135135
exclude_below_fees_flag = hlp.get_exclude_below_fees_flag()
136136
exclude_below_usd_cent_flag = hlp.get_exclude_below_usd_cent_flag()
137137
output_filename = 'output'
138-
if no_clustering:
138+
if not clustering:
139139
output_filename += '-no_clustering'
140140
if exclude_contract_addresses_flag:
141141
output_filename += '-exclude_contract_addresses'

tokenomics_decentralization/helper.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -199,17 +199,17 @@ def get_force_analyze_flag():
199199
raise ValueError('Flag "force_analyze" not in config file')
200200

201201

202-
def get_no_clustering_flag():
202+
def get_clustering_flag():
203203
"""
204-
Gets the flag that determines whether to forcefully recreate metrics
204+
Gets the flag that determines whether to cluster addresses into entities
205205
:returns: boolean
206206
:raises ValueError: if the flag is not set in the config file
207207
"""
208208
config = get_config_data()
209209
try:
210-
return config['analyze_flags']['no_clustering']
210+
return config['analyze_flags']['clustering']
211211
except KeyError:
212-
raise ValueError('Flag "no_clustering" not in config file')
212+
raise ValueError('Flag "clustering" not in config file')
213213

214214

215215
def get_metrics():

0 commit comments

Comments
 (0)