Merge pull request #70 from Blockchain-Technology-Lab/rename_flag

dimkarakostas · web-flow · commit 832c9c541e79 · 2024-04-30T12:59:22.000+03:00
Rename clustering flag
diff --git a/config.yaml b/config.yaml
@@ -28,7 +28,7 @@ execution_flags:
 
 # Analyze flags
 analyze_flags:
-  no_clustering: false
+  clustering: true
   top_limit_type: "absolute"  # one of two types: "absolute" or "percentage"; if absolute then value should be integer; if percentage then value should be float in [0, 1]
   top_limit_value: 0
   exclude_contract_addresses: false
@@ -61,7 +61,7 @@ plot_parameters:
     # if true, then all possible combinations of all params are plotted
     # if false, then starting from a baseline where bools are false and top limits are 0, each other param is plotted sequencially while keeping the rest on the default
     combine_params: false  
-    no_clustering:
+    clustering:
       - true
       - false
     top_limit_absolute:
diff --git a/docs/setup.md b/docs/setup.md
@@ -49,8 +49,9 @@ page](https://blockchain-technology-lab.github.io/tokenomics-decentralization/co
 
 `analyze_flags` defines various analysis-related flags:
 
-* `no_clustering`: a boolean that disables clustering of addresses (under the
-  same entity, as defined in the mapping information)
+* `clustering`: a boolean that determines whether addresses will be clustered into entities
+ (as defined in the mapping information). If set to False, no clustering takes
+  place and the addresses are treated as distinct entities.
 * `top_limit_type`: a string of two values (`absolute` or `percentage`) that
   enables applying a threshold on the addresses that will be considered
 * `top_limit_value`: the value of the top limit that should be applied; if 0,
diff --git a/plot.py b/plot.py
@@ -66,7 +66,7 @@ def plot():
     # Filter rows with boolean flag params defined in config.
     # If no value is set for a flag, False is used by default
     # If the param consists of more than 2 and/or non-boolean entries, a ValueError is raised
-    for flag in ['no_clustering', 'exclude_contract_addresses']:
+    for flag in ['clustering', 'exclude_contract_addresses']:
         if plot_line_params[flag] is None:
             plot_line_params[flag] = [False]
         if len(plot_line_params[flag]) == 1:
@@ -79,22 +79,22 @@ def plot():
     # Plot each param in a line sequentially (keeping the other params at the default), instead of plotting the param combinations
     if plot_line_params['combine_params'] is False:
         dataframes = []
-        for flag_value in plot_line_params['no_clustering']:
+        for flag_value in plot_line_params['clustering']:
             dataframes.append(output_df[
-                (output_df['no_clustering'] == flag_value) &
+                (output_df['clustering'] == flag_value) &
                 (output_df['exclude_contract_addresses'] == False) &  # noqa
                 (output_df['top_limit_value'] == 0)
             ])
         for flag_value in plot_line_params['exclude_contract_addresses']:
             dataframes.append(output_df[
-                (output_df['no_clustering'] == False) &  # noqa
+                (output_df['clustering'] == True) &  # noqa
                 (output_df['exclude_contract_addresses'] == flag_value) &
                 (output_df['top_limit_value'] == 0)
             ])
         for limit_type in top_limits.keys():
             for limit_val in top_limits[limit_type]:
                 dataframes.append(output_df[
-                    (output_df['no_clustering'] == False) &  # noqa
+                    (output_df['clustering'] == True) &  # noqa
                     (output_df['exclude_contract_addresses'] == False) &  # noqa
                     (output_df['top_limit_type'] == limit_type) &
                     (output_df['top_limit_value'] == limit_val)
@@ -108,7 +108,7 @@ def plot():
     # This column will be used as the plot's legend
     for i, row in output_df.iterrows():
         output_df.at[i, 'ledger'] = tickers[row['ledger']]
-        if row['no_clustering']:
+        if not row['clustering']:
             output_df.at[i, 'ledger'] += '_nocluster'
         if row['exclude_contract_addresses']:
             output_df.at[i, 'ledger'] += '_nocontracts'
@@ -118,9 +118,9 @@ def plot():
                 limit_val = int(limit_val)
             output_df.at[i, 'ledger'] += f'_top_{limit_val}'
 
-    output_df['snapshot date'] = pd.to_datetime(output_df['snapshot date'])
+    output_df['snapshot_date'] = pd.to_datetime(output_df['snapshot_date'])
 
-    output_df = output_df.drop_duplicates(subset=['ledger', 'snapshot date'])
+    output_df = output_df.drop_duplicates(subset=['ledger', 'snapshot_date'])
 
     params = {'legend.fontsize': 14,
               'figure.titlesize': 40,
@@ -145,7 +145,7 @@ def plot():
 
     metric_cols = output_df.columns[6:]
     for metric in metric_cols:
-        df_pivot = output_df.pivot(index='snapshot date', columns='ledger', values=metric)
+        df_pivot = output_df.pivot(index='snapshot_date', columns='ledger', values=metric)
         df_pivot.plot(figsize=(25, 13), grid=True, xlabel='Date', ylabel=metric, lw=2)
         plt.title(metric.upper(), fontsize=30)
         plt.gca().legend().set_title('')
diff --git a/tests/test_analyze.py b/tests/test_analyze.py
@@ -7,14 +7,14 @@ def test_get_output_row(mocker):
     get_metrics_mock = mocker.patch('tokenomics_decentralization.helper.get_metrics')
     get_metrics_mock.return_value = ['hhi', 'gini']
 
-    get_no_clustering_mock = mocker.patch('tokenomics_decentralization.helper.get_no_clustering_flag')
+    get_clustering_mock = mocker.patch('tokenomics_decentralization.helper.get_clustering_flag')
     get_exclude_contracts_mock = mocker.patch('tokenomics_decentralization.helper.get_exclude_contracts_flag')
     get_exclude_below_fees_mock = mocker.patch('tokenomics_decentralization.helper.get_exclude_below_fees_flag')
     get_exclude_below_usd_cent_mock = mocker.patch('tokenomics_decentralization.helper.get_exclude_below_usd_cent_flag')
     get_top_limit_type_mock = mocker.patch('tokenomics_decentralization.helper.get_top_limit_type')
     get_top_limit_value_mock = mocker.patch('tokenomics_decentralization.helper.get_top_limit_value')
 
-    get_no_clustering_mock.return_value = False
+    get_clustering_mock.return_value = True
     get_exclude_contracts_mock.return_value = False
     get_exclude_below_fees_mock.return_value = False
     get_exclude_below_usd_cent_mock.return_value = False
@@ -23,33 +23,33 @@ def test_get_output_row(mocker):
 
     metrics = {'hhi': 1, 'gini': 0}
     csv_row = get_output_row('bitcoin', '2010-01-01', metrics)
-    assert csv_row == ['bitcoin', '2010-01-01', False, False, 'absolute', 0, False, False, 1, 0]
+    assert csv_row == ['bitcoin', '2010-01-01', True, False, 'absolute', 0, False, False, 1, 0]
 
-    get_no_clustering_mock.return_value = True
+    get_clustering_mock.return_value = False
     metrics = {'non-clustered hhi': 1, 'non-clustered gini': 0}
     csv_row = get_output_row('bitcoin', '2010-01-01', metrics)
-    assert csv_row == ['bitcoin', '2010-01-01', True, False, 'absolute', 0, False, False, 1, 0]
+    assert csv_row == ['bitcoin', '2010-01-01', False, False, 'absolute', 0, False, False, 1, 0]
 
     get_exclude_contracts_mock.return_value = True
     metrics = {'exclude_contracts non-clustered hhi': 1, 'exclude_contracts non-clustered gini': 0}
     csv_row = get_output_row('bitcoin', '2010-01-01', metrics)
-    assert csv_row == ['bitcoin', '2010-01-01', True, True, 'absolute', 0, False, False, 1, 0]
+    assert csv_row == ['bitcoin', '2010-01-01', False, True, 'absolute', 0, False, False, 1, 0]
 
     get_top_limit_value_mock.return_value = 1
     metrics = {'top-1_absolute exclude_contracts non-clustered hhi': 1, 'top-1_absolute exclude_contracts non-clustered gini': 0}
     csv_row = get_output_row('bitcoin', '2010-01-01', metrics)
-    assert csv_row == ['bitcoin', '2010-01-01', True, True, 'absolute', 1, False, False, 1, 0]
+    assert csv_row == ['bitcoin', '2010-01-01', False, True, 'absolute', 1, False, False, 1, 0]
 
     get_exclude_below_fees_mock.return_value = True
     get_top_limit_value_mock.return_value = 1
     metrics = {'top-1_absolute exclude_below_fees exclude_contracts non-clustered hhi': 1, 'top-1_absolute exclude_below_fees exclude_contracts non-clustered gini': 0}
     csv_row = get_output_row('bitcoin', '2010-01-01', metrics)
-    assert csv_row == ['bitcoin', '2010-01-01', True, True, 'absolute', 1, True, False, 1, 0]
+    assert csv_row == ['bitcoin', '2010-01-01', False, True, 'absolute', 1, True, False, 1, 0]
 
 
 def test_analyze_snapshot(mocker):
     get_force_analyze_mock = mocker.patch('tokenomics_decentralization.helper.get_force_analyze_flag')
-    get_no_clustering_mock = mocker.patch('tokenomics_decentralization.helper.get_no_clustering_flag')
+    get_clustering_mock = mocker.patch('tokenomics_decentralization.helper.get_clustering_flag')
     get_exclude_contracts_mock = mocker.patch('tokenomics_decentralization.helper.get_exclude_contracts_flag')
     get_exclude_below_fees_mock = mocker.patch('tokenomics_decentralization.helper.get_exclude_below_fees_flag')
     get_exclude_below_usd_cent_mock = mocker.patch('tokenomics_decentralization.helper.get_exclude_below_usd_cent_flag')
@@ -68,7 +68,7 @@ def test_analyze_snapshot(mocker):
     compute_tau_mock = mocker.patch('tokenomics_decentralization.analyze.compute_tau')
 
     get_force_analyze_mock.return_value = False
-    get_no_clustering_mock.return_value = False
+    get_clustering_mock.return_value = True
     get_exclude_contracts_mock.return_value = False
     get_exclude_below_fees_mock.return_value = False
     get_exclude_below_usd_cent_mock.return_value = False
@@ -85,7 +85,7 @@ def test_analyze_snapshot(mocker):
     output = analyze_snapshot(None, 'bitcoin', '2010-01-01')
     assert output == {'hhi': 1}
 
-    get_no_clustering_mock.return_value = True
+    get_clustering_mock.return_value = False
     get_exclude_contracts_mock.return_value = True
     get_exclude_below_fees_mock.return_value = True
     get_top_limit_type_mock.return_value = 'absolute'
@@ -106,7 +106,7 @@ def test_analyze_snapshot(mocker):
     output = analyze_snapshot(None, 'bitcoin', '2010-01-01')
     assert output == {'top-1_absolute exclude_below_fees exclude_contracts non-clustered hhi': 2}
 
-    get_no_clustering_mock.return_value = False
+    get_clustering_mock.return_value = True
 
     compute_hhi_mock.return_value = 3
     output = analyze_snapshot(None, 'bitcoin', '2010-01-01')
@@ -140,36 +140,36 @@ def test_write_csv_output(mocker):
     get_output_directories_mock = mocker.patch('tokenomics_decentralization.helper.get_output_directories')
     get_output_directories_mock.return_value = [pathlib.Path(__file__).resolve().parent]
 
-    get_no_clustering_mock = mocker.patch('tokenomics_decentralization.helper.get_no_clustering_flag')
+    get_clustering_mock = mocker.patch('tokenomics_decentralization.helper.get_clustering_flag')
     get_exclude_contracts_mock = mocker.patch('tokenomics_decentralization.helper.get_exclude_contracts_flag')
     get_exclude_below_fees_mock = mocker.patch('tokenomics_decentralization.helper.get_exclude_below_fees_flag')
     get_exclude_below_usd_cent_mock = mocker.patch('tokenomics_decentralization.helper.get_exclude_below_usd_cent_flag')
     get_top_limit_type_mock = mocker.patch('tokenomics_decentralization.helper.get_top_limit_type')
     get_top_limit_value_mock = mocker.patch('tokenomics_decentralization.helper.get_top_limit_value')
 
-    get_no_clustering_mock.return_value = False
+    get_clustering_mock.return_value = True
     get_exclude_contracts_mock.return_value = False
     get_exclude_below_fees_mock.return_value = False
     get_exclude_below_usd_cent_mock.return_value = False
     get_top_limit_type_mock.return_value = 'absolute'
     get_top_limit_value_mock.return_value = 0
 
     write_csv_output([
-        ['bitcoin', '2010-01-01', False, False, 'absolute', 0, False, False, 100],
-        ['ethereum', '2010-01-01', False, False, 'absolute', 0, False, False, 200],
+        ['bitcoin', '2010-01-01', True, False, 'absolute', 0, False, False, 100],
+        ['ethereum', '2010-01-01', True, False, 'absolute', 0, False, False, 200],
         ])
     with open(pathlib.Path(__file__).resolve().parent / 'output.csv') as f:
         lines = f.readlines()
-        assert lines[0] == ','.join(['ledger', 'snapshot date', 'no_clustering', 'exclude_contract_addresses',
+        assert lines[0] == ','.join(['ledger', 'snapshot_date', 'clustering', 'exclude_contract_addresses',
                                      'top_limit_type', 'top_limit_value', 'exclude_below_fees',
                                      'exclude_below_usd_cent', 'hhi']) + '\n'
-        assert lines[1] == ','.join(['bitcoin', '2010-01-01', 'False', 'False', 'absolute', '0', 'False', 'False',
+        assert lines[1] == ','.join(['bitcoin', '2010-01-01', 'True', 'False', 'absolute', '0', 'False', 'False',
                                      '100']) + '\n'
-        assert lines[2] == ','.join(['ethereum', '2010-01-01', 'False', 'False', 'absolute', '0', 'False', 'False',
+        assert lines[2] == ','.join(['ethereum', '2010-01-01', 'True', 'False', 'absolute', '0', 'False', 'False',
                                      '200']) + '\n'
     os.remove(pathlib.Path(__file__).resolve().parent / 'output.csv')
 
-    get_no_clustering_mock.return_value = True
+    get_clustering_mock.return_value = False
     get_exclude_contracts_mock.return_value = True
     get_exclude_below_fees_mock.return_value = True
     get_top_limit_type_mock.return_value = 'absolute'
@@ -181,7 +181,7 @@ def test_write_csv_output(mocker):
         ])
     with open(pathlib.Path(__file__).resolve().parent / 'output-no_clustering-exclude_contract_addresses-absolute_10-exclude_below_fees.csv') as f:
         lines = f.readlines()
-        assert lines[0] == ','.join(['ledger', 'snapshot date', 'no_clustering', 'exclude_contract_addresses',
+        assert lines[0] == ','.join(['ledger', 'snapshot_date', 'clustering', 'exclude_contract_addresses',
                                      'top_limit_type', 'top_limit_value', 'exclude_below_fees',
                                      'exclude_below_usd_cent', 'hhi']) + '\n'
         assert lines[1] == ','.join(['bitcoin', '2010-01-01', 'False', 'False', 'absolute', '0', 'False', 'False',
diff --git a/tests/test_helper.py b/tests/test_helper.py
@@ -135,7 +135,7 @@ def test_config_flags(mocker):
         hlp.get_force_map_addresses_flag,
         hlp.get_force_map_balances_flag,
         hlp.get_force_analyze_flag,
-        hlp.get_no_clustering_flag,
+        hlp.get_clustering_flag,
         hlp.get_exclude_contracts_flag,
         hlp.get_exclude_below_fees_flag,
     ]
diff --git a/tokenomics_decentralization/analyze.py b/tokenomics_decentralization/analyze.py
@@ -12,7 +12,7 @@
 
 def analyze_snapshot(conn, ledger, snapshot):
     force_analyze = hlp.get_force_analyze_flag()
-    no_clustering = hlp.get_no_clustering_flag()
+    clustering = hlp.get_clustering_flag()
     top_limit_type = hlp.get_top_limit_type()
     top_limit_value = hlp.get_top_limit_value()
     exclude_contract_addresses_flag = hlp.get_exclude_contracts_flag()
@@ -46,7 +46,7 @@ def analyze_snapshot(conn, ledger, snapshot):
     metrics_results = {}
     for default_metric_name in metric_names:
         flagged_metric = default_metric_name
-        if no_clustering:
+        if not clustering:
             flagged_metric = 'non-clustered ' + flagged_metric
         if exclude_contract_addresses_flag:
             flagged_metric = 'exclude_contracts ' + flagged_metric
@@ -62,7 +62,7 @@ def analyze_snapshot(conn, ledger, snapshot):
             metric_value = val[0]
         else:
             if not entries:
-                if no_clustering:
+                if not clustering:
                     entries = db_hlp.get_non_clustered_balance_entries(conn, snapshot, ledger, balance_threshold=balance_threshold)
                 else:
                     entries = db_hlp.get_balance_entries(conn, snapshot, ledger, balance_threshold=balance_threshold)
@@ -97,19 +97,19 @@ def analyze_snapshot(conn, ledger, snapshot):
 
 
 def get_output_row(ledger, date, metrics):
-    no_clustering = hlp.get_no_clustering_flag()
+    clustering = hlp.get_clustering_flag()
     exclude_contract_addresses_flag = hlp.get_exclude_contracts_flag()
     exclude_below_fees_flag = hlp.get_exclude_below_fees_flag()
     exclude_below_usd_cent_flag = hlp.get_exclude_below_usd_cent_flag()
     top_limit_type = hlp.get_top_limit_type()
     top_limit_value = hlp.get_top_limit_value()
 
-    csv_row = [ledger, date, no_clustering, exclude_contract_addresses_flag, top_limit_type, top_limit_value,
+    csv_row = [ledger, date, clustering, exclude_contract_addresses_flag, top_limit_type, top_limit_value,
                exclude_below_fees_flag, exclude_below_usd_cent_flag]
 
     for metric_name in hlp.get_metrics():
         val = metric_name
-        if no_clustering:
+        if not clustering:
             val = 'non-clustered ' + val
         if exclude_contract_addresses_flag:
             val = 'exclude_contracts ' + val
@@ -124,18 +124,18 @@ def get_output_row(ledger, date, metrics):
 
 
 def write_csv_output(output_rows):
-    header = ['ledger', 'snapshot_date', 'no_clustering', 'exclude_contract_addresses', 'top_limit_type',
+    header = ['ledger', 'snapshot_date', 'clustering', 'exclude_contract_addresses', 'top_limit_type',
               'top_limit_value', 'exclude_below_fees', 'exclude_below_usd_cent']
     header += hlp.get_metrics()
 
-    no_clustering = hlp.get_no_clustering_flag()
+    clustering = hlp.get_clustering_flag()
     exclude_contract_addresses_flag = hlp.get_exclude_contracts_flag()
     top_limit_type = hlp.get_top_limit_type()
     top_limit_value = hlp.get_top_limit_value()
     exclude_below_fees_flag = hlp.get_exclude_below_fees_flag()
     exclude_below_usd_cent_flag = hlp.get_exclude_below_usd_cent_flag()
     output_filename = 'output'
-    if no_clustering:
+    if not clustering:
         output_filename += '-no_clustering'
     if exclude_contract_addresses_flag:
         output_filename += '-exclude_contract_addresses'
diff --git a/tokenomics_decentralization/helper.py b/tokenomics_decentralization/helper.py
@@ -199,17 +199,17 @@ def get_force_analyze_flag():
         raise ValueError('Flag "force_analyze" not in config file')
 
 
-def get_no_clustering_flag():
+def get_clustering_flag():
     """
-    Gets the flag that determines whether to forcefully recreate metrics
+    Gets the flag that determines whether to cluster addresses into entities
     :returns: boolean
     :raises ValueError: if the flag is not set in the config file
     """
     config = get_config_data()
     try:
-        return config['analyze_flags']['no_clustering']
+        return config['analyze_flags']['clustering']
     except KeyError:
-        raise ValueError('Flag "no_clustering" not in config file')
+        raise ValueError('Flag "clustering" not in config file')
 
 
 def get_metrics():

Original file line number	Diff line number	Diff line change
`@@ -135,7 +135,7 @@ def test_config_flags(mocker):`
`135`	`135`	`hlp.get_force_map_addresses_flag,`
`136`	`136`	`hlp.get_force_map_balances_flag,`
`137`	`137`	`hlp.get_force_analyze_flag,`
`138`		`- hlp.get_no_clustering_flag,`
	`138`	`+ hlp.get_clustering_flag,`
`139`	`139`	`hlp.get_exclude_contracts_flag,`
`140`	`140`	`hlp.get_exclude_below_fees_flag,`
`141`	`141`	`]`