-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path__main__.py
More file actions
84 lines (50 loc) · 2.39 KB
/
__main__.py
File metadata and controls
84 lines (50 loc) · 2.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#region Imports
import logging, argparse, json
from .utils import *
from .tae import TAE
#endregion
#region Constants
# Configuration dictionary keys
CORPUS_CONFIG_KEY = "corpus"
ANONYMIZATIONS_CONFIG_KEY = "anonymizations"
RESULTS_CONFIG_KEY = "results_file_path"
METRICS_CONFIG_KEY = "metrics"
MANDATORY_CONFIG_KEYS = [CORPUS_CONFIG_KEY, ANONYMIZATIONS_CONFIG_KEY, RESULTS_CONFIG_KEY, METRICS_CONFIG_KEY]
#endregion
#region Main
if __name__ == "__main__":
#region Additional configurations for standalone execution
logging.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s', level=logging.INFO) # Configure logging
logging.getLogger('sentence_transformers').setLevel(logging.WARNING) # Suppress INFO logs from sentence_transformers
logging.getLogger('transformers').setLevel(logging.WARNING) # Suppress INFO logs from transformers
logging.getLogger("huggingface_hub").setLevel(logging.WARNING) # Suppress INFO logs from huggingface_hub
logging.getLogger("httpx").setLevel(logging.WARNING) # Suppress INFO logs from httpx (used by huggingface_hub)
logging.getLogger('torch').setLevel(logging.WARNING) # Suppress INFO logs from torch
#endregion
#region Arguments parsing
parser = argparse.ArgumentParser(description='Computes evaluation metrics for text anonymization')
parser.add_argument('config_file_path', type=str,
help='the path to the JSON file containing the evaluation configuration')
args = parser.parse_args()
# Load configuration dictionary
with open(args.config_file_path, "r", encoding="utf-8") as f:
config = json.load(f)
for key in MANDATORY_CONFIG_KEYS:
if not key in config.keys():
raise RuntimeError(f"Configuration JSON file misses a mandatory key: {key}")
#endregion
#region Initialization
logging.info(f"Selected device: {DEVICE}")
# Create TAE from corpus file path
tae = TAE(config[CORPUS_CONFIG_KEY])
# Get anonymizations file paths
anonymizations = config[ANONYMIZATIONS_CONFIG_KEY]
# Get metrics
metrics = config[METRICS_CONFIG_KEY]
# Get file path for the results CSV file
results_file_path = config[RESULTS_CONFIG_KEY]
#endregion
#region Evaluate
tae.evaluate(anonymizations, metrics, results_file_path)
#endregion
#endregion