-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathgeneral.py
More file actions
58 lines (49 loc) · 1.54 KB
/
general.py
File metadata and controls
58 lines (49 loc) · 1.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# valid data usage policies
import re
"""
Null Constant definitions
"""
NA_STRING = "NA"
null_values_list = [
"nan",
"na",
"none",
"",
"undefined",
"n/a",
"null",
"nil",
"-",
]
# enforce the assumption that these are all lowercase values
assert all(s.lower() == s for s in null_values_list)
# add the NA_STRING only if it's not already in the list
if NA_STRING.lower() not in null_values_list:
null_values_list.append(NA_STRING.lower())
null_values_list.sort()
null_values_re = re.compile(
r"^\s+$|" + "|".join(f"^{s}$" for s in null_values_list if len(s)),
flags=re.IGNORECASE,
)
readable_null_values_list = [f"{s}" for s in null_values_list] + ["whitespace"]
hgvs_nt_column = "hgvs_nt"
hgvs_splice_column = "hgvs_splice"
hgvs_pro_column = "hgvs_pro"
guide_sequence_column = "guide_sequence"
hgvs_columns = sorted([hgvs_nt_column, hgvs_pro_column, hgvs_splice_column])
meta_data = "meta_data"
score_columns = "score_columns"
count_columns = "count_columns"
variant_score_data = "score_data"
variant_count_data = "count_data"
required_score_column = "score"
multi_value_keys = ["molecular mechanism assessed"]
calibration_variant_column_name = "variant_urn"
calibration_class_column_name = "class_name"
valid_dataset_columns = [score_columns, count_columns]
valid_variant_columns = [variant_score_data, variant_count_data]
variant_to_score_set_column = {
variant_score_data: score_columns,
variant_count_data: count_columns,
}
score_set_to_variant_column = {v: k for k, v in variant_to_score_set_column.items()}