-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhelpers.py
92 lines (75 loc) · 2.88 KB
/
helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import sqlite3
import json
import IPAParser_3_0
PARSER = IPAParser_3_0.IPAParser()
with open('parses_cache.json', 'r', encoding='utf-8') as inp:
PARSES_CACHE = json.load(inp)
with open('parses_cache_phoible.json', 'r', encoding='utf-8') as inp:
PARSES_CACHE_PHOIBLE = json.load(inp)
with open('inventories_phoible.json', 'r', encoding='utf-8') as inp:
INVENTORIES_PHOIBLE = json.load(inp)
def get_all_language_ids(db_connection: sqlite3.Connection, query_phoible=False):
if query_phoible:
return set(INVENTORIES_PHOIBLE)
else:
cursor = db_connection.cursor()
return set(el[0] for el in cursor.execute("SELECT id FROM languages").fetchall())
def get_parse(segment):
if segment in PARSES_CACHE:
return set(el for el in PARSES_CACHE[segment])
else:
return PARSER.parse(segment).as_set()
#
# Slow legacy code; replaced with Go binaries
#
# def get_consonants_for_language(language_id: int, db_connection: sqlite3.Connection):
# cursor = db_connection.cursor()
# return set(el[0].replace('(', '').replace(')', '') for el in cursor.execute(
# """
# SELECT ipa FROM segments
# WHERE `is_consonant` = 1
# AND `language_id` = ?
# """,
# (language_id,)
# ).fetchall())
# def get_vowels_for_language(language_id: int, db_connection: sqlite3.Connection):
# cursor = db_connection.cursor()
# return set(el[0].replace('(', '').replace(')', '') for el in cursor.execute(
# """
# SELECT ipa FROM segments
# WHERE `is_consonant` = 0
# AND `language_id` = ?
# """,
# (language_id,)
# ).fetchall())
# def get_count_for_features(language_id, features, db_connection, hit_tmp):
# pos_features = set(el[1] for el in features if el[0] == '+')
# neg_features = set(el[1] for el in features if el[0] == '-')
# if 'approximant' in pos_features and 'lateral' not in pos_features:
# neg_features.add('lateral')
# if 'plosive' in pos_features and 'nasal' not in pos_features:
# neg_features.add('nasal')
# consonants = get_consonants_for_language(language_id, db_connection)
# vowels = get_vowels_for_language(language_id, db_connection)
# hit_count = 0
# for segment in consonants | vowels:
# if segment not in hit_tmp:
# parse = get_parse(segment)
# hit_tmp[segment] = pos_features.issubset(parse) and\
# not neg_features & parse
# if hit_tmp[segment]:
# hit_count += 1
# return hit_count
# def check_eq(diff, op):
# if op == '=':
# return diff == 0
# elif op == '<':
# return diff < 0
# elif op == '<=':
# return diff <= 0
# elif op == '>':
# return diff > 0
# elif op == '>=':
# return diff >= 0
# else:
# raise NotImplementedError(f'Comparison operator not recognised: {op}')