-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbmmtools.py
More file actions
25 lines (21 loc) · 774 Bytes
/
bmmtools.py
File metadata and controls
25 lines (21 loc) · 774 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import re
def searchstringtofts(searchstring):
keresoszo = ''
if isinstance(searchstring, str):
keresoszo = searchstring.strip()
keresoszo = re.sub(r'\s+', ' ', keresoszo)
keresoszo = re.sub(r'([()\-])', '', keresoszo)
if keresoszo:
if not re.search(r'(["+*])', keresoszo):
keresoszo = re.sub(r'([\s])', ' + ', keresoszo) + '*'
return keresoszo
def mnvtimestamp(tstamp):
return int(tstamp) * 1000
def lemmatize(nlp, texts):
lemmas = []
docs = list(nlp.pipe(texts))
for doc in docs:
for token in doc:
if token.pos_ in ['NOUN', 'ADJ', 'PROPN', 'ADP', 'ADV', 'VERB'] and token.lemma_.isalpha():
lemmas.append(token.lemma_.lower())
return lemmas