-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathbmm_birosagdb.py
More file actions
136 lines (105 loc) · 5.11 KB
/
bmm_birosagdb.py
File metadata and controls
136 lines (105 loc) · 5.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import os
import re
import sqlite3
from itertools import chain
from bmm_tools import gather_snippets
class BmmBirosagDB:
def __init__(self, databasename) -> None:
self.databasename = databasename
if not os.path.exists(self.databasename):
self.connection = sqlite3.connect(self.databasename)
c = self.connection.cursor()
# JSON keys to Columns:
# - Azonosito: egyedi_azonosito
# - MeghozoBirosag: birosag
# - Kollegium: kollegium
# - JogTerulet: jogterulet
# - KapcsolodoHatarozatok: -
# - Jogszabalyhelyek: -
# - HatarozatEve: year
# - Szoveg: - content
# - Rezume (elvi tartalom): rezume
c.execute('''CREATE TABLE IF NOT EXISTS hatarozatok (
egyedi_azonosito TEXT,
azonosito TEXT,
birosag TEXT,
kollegium TEXT,
jogterulet TEXT,
year TEXT,
jogszabalyhelyek TEXT,
rezume TEXT,
index_id TEXT,
url TEXT,
download_url TEXT,
content TEXT,
lemmacontent TEXT,
scrape_date TEXT,
isnew INTEGER)''')
c.execute('''CREATE VIRTUAL TABLE IF NOT EXISTS hatarozatok_fts
USING FTS5 (egyedi_azonosito UNINDEXED, content, lemmacontent, rezume,
tokenize="unicode61 remove_diacritics 2")''')
c.execute('''CREATE TRIGGER hatarozatok_ai AFTER INSERT ON hatarozatok BEGIN
INSERT INTO hatarozatok_fts(egyedi_azonosito, content, lemmacontent, rezume)
VALUES (new.egyedi_azonosito, new.content, new.lemmacontent, new.rezume);
END;''')
c.execute('''CREATE TRIGGER hatarozatok_ad AFTER DELETE ON hatarozatok BEGIN
INSERT INTO hatarozatok_fts(hatarozatok_fts, egyedi_azonosito, content, lemmacontent, rezume)
VALUES('delete', old.egyedi_azonosito, old.content, old.lemmacontent, old.rezume);
END;''')
self.commit_connection()
c.close()
else:
self.connection = sqlite3.connect(self.databasename)
def close_connection(self):
self.connection.close()
def commit_connection(self):
self.connection.commit()
def get_hatarozat(self, egyedi_azonosito):
c = self.connection.cursor()
c.execute('SELECT * FROM contracts WHERE egyedi_azonosito=?', (egyedi_azonosito,))
res = c.fetchone()
c.close()
return res
def save_hatarozat(self, entry):
c = self.connection.cursor()
c.execute(
'INSERT INTO hatarozatok (egyedi_azonosito, azonosito, birosag, kollegium, jogterulet, year, '
'jogszabalyhelyek, rezume, index_id, url, download_url, content, lemmacontent, scrape_date, isnew) '
'VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 1)',
(entry['EgyediAzonosito'], entry['Azonosito'], entry['MeghozoBirosag'], entry['Kollegium'], entry['JogTerulet'],
entry['HatarozatEve'], entry['Jogszabalyhelyek'], entry['Rezume'], entry['IndexId'], entry.get('url', None),
entry.get('download_url', None), entry.get('content', None), entry.get('lemmacontent', None), entry.get('scrape_date', None)))
c.close()
def clear_is_new(self, egyedi_azonosito):
c = self.connection.cursor()
c.execute('UPDATE hatarozatok SET isnew=0 WHERE egyedi_azonosito=?', (egyedi_azonosito,))
c.close()
def search_records(self, keyword, original_keyword):
c = self.connection.cursor()
c.execute('SELECT * FROM hatarozatok WHERE isnew=1 AND egyedi_azonosito IN '
'(SELECT egyedi_azonosito FROM hatarozatok_fts WHERE hatarozatok_fts MATCH ?)',
(keyword,))
results = c.fetchall()
snippets = []
if len(results) > 0:
snippets = gather_snippets(original_keyword, results, snippets)
c.close()
return results, snippets
def get_all_new(self):
"""
:return: list of dictionaries containing all new records with column names as keys
"""
c = self.connection.cursor()
c.execute('SELECT * FROM hatarozatok WHERE isnew=1')
# Get column names from cursor description
columns = [desc[0] for desc in c.description]
# Convert tuples to dictionaries using column names as keys
results = [dict(zip(columns, row)) for row in c.fetchall()]
c.close()
return results
def get_existing_azonosito_set(self):
c = self.connection.cursor()
c.execute('SELECT egyedi_azonosito FROM hatarozatok')
results = c.fetchall()
c.close()
return set(chain(*results))