-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathjson_to_csv.py
More file actions
34 lines (27 loc) · 955 Bytes
/
json_to_csv.py
File metadata and controls
34 lines (27 loc) · 955 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# coding=utf-8
import json
import csv
import re
korean_set = list(range(ord('가'), ord('힣') + 1))
pattern_is_not_hangul = re.compile('[^가-힣]')
pattern_special = re.compile('([!-/]|[:-@]|[\[-`])')
def is_available_word(word, part):
if len(word) < 2:
return False
if pattern_is_not_hangul.search(word):
return False
if not part == '명사':
return False
return True
with open('async_output.json', encoding='utf-8') as f:
table = json.load(f)
word_dict = {}
for word, part, meaning in table:
word = pattern_special.sub('', word) # replace special character to none
if is_available_word(word, part):
if word not in word_dict:
word_dict[word] = meaning
with open('sqlite_input.csv', 'w', newline='', encoding='utf-8-sig') as csv_file:
writer = csv.writer(csv_file)
for word, meaning in word_dict.items():
writer.writerow([word, meaning, 0, 1]) # 북한어