1
1
import os
2
2
import re
3
- from pyknp import KNP
4
- from collections import defaultdict
5
3
from argparse import ArgumentParser
4
+ from collections import defaultdict
5
+ from logging import FileHandler , Formatter , getLogger
6
6
7
7
from progressbar import progressbar
8
-
9
- from logging import getLogger , FileHandler , Formatter
8
+ from pyknp import KNP
10
9
11
10
logger = getLogger (__name__ )
12
11
handler = FileHandler ("disc.log" )
@@ -30,10 +29,10 @@ def organize_knp_features(knp_result):
30
29
sid = knp_result .sid
31
30
clause_tids = []
32
31
# remove bnst's features
33
- for i , bnst in enumerate ( knp_result .bnst_list ()):
32
+ for i in range ( len ( knp_result .bnst_list () )):
34
33
knp_result .bnst_list ()[i ].fstring = ""
35
34
# remove mrph's features
36
- for i , mrph in enumerate ( knp_result .mrph_list ()):
35
+ for i in range ( len ( knp_result .mrph_list () )):
37
36
knp_result .mrph_list ()[i ].fstring = ""
38
37
# Organize tag's features
39
38
for i , tag in enumerate (knp_result .tag_list ()):
@@ -61,7 +60,7 @@ def add_discourse_info_to_gold_knp(ann_data):
61
60
knp_results = []
62
61
clause_tids = []
63
62
# Load knp file
64
- with open (knp_path , "r" ) as f :
63
+ with open (knp_path ) as f :
65
64
data = ""
66
65
for line in f :
67
66
data += line
@@ -137,14 +136,14 @@ def make_knp_from_textfile(disc_ann):
137
136
org_path = os .path .join (GOLD_ORG_DIR , doc ["A-ID" ][:13 ], f'{ doc ["A-ID" ]} .org' )
138
137
if os .path .exists (org_path ):
139
138
# Found -> read org file
140
- with open (org_path , "r" ) as f :
139
+ with open (org_path ) as f :
141
140
sents = []
142
141
insert_point = - 1
143
142
for line in f .readlines ():
144
143
if re .match ("#" , line .strip ()):
145
144
if paren_cidx := re .search (r"括弧位置:(\d+)" , line .strip ()):
146
145
insert_point = paren_cidx .group (1 )
147
- else :
146
+ else : # noqa: PLR5501
148
147
if insert_point != - 1 :
149
148
# insert paren
150
149
sents [- 1 ] = (
@@ -238,7 +237,7 @@ def remove_duplicate_data(
238
237
239
238
def read_disc_ann_file (filepath ):
240
239
result = []
241
- with open (filepath , "r" ) as f :
240
+ with open (filepath ) as f :
242
241
doc = {"A-ID" : "" , "clause" : [], "rel" : defaultdict (lambda : defaultdict (list ))}
243
242
for line in f .readlines ():
244
243
if line .strip () == "" :
0 commit comments