Skip to content

Commit c31499b

Browse files
authored
Add files via upload
1 parent 41773b8 commit c31499b

File tree

1 file changed

+114
-0
lines changed

1 file changed

+114
-0
lines changed

parsent.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
import copy
2+
import re
3+
from enum import IntEnum, auto
4+
from typing import Self
5+
6+
7+
class DelimiterHandlingMode(IntEnum):
8+
REMOVE = auto()
9+
ADDITIONAL_TO_AROUND = auto()
10+
ADDITIONAL_TO_BODY = auto()
11+
12+
13+
class SentenceStructureInformation():
14+
def __init__(self, sentence, structure_data) -> None:
15+
self._sentence = sentence
16+
self._structure_data = copy.deepcopy(structure_data)
17+
18+
def __str__(self) -> str:
19+
return self._sentence
20+
21+
@property
22+
def sentence(self) -> str:
23+
return self._sentence
24+
25+
@property
26+
def structure_data(self) -> list:
27+
return copy.deepcopy(self._structure_data)
28+
29+
def get(self, omit_empty_strings=False) -> list:
30+
structure_data = copy.deepcopy(self._structure_data)
31+
if omit_empty_strings == True:
32+
structure_data = [s for s in structure_data if s[0] != ""]
33+
return structure_data
34+
35+
def shrink_analysis_results(self, bottom_hierarchy) -> Self:
36+
structure_data = []
37+
join_next_time_with_previous = False
38+
for s in copy.deepcopy(self._structure_data):
39+
if join_next_time_with_previous == True:
40+
structure_data[-1][0] += s[0]
41+
join_next_time_with_previous = False
42+
elif s[1] <= bottom_hierarchy:
43+
structure_data.append(s)
44+
else:
45+
structure_data[-1][0] += s[0]
46+
join_next_time_with_previous = True
47+
return SentenceStructureInformation(self._sentence, structure_data)
48+
49+
50+
def analyze_sentence(sentence, delimiter, delimiter_handling_mode=DelimiterHandlingMode.ADDITIONAL_TO_AROUND, consider_escaping=False) -> SentenceStructureInformation:
51+
def prepare_pattern(template, delimiter) -> str:
52+
pattern = ""
53+
if type(delimiter) == tuple:
54+
delimiter = [delimiter]
55+
for d in delimiter:
56+
pattern += "|" + template.format(re.escape(d[0]), re.escape(d[1]))
57+
pattern = "(" + pattern.lstrip("|") + ")"
58+
return pattern
59+
60+
def hierarchize_sentence(pattern, sentence, delimiter_handling_mode, hierarchy) -> list:
61+
split_sentences_1 = re.split(pattern, sentence)
62+
if len(split_sentences_1) == 1:
63+
split_sentences_1.append(hierarchy + 1)
64+
return [split_sentences_1]
65+
else:
66+
split_sentences_2 = []
67+
another_hierarchy = False
68+
character_append_in_string_first = None
69+
for part in split_sentences_1:
70+
if another_hierarchy == True:
71+
split_sentences_temp = hierarchize_sentence(pattern, part[1:-1], delimiter_handling_mode, hierarchy + 1)
72+
if delimiter_handling_mode == DelimiterHandlingMode.ADDITIONAL_TO_AROUND:
73+
split_sentences_2[-1][0] += part[0]
74+
character_append_in_string_first = part[-1]
75+
elif delimiter_handling_mode == DelimiterHandlingMode.ADDITIONAL_TO_BODY:
76+
split_sentences_temp[0][0] = split_sentences_temp[0][0] + part[0]
77+
split_sentences_temp[-1][0] += part[-1]
78+
split_sentences_2 += split_sentences_temp
79+
else:
80+
split_sentences_temp = hierarchize_sentence(pattern, part, delimiter_handling_mode, hierarchy)
81+
if character_append_in_string_first is not None:
82+
split_sentences_temp[0][0] = character_append_in_string_first + split_sentences_temp[0][0]
83+
character_append_in_string_first = None
84+
split_sentences_2 += split_sentences_temp
85+
another_hierarchy = another_hierarchy == False
86+
if character_append_in_string_first is not None:
87+
split_sentences_2[-1][0] = character_append_in_string_first + split_sentences_2[-1][0]
88+
return split_sentences_2
89+
90+
if consider_escaping == True:
91+
template = "{0}.*(?<!\\\\){1}"
92+
else:
93+
template = "{0}.*{1}"
94+
pattern = prepare_pattern(template, delimiter)
95+
structure_data = hierarchize_sentence(pattern, sentence, delimiter_handling_mode, -1)
96+
return SentenceStructureInformation(sentence, structure_data)
97+
98+
99+
def ab_to_ba(data) -> list:
100+
return [d[::-1] for d in data]
101+
102+
103+
def ab_to_a(data) -> list:
104+
return [d[0] for d in data]
105+
106+
107+
def ab_to_b(data) -> list:
108+
return [d[1] for d in data]
109+
110+
111+
def a_b_to_ab(a, b) -> list:
112+
if len(a) != len(b):
113+
raise Exception("The number of elements in list a and list b do not match.")
114+
return [list(d) for d in zip(a, b)]

0 commit comments

Comments
 (0)