1+ import copy
2+ import re
3+ from enum import IntEnum , auto
4+ from typing import Self
5+
6+
7+ class DelimiterHandlingMode (IntEnum ):
8+ REMOVE = auto ()
9+ ADDITIONAL_TO_AROUND = auto ()
10+ ADDITIONAL_TO_BODY = auto ()
11+
12+
13+ class SentenceStructureInformation ():
14+ def __init__ (self , sentence , structure_data ) -> None :
15+ self ._sentence = sentence
16+ self ._structure_data = copy .deepcopy (structure_data )
17+
18+ def __str__ (self ) -> str :
19+ return self ._sentence
20+
21+ @property
22+ def sentence (self ) -> str :
23+ return self ._sentence
24+
25+ @property
26+ def structure_data (self ) -> list :
27+ return copy .deepcopy (self ._structure_data )
28+
29+ def get (self , omit_empty_strings = False ) -> list :
30+ structure_data = copy .deepcopy (self ._structure_data )
31+ if omit_empty_strings == True :
32+ structure_data = [s for s in structure_data if s [0 ] != "" ]
33+ return structure_data
34+
35+ def shrink_analysis_results (self , bottom_hierarchy ) -> Self :
36+ structure_data = []
37+ join_next_time_with_previous = False
38+ for s in copy .deepcopy (self ._structure_data ):
39+ if join_next_time_with_previous == True :
40+ structure_data [- 1 ][0 ] += s [0 ]
41+ join_next_time_with_previous = False
42+ elif s [1 ] <= bottom_hierarchy :
43+ structure_data .append (s )
44+ else :
45+ structure_data [- 1 ][0 ] += s [0 ]
46+ join_next_time_with_previous = True
47+ return SentenceStructureInformation (self ._sentence , structure_data )
48+
49+
50+ def analyze_sentence (sentence , delimiter , delimiter_handling_mode = DelimiterHandlingMode .ADDITIONAL_TO_AROUND , consider_escaping = False ) -> SentenceStructureInformation :
51+ def prepare_pattern (template , delimiter ) -> str :
52+ pattern = ""
53+ if type (delimiter ) == tuple :
54+ delimiter = [delimiter ]
55+ for d in delimiter :
56+ pattern += "|" + template .format (re .escape (d [0 ]), re .escape (d [1 ]))
57+ pattern = "(" + pattern .lstrip ("|" ) + ")"
58+ return pattern
59+
60+ def hierarchize_sentence (pattern , sentence , delimiter_handling_mode , hierarchy ) -> list :
61+ split_sentences_1 = re .split (pattern , sentence )
62+ if len (split_sentences_1 ) == 1 :
63+ split_sentences_1 .append (hierarchy + 1 )
64+ return [split_sentences_1 ]
65+ else :
66+ split_sentences_2 = []
67+ another_hierarchy = False
68+ character_append_in_string_first = None
69+ for part in split_sentences_1 :
70+ if another_hierarchy == True :
71+ split_sentences_temp = hierarchize_sentence (pattern , part [1 :- 1 ], delimiter_handling_mode , hierarchy + 1 )
72+ if delimiter_handling_mode == DelimiterHandlingMode .ADDITIONAL_TO_AROUND :
73+ split_sentences_2 [- 1 ][0 ] += part [0 ]
74+ character_append_in_string_first = part [- 1 ]
75+ elif delimiter_handling_mode == DelimiterHandlingMode .ADDITIONAL_TO_BODY :
76+ split_sentences_temp [0 ][0 ] = split_sentences_temp [0 ][0 ] + part [0 ]
77+ split_sentences_temp [- 1 ][0 ] += part [- 1 ]
78+ split_sentences_2 += split_sentences_temp
79+ else :
80+ split_sentences_temp = hierarchize_sentence (pattern , part , delimiter_handling_mode , hierarchy )
81+ if character_append_in_string_first is not None :
82+ split_sentences_temp [0 ][0 ] = character_append_in_string_first + split_sentences_temp [0 ][0 ]
83+ character_append_in_string_first = None
84+ split_sentences_2 += split_sentences_temp
85+ another_hierarchy = another_hierarchy == False
86+ if character_append_in_string_first is not None :
87+ split_sentences_2 [- 1 ][0 ] = character_append_in_string_first + split_sentences_2 [- 1 ][0 ]
88+ return split_sentences_2
89+
90+ if consider_escaping == True :
91+ template = "{0}.*(?<!\\ \\ ){1}"
92+ else :
93+ template = "{0}.*{1}"
94+ pattern = prepare_pattern (template , delimiter )
95+ structure_data = hierarchize_sentence (pattern , sentence , delimiter_handling_mode , - 1 )
96+ return SentenceStructureInformation (sentence , structure_data )
97+
98+
99+ def ab_to_ba (data ) -> list :
100+ return [d [::- 1 ] for d in data ]
101+
102+
103+ def ab_to_a (data ) -> list :
104+ return [d [0 ] for d in data ]
105+
106+
107+ def ab_to_b (data ) -> list :
108+ return [d [1 ] for d in data ]
109+
110+
111+ def a_b_to_ab (a , b ) -> list :
112+ if len (a ) != len (b ):
113+ raise Exception ("The number of elements in list a and list b do not match." )
114+ return [list (d ) for d in zip (a , b )]
0 commit comments