Skip to content

Commit 192586b

Browse files
authored
Merge pull request #20 from KubEF/master
Rework RSA
2 parents 86ac5b5 + 0768274 commit 192586b

File tree

3 files changed

+141
-190
lines changed

3 files changed

+141
-190
lines changed

Diff for: pyformlang/rsa/box.py

+43-43
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
22
Representation of a box for recursive automaton
33
"""
4+
from typing import Union
45

56
from pyformlang.finite_automaton.epsilon_nfa import EpsilonNFA
67
from pyformlang.finite_automaton.finite_automaton import to_symbol
@@ -16,56 +17,59 @@ class Box:
1617
----------
1718
enfa : :class:`~pyformlang.finite_automaton.EpsilonNFA`
1819
A epsilon nfa
19-
label : :class:`~pyformlang.finite_automaton.Symbol`
20-
A label for epsilon nfa
20+
nonterminal : :class:`~pyformlang.finite_automaton.Symbol`
21+
A nonterminal for epsilon nfa
2122
2223
"""
2324

24-
def __init__(self, enfa: EpsilonNFA = None, label: Symbol = None):
25-
if enfa is not None:
26-
enfa = enfa.minimize()
27-
self._dfa = enfa or EpsilonNFA()
28-
29-
if label is not None:
30-
label = to_symbol(label)
31-
self._label = label or Symbol("")
32-
33-
def change_label(self, label: Symbol):
34-
""" Set a new label
35-
36-
Parameters
37-
-----------
38-
label : :class:`~pyformlang.finite_automaton.Symbol`
39-
The new label for automaton
40-
41-
"""
42-
self._label = to_symbol(label)
43-
44-
def change_dfa(self, enfa: EpsilonNFA):
45-
""" Set an epsilon finite automaton
46-
47-
Parameters
48-
-----------
49-
enfa : :class:`~pyformlang.finite_automaton.EpsilonNFA`
50-
The new epsilon finite automaton
51-
52-
"""
53-
enfa = enfa.minimize()
25+
def __init__(self, enfa: EpsilonNFA, nonterminal: Union[Symbol, str]):
5426
self._dfa = enfa
5527

28+
nonterminal = to_symbol(nonterminal)
29+
self._nonterminal = nonterminal
30+
31+
def to_subgraph_dot(self):
32+
"""Creates a named subgraph representing a box"""
33+
graph = self._dfa.to_networkx()
34+
strange_nodes = []
35+
nonterminal = self.nonterminal.value.replace('"', '').replace("'", "").replace(".", "")
36+
dot_string = (f'subgraph cluster_{nonterminal}\n{{ label="{nonterminal}"\n'
37+
f'fontname="Helvetica,Arial,sans-serif"\n'
38+
f'node [fontname="Helvetica,Arial,sans-serif"]\n'
39+
f'edge [fontname="Helvetica,Arial,sans-serif"]\nrankdir=LR;\n'
40+
f'node [shape = circle style=filled fillcolor=white]')
41+
for node, data in graph.nodes(data=True):
42+
node = node.replace('"', '').replace("'", "")
43+
if 'is_start' not in data.keys() or 'is_final' not in data.keys():
44+
strange_nodes.append(node)
45+
continue
46+
if data['is_start']:
47+
dot_string += f'\n"{node}" [fillcolor = green];'
48+
if data['is_final']:
49+
dot_string += f'\n"{node}" [shape = doublecircle];'
50+
for strange_node in strange_nodes:
51+
graph.remove_node(strange_node)
52+
for node_from, node_to, data in graph.edges(data=True):
53+
node_from = node_from.replace('"', '').replace("'", "")
54+
node_to = node_to.replace('"', '').replace("'", "")
55+
label = data['label'].replace('"', '').replace("'", "")
56+
dot_string += f'\n"{node_from}" -> "{node_to}" [label = "{label}"];'
57+
dot_string += "\n}"
58+
return dot_string
59+
5660
@property
5761
def dfa(self):
5862
""" Box's dfa """
5963
return self._dfa
6064

6165
@property
62-
def label(self):
63-
""" Box's label """
64-
return self._label
66+
def nonterminal(self):
67+
""" Box's nonterminal """
68+
return self._nonterminal
6569

6670
@property
67-
def start_state(self):
68-
""" The start state """
71+
def start_states(self):
72+
""" The start states """
6973
return self._dfa.start_states
7074

7175
@property
@@ -90,14 +94,10 @@ def is_equivalent_to(self, other):
9094
if not isinstance(other, Box):
9195
return False
9296

93-
if self._dfa.is_equivalent_to(other.dfa) and \
94-
self._label == other.label:
95-
return True
96-
97-
return False
97+
return self._dfa.is_equivalent_to(other.dfa) and self.nonterminal == other.nonterminal
9898

9999
def __eq__(self, other):
100100
return self.is_equivalent_to(other)
101101

102102
def __hash__(self):
103-
return self._label.__hash__()
103+
return self._nonterminal.__hash__()

Diff for: pyformlang/rsa/recursive_automaton.py

+72-104
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Representation of a recursive automaton
33
"""
44

5-
from typing import AbstractSet
5+
from typing import AbstractSet, Union
66

77
from pyformlang.finite_automaton.finite_automaton import to_symbol
88
from pyformlang.finite_automaton.symbol import Symbol
@@ -19,143 +19,121 @@ class RecursiveAutomaton:
1919
2020
Parameters
2121
----------
22-
labels : set of :class:`~pyformlang.finite_automaton.Symbol`, optional
23-
A finite set of labels for boxes
24-
initial_label : :class:`~pyformlang.finite_automaton.Symbol`, optional
25-
A start label for automaton
26-
boxes : set of :class:`~pyformlang.rsa.Box`, optional
22+
start_box : :class:`~pyformlang.rsa.Box`
23+
Start box
24+
boxes : set of :class:`~pyformlang.rsa.Box`
2725
A finite set of boxes
2826
2927
"""
3028

3129
def __init__(self,
32-
labels: AbstractSet[Symbol] = None,
33-
initial_label: Symbol = None,
34-
boxes: AbstractSet[Box] = None):
35-
36-
if labels is not None:
37-
labels = {to_symbol(x) for x in labels}
38-
self._labels = labels or set()
39-
40-
if initial_label is not None:
41-
initial_label = to_symbol(initial_label)
42-
if initial_label not in self._labels:
43-
self._labels.add(initial_label)
44-
self._initial_label = initial_label or Symbol("")
45-
46-
self._boxes = {}
47-
if boxes is not None:
48-
for box in boxes:
49-
self._boxes.update({to_symbol(box.label): box})
50-
self._labels.add(box.label)
51-
52-
for label in self._labels:
53-
box = self.get_box(label)
54-
if box is None:
55-
raise ValueError(
56-
"RSA must have the same number of labels and DFAs")
57-
58-
def get_box(self, label: Symbol):
59-
""" Box by label """
60-
61-
label = to_symbol(label)
62-
if label in self._boxes:
63-
return self._boxes[label]
64-
65-
return None
66-
67-
def add_box(self, new_box: Box):
68-
""" Set a box
30+
start_box: Box,
31+
boxes: AbstractSet[Box]):
32+
self._nonterminal_to_box = {}
33+
if start_box not in boxes:
34+
self._nonterminal_to_box[to_symbol(start_box.nonterminal)] = start_box
35+
self._start_nonterminal = to_symbol(start_box.nonterminal)
36+
for box in boxes:
37+
self._nonterminal_to_box[to_symbol(box.nonterminal)] = box
38+
39+
def get_box_by_nonterminal(self, nonterminal: Union[Symbol, str]):
40+
"""
41+
Box by nonterminal
6942
7043
Parameters
71-
-----------
72-
new_box : :class:`~pyformlang.rsa.Box`
73-
The new box
44+
----------
45+
nonterminal: :class:`~pyformlang.finite_automaton.Symbol` | str
46+
the nonterminal of which represents a box
7447
48+
Returns
49+
-----------
50+
box : :class:`~pyformlang.rsa.Box` | None
51+
box represented by given nonterminal
7552
"""
7653

77-
self._boxes.update({new_box.label: new_box})
78-
self._labels.add(to_symbol(new_box.label))
54+
nonterminal = to_symbol(nonterminal)
55+
if nonterminal in self._nonterminal_to_box:
56+
return self._nonterminal_to_box[nonterminal]
7957

80-
def get_number_of_boxes(self):
81-
""" Size of set of boxes """
58+
return None
8259

83-
return len(self._boxes)
60+
def get_number_boxes(self):
61+
""" Size of set of boxes """
8462

85-
def change_initial_label(self, new_initial_label: Symbol):
86-
""" Set an initial label
63+
return len(self._nonterminal_to_box)
8764

88-
Parameters
89-
-----------
90-
new_initial_label : :class:`~pyformlang.finite_automaton.Symbol`
91-
The new initial label
92-
93-
"""
94-
95-
new_initial_label = to_symbol(new_initial_label)
96-
if new_initial_label not in self._labels:
97-
raise ValueError(
98-
"New initial label not in set of labels for boxes")
65+
def to_dot(self):
66+
""" Create dot representation of recursive automaton """
67+
dot_string = 'digraph "" {'
68+
for box in self._nonterminal_to_box.values():
69+
dot_string += f'\n{box.to_subgraph_dot()}'
70+
dot_string += "\n}"
71+
return dot_string
9972

10073
@property
101-
def labels(self) -> set:
102-
""" The set of labels """
74+
def nonterminals(self) -> set:
75+
""" The set of nonterminals """
10376

104-
return self._labels
77+
return set(self._nonterminal_to_box.keys())
10578

10679
@property
10780
def boxes(self) -> dict:
10881
""" The set of boxes """
10982

110-
return self._boxes
83+
return self._nonterminal_to_box
84+
85+
@property
86+
def start_nonterminal(self) -> Symbol:
87+
""" The start nonterminal """
88+
89+
return self._start_nonterminal
11190

11291
@property
113-
def initial_label(self) -> Symbol:
114-
""" The initial label """
92+
def start_box(self):
93+
""" The start box """
11594

116-
return self._initial_label
95+
return self.boxes[self.start_nonterminal]
11796

11897
@classmethod
119-
def from_regex(cls, regex: Regex, initial_label: Symbol):
98+
def from_regex(cls, regex: Regex, start_nonterminal: Union[Symbol, str]):
12099
""" Create a recursive automaton from regular expression
121100
122101
Parameters
123102
-----------
124103
regex : :class:`~pyformlang.regular_expression.Regex`
125104
The regular expression
126-
initial_label : :class:`~pyformlang.finite_automaton.Symbol`
127-
The initial label for the recursive automaton
105+
start_nonterminal : :class:`~pyformlang.finite_automaton.Symbol` | str
106+
The start nonterminal for the recursive automaton
128107
129108
Returns
130109
-----------
131110
rsa : :class:`~pyformlang.rsa.RecursiveAutomaton`
132111
The new recursive automaton built from regular expression
133112
"""
134-
135-
initial_label = to_symbol(initial_label)
136-
box = Box(regex.to_epsilon_nfa().minimize(), initial_label)
137-
return RecursiveAutomaton({initial_label}, initial_label, {box})
113+
start_nonterminal = to_symbol(start_nonterminal)
114+
box = Box(regex.to_epsilon_nfa().minimize(), start_nonterminal)
115+
return RecursiveAutomaton(box, {box})
138116

139117
@classmethod
140-
def from_text(cls, text, start_symbol: Symbol = Symbol("S")):
141-
""" Create a recursive automaton from text
118+
def from_ebnf(cls, text, start_nonterminal: Union[Symbol, str] = Symbol("S")):
119+
""" Create a recursive automaton from ebnf (ebnf = Extended Backus-Naur Form)
142120
143121
Parameters
144122
-----------
145123
text : str
146124
The text of transform
147-
start_symbol : str, optional
148-
The start symbol, S by default
125+
start_nonterminal : :class:`~pyformlang.finite_automaton.Symbol` | str, optional
126+
The start nonterminal, S by default
149127
150128
Returns
151129
-----------
152130
rsa : :class:`~pyformlang.rsa.RecursiveAutomaton`
153131
The new recursive automaton built from context-free grammar
154132
"""
155-
133+
start_nonterminal = to_symbol(start_nonterminal)
156134
productions = {}
157135
boxes = set()
158-
labels = set()
136+
nonterminals = set()
159137
for production in text.splitlines():
160138
production = production.strip()
161139
if "->" not in production:
@@ -164,7 +142,7 @@ def from_text(cls, text, start_symbol: Symbol = Symbol("S")):
164142
head, body = production.split("->")
165143
head = head.strip()
166144
body = body.strip()
167-
labels.add(to_symbol(head))
145+
nonterminals.add(to_symbol(head))
168146

169147
if body == "":
170148
body = Epsilon().to_text()
@@ -177,11 +155,13 @@ def from_text(cls, text, start_symbol: Symbol = Symbol("S")):
177155
for head, body in productions.items():
178156
boxes.add(Box(Regex(body).to_epsilon_nfa().minimize(),
179157
to_symbol(head)))
158+
start_box = Box(Regex(productions[start_nonterminal.value]).to_epsilon_nfa().minimize(), start_nonterminal)
159+
return RecursiveAutomaton(start_box, boxes)
180160

181-
return RecursiveAutomaton(labels, start_symbol, boxes)
182-
183-
def is_equivalent_to(self, other):
184-
""" Check whether two recursive automata are equivalent
161+
def is_equals_to(self, other):
162+
"""
163+
Check whether two recursive automata are equals by boxes.
164+
Not equivalency in terms of formal languages theory, just mapping boxes
185165
186166
Parameters
187167
----------
@@ -191,23 +171,11 @@ def is_equivalent_to(self, other):
191171
Returns
192172
----------
193173
are_equivalent : bool
194-
Whether the two recursive automata are equivalent or not
174+
Whether the two recursive automata are equals or not
195175
"""
196-
197176
if not isinstance(other, RecursiveAutomaton):
198177
return False
199-
200-
if self._labels != other._labels:
201-
return False
202-
203-
for label in self._labels:
204-
box_1 = self.get_box(label)
205-
box_2 = other.get_box(label)
206-
207-
if box_1 != box_2:
208-
return False
209-
210-
return True
178+
return self.boxes == other.boxes
211179

212180
def __eq__(self, other):
213-
return self.is_equivalent_to(other)
181+
return self.is_equals_to(other)

0 commit comments

Comments
 (0)