Skip to content

Commit 298bc3e

Browse files
authored
Merge pull request #22 from FormalLanguageConstrainedPathQuerying/master
Add string generator for finite automata
2 parents 9d576c8 + 0c9930d commit 298bc3e

10 files changed

+460
-27
lines changed

.github/workflows/python-package.yml

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,7 @@
33

44
name: Python package
55

6-
on:
7-
push:
8-
branches: [ master ]
9-
pull_request:
10-
branches: [ master ]
6+
on: [push, pull_request]
117

128
jobs:
139
build:
@@ -51,7 +47,7 @@ jobs:
5147
junitxml-path: ./pytest.xml
5248
default-branch: master
5349
- name: Create coverage Badge
54-
if: ${{ matrix.python-version == '3.8'}}
50+
if: ${{ github.ref_name == 'master' && matrix.python-version == '3.8'}}
5551
uses: schneegans/[email protected]
5652
with:
5753
auth: ${{ secrets.GIST_SECRET }}

pyformlang/finite_automaton/deterministic_finite_automaton.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -299,23 +299,6 @@ def _get_previous_transitions(self):
299299
previous_transitions.add(None, symbol, None)
300300
return previous_transitions
301301

302-
def _get_reachable_states(self) -> AbstractSet[State]:
303-
""" Get all states which are reachable """
304-
to_process = []
305-
processed = set()
306-
for state in self._start_state:
307-
to_process.append(state)
308-
processed.add(state)
309-
while to_process:
310-
current = to_process.pop()
311-
for symbol in self._input_symbols:
312-
next_state = self._transition_function(current, symbol)
313-
if not next_state or next_state[0] in processed:
314-
continue
315-
to_process.append(next_state[0])
316-
processed.add(next_state[0])
317-
return processed
318-
319302
def minimize(self) -> "DeterministicFiniteAutomaton":
320303
""" Minimize the current DFA
321304

pyformlang/finite_automaton/finite_automaton.py

Lines changed: 92 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
""" A general finite automaton representation """
22

3-
from typing import List, Any, Union
3+
from typing import List, Iterable, Set, Optional, Union, Any
4+
from collections import deque
45

56
import networkx as nx
67
from networkx.drawing.nx_pydot import write_dot
@@ -594,6 +595,86 @@ def is_equivalent_to(self, other):
594595
self_dfa = self.to_deterministic()
595596
return self_dfa.is_equivalent_to(other)
596597

598+
def get_accepted_words(self, max_length: Optional[int] = None) \
599+
-> Iterable[List[Symbol]]:
600+
"""
601+
Gets words accepted by the finite automaton.
602+
"""
603+
if max_length is not None and max_length < 0:
604+
return []
605+
states_to_visit = deque((start_state, [])
606+
for start_state in self.start_states)
607+
states_leading_to_final = self._get_states_leading_to_final()
608+
words_by_state = {state: set() for state in self.states}
609+
yielded_words = set()
610+
while states_to_visit:
611+
current_state, current_word = states_to_visit.popleft()
612+
if max_length is not None and len(current_word) > max_length:
613+
continue
614+
word_to_add = tuple(current_word)
615+
if not self.__try_add(words_by_state[current_state], word_to_add):
616+
continue
617+
transitions = self._transition_function.get_transitions_from(
618+
current_state)
619+
for symbol, next_state in transitions:
620+
if next_state in states_leading_to_final:
621+
temp_word = current_word.copy()
622+
if symbol != Epsilon():
623+
temp_word.append(symbol)
624+
states_to_visit.append((next_state, temp_word))
625+
if self.is_final_state(current_state):
626+
if self.__try_add(yielded_words, word_to_add):
627+
yield current_word
628+
629+
def _get_states_leading_to_final(self) -> Set[State]:
630+
"""
631+
Gets a set of states from which one
632+
of the final states can be reached.
633+
"""
634+
leading_to_final = self.final_states.copy()
635+
visited = set()
636+
states_to_process = deque((None, start_state)
637+
for start_state in self.start_states)
638+
delayed_states = deque()
639+
while states_to_process:
640+
previous_state, current_state = states_to_process.pop()
641+
if previous_state and current_state in leading_to_final:
642+
leading_to_final.add(previous_state)
643+
continue
644+
if current_state in visited:
645+
delayed_states.append((previous_state, current_state))
646+
continue
647+
visited.add(current_state)
648+
next_states = self._get_next_states_from(current_state)
649+
if next_states:
650+
states_to_process.append((previous_state, current_state))
651+
for next_state in next_states:
652+
states_to_process.append((current_state, next_state))
653+
for previous_state, current_state in delayed_states:
654+
if previous_state and current_state in leading_to_final:
655+
leading_to_final.add(previous_state)
656+
return leading_to_final
657+
658+
def _get_reachable_states(self) -> Set[State]:
659+
""" Get all states which are reachable """
660+
visited = set()
661+
states_to_process = deque(self.start_states)
662+
while states_to_process:
663+
current_state = states_to_process.popleft()
664+
visited.add(current_state)
665+
for next_state in self._get_next_states_from(current_state):
666+
if next_state not in visited:
667+
states_to_process.append(next_state)
668+
return visited
669+
670+
def _get_next_states_from(self, state_from: State) -> Set[State]:
671+
""" Gets a set of states that are next to the given one """
672+
next_states = set()
673+
for _, next_state in \
674+
self._transition_function.get_transitions_from(state_from):
675+
next_states.add(next_state)
676+
return next_states
677+
597678
def to_deterministic(self):
598679
""" Turns the automaton into a deterministic one"""
599680
raise NotImplementedError
@@ -637,6 +718,16 @@ def to_dict(self):
637718
"""
638719
return self._transition_function.to_dict()
639720

721+
@staticmethod
722+
def __try_add(set_to_add_to: Set[Any], element_to_add: Any) -> bool:
723+
"""
724+
Tries to add a given element to the given set.
725+
Returns True if element was added, otherwise False.
726+
"""
727+
initial_length = len(set_to_add_to)
728+
set_to_add_to.add(element_to_add)
729+
return len(set_to_add_to) != initial_length
730+
640731

641732
def to_state(given: Any) -> Union[State, None]:
642733
""" Transforms the input into a state

pyformlang/finite_automaton/nondeterministic_transition_function.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
A nondeterministic transition function
33
"""
44
import copy
5-
from typing import Set
5+
from typing import Set, Iterable, Tuple
66

77
from .state import State
88
from .symbol import Symbol
@@ -201,3 +201,11 @@ def to_dict(self):
201201
The transitions as a dictionary.
202202
"""
203203
return copy.deepcopy(self._transitions)
204+
205+
def get_transitions_from(self, state_from: State) \
206+
-> Iterable[Tuple[Symbol, State]]:
207+
""" Gets transitions from the given state """
208+
if state_from in self._transitions:
209+
for symb_by, states_to in self._transitions[state_from].items():
210+
for state_to in states_to:
211+
yield symb_by, state_to

pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,28 @@ def test_regex_dfa(self):
280280
dfa_regex = dfa1.to_regex().to_epsilon_nfa()
281281
assert dfa1 == dfa_regex
282282

283+
def test_word_generation(self):
284+
dfa = get_dfa_example_for_word_generation()
285+
accepted_words = list(dfa.get_accepted_words())
286+
assert [] in accepted_words
287+
assert [Symbol("b"), Symbol("c")] in accepted_words
288+
assert [Symbol("b"), Symbol("d")] in accepted_words
289+
assert len(accepted_words) == 3
290+
291+
def test_cyclic_word_generation(self):
292+
dfa = get_cyclic_dfa_example()
293+
accepted_words = list(dfa.get_accepted_words(5))
294+
assert ["a", "f"] in accepted_words
295+
assert ["a", "b", "e", "f"] in accepted_words
296+
assert ["a", "b", "c", "e", "f"] in accepted_words
297+
assert ["a", "b", "d", "a", "f"] in accepted_words
298+
assert len(accepted_words) == 4
299+
300+
def test_dfa_generating_no_words(self):
301+
dfa = get_dfa_example_without_accepted_words()
302+
accepted_words = list(dfa.get_accepted_words())
303+
assert not accepted_words
304+
283305

284306
def get_example0():
285307
""" Gives a dfa """
@@ -326,3 +348,54 @@ def get_dfa_example():
326348
dfa1.add_start_state(State("A"))
327349
dfa1.add_final_state(State("D"))
328350
return dfa1
351+
352+
353+
def get_dfa_example_for_word_generation():
354+
""" DFA example for the word generation test """
355+
dfa = DeterministicFiniteAutomaton()
356+
states = [State(x) for x in range(4)]
357+
symbol_a = Symbol("a")
358+
symbol_b = Symbol("b")
359+
symbol_c = Symbol("c")
360+
symbol_d = Symbol("d")
361+
dfa.add_transitions([
362+
(states[0], symbol_a, states[1]),
363+
(states[0], symbol_b, states[2]),
364+
(states[1], symbol_a, states[1]),
365+
(states[2], symbol_c, states[3]),
366+
(states[2], symbol_d, states[3]),
367+
])
368+
dfa.add_start_state(states[0])
369+
dfa.add_final_state(states[0])
370+
dfa.add_final_state(states[3])
371+
return dfa
372+
373+
374+
def get_cyclic_dfa_example():
375+
""" Gets DFA example with several cycles on path to final """
376+
dfa = DeterministicFiniteAutomaton(start_state=0,
377+
final_states={3})
378+
dfa.add_transitions([
379+
(0, "a", 1),
380+
(1, "b", 2),
381+
(2, "c", 2),
382+
(2, "d", 0),
383+
(2, "e", 1),
384+
(1, "f", 3),
385+
])
386+
return dfa
387+
388+
389+
def get_dfa_example_without_accepted_words():
390+
""" DFA example accepting no words """
391+
dfa = DeterministicFiniteAutomaton()
392+
states = [State(x) for x in range(4)]
393+
symbol_a = Symbol("a")
394+
symbol_b = Symbol("b")
395+
dfa.add_transitions([
396+
(states[0], symbol_a, states[1]),
397+
(states[2], symbol_b, states[3]),
398+
])
399+
dfa.add_start_state(states[0])
400+
dfa.add_final_state(states[3])
401+
return dfa

pyformlang/finite_automaton/tests/test_epsilon_nfa.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,47 @@ def test_remove_epsilon_transitions(self):
622622
assert nfa.get_number_transitions() == 3
623623
assert nfa.is_equivalent_to(enfa)
624624

625+
def test_word_generation(self):
626+
enfa = get_enfa_example_for_word_generation()
627+
accepted_words = list(enfa.get_accepted_words())
628+
assert [] in accepted_words
629+
assert [Symbol("b")] in accepted_words
630+
assert [Symbol("c")] in accepted_words
631+
assert [Symbol("d"), Symbol("e")] in accepted_words
632+
assert [Symbol("d"), Symbol("e"), Symbol("f")] in accepted_words
633+
assert len(accepted_words) == 5
634+
635+
def test_cyclic_word_generation(self):
636+
enfa = get_cyclic_enfa_example()
637+
max_length = 10
638+
accepted_words = [[Symbol("a")] +
639+
[Symbol("b")] * (i + 1) +
640+
[Symbol("c")]
641+
for i in range(max_length - 2)]
642+
actual_accepted_words = list(enfa.get_accepted_words(max_length))
643+
assert accepted_words == actual_accepted_words
644+
645+
def test_epsilon_cycle_word_generation(self):
646+
enfa = get_epsilon_cycle_enfa_example()
647+
max_length = 4
648+
accepted_words = list(enfa.get_accepted_words(max_length))
649+
assert [] in accepted_words
650+
assert [Symbol("a"), Symbol("c")] in accepted_words
651+
assert [Symbol("a"), Symbol("b"), Symbol("c")] in accepted_words
652+
assert [Symbol("a"), Symbol("b"),
653+
Symbol("b"), Symbol("c")] in accepted_words
654+
assert len(accepted_words) == 4
655+
656+
def test_max_length_zero_accepting_empty_string(self):
657+
enfa = get_enfa_example_for_word_generation()
658+
accepted_words = list(enfa.get_accepted_words(0))
659+
assert accepted_words == [[]]
660+
661+
def test_max_length_zero_not_accepting_empty_string(self):
662+
enfa = get_cyclic_enfa_example()
663+
accepted_words = list(enfa.get_accepted_words(0))
664+
assert not accepted_words
665+
625666

626667
def get_digits_enfa():
627668
""" An epsilon NFA to recognize digits """
@@ -730,3 +771,75 @@ def get_example_non_minimal():
730771
enfa0.add_transition(state5, symb_b, state3)
731772
enfa0.add_transition(state6, symb_b, state4)
732773
return enfa0
774+
775+
776+
def get_enfa_example_for_word_generation():
777+
""" ENFA example for the word generation test """
778+
enfa = EpsilonNFA()
779+
states = [State(x) for x in range(9)]
780+
symbol_a = Symbol("a")
781+
symbol_b = Symbol("b")
782+
symbol_c = Symbol("c")
783+
symbol_d = Symbol("d")
784+
symbol_e = Symbol("e")
785+
symbol_f = Symbol("f")
786+
epsilon = Epsilon()
787+
enfa.add_transitions([
788+
(states[0], symbol_a, states[1]),
789+
(states[0], epsilon, states[2]),
790+
(states[1], symbol_a, states[1]),
791+
(states[2], symbol_b, states[3]),
792+
(states[2], symbol_c, states[3]),
793+
(states[4], symbol_d, states[5]),
794+
(states[5], symbol_e, states[6]),
795+
(states[5], symbol_e, states[7]),
796+
(states[7], symbol_f, states[8]),
797+
])
798+
enfa.add_start_state(states[0])
799+
enfa.add_start_state(states[4])
800+
enfa.add_final_state(states[3])
801+
enfa.add_final_state(states[4])
802+
enfa.add_final_state(states[6])
803+
enfa.add_final_state(states[8])
804+
return enfa
805+
806+
807+
def get_cyclic_enfa_example():
808+
""" ENFA example with a cycle on the path to the final state """
809+
enfa = EpsilonNFA()
810+
states = [State(x) for x in range(4)]
811+
symbol_a = Symbol("a")
812+
symbol_b = Symbol("b")
813+
symbol_c = Symbol("c")
814+
epsilon = Epsilon()
815+
enfa.add_transitions([
816+
(states[0], symbol_a, states[1]),
817+
(states[1], symbol_b, states[2]),
818+
(states[2], epsilon, states[1]),
819+
(states[2], symbol_c, states[3]),
820+
])
821+
enfa.add_start_state(states[0])
822+
enfa.add_final_state(states[3])
823+
return enfa
824+
825+
826+
def get_epsilon_cycle_enfa_example():
827+
""" ENFA example with an epsilon cycle """
828+
enfa = EpsilonNFA()
829+
states = [State(x) for x in range(4)]
830+
symbol_a = Symbol("a")
831+
symbol_b = Symbol("b")
832+
symbol_c = Symbol("c")
833+
epsilon = Epsilon()
834+
enfa.add_transitions([
835+
(states[0], epsilon, states[0]),
836+
(states[0], symbol_a, states[1]),
837+
(states[1], symbol_b, states[1]),
838+
(states[1], epsilon, states[2]),
839+
(states[2], epsilon, states[1]),
840+
(states[1], symbol_c, states[3]),
841+
])
842+
enfa.add_start_state(states[0])
843+
enfa.add_final_state(states[0])
844+
enfa.add_final_state(states[3])
845+
return enfa

0 commit comments

Comments
 (0)