Skip to content

Commit 7432eb4

Browse files
committed
Merge branch 'parsing-reg-ex-to-tree'
2 parents 1de9611 + 166d418 commit 7432eb4

File tree

4 files changed

+289
-1
lines changed

4 files changed

+289
-1
lines changed

input.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
5
2+
star(symbol(a))
3+
aaaa
4+
concat(star(symbol(b)),symbol(a))
5+
bba
6+
concat(star(symbol(a)),union(symbol(b),symbol(c)))
7+
aab
8+
concat(star(union(symbol(a),union(symbol(b),symbol(c)))),symbol(d))
9+
dabcd
10+
concat(concat(symbol(0),symbol(1)),star(union(symbol(0),symbol(1))))
11+
1011

output.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
5
2+
star(symbol(a))
3+
aaaa
4+
concat(star(symbol(b)),symbol(a))
5+
bba
6+
concat(star(symbol(a)),union(symbol(b),symbol(c)))
7+
aab
8+
concat(star(union(symbol(a),union(symbol(b),symbol(c)))),symbol(d))
9+
dabcd
10+
concat(concat(symbol(0),symbol(1)),star(union(symbol(0),symbol(1))))
11+
1011
12+
Hello, world!

src/main.rs

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
#![allow(dead_code)]
22

3+
use std::io::{self, BufRead};
4+
35
mod custom_errors;
46
mod d_transition_function;
57
mod dfa;
68
mod disjoint_set_union;
79
mod n_transition_function;
810
mod nfa;
11+
mod parsing;
912
mod state;
1013
mod symbol_table;
1114
mod transition_function;
@@ -14,5 +17,56 @@ mod transition_function;
1417
// mod nfa;
1518

1619
fn main() {
17-
println!("Hello, world!");
20+
let stdin = io::stdin();
21+
let mut iter = stdin.lock().lines();
22+
23+
let num_test_cases = iter
24+
.next()
25+
.unwrap_or_else(|| {
26+
panic!("No number of test cases given");
27+
})
28+
.unwrap_or_else(|err| {
29+
panic!("Error in std input");
30+
})
31+
.parse::<usize>()
32+
.unwrap_or_else(|err| {
33+
panic!("Error in parsing number {}", err.to_string());
34+
});
35+
36+
for _ in 0..num_test_cases {
37+
let regex = iter
38+
.next()
39+
.unwrap_or_else(|| {
40+
panic!("No number of test cases given");
41+
})
42+
.unwrap_or_else(|err| {
43+
panic!("Error in std input");
44+
});
45+
let input_string = iter
46+
.next()
47+
.unwrap_or_else(|| {
48+
panic!("No number of test cases given");
49+
})
50+
.unwrap_or_else(|err| {
51+
panic!("Error in std input");
52+
});
53+
54+
let dfa = parsing::create_dfa_from_reg_ex(&regex);
55+
let dfa = match dfa {
56+
Ok(dfa) => dfa,
57+
Err(err) => {
58+
println!("{}", err.to_string());
59+
continue;
60+
}
61+
};
62+
let result = dfa.run(&input_string);
63+
match result {
64+
Ok(res) => {
65+
println!("{}", if res { "Yes" } else { "No" });
66+
}
67+
Err(err) => {
68+
println!("{}", err.to_string());
69+
}
70+
}
71+
}
1872
}

src/parsing.rs

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
use std::collections::HashSet;
2+
3+
use thiserror::Error;
4+
5+
use crate::{
6+
dfa::DFA,
7+
nfa::NFA,
8+
symbol_table::{Symbol, SymbolTable},
9+
};
10+
11+
type Stack<T> = Vec<T>;
12+
13+
#[derive(Clone, Debug, Error)]
14+
pub enum ParsingError {
15+
#[error("Parsing Error")]
16+
ParseError,
17+
}
18+
19+
pub fn create_nfa_from_reg_ex(input: &str) -> Result<NFA, ParsingError> {
20+
let symbol_table = create_symbol_table(input)?;
21+
22+
let bytes = input.as_bytes();
23+
24+
let mut string_stack: Stack<&str> = Stack::new();
25+
let mut nfa_stack: Stack<NFA> = Stack::new();
26+
27+
let mut i = 0;
28+
let n = bytes.len();
29+
while i < n {
30+
if bytes[i] == b'c' {
31+
// has to start with concat
32+
if i + 7 >= n {
33+
return Err(ParsingError::ParseError);
34+
}
35+
if &input[i..i + 7] == "concat(" {
36+
string_stack.push("(");
37+
string_stack.push("concat");
38+
i += 7;
39+
} else {
40+
return Err(ParsingError::ParseError);
41+
}
42+
} else if bytes[i] == b'u' {
43+
// has to be union
44+
if i + 6 >= n {
45+
return Err(ParsingError::ParseError);
46+
}
47+
48+
if &input[i..i + 6] == "union(" {
49+
string_stack.push("(");
50+
string_stack.push("union");
51+
i += 6;
52+
} else {
53+
return Err(ParsingError::ParseError);
54+
}
55+
} else if bytes[i] == b's' {
56+
// must be star or symbol
57+
if i + 5 >= n {
58+
return Err(ParsingError::ParseError);
59+
}
60+
61+
if &input[i..i + 5] == "star(" {
62+
string_stack.push("(");
63+
string_stack.push("star");
64+
i += 5;
65+
} else if i + 7 >= n {
66+
return Err(ParsingError::ParseError);
67+
} else if &input[i..i + 7] == "symbol(" && bytes[i + 8] == b')' {
68+
// since its a symbol it will be only a single character
69+
// skip by length of symbol(a)
70+
let nfa_from_symbol =
71+
NFA::from_symbol(&Symbol::Character(bytes[i + 7] as char), &symbol_table);
72+
nfa_stack.push(nfa_from_symbol);
73+
74+
i += 9;
75+
} else {
76+
return Err(ParsingError::ParseError);
77+
}
78+
} else if bytes[i] == b')' {
79+
i += 1;
80+
81+
while let Some(string) = string_stack.pop() {
82+
match string {
83+
"star" => {
84+
if let Some(nfa) = nfa_stack.pop() {
85+
// push kleene star onto stack
86+
let nfa_kleene_star = nfa.kleene_star();
87+
nfa_stack.push(nfa_kleene_star);
88+
} else {
89+
return Err(ParsingError::ParseError);
90+
}
91+
}
92+
"union" => {
93+
if nfa_stack.len() < 2 {
94+
return Err(ParsingError::ParseError);
95+
}
96+
let second_nfa = nfa_stack.pop().unwrap();
97+
let first_nfa = nfa_stack.pop().unwrap();
98+
let nfa_union = first_nfa.union(second_nfa);
99+
100+
nfa_stack.push(nfa_union);
101+
}
102+
"concat" => {
103+
if nfa_stack.len() < 2 {
104+
return Err(ParsingError::ParseError);
105+
}
106+
let second_nfa = nfa_stack.pop().unwrap();
107+
let first_nfa = nfa_stack.pop().unwrap();
108+
let nfa_concat = first_nfa.concat(second_nfa);
109+
110+
nfa_stack.push(nfa_concat);
111+
}
112+
"(" => {
113+
break;
114+
}
115+
_ => {
116+
return Err(ParsingError::ParseError);
117+
}
118+
}
119+
}
120+
} else if bytes[i] == b',' {
121+
// comma is just a separator
122+
i += 1;
123+
} else {
124+
return Err(ParsingError::ParseError);
125+
}
126+
}
127+
128+
if nfa_stack.len() != 1 {
129+
return Err(ParsingError::ParseError);
130+
}
131+
132+
Ok(nfa_stack.pop().unwrap())
133+
}
134+
135+
pub fn create_dfa_from_reg_ex(input: &str) -> Result<DFA, ParsingError> {
136+
let nfa = create_nfa_from_reg_ex(input)?;
137+
let dfa = DFA::convert_to_dfa(nfa);
138+
let dfa = dfa.minimized_dfa();
139+
140+
Ok(dfa)
141+
}
142+
143+
fn extract_symbols(input: &str) -> Result<HashSet<char>, ParsingError> {
144+
let mut result = HashSet::new();
145+
let bytes = input.as_bytes();
146+
147+
let mut i = 0;
148+
while i + 8 < bytes.len() {
149+
if &input[i..i + 7] == "symbol(" && bytes[i + 8] != b')' {
150+
return Err(ParsingError::ParseError);
151+
}
152+
if &input[i..i + 7] == "symbol(" && bytes[i + 8] == b')' {
153+
// The character at position i+7 is the one inside symbol(...)
154+
result.insert(input.chars().nth(i + 7).unwrap());
155+
i += 9; // move past "symbol(x)"
156+
} else {
157+
i += 1;
158+
}
159+
}
160+
161+
Ok(result)
162+
}
163+
164+
fn create_symbol_table(input: &str) -> Result<SymbolTable, ParsingError> {
165+
let symbols = extract_symbols(input)?;
166+
167+
let mut symbol_table = SymbolTable::new();
168+
169+
for character in symbols {
170+
symbol_table.add_character(character);
171+
}
172+
173+
Ok(symbol_table)
174+
}
175+
176+
#[cfg(test)]
177+
mod tests {
178+
use crate::dfa::DFA;
179+
180+
use super::*;
181+
182+
#[test]
183+
fn check_extracting_symbols() {
184+
let input = "concat(concat(symbol(a),symbol(1)),star(union(symbol(0),symbol(1))))";
185+
let symbols = extract_symbols(input).unwrap();
186+
assert!(symbols.contains(&'a'));
187+
assert!(symbols.contains(&'0'));
188+
assert!(symbols.contains(&'1'));
189+
}
190+
191+
#[test]
192+
fn check_dfa() {
193+
let input = "star(symbol(a))";
194+
let nfa = create_nfa_from_reg_ex(input).unwrap();
195+
196+
let dfa = DFA::convert_to_dfa(nfa);
197+
let result = dfa.run("aaaa");
198+
assert!(result.is_ok_and(|res| res));
199+
200+
let input = "concat(concat(symbol(0),symbol(1)),star(union(symbol(0),symbol(1))))";
201+
let nfa = create_nfa_from_reg_ex(input).unwrap();
202+
203+
let dfa = DFA::convert_to_dfa(nfa);
204+
let result = dfa.run("1011");
205+
assert!(result.is_ok_and(|res| !res));
206+
let result = dfa.run("01");
207+
assert!(result.is_ok_and(|res| res));
208+
let result = dfa.run("010011");
209+
assert!(result.is_ok_and(|res| res));
210+
}
211+
}

0 commit comments

Comments
 (0)