|
| 1 | +package week4.regex; |
| 2 | + |
| 3 | +import week4.regex.ast.*; |
| 4 | + |
| 5 | +import java.util.Collections; |
| 6 | +import java.util.HashSet; |
| 7 | +import java.util.Set; |
| 8 | + |
| 9 | +public class RegexAnalyzer { |
| 10 | + |
| 11 | + public static boolean matchesEmptyWord(RegexNode regex) { |
| 12 | + return switch (regex) { |
| 13 | + case Letter _ -> false; |
| 14 | + case Epsilon _, Repetition _ -> true; |
| 15 | + case Concatenation(RegexNode left, RegexNode right) -> matchesEmptyWord(left) && matchesEmptyWord(right); |
| 16 | + case Alternation(RegexNode left, RegexNode right) -> matchesEmptyWord(left) || matchesEmptyWord(right); |
| 17 | + }; |
| 18 | + } |
| 19 | + |
| 20 | + |
| 21 | + public static Set<Character> getFirst(String regex) { |
| 22 | + RegexNode node = RegexParser.parse(regex); |
| 23 | + return getFirstResult(node).firstSet; |
| 24 | + } |
| 25 | + |
| 26 | + private record ResultType(Set<Character> firstSet, boolean matchesEmpty) { |
| 27 | + } |
| 28 | + |
| 29 | + private static ResultType getFirstResult(RegexNode regex) { |
| 30 | + return switch (regex) { |
| 31 | + case Alternation(RegexNode left, RegexNode right) -> { |
| 32 | + ResultType leftResult = getFirstResult(left); |
| 33 | + ResultType rightResult = getFirstResult(right); |
| 34 | + Set<Character> chars = new HashSet<>(leftResult.firstSet); |
| 35 | + chars.addAll(rightResult.firstSet); |
| 36 | + yield new ResultType(chars, leftResult.matchesEmpty || rightResult.matchesEmpty); |
| 37 | + } |
| 38 | + case Concatenation(RegexNode left, RegexNode right) -> { |
| 39 | + ResultType leftResult = getFirstResult(left); |
| 40 | + ResultType rightResult = getFirstResult(right); |
| 41 | + Set<Character> chars = new HashSet<>(leftResult.firstSet); |
| 42 | + // Siin on see oluline koht, kus kontrollime tühjust: |
| 43 | + if (leftResult.matchesEmpty) chars.addAll(rightResult.firstSet); |
| 44 | + yield new ResultType(chars, leftResult.matchesEmpty && rightResult.matchesEmpty); |
| 45 | + } |
| 46 | + case Epsilon _ -> new ResultType(Collections.emptySet(), true); |
| 47 | + case Letter(char symbol) -> new ResultType(Collections.singleton(symbol), false); |
| 48 | + case Repetition(RegexNode child) -> { |
| 49 | + ResultType childResult = getFirstResult(child); |
| 50 | + yield new ResultType(childResult.firstSet, true); |
| 51 | + } |
| 52 | + }; |
| 53 | + } |
| 54 | + |
| 55 | + |
| 56 | + public static Set<String> getAllWords(RegexNode regex) { |
| 57 | + return switch (regex) { |
| 58 | + case Alternation(RegexNode left, RegexNode right) -> { |
| 59 | + Set<String> ret = new HashSet<>(getAllWords(left)); |
| 60 | + ret.addAll(getAllWords(right)); |
| 61 | + yield ret; |
| 62 | + } |
| 63 | + case Concatenation(RegexNode left, RegexNode right) -> combine(getAllWords(left), getAllWords(right)); |
| 64 | + case Epsilon _ -> Collections.singleton(""); |
| 65 | + case Letter(char symbol) -> Collections.singleton(Character.toString(symbol)); |
| 66 | + case Repetition(RegexNode child) -> { |
| 67 | + Set<String> childWords = getAllWords(child); |
| 68 | + if (childWords.equals(Collections.singleton(""))) |
| 69 | + yield childWords; |
| 70 | + else |
| 71 | + throw new RuntimeException("Infinite language"); |
| 72 | + } |
| 73 | + }; |
| 74 | + } |
| 75 | + |
| 76 | + private static Set<String> combine(Set<String> s1, Set<String> s2) { |
| 77 | + Set<String> result = new HashSet<>(); |
| 78 | + for (String w1 : s1) { |
| 79 | + for (String w2 : s2) { |
| 80 | + result.add(w1 + w2); |
| 81 | + } |
| 82 | + } |
| 83 | + return result; |
| 84 | + } |
| 85 | +} |
0 commit comments