|
1 |
| -pub mod base; |
2 | 1 | pub mod common;
|
3 | 2 | pub mod init;
|
4 | 3 | pub mod syntax;
|
| 4 | + |
| 5 | +use std::borrow::Cow; |
| 6 | +use std::collections::hash_map::Entry; |
| 7 | +use std::fmt::Debug; |
| 8 | + |
| 9 | +use ahash::{AHashMap, AHashSet}; |
| 10 | + |
| 11 | +use crate::dialects::init::DialectKind; |
| 12 | +use crate::dialects::syntax::SyntaxKind; |
| 13 | +use crate::helpers::{ToMatchable, capitalize}; |
| 14 | +use crate::parser::lexer::{Lexer, Matcher}; |
| 15 | +use crate::parser::matchable::Matchable; |
| 16 | +use crate::parser::parsers::StringParser; |
| 17 | +use crate::parser::types::DialectElementType; |
| 18 | + |
| 19 | +#[derive(Debug, Clone, Default)] |
| 20 | +pub struct Dialect { |
| 21 | + pub name: DialectKind, |
| 22 | + lexer_matchers: Option<Vec<Matcher>>, |
| 23 | + // TODO: Can we use PHF here? https://crates.io/crates/phf |
| 24 | + library: AHashMap<Cow<'static, str>, DialectElementType>, |
| 25 | + sets: AHashMap<&'static str, AHashSet<&'static str>>, |
| 26 | + pub bracket_collections: AHashMap<&'static str, AHashSet<BracketPair>>, |
| 27 | + lexer: Option<Lexer>, |
| 28 | +} |
| 29 | + |
| 30 | +impl PartialEq for Dialect { |
| 31 | + fn eq(&self, other: &Self) -> bool { |
| 32 | + self.name == other.name |
| 33 | + } |
| 34 | +} |
| 35 | + |
| 36 | +impl Dialect { |
| 37 | + pub fn new() -> Self { |
| 38 | + Dialect { |
| 39 | + name: DialectKind::Ansi, |
| 40 | + ..Default::default() |
| 41 | + } |
| 42 | + } |
| 43 | + |
| 44 | + pub fn name(&self) -> DialectKind { |
| 45 | + self.name |
| 46 | + } |
| 47 | + |
| 48 | + pub fn add( |
| 49 | + &mut self, |
| 50 | + iter: impl IntoIterator<Item = (Cow<'static, str>, DialectElementType)> + Clone, |
| 51 | + ) { |
| 52 | + self.library.extend(iter); |
| 53 | + } |
| 54 | + |
| 55 | + pub fn grammar(&self, name: &str) -> Matchable { |
| 56 | + match self |
| 57 | + .library |
| 58 | + .get(name) |
| 59 | + .unwrap_or_else(|| panic!("not found {name}")) |
| 60 | + { |
| 61 | + DialectElementType::Matchable(matchable) => matchable.clone(), |
| 62 | + DialectElementType::SegmentGenerator(_) => { |
| 63 | + unreachable!("Attempted to fetch non grammar [{name}] with `Dialect::grammar`.") |
| 64 | + } |
| 65 | + } |
| 66 | + } |
| 67 | + |
| 68 | + #[track_caller] |
| 69 | + pub fn replace_grammar(&mut self, name: &'static str, match_grammar: Matchable) { |
| 70 | + match self |
| 71 | + .library |
| 72 | + .get_mut(name) |
| 73 | + .unwrap_or_else(|| panic!("Failed to get mutable reference for {name}")) |
| 74 | + { |
| 75 | + DialectElementType::Matchable(matchable) => { |
| 76 | + matchable.as_node_matcher().unwrap().match_grammar = match_grammar; |
| 77 | + } |
| 78 | + DialectElementType::SegmentGenerator(_) => { |
| 79 | + unreachable!("Attempted to fetch non grammar [{name}] with `Dialect::grammar`.") |
| 80 | + } |
| 81 | + } |
| 82 | + } |
| 83 | + |
| 84 | + pub fn lexer_matchers(&self) -> &[Matcher] { |
| 85 | + match &self.lexer_matchers { |
| 86 | + Some(lexer_matchers) => lexer_matchers, |
| 87 | + None => panic!("Lexing struct has not been set for dialect {self:?}"), |
| 88 | + } |
| 89 | + } |
| 90 | + |
| 91 | + pub fn insert_lexer_matchers(&mut self, lexer_patch: Vec<Matcher>, before: &str) { |
| 92 | + let mut buff = Vec::new(); |
| 93 | + let mut found = false; |
| 94 | + |
| 95 | + if self.lexer_matchers.is_none() { |
| 96 | + panic!("Lexer struct must be defined before it can be patched!"); |
| 97 | + } |
| 98 | + |
| 99 | + for elem in self.lexer_matchers.take().unwrap() { |
| 100 | + if elem.name() == before { |
| 101 | + found = true; |
| 102 | + for patch in lexer_patch.clone() { |
| 103 | + buff.push(patch); |
| 104 | + } |
| 105 | + buff.push(elem); |
| 106 | + } else { |
| 107 | + buff.push(elem); |
| 108 | + } |
| 109 | + } |
| 110 | + |
| 111 | + if !found { |
| 112 | + panic!("Lexer struct insert before '{before}' failed because tag never found."); |
| 113 | + } |
| 114 | + |
| 115 | + self.lexer_matchers = Some(buff); |
| 116 | + } |
| 117 | + |
| 118 | + pub fn patch_lexer_matchers(&mut self, lexer_patch: Vec<Matcher>) { |
| 119 | + let mut buff = Vec::with_capacity(self.lexer_matchers.as_ref().map_or(0, Vec::len)); |
| 120 | + if self.lexer_matchers.is_none() { |
| 121 | + panic!("Lexer struct must be defined before it can be patched!"); |
| 122 | + } |
| 123 | + |
| 124 | + let patch_dict: AHashMap<&'static str, Matcher> = lexer_patch |
| 125 | + .into_iter() |
| 126 | + .map(|elem| (elem.name(), elem)) |
| 127 | + .collect(); |
| 128 | + |
| 129 | + for elem in self.lexer_matchers.take().unwrap() { |
| 130 | + if let Some(patch) = patch_dict.get(elem.name()) { |
| 131 | + buff.push(patch.clone()); |
| 132 | + } else { |
| 133 | + buff.push(elem); |
| 134 | + } |
| 135 | + } |
| 136 | + |
| 137 | + self.lexer_matchers = Some(buff); |
| 138 | + } |
| 139 | + |
| 140 | + pub fn set_lexer_matchers(&mut self, lexer_matchers: Vec<Matcher>) { |
| 141 | + self.lexer_matchers = lexer_matchers.into(); |
| 142 | + } |
| 143 | + |
| 144 | + pub fn sets(&self, label: &str) -> AHashSet<&'static str> { |
| 145 | + match label { |
| 146 | + "bracket_pairs" | "angle_bracket_pairs" => { |
| 147 | + panic!("Use `bracket_sets` to retrieve {} set.", label); |
| 148 | + } |
| 149 | + _ => (), |
| 150 | + } |
| 151 | + |
| 152 | + self.sets.get(label).cloned().unwrap_or_default() |
| 153 | + } |
| 154 | + |
| 155 | + pub fn sets_mut(&mut self, label: &'static str) -> &mut AHashSet<&'static str> { |
| 156 | + assert!( |
| 157 | + label != "bracket_pairs" && label != "angle_bracket_pairs", |
| 158 | + "Use `bracket_sets` to retrieve {} set.", |
| 159 | + label |
| 160 | + ); |
| 161 | + |
| 162 | + match self.sets.entry(label) { |
| 163 | + Entry::Occupied(entry) => entry.into_mut(), |
| 164 | + Entry::Vacant(entry) => entry.insert(<_>::default()), |
| 165 | + } |
| 166 | + } |
| 167 | + |
| 168 | + pub fn update_keywords_set_from_multiline_string( |
| 169 | + &mut self, |
| 170 | + set_label: &'static str, |
| 171 | + values: &'static str, |
| 172 | + ) { |
| 173 | + let keywords = values.lines().map(str::trim); |
| 174 | + self.sets_mut(set_label).extend(keywords); |
| 175 | + } |
| 176 | + |
| 177 | + pub fn add_keyword_to_set(&mut self, set_label: &'static str, value: &'static str) { |
| 178 | + self.sets_mut(set_label).insert(value); |
| 179 | + } |
| 180 | + |
| 181 | + pub fn bracket_sets(&self, label: &str) -> AHashSet<BracketPair> { |
| 182 | + assert!( |
| 183 | + label == "bracket_pairs" || label == "angle_bracket_pairs", |
| 184 | + "Invalid bracket set. Consider using another identifier instead." |
| 185 | + ); |
| 186 | + |
| 187 | + self.bracket_collections |
| 188 | + .get(label) |
| 189 | + .cloned() |
| 190 | + .unwrap_or_default() |
| 191 | + } |
| 192 | + |
| 193 | + pub fn bracket_sets_mut(&mut self, label: &'static str) -> &mut AHashSet<BracketPair> { |
| 194 | + assert!( |
| 195 | + label == "bracket_pairs" || label == "angle_bracket_pairs", |
| 196 | + "Invalid bracket set. Consider using another identifier instead." |
| 197 | + ); |
| 198 | + |
| 199 | + self.bracket_collections.entry(label).or_default() |
| 200 | + } |
| 201 | + |
| 202 | + pub fn update_bracket_sets(&mut self, label: &'static str, pairs: Vec<BracketPair>) { |
| 203 | + let set = self.bracket_sets_mut(label); |
| 204 | + for pair in pairs { |
| 205 | + set.insert(pair); |
| 206 | + } |
| 207 | + } |
| 208 | + |
| 209 | + pub fn r#ref(&self, name: &str) -> Matchable { |
| 210 | + match self.library.get(name) { |
| 211 | + Some(DialectElementType::Matchable(matchable)) => matchable.clone(), |
| 212 | + Some(DialectElementType::SegmentGenerator(_)) => { |
| 213 | + panic!("Unexpected SegmentGenerator while fetching '{}'", name); |
| 214 | + } |
| 215 | + None => { |
| 216 | + if let Some(keyword) = name.strip_suffix("KeywordSegment") { |
| 217 | + let keyword_tip = "\ |
| 218 | + \n\nThe syntax in the query is not (yet?) supported. Try to \ |
| 219 | + narrow down your query to a minimal, reproducible case and \ |
| 220 | + raise an issue on GitHub.\n\n\ |
| 221 | + Or, even better, see this guide on how to help contribute \ |
| 222 | + keyword and/or dialect updates:\n\ |
| 223 | + https://github.com/quarylabs/sqruff"; |
| 224 | + panic!( |
| 225 | + "Grammar refers to the '{keyword}' keyword which was not found in the \ |
| 226 | + dialect.{keyword_tip}", |
| 227 | + ); |
| 228 | + } else { |
| 229 | + panic!("Grammar refers to '{name}' which was not found in the dialect.",); |
| 230 | + } |
| 231 | + } |
| 232 | + } |
| 233 | + } |
| 234 | + |
| 235 | + pub fn expand(&mut self) { |
| 236 | + // Temporarily take ownership of 'library' from 'self' to avoid borrow checker |
| 237 | + // errors during mutation. |
| 238 | + let mut library = std::mem::take(&mut self.library); |
| 239 | + for element in library.values_mut() { |
| 240 | + if let DialectElementType::SegmentGenerator(generator) = element { |
| 241 | + *element = DialectElementType::Matchable(generator.expand(self)); |
| 242 | + } |
| 243 | + } |
| 244 | + self.library = library; |
| 245 | + |
| 246 | + for keyword_set in ["unreserved_keywords", "reserved_keywords"] { |
| 247 | + if let Some(keywords) = self.sets.get(keyword_set) { |
| 248 | + for kw in keywords { |
| 249 | + let n = format!("{}KeywordSegment", capitalize(kw)); |
| 250 | + if !self.library.contains_key(n.as_str()) { |
| 251 | + let parser = StringParser::new(&kw.to_lowercase(), SyntaxKind::Keyword); |
| 252 | + |
| 253 | + self.library.insert( |
| 254 | + n.into(), |
| 255 | + DialectElementType::Matchable(parser.to_matchable()), |
| 256 | + ); |
| 257 | + } |
| 258 | + } |
| 259 | + } |
| 260 | + } |
| 261 | + |
| 262 | + self.lexer = Lexer::new(self.lexer_matchers()).into(); |
| 263 | + } |
| 264 | + |
| 265 | + pub fn lexer(&self) -> &Lexer { |
| 266 | + self.lexer.as_ref().unwrap() |
| 267 | + } |
| 268 | +} |
| 269 | + |
| 270 | +pub type BracketPair = (&'static str, &'static str, &'static str, bool); |
0 commit comments