-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbuild.rs
More file actions
executable file
·266 lines (245 loc) · 8.34 KB
/
build.rs
File metadata and controls
executable file
·266 lines (245 loc) · 8.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
#![warn(clippy::all, clippy::pedantic, clippy::nursery)]
use std::{
collections::{HashMap, HashSet},
env,
error::Error,
fmt::Write as _,
fs,
path::Path,
result,
};
use indexmap::IndexMap;
use serde::{Deserialize, Deserializer, de};
type Result<T> = result::Result<T, Box<dyn Error>>;
fn validate_no_whitespace(s: &str, field: &str, idx: Option<usize>) -> result::Result<(), String> {
if s.trim() == s {
Ok(())
} else {
Err(idx.map_or_else(
|| format!("{field}: has leading/trailing whitespace"),
|i| format!("{field} {}: has leading/trailing whitespace", i + 1),
))
}
}
fn deserialize_vec_strings<'de, D>(
deserializer: D,
field: &'static str,
allow_empty_vec: bool,
validate: impl Fn(&str, usize) -> result::Result<(), String>,
) -> result::Result<Vec<String>, D::Error>
where
D: Deserializer<'de>,
{
let values: Vec<String> = Vec::deserialize(deserializer)?;
if !allow_empty_vec && values.is_empty() {
return Err(de::Error::custom(format!("{field} cannot be empty")));
}
for (idx, value) in values.iter().enumerate() {
validate(value, idx).map_err(de::Error::custom)?;
}
Ok(values)
}
fn deserialize_file_patterns<'de, D>(deserializer: D) -> result::Result<Vec<String>, D::Error>
where
D: Deserializer<'de>,
{
deserialize_vec_strings(deserializer, "file_patterns", false, |s, idx| {
if s.is_empty() {
Err(format!("pattern {}: cannot be empty", idx + 1))
} else {
validate_no_whitespace(s, "pattern", Some(idx))
}
})
}
fn deserialize_line_comments<'de, D>(deserializer: D) -> result::Result<Vec<String>, D::Error>
where
D: Deserializer<'de>,
{
deserialize_vec_strings(deserializer, "line_comments", true, |s, idx| {
if s.is_empty() { Err(format!("line comment {}: cannot be empty", idx + 1)) } else { Ok(()) }
})
}
fn deserialize_block_comments<'de, D>(deserializer: D) -> result::Result<Vec<(String, String)>, D::Error>
where
D: Deserializer<'de>,
{
let pairs: Vec<Vec<String>> = Vec::deserialize(deserializer)?;
let mut out: Vec<(String, String)> = Vec::with_capacity(pairs.len());
for (idx, pair) in pairs.into_iter().enumerate() {
let err = |msg| de::Error::custom(format!("block comment {}: {msg}", idx + 1));
if pair.len() != 2 {
return Err(err("must contain exactly start and end delimiters"));
}
let mut iter = pair.into_iter();
let start = iter.next().unwrap();
let end = iter.next().unwrap();
if start.is_empty() {
return Err(err("start cannot be empty"));
}
if end.is_empty() {
return Err(err("end cannot be empty"));
}
out.push((start, end));
}
Ok(out)
}
#[derive(Debug, Clone, Deserialize)]
#[serde(deny_unknown_fields)]
struct LanguageConfig {
#[serde(skip)]
name: String,
#[serde(deserialize_with = "deserialize_file_patterns")]
file_patterns: Vec<String>,
#[serde(default, deserialize_with = "deserialize_line_comments")]
line_comments: Vec<String>,
#[serde(default, deserialize_with = "deserialize_block_comments")]
block_comments: Vec<(String, String)>,
#[serde(default)]
nested_blocks: bool,
#[serde(default)]
shebangs: Vec<String>,
#[serde(default)]
keywords: Vec<String>,
}
const LANGUAGE_SCHEMA: &[(&str, &str)] = &[
("index", "usize"),
("name", "&'static str"),
("file_patterns", "&'static [&'static str]"),
("line_comments", "&'static [&'static str]"),
("block_comments", "&'static [(&'static str, &'static str)]"),
("nested_blocks", "bool"),
("shebangs", "&'static [&'static str]"),
("keywords", "&'static [&'static str]"),
];
fn write_field(output: &mut String, name: &str, value: impl std::fmt::Display) {
let _ = writeln!(output, "\t\t{name}: {value},");
}
fn render_slice<T>(values: &[T], render: impl Fn(&T) -> String) -> String {
if values.is_empty() {
"&[]".to_string()
} else {
let joined = values.iter().map(render).collect::<Vec<_>>().join(", ");
format!("&[{joined}]")
}
}
fn render_languages(languages: &[LanguageConfig]) -> String {
let mut output = String::new();
output.push_str("#[derive(Debug, Clone, PartialEq, Eq)]\n");
output.push_str("/// Holds information about a single programming language.\n");
output.push_str("pub struct Language {\n");
for (field, ty) in LANGUAGE_SCHEMA {
let _ = writeln!(output, "\tpub {field}: {ty},");
}
output.push_str("}\n\n");
output.push_str("pub static LANGUAGES: &[Language] = &[\n");
for (idx, lang) in languages.iter().enumerate() {
output.push_str("\tLanguage {\n");
write_field(&mut output, "index", idx);
write_field(&mut output, "name", format_args!("{:?}", lang.name));
write_field(&mut output, "file_patterns", render_slice(&lang.file_patterns, |v| format!("{v:?}")));
write_field(&mut output, "line_comments", render_slice(&lang.line_comments, |v| format!("{v:?}")));
write_field(
&mut output,
"block_comments",
render_slice(&lang.block_comments, |(s, e)| format!("({s:?}, {e:?})")),
);
write_field(&mut output, "nested_blocks", lang.nested_blocks);
write_field(&mut output, "shebangs", render_slice(&lang.shebangs, |v| format!("{v:?}")));
write_field(&mut output, "keywords", render_slice(&lang.keywords, |v| format!("{v:?}")));
output.push_str("\t},\n");
}
output.push_str("];\n\n");
output
}
fn normalize_languages(entries: IndexMap<String, LanguageConfig>) -> Result<Vec<LanguageConfig>> {
let mut errors = Vec::new();
let mut prev_name: Option<String> = None;
let mut languages = Vec::with_capacity(entries.len());
for (index, (name, mut config)) in entries.into_iter().enumerate() {
if name.trim().is_empty() {
errors.push(format!("Language at position {}: name cannot be empty", index + 1));
continue;
}
if let Some(prev) = &prev_name
&& name.to_lowercase() < prev.to_lowercase()
{
// Enforce a stable ordering so generated indices remain consistent across edits.
errors.push(format!("Language '{name}' is not in alphabetical order (should come before '{prev}')"));
}
prev_name = Some(name.clone());
config.name = name;
languages.push(config);
}
if errors.is_empty() { Ok(languages) } else { Err(errors.join("\n").into()) }
}
struct PatternInfo {
names: Vec<String>,
all_have_keywords: bool,
}
struct LanguageValidator {
errors: Vec<String>,
seen_names: HashSet<String>,
seen_patterns: HashMap<String, PatternInfo>,
}
impl LanguageValidator {
fn new() -> Self {
Self { errors: Vec::new(), seen_names: HashSet::new(), seen_patterns: HashMap::new() }
}
fn validate_all(&mut self, languages: &[LanguageConfig]) {
for lang in languages {
if !self.seen_names.insert(lang.name.clone()) {
self.errors.push(format!("Duplicate language name '{}'", lang.name));
}
if lang.name.trim() != lang.name {
self.errors.push(format!("Language '{}': name has leading/trailing whitespace", lang.name));
}
for pattern in &lang.file_patterns {
let info = self
.seen_patterns
.entry(pattern.clone())
.or_insert(PatternInfo { names: Vec::new(), all_have_keywords: true });
info.names.push(lang.name.clone());
info.all_have_keywords &= !lang.keywords.is_empty();
}
}
for (pattern, info) in &self.seen_patterns {
if info.names.len() > 1 && !info.all_have_keywords {
// Shared patterns need keyword disambiguation to avoid random selection.
self.errors.push(format!(
"Duplicate pattern '{}' in [{}] - all must have 'keywords' for disambiguation",
pattern,
info.names.join(", ")
));
}
}
}
fn into_result(self) -> Result<()> {
if self.errors.is_empty() {
Ok(())
} else {
Err(format!("Language validation failed with {} error(s):\n{}", self.errors.len(), self.errors.join("\n"))
.into())
}
}
}
fn validate_languages(languages: &[LanguageConfig]) -> Result<()> {
let mut validator = LanguageValidator::new();
validator.validate_all(languages);
validator.into_result()
}
fn main() -> Result<()> {
let manifest_dir = env::var("CARGO_MANIFEST_DIR")?;
let json_path = Path::new(&manifest_dir).join("languages.json5");
println!("cargo:rerun-if-changed={}", json_path.display());
let json_content = fs::read_to_string(&json_path)?;
let language_map: IndexMap<String, LanguageConfig> =
json5::from_str(&json_content).map_err(|e| format!("Failed to parse languages.json5: {e}"))?;
let languages = normalize_languages(language_map)?;
validate_languages(&languages)?;
let rendered = render_languages(&languages);
let out_dir = env::var("OUT_DIR")?;
let dest_path = Path::new(&out_dir).join("languages.rs");
fs::write(dest_path, rendered)?;
println!("Generated language data for {} languages", languages.len());
Ok(())
}