syntastica_query_preprocessor/
lib.rs1#![doc = include_str!("../README.md")]
2#![warn(rust_2018_idioms)]
3#![deny(missing_docs)]
4
5use std::{fmt::Write as _, fs};
6
7use lazy_regex::regex_replace_all;
8use rsexpr::{OwnedSexpr, OwnedSexprs};
9
10#[inline]
11fn _process(strip_comment: &str, nvim_like: bool, src: &str, proc: fn(&mut OwnedSexprs)) -> String {
12 process(
13 src,
14 if nvim_like { Some(proc) } else { None },
15 strip_comment,
16 )
17 .unwrap_or_else(|err| panic!("invalid input queries: {err}"))
18}
19
20#[inline]
27pub fn process_highlights(strip_comment: &str, nvim_like: bool, src: &str) -> String {
28 _process(strip_comment, nvim_like, src, _process_highlights)
29}
30
31#[inline]
38pub fn process_injections(strip_comment: &str, nvim_like: bool, src: &str) -> String {
39 _process(strip_comment, nvim_like, src, _process_injections)
40}
41
42#[inline]
49pub fn process_locals(strip_comment: &str, nvim_like: bool, src: &str) -> String {
50 _process(strip_comment, nvim_like, src, _process_locals)
51}
52
53#[inline]
54fn _process_with_inherits(
55 strip_comment: &str,
56 nvim_like: bool,
57 lang_name: &str,
58 base_dir: &str,
59 proc: fn(&mut OwnedSexprs),
60 filename: &str,
61) -> String {
62 process_with_inherits(
63 base_dir,
64 lang_name,
65 filename,
66 if nvim_like { Some(proc) } else { None },
67 strip_comment,
68 )
69}
70
71#[inline]
78pub fn process_highlights_with_inherits(
79 strip_comment: &str,
80 nvim_like: bool,
81 lang_name: &str,
82 base_dir: &str,
83) -> String {
84 _process_with_inherits(
85 strip_comment,
86 nvim_like,
87 lang_name,
88 base_dir,
89 _process_highlights,
90 "highlights.scm",
91 )
92}
93
94#[inline]
101pub fn process_injections_with_inherits(
102 strip_comment: &str,
103 nvim_like: bool,
104 lang_name: &str,
105 base_dir: &str,
106) -> String {
107 _process_with_inherits(
108 strip_comment,
109 nvim_like,
110 lang_name,
111 base_dir,
112 _process_injections,
113 "injections.scm",
114 )
115}
116
117#[inline]
124pub fn process_locals_with_inherits(
125 strip_comment: &str,
126 nvim_like: bool,
127 lang_name: &str,
128 base_dir: &str,
129) -> String {
130 _process_with_inherits(
131 strip_comment,
132 nvim_like,
133 lang_name,
134 base_dir,
135 _process_locals,
136 "locals.scm",
137 )
138}
139
140fn process(
141 src: &str,
142 processor: Option<fn(&mut OwnedSexprs)>,
143 strip_comment: &str,
144) -> Result<String, String> {
145 let mut new_queries = rsexpr::from_slice_multi(src)
146 .map_err(|errs| {
147 errs.iter()
148 .map(rsexpr::Error::to_string)
149 .collect::<Vec<_>>()
150 .join(", ")
151 })?
152 .into_iter()
153 .map(OwnedSexpr::from)
154 .collect();
155 new_queries = group_root_level_captures(new_queries);
156 strip(&mut new_queries, strip_comment.as_bytes());
157 remove_comments(&mut new_queries);
158 if let Some(func) = processor {
159 func(&mut new_queries);
160 }
161 new_queries = ungroup_root_level_captures(new_queries);
162 let new_queries = format!("{new_queries:}");
163
164 Ok(new_queries)
165}
166
167fn process_with_inherits(
168 base_dir: &str,
169 lang_name: &str,
170 filename: &str,
171 processor: Option<fn(&mut OwnedSexprs)>,
172 strip_comment: &str,
173) -> String {
174 let queries = read_queries(base_dir, lang_name, filename);
175 process(&queries, processor, strip_comment).unwrap_or_else(|err| {
176 panic!("invalid queries in file '{base_dir}/{lang_name}/{filename}': {err}")
177 })
178}
179
180fn read_queries(base_dir: &str, lang_name: &str, filename: &str) -> String {
181 let path = format!("{base_dir}/{lang_name}/{filename}");
182 let queries = match fs::read_to_string(&path) {
183 Ok(queries) => queries,
184 Err(err) => {
185 eprintln!("warning: failed to read '{path}': {err}");
186 String::new()
187 }
188 };
189 regex_replace_all!(
190 r";+\s*inherits\s*:?\s*([a-z_,()-]+)\s*",
191 &queries,
192 |_, langs: &str| {
193 langs.split(',').fold(String::new(), |mut out, lang| {
194 _ = write!(out, "\n{}\n", read_queries(base_dir, lang.trim(), filename));
195 out
196 })
197 }
198 )
199 .into_owned()
200}
201
202fn group_root_level_captures(queries: OwnedSexprs) -> OwnedSexprs {
203 let mut new_queries = OwnedSexprs::from(Vec::with_capacity(queries.len()));
204 let mut iter = queries.into_iter().peekable();
205
206 while let Some(sexpr) = iter.next() {
207 if let OwnedSexpr::List(_) | OwnedSexpr::Group(_) | OwnedSexpr::String(_) = sexpr {
209 let mut group = OwnedSexprs::from(vec![sexpr]);
210 while let Some(OwnedSexpr::Atom(_)) = iter.peek() {
212 group.push(iter.next().unwrap());
213 }
214 new_queries.push(match group.len() {
215 1 => group.swap_remove(0),
217 _ => OwnedSexpr::List(group),
218 });
219 } else {
220 new_queries.push(sexpr);
221 }
222 }
223
224 new_queries
225}
226
227fn ungroup_root_level_captures(queries: OwnedSexprs) -> OwnedSexprs {
228 let mut new_queries = OwnedSexprs::from(Vec::with_capacity(queries.len()));
229
230 for query in queries {
231 match query {
232 OwnedSexpr::List(list) if list.is_empty() => {}
234 OwnedSexpr::List(list)
237 if list.first().is_some_and(|sexpr| {
238 matches!(
239 sexpr,
240 OwnedSexpr::List(_) | OwnedSexpr::Group(_) | OwnedSexpr::String(_)
241 )
242 }) && list
243 .iter()
244 .skip(1)
245 .all(|sexpr| matches!(sexpr, OwnedSexpr::Atom(_))) =>
246 {
247 new_queries.extend(list);
248 }
249 _ => new_queries.push(query),
250 }
251 }
252
253 new_queries
254}
255
256fn remove_comments(queries: &mut OwnedSexprs) {
258 queries.retain(|sexpr| !matches!(sexpr, OwnedSexpr::Comment(_)));
259 for query in queries {
260 if let OwnedSexpr::List(children) | OwnedSexpr::Group(children) = query {
261 remove_comments(children);
262 }
263 }
264}
265
266fn strip(queries: &mut OwnedSexprs, skip_comment: &[u8]) {
267 let mut delete_next = false;
268 queries.retain(|query| {
269 let delete_this = delete_next;
270 delete_next = matches!(query, OwnedSexpr::Comment(comment) if comment == skip_comment);
271 !delete_this
272 });
273
274 for query in queries {
275 if let OwnedSexpr::List(children) | OwnedSexpr::Group(children) = query {
276 strip(children, skip_comment);
277 }
278 }
279}
280
281fn _process_locals(queries: &mut OwnedSexprs) {
282 for query in queries {
283 replace_locals_captures(query);
284 replace_predicates(query);
285 }
286}
287
288fn replace_locals_captures(tree: &mut OwnedSexpr) {
289 match tree {
290 OwnedSexpr::Atom(atom) => match atom.as_slice() {
291 b"@scope" => *atom = b"@local.scope".to_vec(),
292 b"@reference" => *atom = b"@local.reference".to_vec(),
293 other => {
294 if std::str::from_utf8(other).is_ok_and(|str| {
295 str == "@definition"
296 || str.starts_with("@definition.")
297 || str.starts_with("@local.definition.")
298 }) {
299 *atom = b"@local.definition".to_vec()
300 }
301 }
302 },
303 OwnedSexpr::String(_) => {}
304 OwnedSexpr::Comment(_) => {}
305 OwnedSexpr::List(list) | OwnedSexpr::Group(list) => {
306 for subtree in list {
307 replace_locals_captures(subtree);
308 }
309 }
310 }
311}
312
313fn _process_injections(queries: &mut OwnedSexprs) {
314 for query in queries {
315 replace_injection_captures(query, 0);
316 replace_predicates(query);
317 }
318}
319
320fn replace_injection_captures(
321 tree: &mut OwnedSexpr,
322 mut predicate_count: usize,
323) -> (bool, Option<OwnedSexpr>) {
324 let mut is_predicate = false;
325 let mut additional_sexp = None;
326 match tree {
327 OwnedSexpr::String(_) => {}
328 OwnedSexpr::Comment(_) => {}
329 OwnedSexpr::Atom(atom) => match atom.as_slice() {
330 [b'@', capture @ ..] if !capture.starts_with(b"_") => match capture {
331 b"injection.content" | b"injection.language" => {}
332 b"content" => *atom = b"@injection.content".to_vec(),
333 b"language" => *atom = b"@injection.language".to_vec(),
334 b"combined" => {
335 *tree = OwnedSexpr::List(
336 vec![
337 OwnedSexpr::Atom(b"#set!".to_vec()),
338 OwnedSexpr::Atom(b"injection.combined".to_vec()),
339 ]
340 .into(),
341 )
342 }
343 lang_name => {
344 if predicate_count == 0 {
345 additional_sexp = Some(OwnedSexpr::List(
346 vec![
347 OwnedSexpr::Atom(b"#set!".to_vec()),
348 OwnedSexpr::Atom(b"injection.language".to_vec()),
349 OwnedSexpr::String(lang_name.to_owned()),
350 ]
351 .into(),
352 ));
353 }
354 *atom = b"@injection.content".to_vec();
355 }
356 },
357 [b'#', ..] => is_predicate = true,
358 _ => {}
359 },
360 OwnedSexpr::List(subtrees) | OwnedSexpr::Group(subtrees) => {
361 let mut insertions = vec![];
362 for (index, subtree) in subtrees.iter_mut().enumerate() {
363 let (is_predicate, additional_sexp) =
364 replace_injection_captures(subtree, predicate_count);
365 if is_predicate {
366 predicate_count += 1;
367 }
368 if let Some(additional_sexp) = additional_sexp {
369 insertions.push((index + 1 + insertions.len(), additional_sexp));
370 }
371 }
372 for (index, sexp) in insertions {
373 subtrees.insert(index, sexp);
374 }
375 }
376 }
377 (is_predicate, additional_sexp)
378}
379
380fn _process_highlights(queries: &mut OwnedSexprs) {
381 for query in queries {
382 replace_predicates(query);
383 }
384}
385
386fn replace_predicates(tree: &mut OwnedSexpr) {
387 if let OwnedSexpr::List(list) | OwnedSexpr::Group(list) = tree {
388 match list.first() {
389 Some(OwnedSexpr::Atom(atom)) if atom.first() == Some(&b'#') => {
390 let match_predicate = OwnedSexpr::Atom(match atom.starts_with(b"#not-") {
391 false => b"#match?".to_vec(),
392 true => b"#not-match?".to_vec(),
393 });
394 match atom.as_slice() {
395 b"#gsub!" => {
396 list[0] = OwnedSexpr::Atom(b"#replace!".to_vec());
397 list[2] = OwnedSexpr::String(
398 lua_to_regex(std::str::from_utf8(list[2].unwrap_string_ref()).unwrap())
399 .into_bytes(),
400 );
401 list[3] = OwnedSexpr::String(
402 regex_replace_all!(
403 r"%(\d)",
404 &std::str::from_utf8(list[3].unwrap_string_ref())
405 .unwrap()
406 .replace("%%", "%")
407 .replace('$', "$$"),
408 |_, i| format!("${{{i}}}")
409 )
410 .into_owned()
411 .into_bytes(),
412 );
413 list.truncate(4);
414 }
415 b"#lua-match?" | b"#not-lua-match?" => {
416 list[0] = match_predicate;
417 list[2] = OwnedSexpr::String(
418 lua_to_regex(std::str::from_utf8(list[2].unwrap_string_ref()).unwrap())
419 .into_bytes(),
420 );
421 list.truncate(3);
422 }
423 b"#any-of?" | b"#not-any-of?" => {
424 list[0] = match_predicate;
425 list[2] = OwnedSexpr::String(
426 format!(
427 "^({})$",
428 list[2..]
429 .iter()
430 .map(|arg| std::str::from_utf8(arg.unwrap_string_ref())
431 .unwrap()
432 .chars()
433 .fold(String::new(), |mut out, char| {
434 const SPECIAL_CHARS: &str = "\\.()[]{}|*+?^$/";
435
436 match SPECIAL_CHARS.contains(char) {
437 true => _ = write!(out, "\\{char}"),
438 false => _ = write!(out, "{char}"),
439 }
440 out
441 }))
442 .collect::<Vec<_>>()
443 .join("|")
444 )
445 .into_bytes(),
446 );
447 list.truncate(3);
448 }
449 b"#contains?" | b"#not-contains?" => list[0] = match_predicate,
450 _ => {}
451 }
452 }
453 _ => {
454 for subtree in list {
455 replace_predicates(subtree);
456 }
457 }
458 }
459 }
460}
461
462fn lua_to_regex(pattern: &str) -> String {
463 lua_pattern::try_to_regex(
464 &lua_pattern::parse(pattern)
465 .unwrap_or_else(|err| panic!("Lua pattern `{pattern}` could not be parsed: {err}")),
466 false,
467 false,
468 )
469 .unwrap_or_else(|err| {
470 panic!("Lua pattern `{pattern}` could not be converted into a regex: {err}")
471 })
472}