syntastica_query_preprocessor/
lib.rs

1#![doc = include_str!("../README.md")]
2#![warn(rust_2018_idioms)]
3#![deny(missing_docs)]
4
5use std::{fmt::Write as _, fs};
6
7use lazy_regex::regex_replace_all;
8use rsexpr::{OwnedSexpr, OwnedSexprs};
9
10#[inline]
11fn _process(strip_comment: &str, nvim_like: bool, src: &str, proc: fn(&mut OwnedSexprs)) -> String {
12    process(
13        src,
14        if nvim_like { Some(proc) } else { None },
15        strip_comment,
16    )
17    .unwrap_or_else(|err| panic!("invalid input queries: {err}"))
18}
19
20/// Pre-process highlight queries.
21///
22/// See [the crate docs](crate#usage) for information on the parameters.
23///
24/// # Panics
25/// The function panics if the query string cannot be parsed by [`rsexpr`].
26#[inline]
27pub fn process_highlights(strip_comment: &str, nvim_like: bool, src: &str) -> String {
28    _process(strip_comment, nvim_like, src, _process_highlights)
29}
30
31/// Pre-process injection queries.
32///
33/// See [the crate docs](crate#usage) for information on the parameters.
34///
35/// # Panics
36/// The function panics if the query string cannot be parsed by [`rsexpr`].
37#[inline]
38pub fn process_injections(strip_comment: &str, nvim_like: bool, src: &str) -> String {
39    _process(strip_comment, nvim_like, src, _process_injections)
40}
41
42/// Pre-process locals queries.
43///
44/// See [the crate docs](crate#usage) for information on the parameters.
45///
46/// # Panics
47/// The function panics if the query string cannot be parsed by [`rsexpr`].
48#[inline]
49pub fn process_locals(strip_comment: &str, nvim_like: bool, src: &str) -> String {
50    _process(strip_comment, nvim_like, src, _process_locals)
51}
52
53#[inline]
54fn _process_with_inherits(
55    strip_comment: &str,
56    nvim_like: bool,
57    lang_name: &str,
58    base_dir: &str,
59    proc: fn(&mut OwnedSexprs),
60    filename: &str,
61) -> String {
62    process_with_inherits(
63        base_dir,
64        lang_name,
65        filename,
66        if nvim_like { Some(proc) } else { None },
67        strip_comment,
68    )
69}
70
71/// Pre-process highlight queries with support for `; inherits <lang>` comments.
72///
73/// See [the crate docs](crate#usage) for information on the parameters.
74///
75/// # Panics
76/// The function panics if any query file cannot be parsed by [`rsexpr`].
77#[inline]
78pub fn process_highlights_with_inherits(
79    strip_comment: &str,
80    nvim_like: bool,
81    lang_name: &str,
82    base_dir: &str,
83) -> String {
84    _process_with_inherits(
85        strip_comment,
86        nvim_like,
87        lang_name,
88        base_dir,
89        _process_highlights,
90        "highlights.scm",
91    )
92}
93
94/// Pre-process injection queries with support for `; inherits <lang>` comments.
95///
96/// See [the crate docs](crate#usage) for information on the parameters.
97///
98/// # Panics
99/// The function panics if any query file cannot be parsed by [`rsexpr`].
100#[inline]
101pub fn process_injections_with_inherits(
102    strip_comment: &str,
103    nvim_like: bool,
104    lang_name: &str,
105    base_dir: &str,
106) -> String {
107    _process_with_inherits(
108        strip_comment,
109        nvim_like,
110        lang_name,
111        base_dir,
112        _process_injections,
113        "injections.scm",
114    )
115}
116
117/// Pre-process locals queries with support for `; inherits <lang>` comments.
118///
119/// See [the crate docs](crate#usage) for information on the parameters.
120///
121/// # Panics
122/// The function panics if any query file cannot be parsed by [`rsexpr`].
123#[inline]
124pub fn process_locals_with_inherits(
125    strip_comment: &str,
126    nvim_like: bool,
127    lang_name: &str,
128    base_dir: &str,
129) -> String {
130    _process_with_inherits(
131        strip_comment,
132        nvim_like,
133        lang_name,
134        base_dir,
135        _process_locals,
136        "locals.scm",
137    )
138}
139
140fn process(
141    src: &str,
142    processor: Option<fn(&mut OwnedSexprs)>,
143    strip_comment: &str,
144) -> Result<String, String> {
145    let mut new_queries = rsexpr::from_slice_multi(src)
146        .map_err(|errs| {
147            errs.iter()
148                .map(rsexpr::Error::to_string)
149                .collect::<Vec<_>>()
150                .join(", ")
151        })?
152        .into_iter()
153        .map(OwnedSexpr::from)
154        .collect();
155    new_queries = group_root_level_captures(new_queries);
156    strip(&mut new_queries, strip_comment.as_bytes());
157    remove_comments(&mut new_queries);
158    if let Some(func) = processor {
159        func(&mut new_queries);
160    }
161    new_queries = ungroup_root_level_captures(new_queries);
162    let new_queries = format!("{new_queries:}");
163
164    Ok(new_queries)
165}
166
167fn process_with_inherits(
168    base_dir: &str,
169    lang_name: &str,
170    filename: &str,
171    processor: Option<fn(&mut OwnedSexprs)>,
172    strip_comment: &str,
173) -> String {
174    let queries = read_queries(base_dir, lang_name, filename);
175    process(&queries, processor, strip_comment).unwrap_or_else(|err| {
176        panic!("invalid queries in file '{base_dir}/{lang_name}/{filename}': {err}")
177    })
178}
179
180fn read_queries(base_dir: &str, lang_name: &str, filename: &str) -> String {
181    let path = format!("{base_dir}/{lang_name}/{filename}");
182    let queries = match fs::read_to_string(&path) {
183        Ok(queries) => queries,
184        Err(err) => {
185            eprintln!("warning: failed to read '{path}': {err}");
186            String::new()
187        }
188    };
189    regex_replace_all!(
190        r";+\s*inherits\s*:?\s*([a-z_,()-]+)\s*",
191        &queries,
192        |_, langs: &str| {
193            langs.split(',').fold(String::new(), |mut out, lang| {
194                _ = write!(out, "\n{}\n", read_queries(base_dir, lang.trim(), filename));
195                out
196            })
197        }
198    )
199    .into_owned()
200}
201
202fn group_root_level_captures(queries: OwnedSexprs) -> OwnedSexprs {
203    let mut new_queries = OwnedSexprs::from(Vec::with_capacity(queries.len()));
204    let mut iter = queries.into_iter().peekable();
205
206    while let Some(sexpr) = iter.next() {
207        // groups start with `List`, `Group`, or `String` nodes
208        if let OwnedSexpr::List(_) | OwnedSexpr::Group(_) | OwnedSexpr::String(_) = sexpr {
209            let mut group = OwnedSexprs::from(vec![sexpr]);
210            // and include all following `Atom` nodes
211            while let Some(OwnedSexpr::Atom(_)) = iter.peek() {
212                group.push(iter.next().unwrap());
213            }
214            new_queries.push(match group.len() {
215                // if the group only consists of one item, there is no need to wrap it
216                1 => group.swap_remove(0),
217                _ => OwnedSexpr::List(group),
218            });
219        } else {
220            new_queries.push(sexpr);
221        }
222    }
223
224    new_queries
225}
226
227fn ungroup_root_level_captures(queries: OwnedSexprs) -> OwnedSexprs {
228    let mut new_queries = OwnedSexprs::from(Vec::with_capacity(queries.len()));
229
230    for query in queries {
231        match query {
232            // remove empty groups
233            OwnedSexpr::List(list) if list.is_empty() => {}
234            // list doesn't start with an atom, and contains at most one list or group or starts
235            // with a string
236            OwnedSexpr::List(list)
237                if list.first().is_some_and(|sexpr| {
238                    matches!(
239                        sexpr,
240                        OwnedSexpr::List(_) | OwnedSexpr::Group(_) | OwnedSexpr::String(_)
241                    )
242                }) && list
243                    .iter()
244                    .skip(1)
245                    .all(|sexpr| matches!(sexpr, OwnedSexpr::Atom(_))) =>
246            {
247                new_queries.extend(list);
248            }
249            _ => new_queries.push(query),
250        }
251    }
252
253    new_queries
254}
255
256// TODO: preserve "Forked from" comments
257fn remove_comments(queries: &mut OwnedSexprs) {
258    queries.retain(|sexpr| !matches!(sexpr, OwnedSexpr::Comment(_)));
259    for query in queries {
260        if let OwnedSexpr::List(children) | OwnedSexpr::Group(children) = query {
261            remove_comments(children);
262        }
263    }
264}
265
266fn strip(queries: &mut OwnedSexprs, skip_comment: &[u8]) {
267    let mut delete_next = false;
268    queries.retain(|query| {
269        let delete_this = delete_next;
270        delete_next = matches!(query, OwnedSexpr::Comment(comment) if comment == skip_comment);
271        !delete_this
272    });
273
274    for query in queries {
275        if let OwnedSexpr::List(children) | OwnedSexpr::Group(children) = query {
276            strip(children, skip_comment);
277        }
278    }
279}
280
281fn _process_locals(queries: &mut OwnedSexprs) {
282    for query in queries {
283        replace_locals_captures(query);
284        replace_predicates(query);
285    }
286}
287
288fn replace_locals_captures(tree: &mut OwnedSexpr) {
289    match tree {
290        OwnedSexpr::Atom(atom) => match atom.as_slice() {
291            b"@scope" => *atom = b"@local.scope".to_vec(),
292            b"@reference" => *atom = b"@local.reference".to_vec(),
293            other => {
294                if std::str::from_utf8(other).is_ok_and(|str| {
295                    str == "@definition"
296                        || str.starts_with("@definition.")
297                        || str.starts_with("@local.definition.")
298                }) {
299                    *atom = b"@local.definition".to_vec()
300                }
301            }
302        },
303        OwnedSexpr::String(_) => {}
304        OwnedSexpr::Comment(_) => {}
305        OwnedSexpr::List(list) | OwnedSexpr::Group(list) => {
306            for subtree in list {
307                replace_locals_captures(subtree);
308            }
309        }
310    }
311}
312
313fn _process_injections(queries: &mut OwnedSexprs) {
314    for query in queries {
315        replace_injection_captures(query, 0);
316        replace_predicates(query);
317    }
318}
319
320fn replace_injection_captures(
321    tree: &mut OwnedSexpr,
322    mut predicate_count: usize,
323) -> (bool, Option<OwnedSexpr>) {
324    let mut is_predicate = false;
325    let mut additional_sexp = None;
326    match tree {
327        OwnedSexpr::String(_) => {}
328        OwnedSexpr::Comment(_) => {}
329        OwnedSexpr::Atom(atom) => match atom.as_slice() {
330            [b'@', capture @ ..] if !capture.starts_with(b"_") => match capture {
331                b"injection.content" | b"injection.language" => {}
332                b"content" => *atom = b"@injection.content".to_vec(),
333                b"language" => *atom = b"@injection.language".to_vec(),
334                b"combined" => {
335                    *tree = OwnedSexpr::List(
336                        vec![
337                            OwnedSexpr::Atom(b"#set!".to_vec()),
338                            OwnedSexpr::Atom(b"injection.combined".to_vec()),
339                        ]
340                        .into(),
341                    )
342                }
343                lang_name => {
344                    if predicate_count == 0 {
345                        additional_sexp = Some(OwnedSexpr::List(
346                            vec![
347                                OwnedSexpr::Atom(b"#set!".to_vec()),
348                                OwnedSexpr::Atom(b"injection.language".to_vec()),
349                                OwnedSexpr::String(lang_name.to_owned()),
350                            ]
351                            .into(),
352                        ));
353                    }
354                    *atom = b"@injection.content".to_vec();
355                }
356            },
357            [b'#', ..] => is_predicate = true,
358            _ => {}
359        },
360        OwnedSexpr::List(subtrees) | OwnedSexpr::Group(subtrees) => {
361            let mut insertions = vec![];
362            for (index, subtree) in subtrees.iter_mut().enumerate() {
363                let (is_predicate, additional_sexp) =
364                    replace_injection_captures(subtree, predicate_count);
365                if is_predicate {
366                    predicate_count += 1;
367                }
368                if let Some(additional_sexp) = additional_sexp {
369                    insertions.push((index + 1 + insertions.len(), additional_sexp));
370                }
371            }
372            for (index, sexp) in insertions {
373                subtrees.insert(index, sexp);
374            }
375        }
376    }
377    (is_predicate, additional_sexp)
378}
379
380fn _process_highlights(queries: &mut OwnedSexprs) {
381    for query in queries {
382        replace_predicates(query);
383    }
384}
385
386fn replace_predicates(tree: &mut OwnedSexpr) {
387    if let OwnedSexpr::List(list) | OwnedSexpr::Group(list) = tree {
388        match list.first() {
389            Some(OwnedSexpr::Atom(atom)) if atom.first() == Some(&b'#') => {
390                let match_predicate = OwnedSexpr::Atom(match atom.starts_with(b"#not-") {
391                    false => b"#match?".to_vec(),
392                    true => b"#not-match?".to_vec(),
393                });
394                match atom.as_slice() {
395                    b"#gsub!" => {
396                        list[0] = OwnedSexpr::Atom(b"#replace!".to_vec());
397                        list[2] = OwnedSexpr::String(
398                            lua_to_regex(std::str::from_utf8(list[2].unwrap_string_ref()).unwrap())
399                                .into_bytes(),
400                        );
401                        list[3] = OwnedSexpr::String(
402                            regex_replace_all!(
403                                r"%(\d)",
404                                &std::str::from_utf8(list[3].unwrap_string_ref())
405                                    .unwrap()
406                                    .replace("%%", "%")
407                                    .replace('$', "$$"),
408                                |_, i| format!("${{{i}}}")
409                            )
410                            .into_owned()
411                            .into_bytes(),
412                        );
413                        list.truncate(4);
414                    }
415                    b"#lua-match?" | b"#not-lua-match?" => {
416                        list[0] = match_predicate;
417                        list[2] = OwnedSexpr::String(
418                            lua_to_regex(std::str::from_utf8(list[2].unwrap_string_ref()).unwrap())
419                                .into_bytes(),
420                        );
421                        list.truncate(3);
422                    }
423                    b"#any-of?" | b"#not-any-of?" => {
424                        list[0] = match_predicate;
425                        list[2] = OwnedSexpr::String(
426                            format!(
427                                "^({})$",
428                                list[2..]
429                                    .iter()
430                                    .map(|arg| std::str::from_utf8(arg.unwrap_string_ref())
431                                        .unwrap()
432                                        .chars()
433                                        .fold(String::new(), |mut out, char| {
434                                            const SPECIAL_CHARS: &str = "\\.()[]{}|*+?^$/";
435
436                                            match SPECIAL_CHARS.contains(char) {
437                                                true => _ = write!(out, "\\{char}"),
438                                                false => _ = write!(out, "{char}"),
439                                            }
440                                            out
441                                        }))
442                                    .collect::<Vec<_>>()
443                                    .join("|")
444                            )
445                            .into_bytes(),
446                        );
447                        list.truncate(3);
448                    }
449                    b"#contains?" | b"#not-contains?" => list[0] = match_predicate,
450                    _ => {}
451                }
452            }
453            _ => {
454                for subtree in list {
455                    replace_predicates(subtree);
456                }
457            }
458        }
459    }
460}
461
462fn lua_to_regex(pattern: &str) -> String {
463    lua_pattern::try_to_regex(
464        &lua_pattern::parse(pattern)
465            .unwrap_or_else(|err| panic!("Lua pattern `{pattern}` could not be parsed: {err}")),
466        false,
467        false,
468    )
469    .unwrap_or_else(|err| {
470        panic!("Lua pattern `{pattern}` could not be converted into a regex: {err}")
471    })
472}