syntastica/
processor.rs

1use syntastica_core::ts_runtime::Node;
2use syntastica_core::{
3    language_set::{LanguageSet, SupportedLanguage},
4    theme::THEME_KEYS,
5    Result,
6};
7use syntastica_highlight::{Highlight, HighlightEvent, Highlighter};
8
9use crate::Highlights;
10
11/// A type for easy reuse of resources when highlighting multiple inputs.
12///
13/// When planning to process multiple different inputs, potentially in multiple different
14/// languages, create and store an instance of this type, so that the resusable resources will be
15/// reused.
16///
17/// Additionally, the [`Processor::process_once`] function provides a quick way to process an input
18/// once without keeping the reusable resources.
19///
20/// # Instantiation
21///
22/// A [`Processor`] can be created by calling [`Processor::new`] with an implementation of
23/// [`LanguageSet`].
24///
25/// # Examples
26///
27/// ## Example: process once
28///
29/// This example uses the [`Processor::process_once`] function to process one input without keeping
30/// a [`Processor`] instance.
31///
32/// ```
33/// use syntastica::{style::Style, Processor};
34/// use syntastica_parsers::{Lang, LanguageSetImpl};
35///
36/// let highlights = Processor::process_once(
37///     "fn",       // the code to process
38///     Lang::Rust, // the code's language
39///     // any valid `LanguageSet` supporting the required language
40///     &LanguageSetImpl::new(),
41/// )
42/// .unwrap();
43///
44/// assert_eq!(highlights, vec![vec![("fn", Some("keyword.function"))]]);
45/// ```
46///
47/// ## Example: instantiation with `Processor::new`
48///
49/// This example uses the [`Processor::new`] function to create a [`Processor`]
50/// instance which can then be used to process multiple different inputs.
51///
52/// ```
53/// use syntastica::{style::Style, Processor};
54/// use syntastica_parsers_git::{Lang, LanguageSetImpl};
55///
56/// // get a `LanguageSet`
57/// let language_set = LanguageSetImpl::new();
58///
59/// // create a `Processor` using that `LanguageSet`
60/// let mut processor = Processor::new(&language_set);
61///
62/// // process some input
63/// let highlights = processor.process("# comment", Lang::Python).unwrap();
64/// assert_eq!(highlights, vec![vec![("# comment", Some("comment"))]]);
65///
66/// // process input with injections
67/// let highlights = processor
68///     .process(r#"Regex::new(r".")"#, Lang::Rust)
69///     .unwrap();
70/// assert_eq!(
71///     highlights,
72///     vec![vec![
73///         ("Regex", Some("type")),
74///         ("::", Some("punctuation.delimiter")),
75///         ("new", Some("function.call")),
76///         ("(", Some("punctuation.bracket")),
77///         ("r\"", Some("string")),
78///         (".", Some("variable.builtin")), // this is the injected regex language
79///         ("\"", Some("string")),
80///         (")", Some("punctuation.bracket")),
81///     ]]
82/// );
83/// ```
84pub struct Processor<'set, Set: LanguageSet<'set>> {
85    set: &'set Set,
86    highlighter: Highlighter,
87}
88
89impl<'set, Set: LanguageSet<'set>> Processor<'set, Set> {
90    /// Create a new [`Processor`] given a [`LanguageSet`].
91    ///
92    /// See [the type documentation](Processor) for other means of instantiation and an example.
93    pub fn new(set: &'set Set) -> Self {
94        Self {
95            set,
96            highlighter: Highlighter::new(),
97        }
98    }
99
100    /// Create a temporary [`Processor`] and run [`process`](Processor::process) once.
101    ///
102    /// **Only use this function if you do not plan to process multiple inputs!**
103    ///
104    /// See the documentation for [`process`](Processor::process) and
105    /// [`new`](Processor::new) for more information on the parameters,
106    /// return type, and possible errors.
107    pub fn process_once<'src>(
108        code: &'src str,
109        language: impl Into<Set::Language>,
110        set: &'set Set,
111    ) -> Result<Highlights<'src>> {
112        Self::new(set).process(code, language)
113    }
114
115    /// Process the given `code` using the language specified by `language_name`.
116    ///
117    /// # Returns
118    ///
119    /// On success, the function returns [`Highlights`] which can be used by
120    /// [`render`](crate::render) for rendering to end users.
121    ///
122    /// # Errors
123    ///
124    /// The function may result in the following errors:
125    ///
126    /// - [`Error::UnsupportedLanguage`](crate::Error::UnsupportedLanguage) if the given
127    ///   `language_name` is not supported by the [`LanguageSet`] which was passed during
128    ///   instantiation of this [`Processor`].
129    /// - [`Error::Highlight`](crate::Error::Highlight) if highlighting fails (mainly because of
130    ///   tree-sitter version mismatches).
131    pub fn process<'src>(
132        &mut self,
133        code: &'src str,
134        language: impl Into<Set::Language>,
135    ) -> Result<Highlights<'src>> {
136        self.process_impl(code, language.into(), None)
137    }
138
139    /// Process the given `code` using the language specified by `language_name` using an already
140    /// parsed tree.
141    ///
142    /// Unlike [`process`](Processor::process), this does not parse the input text, but instead
143    /// uses a parsed tree that is provided by the caller. This also means that **language
144    /// injections must be handled by the caller**.
145    ///
146    /// This allows for incremental parsing, useful for e.g. text editors. See the
147    /// [tree-sitter Rust documentation](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust#editing)
148    /// for more information.
149    ///
150    /// # Example
151    /// ```
152    /// use syntastica::{language_set::LanguageSet, renderer::TerminalRenderer, Processor};
153    /// use syntastica_parsers::{Lang, LanguageSetImpl};
154    /// use tree_sitter::{InputEdit, Parser, Point};
155    ///
156    /// // create a LanguageSet, Processor, Renderer, and ResolvedTheme
157    /// let set = LanguageSetImpl::new();
158    /// let mut processor = Processor::new(&set);
159    /// let mut renderer = TerminalRenderer::new(None);
160    /// let theme = syntastica_themes::one::dark();
161    ///
162    /// // create a tree-sitter parser
163    /// let mut parser = Parser::new();
164    /// // and set the desired language
165    /// parser.set_language(&Lang::Rust.get())?;
166    ///
167    /// // parse, process, and render source code
168    /// let code = "fn test() {}";
169    /// let mut tree = parser.parse(code, None).unwrap();
170    /// println!(
171    ///     "{}",
172    ///     syntastica::render(
173    ///         &processor.process_tree(code, Lang::Rust, &tree.root_node())?,
174    ///         &mut renderer,
175    ///         &theme,
176    ///     )
177    /// );
178    ///
179    /// // edit the code and tree
180    /// let new_code = "fn test(a: u32) {}";
181    /// tree.edit(&InputEdit {
182    ///     start_byte: 8,
183    ///     old_end_byte: 8,
184    ///     new_end_byte: 14,
185    ///     start_position: Point::new(0, 8),
186    ///     old_end_position: Point::new(0, 8),
187    ///     new_end_position: Point::new(0, 14),
188    /// });
189    ///
190    /// // re-parse, process, and render the code
191    /// let new_tree = parser.parse(new_code, Some(&tree)).unwrap();
192    /// println!(
193    ///     "{}",
194    ///     syntastica::render(
195    ///         &processor.process_tree(new_code, Lang::Rust, &new_tree.root_node())?,
196    ///         &mut renderer,
197    ///         &theme,
198    ///     )
199    /// );
200    ///
201    /// # Ok::<(), Box<dyn std::error::Error>>(())
202    /// ```
203    ///
204    /// # Returns
205    ///
206    /// On success, the function returns [`Highlights`] which can be used by
207    /// [`render`](crate::render) for rendering to end users.
208    ///
209    /// # Errors
210    ///
211    /// The function may result in the following errors:
212    ///
213    /// - [`Error::UnsupportedLanguage`](crate::Error::UnsupportedLanguage) if the given
214    ///   `language_name` is not supported by the [`LanguageSet`] which was passed during
215    ///   instantiation of this [`Processor`].
216    /// - [`Error::Highlight`](crate::Error::Highlight) if highlighting fails (mainly because of
217    ///   tree-sitter version mismatches).
218    pub fn process_tree<'src>(
219        &mut self,
220        code: &'src str,
221        language: impl Into<Set::Language>,
222        tree: &Node<'_>,
223    ) -> Result<Highlights<'src>> {
224        self.process_impl(code, language.into(), Some(tree))
225    }
226
227    fn process_impl<'src>(
228        &mut self,
229        code: &'src str,
230        language: Set::Language,
231        tree: Option<&Node<'_>>,
232    ) -> Result<Highlights<'src>> {
233        let highlight_config = self.set.get_language(language)?;
234
235        let injection_callback = |lang_name: &str| {
236            let lang_name = lang_name.to_ascii_lowercase();
237            // if `lang_name` is a supported language in the set, use that
238            Set::Language::for_name(&lang_name, self.set)
239                .ok()
240                // else if `for_injection` returns a name, try getting a language for that name
241                .or_else(|| Set::Language::for_injection(&lang_name, self.set))
242                // else, `lang_name` might be a mimetype like `application/json`, so try both again
243                // with the text after the last `/`
244                .or_else(|| {
245                    lang_name.rsplit_once('/').and_then(|(_, name)| {
246                        Set::Language::for_name(name, self.set)
247                            .ok()
248                            .or_else(|| Set::Language::for_injection(name, self.set))
249                    })
250                })
251                .and_then(|lang| self.set.get_language(lang).ok())
252        };
253
254        match tree {
255            Some(tree) => process_highlight_iter(
256                self.highlighter.highlight_existing_tree(
257                    highlight_config,
258                    code.as_bytes(),
259                    None,
260                    tree,
261                )?,
262                code,
263            ),
264            None => process_highlight_iter(
265                self.highlighter.highlight(
266                    highlight_config,
267                    code.as_bytes(),
268                    None,
269                    injection_callback,
270                )?,
271                code,
272            ),
273        }
274    }
275}
276
277fn process_highlight_iter(
278    iter: impl Iterator<Item = std::result::Result<HighlightEvent, syntastica_highlight::Error>>,
279    code: &str,
280) -> Result<Highlights<'_>> {
281    let mut out = vec![vec![]];
282    let mut style_stack = vec![];
283
284    for event in iter {
285        match event? {
286            HighlightEvent::HighlightStart(Highlight(highlight)) => style_stack.push(highlight),
287            HighlightEvent::HighlightEnd => {
288                style_stack.pop();
289            }
290            HighlightEvent::Source { start, end } => {
291                let ends_with_newline = code[start..end].ends_with('\n');
292                let mut lines = code[start..end].lines().peekable();
293                while let Some(line) = lines.next() {
294                    let style = style_stack.last().and_then(|idx| {
295                        let key = THEME_KEYS[*idx];
296                        match key {
297                            "none" => None,
298                            _ => Some(key),
299                        }
300                    });
301                    out.last_mut()
302                        .expect("`out` is initialized with one element and never shrinks in size")
303                        .push((line, style));
304
305                    if lines.peek().is_some() || ends_with_newline {
306                        out.push(vec![]);
307                    }
308                }
309            }
310        }
311    }
312
313    Ok(out)
314}