syntastica/processor.rs
1use syntastica_core::ts_runtime::Node;
2use syntastica_core::{
3 language_set::{LanguageSet, SupportedLanguage},
4 theme::THEME_KEYS,
5 Result,
6};
7use syntastica_highlight::{Highlight, HighlightEvent, Highlighter};
8
9use crate::Highlights;
10
11/// A type for easy reuse of resources when highlighting multiple inputs.
12///
13/// When planning to process multiple different inputs, potentially in multiple different
14/// languages, create and store an instance of this type, so that the resusable resources will be
15/// reused.
16///
17/// Additionally, the [`Processor::process_once`] function provides a quick way to process an input
18/// once without keeping the reusable resources.
19///
20/// # Instantiation
21///
22/// A [`Processor`] can be created by calling [`Processor::new`] with an implementation of
23/// [`LanguageSet`].
24///
25/// # Examples
26///
27/// ## Example: process once
28///
29/// This example uses the [`Processor::process_once`] function to process one input without keeping
30/// a [`Processor`] instance.
31///
32/// ```
33/// use syntastica::{style::Style, Processor};
34/// use syntastica_parsers::{Lang, LanguageSetImpl};
35///
36/// let highlights = Processor::process_once(
37/// "fn", // the code to process
38/// Lang::Rust, // the code's language
39/// // any valid `LanguageSet` supporting the required language
40/// &LanguageSetImpl::new(),
41/// )
42/// .unwrap();
43///
44/// assert_eq!(highlights, vec![vec![("fn", Some("keyword.function"))]]);
45/// ```
46///
47/// ## Example: instantiation with `Processor::new`
48///
49/// This example uses the [`Processor::new`] function to create a [`Processor`]
50/// instance which can then be used to process multiple different inputs.
51///
52/// ```
53/// use syntastica::{style::Style, Processor};
54/// use syntastica_parsers_git::{Lang, LanguageSetImpl};
55///
56/// // get a `LanguageSet`
57/// let language_set = LanguageSetImpl::new();
58///
59/// // create a `Processor` using that `LanguageSet`
60/// let mut processor = Processor::new(&language_set);
61///
62/// // process some input
63/// let highlights = processor.process("# comment", Lang::Python).unwrap();
64/// assert_eq!(highlights, vec![vec![("# comment", Some("comment"))]]);
65///
66/// // process input with injections
67/// let highlights = processor
68/// .process(r#"Regex::new(r".")"#, Lang::Rust)
69/// .unwrap();
70/// assert_eq!(
71/// highlights,
72/// vec![vec![
73/// ("Regex", Some("type")),
74/// ("::", Some("punctuation.delimiter")),
75/// ("new", Some("function.call")),
76/// ("(", Some("punctuation.bracket")),
77/// ("r\"", Some("string")),
78/// (".", Some("variable.builtin")), // this is the injected regex language
79/// ("\"", Some("string")),
80/// (")", Some("punctuation.bracket")),
81/// ]]
82/// );
83/// ```
84pub struct Processor<'set, Set: LanguageSet<'set>> {
85 set: &'set Set,
86 highlighter: Highlighter,
87}
88
89impl<'set, Set: LanguageSet<'set>> Processor<'set, Set> {
90 /// Create a new [`Processor`] given a [`LanguageSet`].
91 ///
92 /// See [the type documentation](Processor) for other means of instantiation and an example.
93 pub fn new(set: &'set Set) -> Self {
94 Self {
95 set,
96 highlighter: Highlighter::new(),
97 }
98 }
99
100 /// Create a temporary [`Processor`] and run [`process`](Processor::process) once.
101 ///
102 /// **Only use this function if you do not plan to process multiple inputs!**
103 ///
104 /// See the documentation for [`process`](Processor::process) and
105 /// [`new`](Processor::new) for more information on the parameters,
106 /// return type, and possible errors.
107 pub fn process_once<'src>(
108 code: &'src str,
109 language: impl Into<Set::Language>,
110 set: &'set Set,
111 ) -> Result<Highlights<'src>> {
112 Self::new(set).process(code, language)
113 }
114
115 /// Process the given `code` using the language specified by `language_name`.
116 ///
117 /// # Returns
118 ///
119 /// On success, the function returns [`Highlights`] which can be used by
120 /// [`render`](crate::render) for rendering to end users.
121 ///
122 /// # Errors
123 ///
124 /// The function may result in the following errors:
125 ///
126 /// - [`Error::UnsupportedLanguage`](crate::Error::UnsupportedLanguage) if the given
127 /// `language_name` is not supported by the [`LanguageSet`] which was passed during
128 /// instantiation of this [`Processor`].
129 /// - [`Error::Highlight`](crate::Error::Highlight) if highlighting fails (mainly because of
130 /// tree-sitter version mismatches).
131 pub fn process<'src>(
132 &mut self,
133 code: &'src str,
134 language: impl Into<Set::Language>,
135 ) -> Result<Highlights<'src>> {
136 self.process_impl(code, language.into(), None)
137 }
138
139 /// Process the given `code` using the language specified by `language_name` using an already
140 /// parsed tree.
141 ///
142 /// Unlike [`process`](Processor::process), this does not parse the input text, but instead
143 /// uses a parsed tree that is provided by the caller. This also means that **language
144 /// injections must be handled by the caller**.
145 ///
146 /// This allows for incremental parsing, useful for e.g. text editors. See the
147 /// [tree-sitter Rust documentation](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust#editing)
148 /// for more information.
149 ///
150 /// # Example
151 /// ```
152 /// use syntastica::{language_set::LanguageSet, renderer::TerminalRenderer, Processor};
153 /// use syntastica_parsers::{Lang, LanguageSetImpl};
154 /// use tree_sitter::{InputEdit, Parser, Point};
155 ///
156 /// // create a LanguageSet, Processor, Renderer, and ResolvedTheme
157 /// let set = LanguageSetImpl::new();
158 /// let mut processor = Processor::new(&set);
159 /// let mut renderer = TerminalRenderer::new(None);
160 /// let theme = syntastica_themes::one::dark();
161 ///
162 /// // create a tree-sitter parser
163 /// let mut parser = Parser::new();
164 /// // and set the desired language
165 /// parser.set_language(&Lang::Rust.get())?;
166 ///
167 /// // parse, process, and render source code
168 /// let code = "fn test() {}";
169 /// let mut tree = parser.parse(code, None).unwrap();
170 /// println!(
171 /// "{}",
172 /// syntastica::render(
173 /// &processor.process_tree(code, Lang::Rust, &tree.root_node())?,
174 /// &mut renderer,
175 /// &theme,
176 /// )
177 /// );
178 ///
179 /// // edit the code and tree
180 /// let new_code = "fn test(a: u32) {}";
181 /// tree.edit(&InputEdit {
182 /// start_byte: 8,
183 /// old_end_byte: 8,
184 /// new_end_byte: 14,
185 /// start_position: Point::new(0, 8),
186 /// old_end_position: Point::new(0, 8),
187 /// new_end_position: Point::new(0, 14),
188 /// });
189 ///
190 /// // re-parse, process, and render the code
191 /// let new_tree = parser.parse(new_code, Some(&tree)).unwrap();
192 /// println!(
193 /// "{}",
194 /// syntastica::render(
195 /// &processor.process_tree(new_code, Lang::Rust, &new_tree.root_node())?,
196 /// &mut renderer,
197 /// &theme,
198 /// )
199 /// );
200 ///
201 /// # Ok::<(), Box<dyn std::error::Error>>(())
202 /// ```
203 ///
204 /// # Returns
205 ///
206 /// On success, the function returns [`Highlights`] which can be used by
207 /// [`render`](crate::render) for rendering to end users.
208 ///
209 /// # Errors
210 ///
211 /// The function may result in the following errors:
212 ///
213 /// - [`Error::UnsupportedLanguage`](crate::Error::UnsupportedLanguage) if the given
214 /// `language_name` is not supported by the [`LanguageSet`] which was passed during
215 /// instantiation of this [`Processor`].
216 /// - [`Error::Highlight`](crate::Error::Highlight) if highlighting fails (mainly because of
217 /// tree-sitter version mismatches).
218 pub fn process_tree<'src>(
219 &mut self,
220 code: &'src str,
221 language: impl Into<Set::Language>,
222 tree: &Node<'_>,
223 ) -> Result<Highlights<'src>> {
224 self.process_impl(code, language.into(), Some(tree))
225 }
226
227 fn process_impl<'src>(
228 &mut self,
229 code: &'src str,
230 language: Set::Language,
231 tree: Option<&Node<'_>>,
232 ) -> Result<Highlights<'src>> {
233 let highlight_config = self.set.get_language(language)?;
234
235 let injection_callback = |lang_name: &str| {
236 let lang_name = lang_name.to_ascii_lowercase();
237 // if `lang_name` is a supported language in the set, use that
238 Set::Language::for_name(&lang_name, self.set)
239 .ok()
240 // else if `for_injection` returns a name, try getting a language for that name
241 .or_else(|| Set::Language::for_injection(&lang_name, self.set))
242 // else, `lang_name` might be a mimetype like `application/json`, so try both again
243 // with the text after the last `/`
244 .or_else(|| {
245 lang_name.rsplit_once('/').and_then(|(_, name)| {
246 Set::Language::for_name(name, self.set)
247 .ok()
248 .or_else(|| Set::Language::for_injection(name, self.set))
249 })
250 })
251 .and_then(|lang| self.set.get_language(lang).ok())
252 };
253
254 match tree {
255 Some(tree) => process_highlight_iter(
256 self.highlighter.highlight_existing_tree(
257 highlight_config,
258 code.as_bytes(),
259 None,
260 tree,
261 )?,
262 code,
263 ),
264 None => process_highlight_iter(
265 self.highlighter.highlight(
266 highlight_config,
267 code.as_bytes(),
268 None,
269 injection_callback,
270 )?,
271 code,
272 ),
273 }
274 }
275}
276
277fn process_highlight_iter(
278 iter: impl Iterator<Item = std::result::Result<HighlightEvent, syntastica_highlight::Error>>,
279 code: &str,
280) -> Result<Highlights<'_>> {
281 let mut out = vec![vec![]];
282 let mut style_stack = vec![];
283
284 for event in iter {
285 match event? {
286 HighlightEvent::HighlightStart(Highlight(highlight)) => style_stack.push(highlight),
287 HighlightEvent::HighlightEnd => {
288 style_stack.pop();
289 }
290 HighlightEvent::Source { start, end } => {
291 let ends_with_newline = code[start..end].ends_with('\n');
292 let mut lines = code[start..end].lines().peekable();
293 while let Some(line) = lines.next() {
294 let style = style_stack.last().and_then(|idx| {
295 let key = THEME_KEYS[*idx];
296 match key {
297 "none" => None,
298 _ => Some(key),
299 }
300 });
301 out.last_mut()
302 .expect("`out` is initialized with one element and never shrinks in size")
303 .push((line, style));
304
305 if lines.peek().is_some() || ends_with_newline {
306 out.push(vec![]);
307 }
308 }
309 }
310 }
311 }
312
313 Ok(out)
314}