syntastica_highlight/
lib.rs

1//! Forked from <https://github.com/tree-sitter/tree-sitter/blob/v0.25.2/highlight/src/lib.rs>
2//!
3//! The MIT License (MIT)
4//!
5//! Copyright (c) 2018-2024 Max Brunsfeld
6//!
7//! Permission is hereby granted, free of charge, to any person obtaining a copy
8//! of this software and associated documentation files (the "Software"), to deal
9//! in the Software without restriction, including without limitation the rights
10//! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11//! copies of the Software, and to permit persons to whom the Software is
12//! furnished to do so, subject to the following conditions:
13//!
14//! The above copyright notice and this permission notice shall be included in all
15//! copies or substantial portions of the Software.
16//!
17//! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18//! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19//! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20//! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21//! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22//! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23//! SOFTWARE.
24
25#[cfg(all(not(feature = "runtime-c"), not(feature = "runtime-c2rust")))]
26compile_error!("Either `runtime-c` or `runtime-c2rust` must be enabled!");
27#[cfg(feature = "runtime-c")]
28use tree_sitter as ts_runtime;
29#[cfg(all(
30    feature = "runtime-c2rust",
31    not(feature = "runtime-c"), // if both features are enabled, use the c runtime
32))]
33use tree_sitter_c2rust as ts_runtime;
34
35use regex::Regex;
36use std::borrow::Cow;
37use std::marker::PhantomData;
38use std::mem::MaybeUninit;
39use std::sync::atomic::{AtomicUsize, Ordering};
40use std::{iter, mem, ops, slice, str};
41use streaming_iterator::StreamingIterator as _;
42use thiserror::Error;
43use ts_runtime::{
44    ffi, Language, Node, ParseOptions, Parser, Point, Query, QueryCapture, QueryCaptures,
45    QueryCursor, QueryError, QueryMatch, QueryPredicateArg, Range, TextProvider, Tree,
46};
47
48const CANCELLATION_CHECK_INTERVAL: usize = 100;
49
50/// Indicates which highlight should be applied to a region of source code.
51#[derive(Copy, Clone, Debug, PartialEq, Eq)]
52pub struct Highlight(pub usize);
53
54/// Represents the reason why syntax highlighting failed.
55#[derive(Debug, Error, PartialEq, Eq)]
56pub enum Error {
57    /// Highlighting was manually cancelled by flipping the cancellation flag.
58    #[error("cancelled")]
59    Cancelled,
60
61    /// The provided language uses an incompatible version of tree-sitter.
62    #[error("invalid language: incompatible tree-sitter version")]
63    InvalidLanguage,
64}
65
66/// Represents a single step in rendering a syntax-highlighted document.
67#[derive(Copy, Clone, Debug)]
68pub enum HighlightEvent {
69    Source { start: usize, end: usize },
70    HighlightStart(Highlight),
71    HighlightEnd,
72}
73
74/// Contains the data needed to highlight code written in a particular language.
75///
76/// This struct is immutable and can be shared between threads.
77pub struct HighlightConfiguration {
78    pub language: Language,
79    pub language_name: String,
80    pub query: Query,
81    combined_injections_query: Option<Query>,
82    locals_pattern_index: usize,
83    highlights_pattern_index: usize,
84    highlight_indices: Vec<Option<Highlight>>,
85    non_local_variable_patterns: Vec<bool>,
86    injection_content_capture_index: Option<u32>,
87    injection_language_capture_index: Option<u32>,
88    local_scope_capture_index: Option<u32>,
89    local_def_capture_index: Option<u32>,
90    local_def_value_capture_index: Option<u32>,
91    local_ref_capture_index: Option<u32>,
92}
93
94/// Performs syntax highlighting, recognizing a given list of highlight names.
95///
96/// For the best performance `Highlighter` values should be reused between
97/// syntax highlighting calls. A separate highlighter is needed for each thread that
98/// is performing highlighting.
99pub struct Highlighter {
100    pub parser: Parser,
101    cursors: Vec<QueryCursor>,
102}
103
104#[derive(Debug)]
105struct LocalDef<'a> {
106    name: &'a str,
107    value_range: ops::Range<usize>,
108    highlight: Option<Highlight>,
109}
110
111#[derive(Debug)]
112struct LocalScope<'a> {
113    inherits: bool,
114    range: ops::Range<usize>,
115    local_defs: Vec<LocalDef<'a>>,
116}
117
118struct HighlightIter<'a, F>
119where
120    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
121{
122    source: &'a [u8],
123    language_name: &'a str,
124    byte_offset: usize,
125    highlighter: &'a mut Highlighter,
126    injection_callback: F,
127    cancellation_flag: Option<&'a AtomicUsize>,
128    layers: Vec<HighlightIterLayer<'a>>,
129    iter_count: usize,
130    next_event: Option<HighlightEvent>,
131    last_highlight_range: Option<(usize, usize, usize)>,
132}
133
134struct HighlightIterLayer<'a> {
135    _tree: Option<Tree>, // needed to keep tree in memory
136    cursor: QueryCursor,
137    captures: iter::Peekable<_QueryCaptures<'a, 'a, &'a [u8], &'a [u8]>>,
138    config: &'a HighlightConfiguration,
139    highlight_end_stack: Vec<usize>,
140    scope_stack: Vec<LocalScope<'a>>,
141    ranges: Vec<Range>,
142    depth: usize,
143}
144
145// Additional option to only include unnamed children. Borrowed from helix-editor.
146// (syntastica addition)
147#[derive(Clone, Copy)]
148enum IncludedChildren {
149    None,
150    All,
151    Unnamed,
152}
153
154pub struct _QueryCaptures<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> {
155    ptr: *mut ffi::TSQueryCursor,
156    query: &'query Query,
157    text_provider: T,
158    buffer1: Vec<u8>,
159    buffer2: Vec<u8>,
160    _current_match: Option<(QueryMatch<'query, 'tree>, usize)>,
161    _options: Option<*mut ffi::TSQueryCursorOptions>,
162    _phantom: PhantomData<(&'tree (), I)>,
163}
164
165struct _QueryMatch<'cursor, 'tree> {
166    pub _pattern_index: usize,
167    pub _captures: &'cursor [QueryCapture<'tree>],
168    _id: u32,
169    _cursor: *mut ffi::TSQueryCursor,
170}
171
172impl<'tree> _QueryMatch<'_, 'tree> {
173    fn new(m: &ffi::TSQueryMatch, cursor: *mut ffi::TSQueryCursor) -> Self {
174        _QueryMatch {
175            _cursor: cursor,
176            _id: m.id,
177            _pattern_index: m.pattern_index as usize,
178            _captures: (m.capture_count > 0)
179                .then(|| unsafe {
180                    slice::from_raw_parts(
181                        m.captures.cast::<QueryCapture<'tree>>(),
182                        m.capture_count as usize,
183                    )
184                })
185                .unwrap_or_default(),
186        }
187    }
188}
189
190impl<'query, 'tree: 'query, T: TextProvider<I>, I: AsRef<[u8]>> Iterator
191    for _QueryCaptures<'query, 'tree, T, I>
192{
193    type Item = (QueryMatch<'query, 'tree>, usize);
194
195    fn next(&mut self) -> Option<Self::Item> {
196        unsafe {
197            loop {
198                let mut capture_index = 0u32;
199                let mut m = MaybeUninit::<ffi::TSQueryMatch>::uninit();
200                if ffi::ts_query_cursor_next_capture(
201                    self.ptr,
202                    m.as_mut_ptr(),
203                    core::ptr::addr_of_mut!(capture_index),
204                ) {
205                    let result = std::mem::transmute::<_QueryMatch, QueryMatch>(_QueryMatch::new(
206                        &m.assume_init(),
207                        self.ptr,
208                    ));
209                    if result.satisfies_text_predicates(
210                        self.query,
211                        &mut self.buffer1,
212                        &mut self.buffer2,
213                        &mut self.text_provider,
214                    ) {
215                        return Some((result, capture_index as usize));
216                    }
217                    result.remove();
218                } else {
219                    return None;
220                }
221            }
222        }
223    }
224}
225
226impl Default for Highlighter {
227    fn default() -> Self {
228        Self::new()
229    }
230}
231
232impl Highlighter {
233    #[must_use]
234    pub fn new() -> Self {
235        Self {
236            parser: Parser::new(),
237            cursors: Vec::new(),
238        }
239    }
240
241    pub fn parser(&mut self) -> &mut Parser {
242        &mut self.parser
243    }
244
245    /// Iterate over the highlighted regions for a given slice of source code.
246    pub fn highlight<'a>(
247        &'a mut self,
248        config: &'a HighlightConfiguration,
249        source: &'a [u8],
250        cancellation_flag: Option<&'a AtomicUsize>,
251        mut injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
252    ) -> Result<impl Iterator<Item = Result<HighlightEvent, Error>> + 'a, Error> {
253        let layers = HighlightIterLayer::new(
254            source,
255            None,
256            self,
257            cancellation_flag,
258            &mut injection_callback,
259            config,
260            0,
261            vec![Range {
262                start_byte: 0,
263                end_byte: usize::MAX,
264                start_point: Point::new(0, 0),
265                end_point: Point::new(usize::MAX, usize::MAX),
266            }],
267        )?;
268        assert_ne!(layers.len(), 0);
269        let mut result = HighlightIter {
270            source,
271            language_name: &config.language_name,
272            byte_offset: 0,
273            injection_callback,
274            cancellation_flag,
275            highlighter: self,
276            iter_count: 0,
277            layers,
278            next_event: None,
279            last_highlight_range: None,
280        };
281        result.sort_layers();
282        Ok(result)
283    }
284
285    /// Iterate over the highlighted regions for a given node.
286    /// Does not parse anything (and therefore does not support injections).
287    // (syntastica addition)
288    pub fn highlight_existing_tree<'a>(
289        &'a mut self,
290        config: &'a HighlightConfiguration,
291        source: &'a [u8],
292        cancellation_flag: Option<&'a AtomicUsize>,
293        node: &'a Node,
294    ) -> Result<impl Iterator<Item = Result<HighlightEvent, Error>> + 'a, Error> {
295        let layers = vec![HighlightIterLayer::new_from_tree(source, node, config)];
296        let mut result = HighlightIter {
297            source,
298            language_name: &config.language_name,
299            byte_offset: 0,
300            injection_callback: |_| None,
301            cancellation_flag,
302            highlighter: self,
303            iter_count: 0,
304            layers,
305            next_event: None,
306            last_highlight_range: None,
307        };
308        result.sort_layers();
309        Ok(result)
310    }
311}
312
313impl HighlightConfiguration {
314    /// Creates a `HighlightConfiguration` for a given `Language` and set of highlighting
315    /// queries.
316    ///
317    /// # Parameters
318    ///
319    /// * `language`  - The Tree-sitter `Language` that should be used for parsing.
320    /// * `highlights_query` - A string containing tree patterns for syntax highlighting. This
321    ///   should be non-empty, otherwise no syntax highlights will be added.
322    /// * `injections_query` -  A string containing tree patterns for injecting other languages into
323    ///   the document. This can be empty if no injections are desired.
324    /// * `locals_query` - A string containing tree patterns for tracking local variable definitions
325    ///   and references. This can be empty if local variable tracking is not needed.
326    ///
327    /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method.
328    pub fn new(
329        language: Language,
330        name: impl Into<String>,
331        highlights_query: &str,
332        injection_query: &str,
333        locals_query: &str,
334    ) -> Result<Self, QueryError> {
335        // Concatenate the query strings, keeping track of the start offset of each section.
336        let mut query_source = String::new();
337        query_source.push_str(injection_query);
338        let locals_query_offset = query_source.len();
339        query_source.push_str(locals_query);
340        let highlights_query_offset = query_source.len();
341        query_source.push_str(highlights_query);
342
343        // Construct a single query by concatenating the three query strings, but record the
344        // range of pattern indices that belong to each individual string.
345        let mut query = Query::new(&language, &query_source)?;
346        let mut locals_pattern_index = 0;
347        let mut highlights_pattern_index = 0;
348        for i in 0..(query.pattern_count()) {
349            let pattern_offset = query.start_byte_for_pattern(i);
350            if pattern_offset < highlights_query_offset {
351                if pattern_offset < highlights_query_offset {
352                    highlights_pattern_index += 1;
353                }
354                if pattern_offset < locals_query_offset {
355                    locals_pattern_index += 1;
356                }
357            }
358        }
359
360        // Construct a separate query just for dealing with the 'combined injections'.
361        // Disable the combined injection patterns in the main query.
362        let mut combined_injections_query = Query::new(&language, injection_query)?;
363        let mut has_combined_queries = false;
364        for pattern_index in 0..locals_pattern_index {
365            let settings = query.property_settings(pattern_index);
366            if settings.iter().any(|s| &*s.key == "injection.combined") {
367                has_combined_queries = true;
368                query.disable_pattern(pattern_index);
369            } else {
370                combined_injections_query.disable_pattern(pattern_index);
371            }
372        }
373        let combined_injections_query = if has_combined_queries {
374            Some(combined_injections_query)
375        } else {
376            None
377        };
378
379        // Find all of the highlighting patterns that are disabled for nodes that
380        // have been identified as local variables.
381        let non_local_variable_patterns = (0..query.pattern_count())
382            .map(|i| {
383                query
384                    .property_predicates(i)
385                    .iter()
386                    .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local")
387            })
388            .collect();
389
390        // Store the numeric ids for all of the special captures.
391        let mut injection_content_capture_index = None;
392        let mut injection_language_capture_index = None;
393        let mut local_def_capture_index = None;
394        let mut local_def_value_capture_index = None;
395        let mut local_ref_capture_index = None;
396        let mut local_scope_capture_index = None;
397        for (i, name) in query.capture_names().iter().enumerate() {
398            let i = Some(i as u32);
399            match *name {
400                "injection.content" => injection_content_capture_index = i,
401                "injection.language" => injection_language_capture_index = i,
402                "local.definition" => local_def_capture_index = i,
403                "local.definition-value" => local_def_value_capture_index = i,
404                "local.reference" => local_ref_capture_index = i,
405                "local.scope" => local_scope_capture_index = i,
406                _ => {}
407            }
408        }
409
410        let highlight_indices = vec![None; query.capture_names().len()];
411        Ok(Self {
412            language,
413            language_name: name.into(),
414            query,
415            combined_injections_query,
416            locals_pattern_index,
417            highlights_pattern_index,
418            highlight_indices,
419            non_local_variable_patterns,
420            injection_content_capture_index,
421            injection_language_capture_index,
422            local_def_capture_index,
423            local_def_value_capture_index,
424            local_ref_capture_index,
425            local_scope_capture_index,
426        })
427    }
428
429    /// Get a slice containing all of the highlight names used in the configuration.
430    #[must_use]
431    pub const fn names(&self) -> &[&str] {
432        self.query.capture_names()
433    }
434
435    /// Set the list of recognized highlight names.
436    ///
437    /// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated
438    /// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of
439    /// these queries can choose to recognize highlights with different levels of specificity.
440    /// For example, the string `function.builtin` will match against `function.method.builtin`
441    /// and `function.builtin.constructor`, but will not match `function.method`.
442    ///
443    /// When highlighting, results are returned as `Highlight` values, which contain the index
444    /// of the matched highlight this list of highlight names.
445    pub fn configure(&mut self, recognized_names: &[impl AsRef<str>]) {
446        let mut capture_parts = Vec::new();
447        self.highlight_indices.clear();
448        self.highlight_indices
449            .extend(self.query.capture_names().iter().map(move |capture_name| {
450                capture_parts.clear();
451                capture_parts.extend(capture_name.split('.'));
452
453                let mut best_index = None;
454                let mut best_match_len = 0;
455                for (i, recognized_name) in recognized_names.iter().enumerate() {
456                    let mut len = 0;
457                    let mut matches = true;
458                    for part in recognized_name.as_ref().split('.') {
459                        len += 1;
460                        if !capture_parts.contains(&part) {
461                            matches = false;
462                            break;
463                        }
464                    }
465                    if matches && len > best_match_len {
466                        best_index = Some(i);
467                        best_match_len = len;
468                    }
469                }
470                best_index.map(Highlight)
471            }));
472    }
473}
474
475impl<'a> HighlightIterLayer<'a> {
476    /// Create a new 'layer' of highlighting for this document.
477    ///
478    /// In the event that the new layer contains "combined injections" (injections where multiple
479    /// disjoint ranges are parsed as one syntax tree), these will be eagerly processed and
480    /// added to the returned vector.
481    #[allow(clippy::too_many_arguments)]
482    fn new<F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a>(
483        source: &'a [u8],
484        parent_name: Option<&str>,
485        highlighter: &mut Highlighter,
486        cancellation_flag: Option<&'a AtomicUsize>,
487        injection_callback: &mut F,
488        mut config: &'a HighlightConfiguration,
489        mut depth: usize,
490        mut ranges: Vec<Range>,
491    ) -> Result<Vec<Self>, Error> {
492        let mut result = Vec::with_capacity(1);
493        let mut queue = Vec::new();
494        loop {
495            if highlighter.parser.set_included_ranges(&ranges).is_ok() {
496                highlighter
497                    .parser
498                    .set_language(&config.language)
499                    .map_err(|_| Error::InvalidLanguage)?;
500
501                let tree = highlighter
502                    .parser
503                    .parse_with_options(
504                        &mut |i, _| if i < source.len() { &source[i..] } else { &[] },
505                        None,
506                        Some(ParseOptions::new().progress_callback(&mut |_| {
507                            if let Some(cancellation_flag) = cancellation_flag {
508                                cancellation_flag.load(Ordering::SeqCst) != 0
509                            } else {
510                                false
511                            }
512                        })),
513                    )
514                    .ok_or(Error::Cancelled)?;
515                let mut cursor = highlighter.cursors.pop().unwrap_or_default();
516
517                // Process combined injections.
518                if let Some(combined_injections_query) = &config.combined_injections_query {
519                    let mut injections_by_pattern_index =
520                        vec![
521                            (None, Vec::new(), IncludedChildren::None);
522                            combined_injections_query.pattern_count()
523                        ];
524                    let mut matches =
525                        cursor.matches(combined_injections_query, tree.root_node(), source);
526                    while let Some(mat) = matches.next() {
527                        let entry = &mut injections_by_pattern_index[mat.pattern_index];
528                        let (language_name, content_node, included_children) = injection_for_match(
529                            config,
530                            parent_name,
531                            combined_injections_query,
532                            mat,
533                            source,
534                        );
535                        if language_name.is_some() {
536                            entry.0 = language_name;
537                        }
538                        if let Some(content_node) = content_node {
539                            entry.1.push(content_node);
540                        }
541                        entry.2 = included_children;
542                    }
543                    for (lang_name, content_nodes, included_children) in injections_by_pattern_index
544                    {
545                        if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) {
546                            if let Some(next_config) = (injection_callback)(&lang_name) {
547                                let ranges = Self::intersect_ranges(
548                                    &ranges,
549                                    &content_nodes,
550                                    included_children,
551                                );
552                                if !ranges.is_empty() {
553                                    queue.push((next_config, depth + 1, ranges));
554                                }
555                            }
556                        }
557                    }
558                }
559
560                // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
561                // prevents them from being moved. But both of these values are really just
562                // pointers, so it's actually ok to move them.
563                let tree_ref = unsafe { mem::transmute::<&Tree, &'static Tree>(&tree) };
564                let cursor_ref = unsafe {
565                    mem::transmute::<&mut QueryCursor, &'static mut QueryCursor>(&mut cursor)
566                };
567                let captures = unsafe {
568                    mem::transmute::<QueryCaptures<_, _>, _QueryCaptures<_, _>>(
569                        cursor_ref.captures(&config.query, tree_ref.root_node(), source),
570                    )
571                }
572                .peekable();
573
574                result.push(HighlightIterLayer {
575                    highlight_end_stack: Vec::new(),
576                    scope_stack: vec![LocalScope {
577                        inherits: false,
578                        range: 0..usize::MAX,
579                        local_defs: Vec::new(),
580                    }],
581                    cursor,
582                    depth,
583                    _tree: Some(tree),
584                    captures,
585                    config,
586                    ranges,
587                });
588            }
589
590            if queue.is_empty() {
591                break;
592            }
593
594            let (next_config, next_depth, next_ranges) = queue.remove(0);
595            config = next_config;
596            depth = next_depth;
597            ranges = next_ranges;
598        }
599
600        Ok(result)
601    }
602
603    // (syntastica addition)
604    fn new_from_tree(source: &'a [u8], node: &'a Node, config: &'a HighlightConfiguration) -> Self {
605        let mut cursor = QueryCursor::new();
606
607        // `QueryCursor` is really just a pointer, so it's ok to move.
608        let cursor_ref =
609            unsafe { mem::transmute::<&mut QueryCursor, &'static mut QueryCursor>(&mut cursor) };
610        let captures = unsafe {
611            mem::transmute::<QueryCaptures<_, _>, _QueryCaptures<_, _>>(cursor_ref.captures(
612                &config.query,
613                *node,
614                source,
615            ))
616        }
617        .peekable();
618
619        HighlightIterLayer {
620            highlight_end_stack: Vec::new(),
621            scope_stack: vec![LocalScope {
622                inherits: false,
623                range: 0..usize::MAX,
624                local_defs: Vec::new(),
625            }],
626            cursor,
627            depth: 0,
628            _tree: None,
629            captures,
630            config,
631            ranges: vec![Range {
632                start_byte: 0,
633                end_byte: usize::MAX,
634                start_point: Point::new(0, 0),
635                end_point: Point::new(usize::MAX, usize::MAX),
636            }],
637        }
638    }
639
640    // Compute the ranges that should be included when parsing an injection.
641    // This takes into account three things:
642    // * `parent_ranges` - The ranges must all fall within the *current* layer's ranges.
643    // * `nodes` - Every injection takes place within a set of nodes. The injection ranges are the
644    //   ranges of those nodes.
645    // * `included_children` - For some injections, the content nodes' children should be excluded
646    //   from the nested document, so that only the content nodes' *own* content is reparsed. For
647    //   other injections, the content nodes' entire ranges should be reparsed, including the ranges
648    //   of their children.
649    fn intersect_ranges(
650        parent_ranges: &[Range],
651        nodes: &[Node],
652        included_children: IncludedChildren,
653    ) -> Vec<Range> {
654        let mut cursor = nodes[0].walk();
655        let mut result = Vec::new();
656        let mut parent_range_iter = parent_ranges.iter();
657        let mut parent_range = parent_range_iter
658            .next()
659            .expect("Layers should only be constructed with non-empty ranges vectors");
660        for node in nodes {
661            let mut preceding_range = Range {
662                start_byte: 0,
663                start_point: Point::new(0, 0),
664                end_byte: node.start_byte(),
665                end_point: node.start_position(),
666            };
667            let following_range = Range {
668                start_byte: node.end_byte(),
669                start_point: node.end_position(),
670                end_byte: usize::MAX,
671                end_point: Point::new(usize::MAX, usize::MAX),
672            };
673
674            for excluded_range in node
675                .children(&mut cursor)
676                .filter_map(|child| match included_children {
677                    IncludedChildren::None => Some(child.range()),
678                    IncludedChildren::All => None,
679                    IncludedChildren::Unnamed => {
680                        if child.is_named() {
681                            Some(child.range())
682                        } else {
683                            None
684                        }
685                    }
686                })
687                .chain(std::iter::once(following_range))
688            {
689                let mut range = Range {
690                    start_byte: preceding_range.end_byte,
691                    start_point: preceding_range.end_point,
692                    end_byte: excluded_range.start_byte,
693                    end_point: excluded_range.start_point,
694                };
695                preceding_range = excluded_range;
696
697                if range.end_byte < parent_range.start_byte {
698                    continue;
699                }
700
701                while parent_range.start_byte <= range.end_byte {
702                    if parent_range.end_byte > range.start_byte {
703                        if range.start_byte < parent_range.start_byte {
704                            range.start_byte = parent_range.start_byte;
705                            range.start_point = parent_range.start_point;
706                        }
707
708                        if parent_range.end_byte < range.end_byte {
709                            if range.start_byte < parent_range.end_byte {
710                                result.push(Range {
711                                    start_byte: range.start_byte,
712                                    start_point: range.start_point,
713                                    end_byte: parent_range.end_byte,
714                                    end_point: parent_range.end_point,
715                                });
716                            }
717                            range.start_byte = parent_range.end_byte;
718                            range.start_point = parent_range.end_point;
719                        } else {
720                            if range.start_byte < range.end_byte {
721                                result.push(range);
722                            }
723                            break;
724                        }
725                    }
726
727                    if let Some(next_range) = parent_range_iter.next() {
728                        parent_range = next_range;
729                    } else {
730                        return result;
731                    }
732                }
733            }
734        }
735        result
736    }
737
738    // First, sort scope boundaries by their byte offset in the document. At a
739    // given position, emit scope endings before scope beginnings. Finally, emit
740    // scope boundaries from deeper layers first.
741    fn sort_key(&mut self) -> Option<(usize, bool, isize)> {
742        let depth = -(self.depth as isize);
743        let next_start = self
744            .captures
745            .peek()
746            .map(|(m, i)| m.captures[*i].node.start_byte());
747        let next_end = self.highlight_end_stack.last().copied();
748        match (next_start, next_end) {
749            (Some(start), Some(end)) => {
750                if start < end {
751                    Some((start, true, depth))
752                } else {
753                    Some((end, false, depth))
754                }
755            }
756            (Some(i), None) => Some((i, true, depth)),
757            (None, Some(j)) => Some((j, false, depth)),
758            _ => None,
759        }
760    }
761}
762
763impl<'a, F> HighlightIter<'a, F>
764where
765    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
766{
767    fn emit_event(
768        &mut self,
769        offset: usize,
770        event: Option<HighlightEvent>,
771    ) -> Option<Result<HighlightEvent, Error>> {
772        let result;
773        if self.byte_offset < offset {
774            result = Some(Ok(HighlightEvent::Source {
775                start: self.byte_offset,
776                end: offset,
777            }));
778            self.byte_offset = offset;
779            self.next_event = event;
780        } else {
781            result = event.map(Ok);
782        }
783        self.sort_layers();
784        result
785    }
786
787    fn sort_layers(&mut self) {
788        while !self.layers.is_empty() {
789            if let Some(sort_key) = self.layers[0].sort_key() {
790                let mut i = 0;
791                while i + 1 < self.layers.len() {
792                    if let Some(next_offset) = self.layers[i + 1].sort_key() {
793                        if next_offset < sort_key {
794                            i += 1;
795                            continue;
796                        }
797                    }
798                    break;
799                }
800                if i > 0 {
801                    self.layers[0..=i].rotate_left(1);
802                }
803                break;
804            }
805            let layer = self.layers.remove(0);
806            self.highlighter.cursors.push(layer.cursor);
807        }
808    }
809
810    fn insert_layer(&mut self, mut layer: HighlightIterLayer<'a>) {
811        if let Some(sort_key) = layer.sort_key() {
812            let mut i = 1;
813            while i < self.layers.len() {
814                if let Some(sort_key_i) = self.layers[i].sort_key() {
815                    if sort_key_i > sort_key {
816                        self.layers.insert(i, layer);
817                        return;
818                    }
819                    i += 1;
820                } else {
821                    self.layers.remove(i);
822                }
823            }
824            self.layers.push(layer);
825        }
826    }
827}
828
829impl<'a, F> Iterator for HighlightIter<'a, F>
830where
831    F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a,
832{
833    type Item = Result<HighlightEvent, Error>;
834
835    fn next(&mut self) -> Option<Self::Item> {
836        'main: loop {
837            // If we've already determined the next highlight boundary, just return it.
838            if let Some(e) = self.next_event.take() {
839                return Some(Ok(e));
840            }
841
842            // Periodically check for cancellation, returning `Cancelled` error if the
843            // cancellation flag was flipped.
844            if let Some(cancellation_flag) = self.cancellation_flag {
845                self.iter_count += 1;
846                if self.iter_count >= CANCELLATION_CHECK_INTERVAL {
847                    self.iter_count = 0;
848                    if cancellation_flag.load(Ordering::Relaxed) != 0 {
849                        return Some(Err(Error::Cancelled));
850                    }
851                }
852            }
853
854            // If none of the layers have any more highlight boundaries, terminate.
855            if self.layers.is_empty() {
856                return if self.byte_offset < self.source.len() {
857                    let result = Some(Ok(HighlightEvent::Source {
858                        start: self.byte_offset,
859                        end: self.source.len(),
860                    }));
861                    self.byte_offset = self.source.len();
862                    result
863                } else {
864                    None
865                };
866            }
867
868            // Get the next capture from whichever layer has the earliest highlight boundary.
869            let range;
870            let layer = &mut self.layers[0];
871            if let Some((next_match, capture_index)) = layer.captures.peek() {
872                let next_capture = next_match.captures[*capture_index];
873                range = next_capture.node.byte_range();
874
875                // If any previous highlight ends before this node starts, then before
876                // processing this capture, emit the source code up until the end of the
877                // previous highlight, and an end event for that highlight.
878                if let Some(end_byte) = layer.highlight_end_stack.last().copied() {
879                    if end_byte <= range.start {
880                        layer.highlight_end_stack.pop();
881                        return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
882                    }
883                }
884            }
885            // If there are no more captures, then emit any remaining highlight end events.
886            // And if there are none of those, then just advance to the end of the document.
887            else if let Some(end_byte) = layer.highlight_end_stack.last().copied() {
888                layer.highlight_end_stack.pop();
889                return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
890            } else {
891                return self.emit_event(self.source.len(), None);
892            }
893
894            let (mut match_, capture_index) = layer.captures.next().unwrap();
895            let mut capture = match_.captures[capture_index];
896
897            // If this capture represents an injection, then process the injection.
898            if match_.pattern_index < layer.config.locals_pattern_index {
899                let (language_name, content_node, included_children) = injection_for_match(
900                    layer.config,
901                    Some(self.language_name),
902                    &layer.config.query,
903                    &match_,
904                    self.source,
905                );
906
907                // If a language is found with the given name, then add a new language layer
908                // to the highlighted document.
909                if let (Some(language_name), Some(content_node)) = (language_name, content_node) {
910                    // Explicitly remove this match so that none of its other captures will remain
911                    // in the stream of captures.
912                    match_.remove();
913
914                    if let Some(config) = (self.injection_callback)(&language_name) {
915                        let ranges = HighlightIterLayer::intersect_ranges(
916                            &self.layers[0].ranges,
917                            &[content_node],
918                            included_children,
919                        );
920                        if !ranges.is_empty() {
921                            match HighlightIterLayer::new(
922                                self.source,
923                                Some(self.language_name),
924                                self.highlighter,
925                                self.cancellation_flag,
926                                &mut self.injection_callback,
927                                config,
928                                self.layers[0].depth + 1,
929                                ranges,
930                            ) {
931                                Ok(layers) => {
932                                    for layer in layers {
933                                        self.insert_layer(layer);
934                                    }
935                                }
936                                Err(e) => return Some(Err(e)),
937                            }
938                        }
939                    }
940                }
941
942                self.sort_layers();
943                continue 'main;
944            }
945
946            // Remove from the local scope stack any local scopes that have already ended.
947            while range.start > layer.scope_stack.last().unwrap().range.end {
948                layer.scope_stack.pop();
949            }
950
951            // If this capture is for tracking local variables, then process the
952            // local variable info.
953            let mut reference_highlight = None;
954            let mut definition_highlight = None;
955            while match_.pattern_index < layer.config.highlights_pattern_index {
956                // If the node represents a local scope, push a new local scope onto
957                // the scope stack.
958                if Some(capture.index) == layer.config.local_scope_capture_index {
959                    definition_highlight = None;
960                    let mut scope = LocalScope {
961                        inherits: true,
962                        range: range.clone(),
963                        local_defs: Vec::new(),
964                    };
965                    for prop in layer.config.query.property_settings(match_.pattern_index) {
966                        if prop.key.as_ref() == "local.scope-inherits" {
967                            scope.inherits =
968                                prop.value.as_ref().is_none_or(|r| r.as_ref() == "true");
969                        }
970                    }
971                    layer.scope_stack.push(scope);
972                }
973                // If the node represents a definition, add a new definition to the
974                // local scope at the top of the scope stack.
975                else if Some(capture.index) == layer.config.local_def_capture_index {
976                    reference_highlight = None;
977                    definition_highlight = None;
978                    let scope = layer.scope_stack.last_mut().unwrap();
979
980                    let mut value_range = 0..0;
981                    for capture in match_.captures {
982                        if Some(capture.index) == layer.config.local_def_value_capture_index {
983                            value_range = capture.node.byte_range();
984                        }
985                    }
986
987                    if let Ok(name) = str::from_utf8(&self.source[range.clone()]) {
988                        scope.local_defs.push(LocalDef {
989                            name,
990                            value_range,
991                            highlight: None,
992                        });
993                        definition_highlight =
994                            scope.local_defs.last_mut().map(|s| &mut s.highlight);
995                    }
996                }
997                // If the node represents a reference, then try to find the corresponding
998                // definition in the scope stack.
999                else if Some(capture.index) == layer.config.local_ref_capture_index
1000                    && definition_highlight.is_none()
1001                {
1002                    definition_highlight = None;
1003                    if let Ok(name) = str::from_utf8(&self.source[range.clone()]) {
1004                        for scope in layer.scope_stack.iter().rev() {
1005                            if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| {
1006                                if def.name == name && range.start >= def.value_range.end {
1007                                    Some(def.highlight)
1008                                } else {
1009                                    None
1010                                }
1011                            }) {
1012                                reference_highlight = highlight;
1013                                break;
1014                            }
1015                            if !scope.inherits {
1016                                break;
1017                            }
1018                        }
1019                    }
1020                }
1021
1022                // Continue processing any additional matches for the same node.
1023                if let Some((next_match, next_capture_index)) = layer.captures.peek() {
1024                    let next_capture = next_match.captures[*next_capture_index];
1025                    if next_capture.node == capture.node {
1026                        capture = next_capture;
1027                        match_ = layer.captures.next().unwrap().0;
1028                        continue;
1029                    }
1030                }
1031
1032                self.sort_layers();
1033                continue 'main;
1034            }
1035
1036            // Otherwise, this capture must represent a highlight.
1037            // If this exact range has already been highlighted by an earlier pattern, or by
1038            // a different layer, then skip over this one.
1039            if let Some((last_start, last_end, last_depth)) = self.last_highlight_range {
1040                if range.start == last_start && range.end == last_end && layer.depth < last_depth {
1041                    self.sort_layers();
1042                    continue 'main;
1043                }
1044            }
1045
1046            // Once a highlighting pattern is found for the current node, keep iterating over
1047            // any later highlighting patterns that also match this node and set the match to it.
1048            // Captures for a given node are ordered by pattern index, so these subsequent
1049            // captures are guaranteed to be for highlighting, not injections or
1050            // local variables.
1051            while let Some((next_match, next_capture_index)) = layer.captures.peek() {
1052                let next_capture = next_match.captures[*next_capture_index];
1053                if next_capture.node == capture.node {
1054                    let (following_match, capture_index) = layer.captures.next().unwrap();
1055                    // If the current node was found to be a local variable, then ignore
1056                    // the following match if it's a highlighting pattern that is disabled
1057                    // for local variables.
1058                    if (definition_highlight.is_some() || reference_highlight.is_some())
1059                        && layer.config.non_local_variable_patterns[following_match.pattern_index]
1060                    {
1061                        continue;
1062                    }
1063
1064                    // Skip captures starting with `_`.
1065                    // (syntastica addition)
1066                    if layer.config.names()[following_match.captures[capture_index].index as usize]
1067                        .starts_with('_')
1068                    {
1069                        continue;
1070                    }
1071
1072                    match_.remove();
1073                    capture = next_capture;
1074                    match_ = following_match;
1075                } else {
1076                    break;
1077                }
1078            }
1079
1080            let current_highlight = layer.config.highlight_indices[capture.index as usize];
1081
1082            // If this node represents a local definition, then store the current
1083            // highlight value on the local scope entry representing this node.
1084            if let Some(definition_highlight) = definition_highlight {
1085                *definition_highlight = current_highlight;
1086            }
1087
1088            // Emit a scope start event and push the node's end position to the stack.
1089            if let Some(highlight) = reference_highlight.or(current_highlight) {
1090                self.last_highlight_range = Some((range.start, range.end, layer.depth));
1091                layer.highlight_end_stack.push(range.end);
1092                return self
1093                    .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight)));
1094            }
1095
1096            self.sort_layers();
1097        }
1098    }
1099}
1100
1101fn injection_for_match<'a>(
1102    config: &'a HighlightConfiguration,
1103    parent_name: Option<&'a str>,
1104    query: &'a Query,
1105    query_match: &QueryMatch<'a, 'a>,
1106    source: &'a [u8],
1107) -> (Option<Cow<'a, str>>, Option<Node<'a>>, IncludedChildren) {
1108    let content_capture_index = config.injection_content_capture_index;
1109    let language_capture_index = config.injection_language_capture_index;
1110
1111    let mut language_name = None;
1112    let mut content_node = None;
1113
1114    for capture in query_match.captures {
1115        let index = Some(capture.index);
1116        if index == language_capture_index {
1117            language_name = capture.node.utf8_text(source).ok();
1118        } else if index == content_capture_index {
1119            content_node = Some(capture.node);
1120        }
1121    }
1122
1123    let mut included_children = IncludedChildren::None;
1124    for prop in query.property_settings(query_match.pattern_index) {
1125        match prop.key.as_ref() {
1126            // In addition to specifying the language name via the text of a
1127            // captured node, it can also be hard-coded via a `#set!` predicate
1128            // that sets the injection.language key.
1129            "injection.language" => {
1130                if language_name.is_none() {
1131                    language_name = prop.value.as_ref().map(std::convert::AsRef::as_ref);
1132                }
1133            }
1134
1135            // Setting the `injection.self` key can be used to specify that the
1136            // language name should be the same as the language of the current
1137            // layer.
1138            "injection.self" => {
1139                if language_name.is_none() {
1140                    language_name = Some(config.language_name.as_str());
1141                }
1142            }
1143
1144            // Setting the `injection.parent` key can be used to specify that
1145            // the language name should be the same as the language of the
1146            // parent layer
1147            "injection.parent" => {
1148                if language_name.is_none() {
1149                    language_name = parent_name;
1150                }
1151            }
1152
1153            // By default, injections do not include the *children* of an
1154            // `injection.content` node - only the ranges that belong to the
1155            // node itself. This can be changed using a `#set!` predicate that
1156            // sets the `injection.include-children` key.
1157            "injection.include-children" => included_children = IncludedChildren::All,
1158
1159            // Some queries might only exclude named children but include unnamed
1160            // children in their `injection.content` node. This can be enabled using
1161            // a `#set!` predicate that sets the `injection.include-unnamed-children` key.
1162            // (syntastica addition)
1163            "injection.include-unnamed-children" => included_children = IncludedChildren::Unnamed,
1164            _ => {}
1165        }
1166    }
1167
1168    // (syntastica addition)
1169    let mut language_name = language_name.map(Cow::Borrowed);
1170    if let Some(language) = &mut language_name {
1171        for predicate in query.general_predicates(query_match.pattern_index) {
1172            if predicate.operator == "replace!".into() {
1173                let [QueryPredicateArg::Capture(capture), QueryPredicateArg::String(pattern), QueryPredicateArg::String(replacement)] =
1174                    &*predicate.args
1175                else {
1176                    // TODO: maybe don't ignore errors
1177                    continue;
1178                };
1179                if Some(*capture) != language_capture_index {
1180                    continue;
1181                }
1182                let Ok(re) = Regex::new(pattern) else {
1183                    continue;
1184                };
1185                *language = re.replace_all(language, &**replacement).into_owned().into();
1186            }
1187        }
1188    }
1189
1190    (language_name, content_node, included_children)
1191}