syntastica_parsers_dynamic/
loader.rs

1//! Forked from <https://github.com/tree-sitter/tree-sitter/blob/v0.25.2/cli/loader/src/lib.rs>
2//!
3//! The MIT License (MIT)
4//!
5//! Copyright (c) 2018-2023 Max Brunsfeld
6//!
7//! Permission is hereby granted, free of charge, to any person obtaining a copy
8//! of this software and associated documentation files (the "Software"), to deal
9//! in the Software without restriction, including without limitation the rights
10//! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11//! copies of the Software, and to permit persons to whom the Software is
12//! furnished to do so, subject to the following conditions:
13//!
14//! The above copyright notice and this permission notice shall be included in all
15//! copies or substantial portions of the Software.
16//!
17//! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18//! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19//! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20//! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21//! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22//! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23//! SOFTWARE.
24
25use std::{
26    borrow::Borrow,
27    collections::HashMap,
28    env, fs,
29    io::{BufRead, BufReader},
30    mem,
31    path::{Path, PathBuf},
32    process::Command,
33    sync::{LazyLock, Mutex},
34    time::SystemTime,
35};
36
37use anyhow::{anyhow, Context, Result};
38use etcetera::BaseStrategy as _;
39use fs4::fs_std::FileExt;
40use indoc::indoc;
41use libloading::{Library, Symbol};
42use once_cell::unsync::OnceCell;
43use regex::{Regex, RegexBuilder};
44use serde::Deserialize;
45use syntastica_core::language_set::HighlightConfiguration;
46use tree_sitter::Language;
47
48static GRAMMAR_NAME_REGEX: LazyLock<Regex> =
49    LazyLock::new(|| Regex::new(r#""name":\s*"(.*?)""#).unwrap());
50
51#[derive(Default)]
52pub struct Config {
53    pub parser_directories: Vec<PathBuf>,
54}
55
56#[derive(Deserialize, Clone, Default)]
57#[serde(untagged)]
58pub enum PathsJSON {
59    #[default]
60    Empty,
61    Single(PathBuf),
62    Multiple(Vec<PathBuf>),
63}
64
65impl PathsJSON {
66    fn into_vec(self) -> Option<Vec<PathBuf>> {
67        match self {
68            Self::Empty => None,
69            Self::Single(s) => Some(vec![s]),
70            Self::Multiple(s) => Some(s),
71        }
72    }
73}
74
75#[derive(Deserialize)]
76#[serde(rename_all = "kebab-case")]
77pub struct TreeSitterJSON {
78    pub grammars: Vec<Grammar>,
79}
80
81impl TreeSitterJSON {
82    pub fn from_file(path: &Path) -> Result<Self> {
83        Ok(serde_json::from_str(&fs::read_to_string(
84            path.join("tree-sitter.json"),
85        )?)?)
86    }
87}
88
89#[derive(Deserialize)]
90#[serde(rename_all = "kebab-case")]
91pub struct Grammar {
92    pub name: String,
93    pub path: Option<PathBuf>,
94    #[serde(default)]
95    pub external_files: PathsJSON,
96    pub file_types: Option<Vec<String>>,
97    #[serde(default)]
98    pub highlights: PathsJSON,
99    #[serde(default)]
100    pub injections: PathsJSON,
101    #[serde(default)]
102    pub locals: PathsJSON,
103    pub injection_regex: Option<String>,
104    pub first_line_regex: Option<String>,
105    pub content_regex: Option<String>,
106}
107
108const BUILD_TARGET: &str = env!("BUILD_TARGET");
109const BUILD_HOST: &str = env!("BUILD_HOST");
110
111pub struct LanguageConfiguration<'a> {
112    pub _content_regex: Option<Regex>,
113    pub first_line_regex: Option<Regex>,
114    pub injection_regex: Option<Regex>,
115    pub file_types: Vec<String>,
116    pub root_path: PathBuf,
117    pub highlights_filenames: Option<Vec<PathBuf>>,
118    pub injections_filenames: Option<Vec<PathBuf>>,
119    pub locals_filenames: Option<Vec<PathBuf>>,
120    pub language_name: String,
121    language_id: usize,
122    highlight_names: &'a Mutex<Vec<String>>,
123    use_all_highlight_names: bool,
124}
125
126pub struct Loader {
127    pub parser_lib_path: PathBuf,
128    languages_by_id: Vec<(PathBuf, OnceCell<Language>, Option<Vec<PathBuf>>)>,
129    language_configurations: Vec<LanguageConfiguration<'static>>,
130    language_configuration_ids_by_file_type: HashMap<String, Vec<usize>>,
131    language_configuration_in_current_path: Option<usize>,
132    language_configuration_ids_by_first_line_regex: HashMap<String, Vec<usize>>,
133    highlight_names: Box<Mutex<Vec<String>>>,
134    use_all_highlight_names: bool,
135}
136
137pub struct CompileConfig<'a> {
138    pub src_path: &'a Path,
139    pub header_paths: Vec<&'a Path>,
140    pub parser_path: PathBuf,
141    pub scanner_path: Option<PathBuf>,
142    pub external_files: Option<&'a [PathBuf]>,
143    pub output_path: Option<PathBuf>,
144    pub flags: &'a [&'a str],
145    pub name: String,
146}
147
148impl<'a> CompileConfig<'a> {
149    #[must_use]
150    pub fn new(
151        src_path: &'a Path,
152        externals: Option<&'a [PathBuf]>,
153        output_path: Option<PathBuf>,
154    ) -> Self {
155        Self {
156            src_path,
157            header_paths: vec![src_path],
158            parser_path: src_path.join("parser.c"),
159            scanner_path: None,
160            external_files: externals,
161            output_path,
162            flags: &[],
163            name: String::new(),
164        }
165    }
166}
167
168unsafe impl Sync for Loader {}
169
170impl Loader {
171    pub fn new() -> Result<Self> {
172        let parser_lib_path = if let Ok(path) = env::var("TREE_SITTER_LIBDIR") {
173            PathBuf::from(path)
174        } else {
175            if cfg!(target_os = "macos") {
176                let legacy_apple_path = etcetera::base_strategy::Apple::new()?
177                    .cache_dir() // `$HOME/Library/Caches/`
178                    .join("tree-sitter");
179                if legacy_apple_path.exists() && legacy_apple_path.is_dir() {
180                    std::fs::remove_dir_all(legacy_apple_path)?;
181                }
182            }
183
184            etcetera::choose_base_strategy()?
185                .cache_dir()
186                .join("tree-sitter")
187                .join("lib")
188        };
189        Ok(Self::with_parser_lib_path(parser_lib_path))
190    }
191
192    #[must_use]
193    pub fn with_parser_lib_path(parser_lib_path: PathBuf) -> Self {
194        Self {
195            parser_lib_path,
196            languages_by_id: Vec::new(),
197            language_configurations: Vec::new(),
198            language_configuration_ids_by_file_type: HashMap::new(),
199            language_configuration_in_current_path: None,
200            language_configuration_ids_by_first_line_regex: HashMap::new(),
201            highlight_names: Box::new(Mutex::new(Vec::new())),
202            use_all_highlight_names: true,
203        }
204    }
205
206    pub fn configure_highlights(&mut self, names: &[impl Borrow<str>]) {
207        self.use_all_highlight_names = false;
208        let mut highlights = self.highlight_names.lock().unwrap();
209        highlights.clear();
210        highlights.extend(names.iter().map(|s| s.borrow().to_owned()));
211    }
212
213    pub fn find_all_languages(&mut self, config: &Config) -> Result<()> {
214        // if config.parser_directories.is_empty() {
215        //     eprintln!("Warning: You have not configured any parser directories!");
216        //     eprintln!("Please run `tree-sitter init-config` and edit the resulting");
217        //     eprintln!("configuration file to indicate where we should look for");
218        //     eprintln!("language grammars.");
219        //     eprintln!();
220        // }
221        for parser_container_dir in &config.parser_directories {
222            if let Ok(entries) = fs::read_dir(parser_container_dir) {
223                for entry in entries {
224                    let entry = entry?;
225                    if let Some(parser_dir_name) = entry.file_name().to_str() {
226                        if parser_dir_name.starts_with("tree-sitter-") {
227                            self.find_language_configurations_at_path(
228                                &parser_container_dir.join(parser_dir_name),
229                                false,
230                            )
231                            .ok();
232                        }
233                    }
234                }
235            }
236        }
237        Ok(())
238    }
239
240    pub fn language_configuration_for_name(
241        &self,
242        name: &str,
243    ) -> Result<Option<(Language, &LanguageConfiguration<'_>)>> {
244        for configuration in &self.language_configurations {
245            if configuration.language_name == name {
246                let language = self.language_for_id(configuration.language_id)?;
247                return Ok(Some((language, configuration)));
248            }
249        }
250        Ok(None)
251    }
252
253    pub fn _language_configuration_for_file_name(
254        &self,
255        path: &Path,
256    ) -> Result<Option<(Language, &LanguageConfiguration<'_>)>> {
257        // Find all the language configurations that match this file name
258        // or a suffix of the file name.
259        let configuration_ids = path
260            .file_name()
261            .and_then(|n| n.to_str())
262            .and_then(|file_name| self.language_configuration_ids_by_file_type.get(file_name))
263            .or_else(|| {
264                let mut path = path.to_owned();
265                let mut extensions = Vec::with_capacity(2);
266                while let Some(extension) = path.extension() {
267                    extensions.push(extension.to_str()?.to_string());
268                    path = PathBuf::from(path.file_stem()?.to_os_string());
269                }
270                extensions.reverse();
271                self.language_configuration_ids_by_file_type
272                    .get(&extensions.join("."))
273            });
274
275        if let Some(configuration_ids) = configuration_ids {
276            if !configuration_ids.is_empty() {
277                let configuration = if configuration_ids.len() == 1 {
278                    &self.language_configurations[configuration_ids[0]]
279                }
280                // If multiple language configurations match, then determine which
281                // one to use by applying the configurations' content regexes.
282                else {
283                    let file_contents =
284                        fs::read(path).with_context(|| format!("Failed to read path {path:?}"))?;
285                    let file_contents = String::from_utf8_lossy(&file_contents);
286                    let mut best_score = -2isize;
287                    let mut best_configuration_id = None;
288                    for configuration_id in configuration_ids {
289                        let config = &self.language_configurations[*configuration_id];
290
291                        // If the language configuration has a content regex, assign
292                        // a score based on the length of the first match.
293                        let score;
294                        if let Some(content_regex) = &config._content_regex {
295                            if let Some(mat) = content_regex.find(&file_contents) {
296                                score = (mat.end() - mat.start()) as isize;
297                            }
298                            // If the content regex does not match, then *penalize* this
299                            // language configuration, so that language configurations
300                            // without content regexes are preferred over those with
301                            // non-matching content regexes.
302                            else {
303                                score = -1;
304                            }
305                        } else {
306                            score = 0;
307                        }
308                        if score > best_score {
309                            best_configuration_id = Some(*configuration_id);
310                            best_score = score;
311                        }
312                    }
313
314                    &self.language_configurations[best_configuration_id.unwrap()]
315                };
316
317                let language = self.language_for_id(configuration.language_id)?;
318                return Ok(Some((language, configuration)));
319            }
320        }
321
322        Ok(None)
323    }
324
325    pub fn language_configuration_for_injection_string(
326        &self,
327        string: &str,
328    ) -> Result<Option<(Language, &LanguageConfiguration<'_>)>> {
329        let mut best_match_length = 0;
330        let mut best_match_position = None;
331        for (i, configuration) in self.language_configurations.iter().enumerate() {
332            if let Some(injection_regex) = &configuration.injection_regex {
333                if let Some(mat) = injection_regex.find(string) {
334                    let length = mat.end() - mat.start();
335                    if length > best_match_length {
336                        best_match_position = Some(i);
337                        best_match_length = length;
338                    }
339                }
340            }
341        }
342
343        if let Some(i) = best_match_position {
344            let configuration = &self.language_configurations[i];
345            let language = self.language_for_id(configuration.language_id)?;
346            Ok(Some((language, configuration)))
347        } else {
348            Ok(None)
349        }
350    }
351
352    fn language_for_id(&self, id: usize) -> Result<Language> {
353        let (path, language, externals) = &self.languages_by_id[id];
354        language
355            .get_or_try_init(|| {
356                let src_path = path.join("src");
357                self.load_language_at_path(CompileConfig::new(
358                    &src_path,
359                    externals.as_deref(),
360                    None,
361                ))
362            })
363            .cloned()
364    }
365
366    pub fn load_language_at_path(&self, mut config: CompileConfig<'_>) -> Result<Language> {
367        let grammar_path = config.src_path.join("grammar.json");
368        config.name = Self::grammar_json_name(&grammar_path)?;
369        self.load_language_at_path_with_name(config)
370    }
371
372    pub fn load_language_at_path_with_name(
373        &self,
374        mut config: CompileConfig<'_>,
375    ) -> Result<Language> {
376        let lib_name = config.name.to_string();
377        let language_fn_name = format!(
378            "tree_sitter_{}",
379            replace_dashes_with_underscores(&config.name)
380        );
381
382        if config.output_path.is_none() {
383            fs::create_dir_all(&self.parser_lib_path)?;
384        }
385
386        let mut recompile = config.output_path.is_some(); // if specified, always recompile
387
388        let output_path = config.output_path.unwrap_or_else(|| {
389            let mut path = self.parser_lib_path.join(lib_name);
390            path.set_extension(env::consts::DLL_EXTENSION);
391            path
392        });
393        config.output_path = Some(output_path.clone());
394
395        let parser_path = config.src_path.join("parser.c");
396        config.scanner_path = self.get_scanner_path(config.src_path);
397
398        let mut paths_to_check = vec![parser_path];
399
400        if let Some(scanner_path) = config.scanner_path.as_ref() {
401            paths_to_check.push(scanner_path.clone());
402        }
403
404        paths_to_check.extend(
405            config
406                .external_files
407                .unwrap_or_default()
408                .iter()
409                .map(|p| config.src_path.join(p)),
410        );
411
412        if !recompile {
413            recompile = needs_recompile(&output_path, &paths_to_check)
414                .with_context(|| "Failed to compare source and binary timestamps")?;
415        }
416
417        let lock_path = if env::var("CROSS_RUNNER").is_ok() {
418            tempfile::tempdir()
419                .unwrap()
420                .path()
421                .join("tree-sitter")
422                .join("lock")
423                .join(format!("{}.lock", config.name))
424        } else {
425            etcetera::choose_base_strategy()?
426                .cache_dir()
427                .join("tree-sitter")
428                .join("lock")
429                .join(format!("{}.lock", config.name))
430        };
431
432        if let Ok(lock_file) = fs::OpenOptions::new().write(true).open(&lock_path) {
433            recompile = false;
434            if lock_file.try_lock_exclusive().is_err() {
435                // if we can't acquire the lock, another process is compiling the parser, wait for
436                // it and don't recompile
437                lock_file.lock_exclusive()?;
438                recompile = false;
439            } else {
440                // if we can acquire the lock, check if the lock file is older than 30 seconds, a
441                // run that was interrupted and left the lock file behind should not block
442                // subsequent runs
443                let time = lock_file.metadata()?.modified()?.elapsed()?.as_secs();
444                if time > 30 {
445                    fs::remove_file(&lock_path)?;
446                    recompile = true;
447                }
448            }
449        }
450
451        if recompile {
452            fs::create_dir_all(lock_path.parent().unwrap()).with_context(|| {
453                format!(
454                    "Failed to create directory {:?}",
455                    lock_path.parent().unwrap()
456                )
457            })?;
458            let lock_file = fs::OpenOptions::new()
459                .create(true)
460                .truncate(true)
461                .write(true)
462                .open(&lock_path)?;
463            lock_file.lock_exclusive()?;
464
465            self.compile_parser_to_dylib(&config, &lock_file, &lock_path)?;
466
467            if config.scanner_path.is_some() {
468                self.check_external_scanner(&config.name, &output_path)?;
469            }
470        }
471
472        let library = unsafe { Library::new(&output_path) }
473            .with_context(|| format!("Error opening dynamic library {output_path:?}"))?;
474        let language = unsafe {
475            let language_fn = library
476                .get::<Symbol<'_, unsafe extern "C" fn() -> Language>>(language_fn_name.as_bytes())
477                .with_context(|| format!("Failed to load symbol {language_fn_name}"))?;
478            language_fn()
479        };
480        mem::forget(library);
481        Ok(language)
482    }
483
484    fn compile_parser_to_dylib(
485        &self,
486        config: &CompileConfig<'_>,
487        lock_file: &fs::File,
488        lock_path: &Path,
489    ) -> Result<()> {
490        let mut cc_config = cc::Build::new();
491        cc_config
492            .cargo_metadata(false)
493            .cargo_warnings(false)
494            .target(BUILD_TARGET)
495            .host(BUILD_HOST)
496            .file(&config.parser_path)
497            .includes(&config.header_paths)
498            .std("c11");
499
500        if let Some(scanner_path) = config.scanner_path.as_ref() {
501            cc_config.file(scanner_path);
502        }
503
504        cc_config.opt_level(2).extra_warnings(false);
505
506        for flag in config.flags {
507            cc_config.define(flag, None);
508        }
509
510        let compiler = cc_config.get_compiler();
511        let mut command = Command::new(compiler.path());
512        command.args(compiler.args());
513        for (key, value) in compiler.env() {
514            command.env(key, value);
515        }
516
517        let output_path = config.output_path.as_ref().unwrap();
518
519        if compiler.is_like_msvc() {
520            let out = format!("-out:{}", output_path.to_str().unwrap());
521            command.arg("-LD");
522            command.arg("-utf-8");
523            command.args(cc_config.get_files());
524            command.arg("-link").arg(out);
525        } else {
526            command.arg("-Werror=implicit-function-declaration");
527            if cfg!(any(target_os = "macos", target_os = "ios")) {
528                command.arg("-dynamiclib");
529                // TODO: remove when supported
530                command.arg("-UTREE_SITTER_REUSE_ALLOCATOR");
531            } else {
532                command.arg("-shared");
533            }
534            command.args(cc_config.get_files());
535            command.arg("-o").arg(output_path);
536        }
537
538        let output = command.output().with_context(|| {
539            format!("Failed to execute the C compiler with the following command:\n{command:?}")
540        })?;
541
542        FileExt::unlock(lock_file)?;
543        fs::remove_file(lock_path)?;
544
545        if output.status.success() {
546            Ok(())
547        } else {
548            Err(anyhow!(
549                "Parser compilation failed.\nStdout: {}\nStderr: {}",
550                String::from_utf8_lossy(&output.stdout),
551                String::from_utf8_lossy(&output.stderr)
552            ))
553        }
554    }
555
556    #[cfg(unix)]
557    fn check_external_scanner(&self, name: &str, library_path: &Path) -> Result<()> {
558        let prefix = if cfg!(any(target_os = "macos", target_os = "ios")) {
559            "_"
560        } else {
561            ""
562        };
563        let mut must_have = vec![
564            format!("{prefix}tree_sitter_{name}_external_scanner_create"),
565            format!("{prefix}tree_sitter_{name}_external_scanner_destroy"),
566            format!("{prefix}tree_sitter_{name}_external_scanner_serialize"),
567            format!("{prefix}tree_sitter_{name}_external_scanner_deserialize"),
568            format!("{prefix}tree_sitter_{name}_external_scanner_scan"),
569        ];
570
571        let command = Command::new("nm")
572            .arg("-W")
573            .arg("-U")
574            .arg(library_path)
575            .output();
576        if let Ok(output) = command {
577            if output.status.success() {
578                let mut found_non_static = false;
579                for line in String::from_utf8_lossy(&output.stdout).lines() {
580                    if line.contains(" T ") {
581                        if let Some(function_name) =
582                            line.split_whitespace().collect::<Vec<_>>().get(2)
583                        {
584                            if !line.contains("tree_sitter_") {
585                                if !found_non_static {
586                                    found_non_static = true;
587                                    // eprintln!("Warning: Found non-static non-tree-sitter functions in the external scannner");
588                                }
589                                // eprintln!("  `{function_name}`");
590                            } else {
591                                must_have.retain(|f| f != function_name);
592                            }
593                        }
594                    }
595                }
596                // if found_non_static {
597                //     eprintln!("Consider making these functions static, they can cause conflicts when another tree-sitter project uses the same function name");
598                // }
599
600                if !must_have.is_empty() {
601                    let missing = must_have
602                        .iter()
603                        .map(|f| format!("  `{f}`"))
604                        .collect::<Vec<_>>()
605                        .join("\n");
606
607                    return Err(anyhow!(format!(
608                        indoc! {"
609                            Missing required functions in the external scanner, parsing won't work without these!
610
611                            {}
612
613                            You can read more about this at https://tree-sitter.github.io/tree-sitter/creating-parsers/4-external-scanners
614                        "},
615                        missing,
616                    )));
617                }
618            }
619        }
620
621        Ok(())
622    }
623
624    #[cfg(windows)]
625    fn check_external_scanner(&self, _name: &str, _library_path: &Path) -> Result<()> {
626        // TODO: there's no nm command on windows, whoever wants to implement this can and should :)
627
628        // let mut must_have = vec![
629        //     format!("tree_sitter_{name}_external_scanner_create"),
630        //     format!("tree_sitter_{name}_external_scanner_destroy"),
631        //     format!("tree_sitter_{name}_external_scanner_serialize"),
632        //     format!("tree_sitter_{name}_external_scanner_deserialize"),
633        //     format!("tree_sitter_{name}_external_scanner_scan"),
634        // ];
635
636        Ok(())
637    }
638
639    pub fn find_language_configurations_at_path(
640        &mut self,
641        parser_path: &Path,
642        set_current_path_config: bool,
643    ) -> Result<&[LanguageConfiguration<'_>]> {
644        let initial_language_configuration_count = self.language_configurations.len();
645
646        let ts_json = TreeSitterJSON::from_file(parser_path);
647        if let Ok(config) = ts_json {
648            let language_count = self.languages_by_id.len();
649            for grammar in config.grammars {
650                // Determine the path to the parser directory. This can be specified in
651                // the tree-sitter.json, but defaults to the directory containing the
652                // tree-sitter.json.
653                let language_path = parser_path.join(grammar.path.unwrap_or(PathBuf::from(".")));
654
655                // Determine if a previous language configuration in this package.json file
656                // already uses the same language.
657                let mut language_id = None;
658                for (id, (path, _, _)) in
659                    self.languages_by_id.iter().enumerate().skip(language_count)
660                {
661                    if language_path == *path {
662                        language_id = Some(id);
663                    }
664                }
665
666                // If not, add a new language path to the list.
667                let language_id = if let Some(language_id) = language_id {
668                    language_id
669                } else {
670                    self.languages_by_id.push((
671                            language_path,
672                            OnceCell::new(),
673                            grammar.external_files.clone().into_vec().map(|files| {
674                                files.into_iter()
675                                    .map(|path| {
676                                       let path = parser_path.join(path);
677                                        // prevent p being above/outside of parser_path
678                                        if path.starts_with(parser_path) {
679                                            Ok(path)
680                                        } else {
681                                            Err(anyhow!("External file path {path:?} is outside of parser directory {parser_path:?}"))
682                                        }
683                                    })
684                                    .collect::<Result<Vec<_>>>()
685                            }).transpose()?,
686                        ));
687                    self.languages_by_id.len() - 1
688                };
689
690                let configuration = LanguageConfiguration {
691                    root_path: parser_path.to_path_buf(),
692                    language_name: grammar.name,
693                    language_id,
694                    file_types: grammar.file_types.unwrap_or_default(),
695                    _content_regex: Self::regex(grammar.content_regex.as_deref()),
696                    first_line_regex: Self::regex(grammar.first_line_regex.as_deref()),
697                    injection_regex: Self::regex(grammar.injection_regex.as_deref()),
698                    injections_filenames: grammar.injections.into_vec(),
699                    locals_filenames: grammar.locals.into_vec(),
700                    highlights_filenames: grammar.highlights.into_vec(),
701                    highlight_names: &self.highlight_names,
702                    use_all_highlight_names: self.use_all_highlight_names,
703                };
704
705                for file_type in &configuration.file_types {
706                    self.language_configuration_ids_by_file_type
707                        .entry(file_type.to_string())
708                        .or_default()
709                        .push(self.language_configurations.len());
710                }
711                if let Some(first_line_regex) = &configuration.first_line_regex {
712                    self.language_configuration_ids_by_first_line_regex
713                        .entry(first_line_regex.to_string())
714                        .or_default()
715                        .push(self.language_configurations.len());
716                }
717
718                self.language_configurations.push(unsafe {
719                    mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
720                        configuration,
721                    )
722                });
723
724                if set_current_path_config && self.language_configuration_in_current_path.is_none()
725                {
726                    self.language_configuration_in_current_path =
727                        Some(self.language_configurations.len() - 1);
728                }
729            }
730        } else if let Err(e) = ts_json {
731            match e.downcast_ref::<std::io::Error>() {
732                // This is noisy, and not really an issue.
733                Some(e) if e.kind() == std::io::ErrorKind::NotFound => {}
734                _ => {
735                    // eprintln!(
736                    //     "Warning: Failed to parse {} -- {e}",
737                    //     parser_path.join("tree-sitter.json").display()
738                    // );
739                }
740            }
741        }
742
743        // If we didn't find any language configurations in the tree-sitter.json file,
744        // but there is a grammar.json file, then use the grammar file to form a simple
745        // language configuration.
746        if self.language_configurations.len() == initial_language_configuration_count
747            && parser_path.join("src").join("grammar.json").exists()
748        {
749            let grammar_path = parser_path.join("src").join("grammar.json");
750            let language_name = Self::grammar_json_name(&grammar_path)?;
751            let configuration = LanguageConfiguration {
752                root_path: parser_path.to_owned(),
753                language_name,
754                language_id: self.languages_by_id.len(),
755                file_types: Vec::new(),
756                _content_regex: None,
757                first_line_regex: None,
758                injection_regex: None,
759                injections_filenames: None,
760                locals_filenames: None,
761                highlights_filenames: None,
762                highlight_names: &self.highlight_names,
763                use_all_highlight_names: self.use_all_highlight_names,
764            };
765            self.language_configurations.push(unsafe {
766                mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
767                    configuration,
768                )
769            });
770            self.languages_by_id
771                .push((parser_path.to_owned(), OnceCell::new(), None));
772        }
773
774        Ok(&self.language_configurations[initial_language_configuration_count..])
775    }
776
777    fn regex(pattern: Option<&str>) -> Option<Regex> {
778        pattern.and_then(|r| RegexBuilder::new(r).multi_line(true).build().ok())
779    }
780
781    fn grammar_json_name(grammar_path: &Path) -> Result<String> {
782        let file = fs::File::open(grammar_path).with_context(|| {
783            format!("Failed to open grammar.json at {}", grammar_path.display())
784        })?;
785
786        let first_three_lines = BufReader::new(file)
787            .lines()
788            .take(3)
789            .collect::<Result<Vec<_>, _>>()
790            .with_context(|| {
791                format!(
792                    "Failed to read the first three lines of grammar.json at {}",
793                    grammar_path.display()
794                )
795            })?
796            .join("\n");
797
798        let name = GRAMMAR_NAME_REGEX
799            .captures(&first_three_lines)
800            .and_then(|c| c.get(1))
801            .ok_or_else(|| {
802                anyhow!(
803                    "Failed to parse the language name from grammar.json at {}",
804                    grammar_path.display()
805                )
806            })?;
807
808        Ok(name.as_str().to_string())
809    }
810
811    #[must_use]
812    pub fn get_scanner_path(&self, src_path: &Path) -> Option<PathBuf> {
813        let path = src_path.join("scanner.c");
814        path.exists().then_some(path)
815    }
816}
817
818impl LanguageConfiguration<'_> {
819    pub fn highlight_config(
820        &self,
821        language: Language,
822        paths: Option<&[PathBuf]>,
823    ) -> syntastica_core::Result<Option<&'static mut HighlightConfiguration>> {
824        let (highlights_filenames, injections_filenames, locals_filenames) = match paths {
825            Some(paths) => (
826                Some(
827                    paths
828                        .iter()
829                        .filter(|p| p.ends_with("highlights.scm"))
830                        .cloned()
831                        .collect::<Vec<_>>(),
832                ),
833                Some(
834                    paths
835                        .iter()
836                        .filter(|p| p.ends_with("tags.scm"))
837                        .cloned()
838                        .collect::<Vec<_>>(),
839                ),
840                Some(
841                    paths
842                        .iter()
843                        .filter(|p| p.ends_with("locals.scm"))
844                        .cloned()
845                        .collect::<Vec<_>>(),
846                ),
847            ),
848            None => (None, None, None),
849        };
850        let highlights_query = self
851            .read_queries(
852                if highlights_filenames.is_some() {
853                    highlights_filenames.as_deref()
854                } else {
855                    self.highlights_filenames.as_deref()
856                },
857                "highlights.scm",
858            )
859            .map_err(|err| syntastica_core::Error::Custom(err.to_string()))?;
860        let injections_query = self
861            .read_queries(
862                if injections_filenames.is_some() {
863                    injections_filenames.as_deref()
864                } else {
865                    self.injections_filenames.as_deref()
866                },
867                "injections.scm",
868            )
869            .map_err(|err| syntastica_core::Error::Custom(err.to_string()))?;
870        let locals_query = self
871            .read_queries(
872                if locals_filenames.is_some() {
873                    locals_filenames.as_deref()
874                } else {
875                    self.locals_filenames.as_deref()
876                },
877                "locals.scm",
878            )
879            .map_err(|err| syntastica_core::Error::Custom(err.to_string()))?;
880
881        if highlights_query.is_empty() {
882            Ok(None)
883        } else {
884            let mut result = HighlightConfiguration::new(
885                language,
886                &self.language_name,
887                &highlights_query,
888                &injections_query,
889                &locals_query,
890            )?;
891            let mut all_highlight_names = self.highlight_names.lock().unwrap();
892            if self.use_all_highlight_names {
893                for capture_name in result.query.capture_names() {
894                    if !all_highlight_names.iter().any(|x| x == capture_name) {
895                        all_highlight_names.push((*capture_name).to_string());
896                    }
897                }
898            }
899            result.configure(all_highlight_names.as_slice());
900            drop(all_highlight_names);
901            let result_ref = Box::leak(Box::new(result));
902            Ok(Some(result_ref))
903        }
904    }
905
906    #[allow(clippy::type_complexity)]
907    fn read_queries(&self, paths: Option<&[PathBuf]>, default_path: &str) -> Result<String> {
908        let mut query = String::new();
909        if let Some(paths) = paths {
910            for path in paths {
911                let abs_path = self.root_path.join(path);
912                query += &fs::read_to_string(&abs_path)
913                    .with_context(|| format!("Failed to read query file {path:?}"))?;
914            }
915        } else {
916            // highlights.scm is needed to test highlights, and tags.scm to test tags
917            // if default_path == "highlights.scm" || default_path == "tags.scm" {
918            //     eprintln!(
919            //         indoc! {"
920            //             Warning: you should add a `{}` entry pointing to the highlights path in the `tree-sitter` object in the grammar's tree-sitter.json file.
921            //             See more here: https://tree-sitter.github.io/tree-sitter/3-syntax-highlighting#query-paths
922            //         "},
923            //         default_path.replace(".scm", "")
924            //     );
925            // }
926            let queries_path = self.root_path.join("queries");
927            let path = queries_path.join(default_path);
928            if path.exists() {
929                query = fs::read_to_string(&path)
930                    .with_context(|| format!("Failed to read query file {path:?}"))?;
931            }
932        }
933
934        Ok(query)
935    }
936}
937
938fn needs_recompile(lib_path: &Path, paths_to_check: &[PathBuf]) -> Result<bool> {
939    if !lib_path.exists() {
940        return Ok(true);
941    }
942    let lib_mtime =
943        mtime(lib_path).with_context(|| format!("Failed to read mtime of {lib_path:?}"))?;
944    for path in paths_to_check {
945        if mtime(path)? > lib_mtime {
946            return Ok(true);
947        }
948    }
949    Ok(false)
950}
951
952fn mtime(path: &Path) -> Result<SystemTime> {
953    Ok(fs::metadata(path)?.modified()?)
954}
955
956fn replace_dashes_with_underscores(name: &str) -> String {
957    let mut result = String::with_capacity(name.len());
958    for c in name.chars() {
959        if c == '-' {
960            result.push('_');
961        } else {
962            result.push(c);
963        }
964    }
965    result
966}