xtask/
add_lang.rs

1use std::{
2    env,
3    fs::{self, OpenOptions},
4    io::Write,
5    process::Command,
6    time::Duration,
7};
8
9use anyhow::{anyhow, Context, Result};
10use crates_io_api::SyncClient;
11use fancy_regex::Regex;
12use once_cell::sync::Lazy;
13use semver::{Version, VersionReq};
14use toml::Table;
15
16use crate::fetch_queries::fetch_query;
17
18pub fn run() -> Result<()> {
19    let group = env::args()
20        .nth(2)
21        .with_context(|| "missing group for `add-lang` task")?;
22    let name = env::args()
23        .nth(3)
24        .with_context(|| "missing name for `add-lang` task")?;
25    let url = env::args()
26        .nth(4)
27        .with_context(|| "missing url for `add-lang` task")?;
28    let path = env::args().nth(5);
29
30    let rev = get_rev(&url).with_context(|| "unable to fetch latest revision of repository")?;
31
32    let content_url = url_to_content_url(&url, &rev);
33    let path_in_url = match &path {
34        Some(path) => format!("/{path}"),
35        None => String::new(),
36    };
37
38    println!("info: found revision '{rev}'");
39    let external_c = content_url.as_ref().is_some_and(|url| {
40        reqwest::blocking::get(format!("{url}{path_in_url}/src/scanner.c"))
41            .is_ok_and(|response| response.status().is_success())
42    });
43    println!("info: found external C scanner: {external_c}");
44    let external_cpp = content_url.as_ref().is_some_and(|url| {
45        reqwest::blocking::get(format!("{url}{path_in_url}/src/scanner.cc"))
46            .is_ok_and(|response| response.status().is_success())
47    });
48    println!("info: found external C++ scanner: {external_cpp}");
49
50    let package = content_url
51        .as_ref()
52        .and_then(|url| try_get_package(url))
53        .unwrap_or_else(|| format!("tree-sitter-{}", name.replace('_', "-")));
54    println!("info: using package name '{package}'");
55
56    let crates_io = match try_get_crates_io_version(&package) {
57        Some(version) => format!("crates-io = \"{version}\""),
58        None => "# crates-io = \"\"".into(),
59    };
60    println!("info: found crates.io version: '{crates_io}'");
61
62    let mut queries_injections = false;
63    let mut queries_locals = false;
64    fs::create_dir_all(crate::WORKSPACE_DIR.join(format!("queries/{name}")))?;
65    fs::write(
66        crate::WORKSPACE_DIR.join(format!("queries/{name}/highlights.scm")),
67        "",
68    )?;
69    for kind in ["highlights", "injections", "locals"] {
70        let queries = fetch_query(&name, kind)?;
71        if let Some(text) = queries {
72            fs::write(
73                crate::WORKSPACE_DIR.join(format!("queries/{name}/{kind}.scm")),
74                text,
75            )?;
76            if kind == "injections" {
77                queries_injections = true;
78            }
79            if kind == "locals" {
80                queries_locals = true;
81            }
82        }
83    }
84
85    let langs_toml_path = crate::WORKSPACE_DIR.join("syntastica-macros/languages.toml");
86    let langs_toml = fs::read_to_string(&langs_toml_path)?;
87
88    let mut langs = langs_toml
89        .split("\n\n")
90        .map(ToString::to_string)
91        .collect::<Vec<_>>();
92    langs.push(format!(
93        r###"[[languages]]
94name = "{name}"
95group = "{group}"
96file-types = []
97[languages.parser]
98git = {{ url = "{url}", rev = "{rev}"{path} }}
99external-scanner = {{ c = {external_c}, cpp = {external_cpp} }}
100ffi-func = "tree_sitter_{name}"
101rust-const = "LANGUAGE"
102package = "{package}"
103{crates_io}
104[languages.queries]
105nvim-like = true
106injections = {queries_injections}
107locals = {queries_locals}"###,
108        path = match &path {
109            Some(path) => format!(", path = \"{path}\""),
110            None => String::new(),
111        }
112    ));
113    langs.sort_unstable_by_key(|lang| {
114        lang.split_once("name = \"")
115            .unwrap()
116            .1
117            .split_once('"')
118            .unwrap()
119            .0
120            .to_owned()
121    });
122    fs::write(&langs_toml_path, langs.join("\n\n"))?;
123
124    let mut queries_lib = OpenOptions::new()
125        .append(true)
126        .open(crate::WORKSPACE_DIR.join("syntastica-queries/src/lib.rs"))?;
127    write!(
128        queries_lib,
129        r###"
130pub const {name}_HIGHLIGHTS: &str = "";
131pub const {name}_INJECTIONS: &str = "";
132pub const {name}_LOCALS: &str = "";
133pub const {name}_HIGHLIGHTS_CRATES_IO: &str = "";
134pub const {name}_INJECTIONS_CRATES_IO: &str = "";
135pub const {name}_LOCALS_CRATES_IO: &str = "";
136"###,
137        name = name.to_uppercase()
138    )?;
139
140    let mut example_programs_toml = OpenOptions::new()
141        .append(true)
142        .open(crate::WORKSPACE_DIR.join("examples/example_programs.toml"))?;
143    writeln!(example_programs_toml, "\n{name} = '''\n'''")?;
144
145    Ok(())
146}
147
148pub fn get_rev(url: &str) -> Result<String> {
149    Ok(String::from_utf8(
150        Command::new("git")
151            .args(["ls-remote", url])
152            .output()?
153            .stdout,
154    )?
155    .lines()
156    .next()
157    .ok_or_else(|| anyhow!("output is empty"))?
158    .replace("HEAD", "")
159    .trim()
160    .to_owned())
161}
162
163static URL_REGEX: Lazy<Regex> =
164    Lazy::new(|| Regex::new(r"https:\/\/(github|gitlab)\.com\/([^\/]*)\/([^\/?#]*)").unwrap());
165
166pub fn url_to_content_url(url: &str, rev: &str) -> Option<String> {
167    match URL_REGEX.captures(url) {
168        Ok(Some(groups)) => match &groups[1] {
169            "github" => Some(format!(
170                "https://raw.githubusercontent.com/{}/{}/{rev}",
171                &groups[2], &groups[3],
172            )),
173            "gitlab" => Some(format!(
174                "https://gitlab.com/{}/{}/-/raw/{rev}",
175                &groups[2], &groups[3],
176            )),
177            _ => unreachable!("the regex only allows above options"),
178        },
179        _ => None,
180    }
181}
182
183fn try_get_package(content_url: &str) -> Option<String> {
184    let toml_str = reqwest::blocking::get(format!("{content_url}/Cargo.toml"))
185        .ok()?
186        .text()
187        .ok()?;
188    let toml = toml::from_str::<toml::map::Map<String, toml::Value>>(&toml_str).ok()?;
189    Some(
190        toml.get("package")?
191            .as_table()?
192            .get("name")?
193            .as_str()?
194            .to_owned(),
195    )
196}
197
198static CRATES_IO_CLIENT: Lazy<SyncClient> = Lazy::new(|| {
199    SyncClient::new(
200        "syntastica xtask (github.com/RubixDev/syntastica)",
201        Duration::from_millis(1200),
202    )
203    .unwrap()
204});
205
206static TREE_SITTER_VERSION: Lazy<Version> = Lazy::new(|| {
207    Version::parse(
208        toml::from_str::<Table>(
209            &fs::read_to_string(crate::WORKSPACE_DIR.join("Cargo.toml")).unwrap(),
210        )
211        .unwrap()["workspace"]["dependencies"]
212            .get("tree-sitter")
213            .map(|ts_dep| match ts_dep.as_str() {
214                Some(str) => str,
215                None => ts_dep["version"].as_str().unwrap(),
216            })
217            .unwrap(),
218    )
219    .unwrap()
220});
221
222pub fn try_get_crates_io_version(package: &str) -> Option<String> {
223    match CRATES_IO_CLIENT.get_crate(package) {
224        Ok(info) if is_compatible_tree_sitter(package, &info.versions.first()?.num) => {
225            Some(info.versions.first()?.num.clone())
226        }
227        _ => None,
228    }
229}
230
231fn is_compatible_tree_sitter(package: &str, version: &str) -> bool {
232    match CRATES_IO_CLIENT.crate_dependencies(package, version) {
233        Ok(deps) => deps.into_iter().any(|dep| {
234            dep.crate_id == "tree-sitter-language"
235                || (dep.crate_id == "tree-sitter"
236                    && VersionReq::parse(&dep.req)
237                        .is_ok_and(|req| req.matches(&TREE_SITTER_VERSION)))
238        }),
239        Err(_) => false,
240    }
241}