1use std::borrow::Cow;
2
3use heck::ToPascalCase;
4use once_cell::sync::Lazy;
5use proc_macro::TokenStream;
6use quote::{format_ident, quote};
7use quote_use::quote_use;
8use schema::*;
9
10mod schema;
11
12static LANGUAGE_CONFIG: Lazy<LanguageConfig> = Lazy::new(|| {
13 toml::from_str(include_str!("../languages.toml")).expect("invalid `languages.toml`")
14});
15
16#[proc_macro]
17pub fn parsers_git(_: TokenStream) -> TokenStream {
18 let mut dedup_ffi_funcs = LANGUAGE_CONFIG.languages.clone();
19 dedup_ffi_funcs.sort_unstable_by_key(|lang| lang.parser.ffi_func.clone());
20 dedup_ffi_funcs.dedup_by_key(|lang| lang.parser.ffi_func.clone());
21 dedup_ffi_funcs
22 .iter()
23 .map(|lang| {
24 let name = &lang.name;
25 let url = &lang.parser.git.url;
26 let rev = &lang.parser.git.rev;
27 let external_c = lang.parser.external_scanner.c;
28 let external_cpp = lang.parser.external_scanner.cpp;
29 let path = match &lang.parser.git.path {
30 Some(path) => quote_use! { Some(#path) },
31 None => quote_use! { None },
32 };
33 let wasm = lang.wasm;
34 let wasm_unknown = lang.wasm_unknown;
35 let generate = lang.parser.generate;
36 quote! {
37 #[cfg(feature = #name)]
38 compile_parser(#name, #url, #rev, #external_c, #external_cpp, #path, #wasm, #wasm_unknown, #generate)?;
39 }
40 })
41 .collect::<proc_macro2::TokenStream>()
42 .into()
43}
44
45fn not_wasm_cfg(lang: &Language) -> proc_macro2::TokenStream {
46 let raw_wasm_cfg = quote! { target_family = "wasm" };
47 let raw_wasm_unknown_cfg = quote! { all(target_arch = "wasm32", target_vendor = "unknown", target_os = "unknown", target_env = "") };
48 match (
49 lang.wasm,
50 lang.parser.external_scanner.cpp || !lang.wasm_unknown,
51 ) {
52 (false, _) => quote! { , not(#raw_wasm_cfg) },
53 (_, true) => quote! { , not(#raw_wasm_unknown_cfg) },
54 _ => quote! {},
55 }
56}
57
58#[proc_macro]
59pub fn parsers_ffi(_: TokenStream) -> TokenStream {
60 let mut dedup_ffi_funcs = LANGUAGE_CONFIG.languages.clone();
61 dedup_ffi_funcs.sort_unstable_by_key(|lang| lang.parser.ffi_func.clone());
62 dedup_ffi_funcs.dedup_by_key(|lang| lang.parser.ffi_func.clone());
63 let extern_c = dedup_ffi_funcs.iter().map(|lang| {
64 let name_str = &lang.name;
65 let ffi_func = format_ident!("{}", lang.parser.ffi_func);
66 let not_wasm_cfg = not_wasm_cfg(lang);
67 quote! {
68 #[cfg(all(feature = #name_str #not_wasm_cfg))]
69 fn #ffi_func() -> ::syntastica_core::language_set::Language;
70 }
71 });
72 let functions = LANGUAGE_CONFIG.languages.iter().map(|lang| {
73 let feat = lang.group.to_string();
74 let name = format_ident!("{}", lang.name);
75 let name_str = &lang.name;
76 let ffi_func = format_ident!("{}", lang.parser.ffi_func);
77 let doc = format!(
78 "Get the parser for [{}]({}/tree/{}). {}",
79 lang.name,
80 lang.parser.git.url,
81 lang.parser.git.rev,
82 match (
83 lang.wasm,
84 lang.parser.external_scanner.cpp || !lang.wasm_unknown
85 ) {
86 (false, _) => "(not supported on WebAssembly targets)",
87 (_, true) => "(not supported on the `wasm32-unknown-unknown` target)",
88 _ => "",
89 },
90 );
91 let not_wasm_cfg = not_wasm_cfg(lang);
93 quote! {
94 #[cfg(all(any(feature = #feat, feature = #name_str) #not_wasm_cfg))]
95 #[doc = #doc]
96 pub fn #name() -> ::syntastica_core::language_set::Language {
97 #[cfg(not(all(feature = "docs", doc)))]
98 unsafe { #ffi_func() }
99 #[cfg(all(feature = "docs", doc))]
100 ::std::unimplemented!()
101 }
102 }
103 });
104 parsers(
105 "syntastica_parsers_git",
106 functions,
107 |_| true,
108 Some(quote! {
109 #[cfg(not(all(feature = "docs", doc)))]
110 extern "C" {
111 #(#extern_c)*
112 }
113 }),
114 "",
115 )
116}
117
118#[proc_macro]
119pub fn parsers_gitdep(_: TokenStream) -> TokenStream {
120 parsers_rust("syntastica_parsers_gitdep", false, "")
121}
122
123#[proc_macro]
124pub fn parsers_dep(_: TokenStream) -> TokenStream {
125 parsers_rust("syntastica_parsers", true, "_CRATES_IO")
126}
127
128fn parsers_rust(crate_name: &str, crates_io: bool, query_suffix: &str) -> TokenStream {
129 let functions = LANGUAGE_CONFIG.languages.iter().map(|lang| {
130 let feat = lang.group.to_string();
131 let name = format_ident!("{}", lang.name);
132 let name_str = &lang.name;
133 let (doc, body) = match (&lang.parser.rust_const, &lang.parser.rust_func) {
134 (Some(ident), _) if lang.parser.supports(!crates_io) => {
135 let ident = format_ident!("{ident}");
136 let package = format_ident!("{}", lang.parser.package.replace('-', "_"));
137 (
138 format!(
139 "Get the parser for [{}]({}/tree/{}).",
140 lang.name, lang.parser.git.url, lang.parser.git.rev,
141 ),
142 quote! { ::syntastica_core::language_set::Language::new(#package::#ident) }
143 )
144 },
145 (_, Some(func)) if lang.parser.supports(!crates_io) => {
146 let func = format_ident!("{func}");
147 let package = format_ident!("{}", lang.parser.package.replace('-', "_"));
148 (
149 format!(
150 "Get the parser for [{}]({}/tree/{}).",
151 lang.name, lang.parser.git.url, lang.parser.git.rev,
152 ),
153 quote! { #package::#func() }
154 )
155 },
156 _ => (
157 "**This parser is not supported by this parser collection and thus this function will panic!**"
158 .to_owned(),
159 quote! { ::std::unimplemented!() }
160 ),
161 };
162 quote! {
163 #[cfg(any(feature = #feat, feature = #name_str))]
164 #[doc = #doc]
165 pub fn #name() -> ::syntastica_core::language_set::Language {
166 #body
167 }
168 }
169 });
170 parsers(
171 crate_name,
172 functions,
173 |lang| lang.parser.supports(!crates_io),
174 None,
175 query_suffix,
176 )
177}
178
179fn parsers(
180 crate_name: &str,
181 functions: impl Iterator<Item = proc_macro2::TokenStream>,
182 filter: impl Fn(&&Language) -> bool,
183 extra: Option<proc_macro2::TokenStream>,
184 query_suffix: &str,
185) -> TokenStream {
186 let list = LANGUAGE_CONFIG
187 .languages
188 .iter()
189 .filter(&filter)
190 .map(|lang| {
191 let name_str = &lang.name;
192 let variant = format_ident!("{}", lang.name.to_pascal_case());
193 let not_wasm_cfg = not_wasm_cfg(lang);
194 quote! { #[cfg(all(feature = #name_str #not_wasm_cfg))] Lang::#variant }
195 });
196 let names_list = LANGUAGE_CONFIG
197 .languages
198 .iter()
199 .filter(&filter)
200 .map(|lang| {
201 let name_str = &lang.name;
202 let not_wasm_cfg = not_wasm_cfg(lang);
203 quote! { #[cfg(all(feature = #name_str #not_wasm_cfg))] #name_str }
204 });
205 let file_types = LANGUAGE_CONFIG
206 .languages
207 .iter()
208 .filter(&filter)
209 .flat_map(|lang| {
210 let name_str = &lang.name;
211 let variant = format_ident!("{}", lang.name.to_pascal_case());
212 let not_wasm_cfg = not_wasm_cfg(lang);
213 lang.file_types.iter().map(move |ft| {
214 let ft = format_ident!("{ft:?}");
215 quote! {
216 #[cfg(all(feature = #name_str #not_wasm_cfg))]
217 (::syntastica_core::language_set::FileType::#ft, Lang::#variant)
218 }
219 })
220 });
221 let mut langs_sorted_by_group = LANGUAGE_CONFIG.languages.clone();
222 langs_sorted_by_group.sort_by_key(|lang| lang.group);
223 let func_map = langs_sorted_by_group.iter().filter(&filter).map(|lang| {
224 let name_str = &lang.name;
225 let variant = format_ident!("{}", lang.name.to_pascal_case());
226 let not_wasm_cfg = not_wasm_cfg(lang);
227 quote! {
228 #[cfg(all(feature = #name_str #not_wasm_cfg))]
229 {
230 _map.insert(Lang::#variant, _idx);
231 _idx += 1;
232 }
233 }
234 });
235 let funcs = langs_sorted_by_group.iter().filter(&filter).map(|lang| {
236 let name = format_ident!("{}", lang.name);
237 let name_str = &lang.name;
238 let not_wasm_cfg = not_wasm_cfg(lang);
239 quote! { #[cfg(all(feature = #name_str #not_wasm_cfg))] #name }
240 });
241 let queries = langs_sorted_by_group.iter().filter(&filter).map(|lang| {
242 let name_str = &lang.name;
243 let highlights = format_ident!("{}_HIGHLIGHTS{query_suffix}", lang.name.to_uppercase());
244 let injections = format_ident!("{}_INJECTIONS{query_suffix}", lang.name.to_uppercase());
245 let locals = format_ident!("{}_LOCALS{query_suffix}", lang.name.to_uppercase());
246 let not_wasm_cfg = not_wasm_cfg(lang);
247
248 quote! { #[cfg(all(feature = #name_str #not_wasm_cfg))] [
249 ::syntastica_queries::#highlights,
250 ::syntastica_queries::#injections,
251 ::syntastica_queries::#locals,
252 ] }
253 });
254 let lang_enum_example_use = format!("use {crate_name}::{{Lang, LANGUAGES, LANGUAGE_NAMES}};");
255 let lang_enum = LANGUAGE_CONFIG
256 .languages
257 .iter()
258 .filter(&filter)
259 .map(|lang| {
260 let feat = lang.group.to_string();
261 let name_str = &lang.name;
262 let ft_support = if lang.file_types.is_empty() {
263 Cow::Borrowed("supports no file types")
264 } else {
265 format!(
266 "supports these file types: {}",
267 lang.file_types
268 .iter()
269 .map(|ft| format!(
270 "[`{ft}`](::syntastica_core::language_set::FileType::{ft:?})"
271 ))
272 .collect::<Vec<_>>()
273 .join(", ")
274 )
275 .into()
276 };
277 let doc = format!("Provides the [`{name_str}`] language, {ft_support}.");
278 let variant = format_ident!("{}", lang.name.to_pascal_case());
279 let not_wasm_cfg = not_wasm_cfg(lang);
280 quote! {
281 #[doc = #doc]
282 #[cfg(all(any(feature = #feat, feature = #name_str) #not_wasm_cfg))]
283 #variant
284 }
285 });
286 let lang_get_match = LANGUAGE_CONFIG
287 .languages
288 .iter()
289 .filter(&filter)
290 .map(|lang| {
291 let name = format_ident!("{}", lang.name);
292 let name_str = &lang.name;
293 let variant = format_ident!("{}", lang.name.to_pascal_case());
294 let not_wasm_cfg = not_wasm_cfg(lang);
295 quote! {
296 #[cfg(all(feature = #name_str #not_wasm_cfg))]
297 Self::#variant => #name(),
298 }
299 });
300 let lang_set_type = quote! { type Language = Lang; };
301 let cfg_test = quote! { #[cfg(test)] };
302 let lang_tests = LANGUAGE_CONFIG.languages.iter().filter(&filter).map(|lang| {
303 let name = format_ident!("{}", lang.name);
304 let name_str = &lang.name;
305 let variant = format_ident!("{}", lang.name.to_pascal_case());
306 let not_wasm_cfg = not_wasm_cfg(lang);
307 quote! {
308 #[test]
309 #[cfg(all(feature = #name_str #not_wasm_cfg))]
310 fn #name() {
311 assert_eq!(crate::#name(), crate::Lang::#variant.get());
312 assert!(::syntastica_core::language_set::LanguageSet::get_language(&crate::LanguageSetImpl::new(), crate::Lang::#variant).is_ok());
313 }
314 }
315 });
316
317 quote_use! {
318 # use std::{borrow::Cow, collections::HashMap};
319
320 # use syntastica_core::{
321 language_set::{HighlightConfiguration, LanguageSet, Language, FileType, SupportedLanguage},
322 Error, Result,
323 theme::THEME_KEYS,
324 };
325 # use once_cell::sync::{Lazy, OnceCell};
326
327 #extra
328
329 pub const LANGUAGES: &[Lang] = &[#(#list),*];
331 const LANG_COUNT: usize = LANGUAGES.len();
332
333 pub const LANGUAGE_NAMES: &[&str] = &[#(#names_list),*];
335
336 #(#functions)*
337
338 static FILE_TYPE_MAP: Lazy<HashMap<FileType, Lang>>
340 = Lazy::new(|| HashMap::from([#(#file_types),*]));
341
342 static IDX_MAP: Lazy<HashMap<Lang, usize>> = Lazy::new(|| {
344 let mut _map = HashMap::new();
345 let mut _idx = 0;
346 #(#func_map)*
347 _map
348 });
349
350 const QUERIES: &[[&str; 3]] = &[#(#queries),*];
351 const FUNCS: &[fn() -> Language] = &[#(#funcs),*];
352
353 #[doc = #lang_enum_example_use]
380 #[non_exhaustive]
401 #[derive(
402 Debug,
403 Clone,
404 Copy,
405 PartialEq,
406 Eq,
407 Hash,
408 ::strum::Display,
409 ::strum::AsRefStr,
410 ::strum::IntoStaticStr,
411 ::strum::EnumString,
412 )]
413 #[strum(serialize_all = "snake_case", use_phf)]
414 pub enum Lang {
415 #(#lang_enum),*
416 }
417
418 impl Lang {
419 pub fn get(&self) -> Language {
422 match self {
423 #(#lang_get_match)*
424 _ => unreachable!("all variants are matched")
425 }
426 }
427
428 pub fn get_config(&self) -> Result<HighlightConfiguration> {
431 let idx = IDX_MAP[self];
432 let lang = FUNCS[idx]();
433 let mut conf = HighlightConfiguration::new(
434 lang,
435 LANGUAGE_NAMES[idx],
436 QUERIES[idx][0],
437 QUERIES[idx][1],
438 QUERIES[idx][2],
439 )?;
440 conf.configure(THEME_KEYS);
441 Ok(conf)
442 }
443
444 pub fn highlights_query(&self) -> &'static str {
446 let idx = IDX_MAP[self];
447 QUERIES[idx][0]
448 }
449
450 pub fn injections_query(&self) -> &'static str {
452 let idx = IDX_MAP[self];
453 QUERIES[idx][1]
454 }
455
456 pub fn locals_query(&self) -> &'static str {
458 let idx = IDX_MAP[self];
459 QUERIES[idx][2]
460 }
461 }
462
463 impl<S> SupportedLanguage<'_, S> for Lang {
464 fn name(&self) -> Cow<'_, str> {
465 Cow::Borrowed(self.into())
466 }
467
468 fn for_name(name: impl AsRef<str>, _set: &S) -> Result<Self> {
469 <Self as ::std::str::FromStr>::from_str(name.as_ref())
470 .map_err(|_| Error::UnsupportedLanguage(name.as_ref().to_owned()))
471 }
472
473 fn for_file_type(file_type: FileType, _set: &S) -> Option<Self> {
474 FILE_TYPE_MAP
475 .get(&file_type)
476 .map(|name| (*name).into())
477 }
478 }
479
480 pub struct LanguageSetImpl([OnceCell<HighlightConfiguration>; LANG_COUNT]);
487
488 impl LanguageSet<'_> for LanguageSetImpl {
489 #lang_set_type
490
491 fn get_language(&self, language: Self::Language) -> Result<&HighlightConfiguration> {
492 let idx = IDX_MAP[&language];
493 self.0[idx].get_or_try_init(|| language.get_config())
494 }
495 }
496
497 impl LanguageSetImpl {
498 pub fn new() -> Self {
500 #[allow(clippy::declare_interior_mutable_const)]
501 const INIT: OnceCell<HighlightConfiguration> = OnceCell::new();
502 Self([INIT; LANG_COUNT])
503 }
504
505 pub fn preload(&self, languages: &[Lang]) -> Result<()> {
509 for lang in languages {
510 let idx = IDX_MAP[lang];
511 let entry = &self.0[idx];
512 if entry.get().is_none() {
513 drop(entry.set(lang.get_config()?));
514 }
515 }
516 Ok(())
517 }
518
519 pub fn preload_all(&self) -> Result<()> {
523 self.preload(LANGUAGES)
524 }
525 }
526
527 impl Default for LanguageSetImpl {
528 fn default() -> Self {
529 Self::new()
530 }
531 }
532
533 #cfg_test
534 mod tests {
535 #(#lang_tests)*
536 }
537 }
538 .into()
539}
540
541#[proc_macro]
542pub fn queries_test(_: TokenStream) -> TokenStream {
543 LANGUAGE_CONFIG
544 .languages
545 .iter()
546 .map(|lang| {
547 let name = format_ident!("{}", lang.name);
548 let highlights = format_ident!("{}_HIGHLIGHTS", lang.name.to_uppercase());
549 let injections = format_ident!("{}_INJECTIONS", lang.name.to_uppercase());
550 let locals = format_ident!("{}_LOCALS", lang.name.to_uppercase());
551 quote! {
552 #[test]
553 fn #name() {
554 let lang = ::syntastica_parsers_git::#name();
555 validate_query(&lang, ::syntastica_queries::#highlights, "highlights");
556 validate_query(&lang, ::syntastica_queries::#injections, "injections");
557 validate_query(&lang, ::syntastica_queries::#locals, "locals");
558 }
559 }
560 })
561 .collect::<proc_macro2::TokenStream>()
562 .into()
563}
564
565#[proc_macro]
566pub fn queries_test_crates_io(_: TokenStream) -> TokenStream {
567 LANGUAGE_CONFIG
568 .languages
569 .iter()
570 .filter(|lang| lang.parser.supports_dep())
571 .map(|lang| {
572 let name = format_ident!("{}", lang.name);
573 let highlights = format_ident!("{}_HIGHLIGHTS_CRATES_IO", lang.name.to_uppercase());
574 let injections = format_ident!("{}_INJECTIONS_CRATES_IO", lang.name.to_uppercase());
575 let locals = format_ident!("{}_LOCALS_CRATES_IO", lang.name.to_uppercase());
576 quote! {
577 #[test]
578 fn #name() {
579 let lang = ::syntastica_parsers::#name();
580 validate_query(&lang, ::syntastica_queries::#highlights, "highlights");
581 validate_query(&lang, ::syntastica_queries::#injections, "injections");
582 validate_query(&lang, ::syntastica_queries::#locals, "locals");
583 }
584 }
585 })
586 .collect::<proc_macro2::TokenStream>()
587 .into()
588}
589
590#[cfg(feature = "js")]
591#[proc_macro]
592pub fn js_lang_info(_: TokenStream) -> TokenStream {
593 quote_use! {
594 #use core::ffi::c_char;
595 #[repr(C)]
597 pub struct LangInfo {
598 name: *const c_char,
599 file_types: *const *const c_char,
600 file_types_len: usize,
601 language: Language,
602 highlights_query: *const c_char,
603 injections_query: *const c_char,
604 locals_query: *const c_char,
605 }
606 }
607 .into()
608}
609
610#[cfg(feature = "js")]
611#[proc_macro]
612pub fn js_lang_lib(input: TokenStream) -> TokenStream {
613 let lang_name = syn::parse_macro_input!(input as syn::LitStr).value();
614 let lang = LANGUAGE_CONFIG
615 .languages
616 .iter()
617 .find(|lang| lang.name == lang_name)
618 .unwrap_or_else(|| panic!("language '{lang_name}' is not defined"));
619
620 let func = format_ident!("syntastica_lang_{lang_name}");
621 let ffi_func = format_ident!("{}", &lang.parser.ffi_func);
622 let name = std::ffi::CString::new(lang_name.as_str()).unwrap();
623 let file_types = lang
624 .file_types
625 .iter()
626 .map(|ft| std::ffi::CString::new(ft.as_ref()).unwrap());
627 let highlights = format_ident!("{}_HIGHLIGHTS", lang_name.to_uppercase());
628 let injections = format_ident!("{}_INJECTIONS", lang_name.to_uppercase());
629 let locals = format_ident!("{}_LOCALS", lang_name.to_uppercase());
630
631 quote_use! {
632 #use core::ffi::{c_char, c_void};
633
634 extern "C" {
635 fn malloc(size: usize) -> *mut c_void;
636 fn memcpy(dest: *mut c_void, src: *const c_void, count: usize) -> *mut c_void;
637 fn #ffi_func() -> Language;
638 }
639
640 fn str_to_cstr(str: &'static str) -> *const c_char {
641 let ptr = unsafe { malloc(str.len() + 1) };
642 unsafe { memcpy(ptr, str.as_ptr() as *const _, str.len()) };
643 unsafe { (ptr as *mut c_char).add(str.len()).write(0) }
644 ptr as *const _
645 }
646
647 #[no_mangle]
648 pub fn #func() -> *mut LangInfo {
649 let ptr = unsafe { malloc(::core::mem::size_of::<LangInfo>()) } as *mut LangInfo;
650 const NAME: *const c_char = #name.as_ptr();
651 const FILE_TYPES: &[*const c_char] = &[#(#file_types.as_ptr()),*];
652 let info = LangInfo {
653 name: NAME,
654 file_types: FILE_TYPES.as_ptr(),
655 file_types_len: FILE_TYPES.len(),
656 language: unsafe { #ffi_func() },
657 highlights_query: str_to_cstr(::syntastica_queries::#highlights),
658 injections_query: str_to_cstr(::syntastica_queries::#injections),
659 locals_query: str_to_cstr(::syntastica_queries::#locals),
660 };
661 unsafe { ptr.write(info) };
662 ptr
663 }
664 }
665 .into()
666}
667
668#[cfg(feature = "js")]
669#[proc_macro]
670pub fn js_lang_build(input: TokenStream) -> TokenStream {
671 let lang_name = syn::parse_macro_input!(input as syn::LitStr).value();
672 let lang = LANGUAGE_CONFIG
673 .languages
674 .iter()
675 .find(|lang| lang.name == lang_name)
676 .unwrap_or_else(|| panic!("language '{lang_name}' is not defined"));
677
678 let url = &lang.parser.git.url;
679 let rev = &lang.parser.git.rev;
680 let external_c = lang.parser.external_scanner.c;
681 let external_cpp = lang.parser.external_scanner.cpp;
682 let path = match &lang.parser.git.path {
683 Some(path) => quote_use! { Some(#path) },
684 None => quote_use! { None },
685 };
686 let wasm = lang.wasm;
687 let wasm_unknown = lang.wasm_unknown;
688 let generate = lang.parser.generate;
689 quote! {
690 compile_parser(#lang_name, #url, #rev, #external_c, #external_cpp, #path, #wasm, #wasm_unknown, #generate)?;
691 }
692 .into()
693}