lua_pattern/lib.rs
1#![doc = include_str!("../README.md")]
2//! ## Usage
3//! - Lua patterns can be parsed to a tree with [`parse`].
4//! - Parsed patterns can be converted to regex strings with [`try_to_regex`].
5//!
6//! For example:
7//! ```
8//! use lua_pattern::{Class, PatternObject};
9//!
10//! let tree = lua_pattern::parse("%l").unwrap();
11//! assert_eq!(tree, [PatternObject::Class(Class::Lowercase)]);
12//! #[cfg(feature = "to-regex")]
13//! assert_eq!(
14//! lua_pattern::try_to_regex(&tree, false, false).unwrap(),
15//! "[a-z]"
16//! );
17//! ```
18#![cfg_attr(
19 feature = "docs",
20 cfg_attr(doc, doc = ::document_features::document_features!(feature_label = r#"<span class="stab portability"><code>{feature}</code></span>"#))
21)]
22#![cfg_attr(all(doc, CHANNEL_NIGHTLY), feature(doc_auto_cfg))]
23#![warn(rust_2018_idioms)]
24#![deny(missing_docs)]
25
26mod error;
27mod lexer;
28mod parser;
29
30#[cfg(feature = "to-regex")]
31mod to_regex;
32
33use std::fmt::{self, Display, Formatter};
34
35pub use error::*;
36use lexer::Lexer;
37use parser::Parser;
38
39#[cfg(feature = "to-regex")]
40pub use to_regex::*;
41
42///////////////
43// Functions //
44///////////////
45
46/// Parse the given input string as a Lua pattern.
47///
48/// # Returns
49/// This function returns a vector of [`PatternObject`]s if parsing was successful, or an [`Error`]
50/// if the pattern could not be parsed.
51///
52/// # Errors
53/// To see the possible errors, have a look at [`Error`].
54pub fn parse(pattern: impl AsRef<str>) -> Result<Pattern> {
55 Parser::parse(Lexer::lex(pattern.as_ref())?)
56}
57
58///////////
59// Types //
60///////////
61
62/// A list of [`PatternObject`]s, representing an entire Lua pattern.
63pub type Pattern = Vec<PatternObject>;
64
65/// A single object of a Lua pattern.
66#[derive(Debug, Clone, PartialEq, Eq)]
67pub enum PatternObject {
68 /// Match any character (`.`).
69 Any,
70 /// Match the start of the string (`^`).
71 Start,
72 /// Match the end of the string (`$`).
73 End,
74
75 /// A sequence of characters to match literally (eg. `Hello, World!`).
76 String(String),
77 /// A [`PatternObject`] followed by a [`Quantifier`] (eg. `a?`, `.*`).
78 Quantifier(Quantifier, Box<PatternObject>),
79 /// An escaped character to match literally (eg. `%%`).
80 Escaped(char),
81 /// A [character class](Class) (eg. `%w`, `%L`).
82 Class(Class),
83 /// A reference to a previous capture group (eg. `%1`).
84 CaptureRef(u8),
85 /// A balanced pattern (eg. `%bxy`). Matches all characters starting at `x` until the
86 /// corresponding `y`.
87 Balanced(char, char),
88 /// A frontier pattern (eg. `%f[a-z]`). Matches if the following character matches the set and
89 /// the previous character does not match the set. The `bool` indicated whether the set is
90 /// inverted.
91 Frontier(bool, Vec<SetPatternObject>),
92
93 /// A capture group with a numeric ID and the contained [`Pattern`] (eg. `(a)`).
94 Capture(u8, Pattern),
95 /// A set of [`SetPatternObject`]s (eg. `[a-z_%u]`, `[^a-z_%u]`), the `bool` specifies whether
96 /// the set is inverted. If the set is _not_ inverted, it matches if any of the contained
97 /// entries matches. Otherwise, it matches if none of the contained entries match.
98 Set(bool, Vec<SetPatternObject>),
99}
100
101/// An entry of a [set](PatternObject::Set).
102#[derive(Debug, Clone, PartialEq, Eq)]
103pub enum SetPatternObject {
104 /// A character to match literally (eg. `a`).
105 Char(char),
106 /// An escaped character to match literally (eg. `%%`, `%]`).
107 Escaped(char),
108 /// A range of characters (eg. `a-z`). Matches if any character in the range matches.
109 Range(char, char),
110 /// A [character class](Class) (eg. `%w`, `%L`).
111 Class(Class),
112}
113
114/// A quantifier, specifying the amount of times the leading [`PatternObject`] can occur.
115#[derive(Debug, Clone, Copy, PartialEq, Eq)]
116pub enum Quantifier {
117 /// Allow zero or more occurrences, taking the longest matching sequence (`*`).
118 ZeroOrMore,
119 /// Allow one or more occurrences, taking the longest matching sequence (`+`).
120 OneOrMore,
121 /// Allow zero or more occurrences, taking the shortest matching sequence (`-`).
122 ZeroOrMoreLazy,
123 /// Allow zero or one occurrences (`?`).
124 ZeroOrOne,
125}
126
127/// A character class, matching any character contained in the class.
128#[derive(Debug, Clone, Copy, PartialEq, Eq)]
129pub enum Class {
130 /// Matches any letter; equivalent to `[a-zA-Z]` (`%a`).
131 Letters,
132 /// Matches any control character; equivalent to `[\0-\31]` (`%c`).
133 Controls,
134 /// Matches any digit; equivalent to `[0-9]` (`%d`).
135 Digits,
136 /// Matches any printable character except space; equivalent to `[\33-\126]` (`%g`).
137 Printable,
138 /// Matches any lowercase letter; equivalent to `[a-z]` (`%l`).
139 Lowercase,
140 /// Matches any punctuation character; equivalent to ``[!"#$%&'()*+,-./[\%]^_`{|}~]`` (`%p`).
141 Punctuations,
142 /// Matches any whitespace character; equivalent to `[ \t\n\v\f\r]` (`%s`).
143 Spaces,
144 /// Matches any uppercase letter; equivalent to `[A-Z]` (`%u`).
145 Uppercase,
146 /// Matches any alphanumeric character (digit or letter); equivalent to `[a-zA-Z0-9]` (`%w`).
147 Alphanumerics,
148 /// Matches any hexadecimal digit; equivalent to `[0-9a-fA-F]` (`%x`).
149 Hexadecimals,
150 /// Matches the NULL character / `0` byte; equivalent to `\0` (`%z`).
151 ZeroByte,
152
153 /// Matches any character, **except** all letters; equivalent to `[^a-zA-Z]` (`%A`).
154 NotLetters,
155 /// Matches any character, **except** all control characters; equivalent to `[^\0-\31]` (`%C`).
156 NotControls,
157 /// Matches any character, **except** all digits; equivalent to `[^0-9]` (`%D`).
158 NotDigits,
159 /// Matches any character, **except** all printable characters, but including space; equivalent
160 /// to `[^\33-\126]` (`%G`).
161 NotPrintable,
162 /// Matches any character, **except** all lowercase letters; equivalent to `[^a-z]` (`%L`).
163 NotLowercase,
164 /// Matches any character, **except** all punctuation characters; equivalent to
165 /// ``[^!"#$%&'()*+,-./[\%]^_`{|}~]`` (`%P`).
166 NotPunctuations,
167 /// Matches any character, **except** all whitespace characters; equivalent to `[^ \t\n\v\f\r]`
168 /// (`%S`).
169 NotSpaces,
170 /// Matches any character, **except** all uppercase letters; equivalent to `[^A-Z]` (`%U`).
171 NotUppercase,
172 /// Matches any character, **except** all alphanumeric characters (digits and letters);
173 /// equivalent to `[^a-zA-Z0-9]` (`%W`).
174 NotAlphanumerics,
175 /// Matches any character, **except** all hexadecimal digits; equivalent to `[^0-9a-fA-F]`
176 /// (`%X`).
177 NotHexadecimals,
178 /// Matches the character, **except** the NULL character / `0` byte; equivalent to `[^\0]`
179 /// (`%Z`).
180 NotZeroByte,
181}
182
183/// A token as used by the internal lexer. Exposed to the public API for use in [`Error`]s.
184#[derive(Debug, Clone, Copy, PartialEq, Eq)]
185pub enum Token {
186 /// `^`
187 Start,
188 /// `$`
189 End,
190 /// `.`
191 Any,
192 /// `*`
193 ZeroOrMore,
194 /// `+`
195 OneOrMore,
196 /// `-`
197 ZeroOrMoreLazy,
198 /// `?`
199 ZeroOrOne,
200 /// `^` in a set
201 Inverse,
202
203 /// `(`
204 LParen,
205 /// `)`
206 RParen,
207 /// `[`
208 LBrack,
209 /// `]`
210 RBrack,
211
212 /// A literal character
213 Char(char),
214 /// An escaped character
215 Escaped(char),
216 /// A character class
217 Class(Class),
218 /// A reference to a previous capture group
219 CaptureRef(u8),
220 /// `%b`
221 Balanced(char, char),
222 /// `%f`
223 Frontier,
224
225 /// End of file
226 Eof,
227}
228
229///////////////////////////
230// Trait implementations //
231///////////////////////////
232
233impl TryFrom<Token> for Quantifier {
234 type Error = ();
235
236 fn try_from(value: Token) -> std::result::Result<Self, Self::Error> {
237 match value {
238 Token::ZeroOrMore => Ok(Self::ZeroOrMore),
239 Token::OneOrMore => Ok(Self::OneOrMore),
240 Token::ZeroOrMoreLazy => Ok(Self::ZeroOrMoreLazy),
241 Token::ZeroOrOne => Ok(Self::ZeroOrOne),
242 _ => Err(()),
243 }
244 }
245}
246
247impl TryFrom<char> for Class {
248 type Error = ();
249
250 fn try_from(value: char) -> std::result::Result<Self, Self::Error> {
251 match value {
252 'a' => Ok(Self::Letters),
253 'c' => Ok(Self::Controls),
254 'd' => Ok(Self::Digits),
255 'g' => Ok(Self::Printable),
256 'l' => Ok(Self::Lowercase),
257 'p' => Ok(Self::Punctuations),
258 's' => Ok(Self::Spaces),
259 'u' => Ok(Self::Uppercase),
260 'w' => Ok(Self::Alphanumerics),
261 'x' => Ok(Self::Hexadecimals),
262 'z' => Ok(Self::ZeroByte),
263
264 'A' => Ok(Self::NotLetters),
265 'C' => Ok(Self::NotControls),
266 'D' => Ok(Self::NotDigits),
267 'G' => Ok(Self::NotPrintable),
268 'L' => Ok(Self::NotLowercase),
269 'P' => Ok(Self::NotPunctuations),
270 'S' => Ok(Self::NotSpaces),
271 'U' => Ok(Self::NotUppercase),
272 'W' => Ok(Self::NotAlphanumerics),
273 'X' => Ok(Self::NotHexadecimals),
274 'Z' => Ok(Self::NotZeroByte),
275
276 _ => Err(()),
277 }
278 }
279}
280
281impl Display for Token {
282 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
283 match self {
284 Token::Start => write!(f, "^"),
285 Token::End => write!(f, "$"),
286 Token::Any => write!(f, "."),
287 Token::ZeroOrMore => write!(f, "*"),
288 Token::OneOrMore => write!(f, "+"),
289 Token::ZeroOrMoreLazy => write!(f, "-"),
290 Token::ZeroOrOne => write!(f, "?"),
291 Token::Inverse => write!(f, "^"),
292 Token::LParen => write!(f, "("),
293 Token::RParen => write!(f, ")"),
294 Token::LBrack => write!(f, "["),
295 Token::RBrack => write!(f, "]"),
296 Token::Char(char) => write!(f, "{char}"),
297 Token::Escaped(char) => write!(f, "%{char}"),
298 Token::Class(class) => write!(f, "{class}"),
299 Token::CaptureRef(id) => write!(f, "%{id}"),
300 Token::Balanced(open, close) => write!(f, "%b{open}{close}"),
301 Token::Frontier => write!(f, "%f"),
302 Token::Eof => write!(f, "end of file"),
303 }
304 }
305}
306
307impl Display for Class {
308 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
309 match self {
310 Class::Letters => write!(f, "%a"),
311 Class::Controls => write!(f, "%c"),
312 Class::Digits => write!(f, "%d"),
313 Class::Printable => write!(f, "%g"),
314 Class::Lowercase => write!(f, "%l"),
315 Class::Punctuations => write!(f, "%p"),
316 Class::Spaces => write!(f, "%s"),
317 Class::Uppercase => write!(f, "%u"),
318 Class::Alphanumerics => write!(f, "%w"),
319 Class::Hexadecimals => write!(f, "%x"),
320 Class::ZeroByte => write!(f, "%z"),
321 Class::NotLetters => write!(f, "%A"),
322 Class::NotControls => write!(f, "%C"),
323 Class::NotDigits => write!(f, "%D"),
324 Class::NotPrintable => write!(f, "%G"),
325 Class::NotLowercase => write!(f, "%L"),
326 Class::NotPunctuations => write!(f, "%P"),
327 Class::NotSpaces => write!(f, "%S"),
328 Class::NotUppercase => write!(f, "%U"),
329 Class::NotAlphanumerics => write!(f, "%W"),
330 Class::NotHexadecimals => write!(f, "%X"),
331 Class::NotZeroByte => write!(f, "%Z"),
332 }
333 }
334}