1use std::borrow::Cow;
2
3use crate::{Class, PatternObject, Quantifier, SetPatternObject};
4
5#[derive(Debug, thiserror::Error, PartialEq, Eq)]
6#[allow(clippy::enum_variant_names)]
7pub enum ToRegexError {
10 #[error("the input pattern includes a balanced pattern (eg. `%b{{}}`) which cannot be represented by regex")]
13 BalancedUsed,
14
15 #[error("the input pattern includes a capture backreference, which may not be supported by some regex engines")]
18 CaptureRefUsed,
19
20 #[error("the input pattern includes a frontier pattern (eg. `%f[a-z]`) which cannot be represented by regex")]
23 FrontierUsed,
24}
25
26pub fn try_to_regex(
52 pattern: &[PatternObject],
53 allow_capture_refs: bool,
54 allow_lookaround: bool,
55) -> Result<String, ToRegexError> {
56 from_pattern(pattern, allow_capture_refs, allow_lookaround)
57}
58
59fn from_pattern(
60 pattern: &[PatternObject],
61 allow_capture_refs: bool,
62 allow_lookaround: bool,
63) -> Result<String, ToRegexError> {
64 pattern
65 .iter()
66 .map(|obj| from_pattern_object(obj, allow_capture_refs, allow_lookaround))
67 .collect::<Result<_, _>>()
68}
69
70fn from_pattern_object(
71 object: &PatternObject,
72 allow_capture_refs: bool,
73 allow_lookaround: bool,
74) -> Result<Cow<'static, str>, ToRegexError> {
75 match object {
76 PatternObject::Balanced(_, _) => Err(ToRegexError::BalancedUsed),
77 PatternObject::Frontier(_, _) if !allow_lookaround => Err(ToRegexError::FrontierUsed),
78 PatternObject::CaptureRef(_) if !allow_capture_refs => Err(ToRegexError::CaptureRefUsed),
79
80 PatternObject::Any => Ok("[\\s\\S]".into()),
81 PatternObject::Start => Ok("^".into()),
82 PatternObject::End => Ok("$".into()),
83
84 PatternObject::String(string) => {
85 Ok(string.chars().map(from_char).collect::<String>().into())
86 }
87 PatternObject::Escaped(char) => Ok(from_char(*char).into()),
88
89 PatternObject::Quantifier(quantifier, child) => Ok(format!(
90 "{}{}",
91 from_pattern_object(child, allow_capture_refs, allow_lookaround)?,
92 from_quantifier(quantifier)
93 )
94 .into()),
95 PatternObject::Class(class) => Ok(from_class(class).into()),
96 PatternObject::CaptureRef(id) => Ok(format!("\\{id}").into()),
97 PatternObject::Capture(_, pattern) => Ok(format!(
98 "({})",
99 from_pattern(pattern, allow_capture_refs, allow_lookaround)?
100 )
101 .into()),
102 PatternObject::Set(inverted, set) => Ok(from_set(set, *inverted).into()),
103 PatternObject::Frontier(inverted, set) => Ok(format!(
104 "(?<{}{})(?{}{})",
105 if *inverted { "=" } else { "!" },
106 from_set(set, false),
107 if *inverted { "!" } else { "=" },
108 from_set(set, false),
109 )
110 .into()),
111 }
112}
113
114fn from_quantifier(quantifier: &Quantifier) -> &'static str {
115 match quantifier {
116 Quantifier::ZeroOrMore => "*",
117 Quantifier::OneOrMore => "+",
118 Quantifier::ZeroOrMoreLazy => "*?",
119 Quantifier::ZeroOrOne => "?",
120 }
121}
122
123fn from_class(class: &Class) -> &'static str {
124 match class {
125 Class::Letters => r"[a-zA-Z]",
126 Class::Controls => r"[\0-\31]",
127 Class::Digits => r"[0-9]",
128 Class::Printable => r"[\33-\126]",
129 Class::Lowercase => r"[a-z]",
130 Class::Punctuations => r##"[!"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~]"##,
131 Class::Spaces => r"[ \t\n\v\f\r]",
132 Class::Uppercase => r"[A-Z]",
133 Class::Alphanumerics => r"[a-zA-Z0-9]",
134 Class::Hexadecimals => r"[0-9a-fA-F]",
135 Class::ZeroByte => r"\0",
136
137 Class::NotLetters => r"[^a-zA-Z]",
138 Class::NotControls => r"[^\0-\31]",
139 Class::NotDigits => r"[^0-9]",
140 Class::NotPrintable => r"[^\33-\126]",
141 Class::NotLowercase => r"[^a-z]",
142 Class::NotPunctuations => r##"[^!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~]"##,
143 Class::NotSpaces => r"[^ \t\n\v\f\r]",
144 Class::NotUppercase => r"[^A-Z]",
145 Class::NotAlphanumerics => r"[^a-zA-Z0-9]",
146 Class::NotHexadecimals => r"[^0-9a-fA-F]",
147 Class::NotZeroByte => r"[^\0]",
148 }
149}
150
151fn from_set(set: &[SetPatternObject], inverse: bool) -> String {
152 format!(
153 "[{}{}]",
154 if inverse { "^" } else { "" },
155 set.iter().map(from_set_pattern_object).collect::<String>()
156 )
157}
158
159fn from_set_pattern_object(object: &SetPatternObject) -> Cow<'static, str> {
160 match object {
161 SetPatternObject::Char(char) | SetPatternObject::Escaped(char) => from_char(*char).into(),
162 SetPatternObject::Range(start, end) => {
163 format!("{}-{}", from_char(*start), from_char(*end)).into()
164 }
165 SetPatternObject::Class(class) => from_class(class).into(),
166 }
167}
168
169fn from_char(char: char) -> String {
170 const SPECIAL_CHARS: &str = "\\.()[]{}|*+?^$/";
171
172 match SPECIAL_CHARS.contains(char) {
173 true => format!("\\{char}"),
174 false => char.to_string(),
175 }
176}
177
178#[cfg(test)]
179mod tests {
180 use super::*;
181
182 #[test]
183 fn everything() {
184 let input = vec![
185 PatternObject::Start,
186 PatternObject::String("^chars".to_owned()),
187 PatternObject::Quantifier(
188 Quantifier::OneOrMore,
189 PatternObject::String("q".to_owned()).into(),
190 ),
191 PatternObject::Quantifier(
192 Quantifier::ZeroOrMoreLazy,
193 PatternObject::String("w".to_owned()).into(),
194 ),
195 PatternObject::Quantifier(
196 Quantifier::ZeroOrMore,
197 PatternObject::String("e".to_owned()).into(),
198 ),
199 PatternObject::Quantifier(
200 Quantifier::ZeroOrOne,
201 PatternObject::String("r".to_owned()).into(),
202 ),
203 PatternObject::Any,
204 PatternObject::Escaped('.'),
205 PatternObject::Capture(
206 1,
207 vec![
208 PatternObject::Class(Class::Letters),
209 PatternObject::Class(Class::Controls),
210 PatternObject::Class(Class::Digits),
211 PatternObject::Class(Class::Printable),
212 PatternObject::Class(Class::Lowercase),
213 PatternObject::Class(Class::Punctuations),
214 PatternObject::Class(Class::Spaces),
215 PatternObject::Class(Class::Uppercase),
216 PatternObject::Class(Class::Alphanumerics),
217 PatternObject::Class(Class::Hexadecimals),
218 PatternObject::Class(Class::ZeroByte),
219 PatternObject::Class(Class::NotLetters),
220 ],
221 ),
222 PatternObject::Set(
223 false,
224 vec![
225 SetPatternObject::Char('a'),
226 SetPatternObject::Char('s'),
227 SetPatternObject::Char('d'),
228 ],
229 ),
230 PatternObject::Set(
231 true,
232 vec![
233 SetPatternObject::Char('n'),
234 SetPatternObject::Char('o'),
235 SetPatternObject::Char('t'),
236 ],
237 ),
238 PatternObject::Frontier(
239 true,
240 vec![
241 SetPatternObject::Char('n'),
242 SetPatternObject::Char('o'),
243 SetPatternObject::Char('t'),
244 ],
245 ),
246 PatternObject::CaptureRef(1),
247 PatternObject::Set(
248 false,
249 vec![
250 SetPatternObject::Escaped(']'),
251 SetPatternObject::Range('a', 'z'),
252 ],
253 ),
254 PatternObject::String("$".to_owned()),
255 PatternObject::End,
256 ];
257
258 assert_eq!(try_to_regex(&input, true, true), Ok(r##"^\^charsq+w*?e*r?[\s\S]\.([a-zA-Z][\0-\31][0-9][\33-\126][a-z][!"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~][ \t\n\v\f\r][A-Z][a-zA-Z0-9][0-9a-fA-F]\0[^a-zA-Z])[asd][^not](?<=[not])(?![not])\1[\]a-z]\$$"##.to_owned()));
259 }
260}