1use std::borrow::Cow;
2
3#[derive(Debug, Clone, PartialEq, Eq)]
4pub(crate) enum Token<'src> {
5 LParen,
7 RParen,
9 LBrack,
11 RBrack,
13
14 String(Cow<'src, [u8]>),
15 Atom(&'src [u8]),
16 #[cfg(feature = "comments")]
17 Comment(&'src [u8]),
18}
19
20const WHITESPACE: &[u8] = b" \t\r\n";
21const LINE_BREAKS: &[u8] = b"\r\n";
22const NOT_IN_ATOM: &[u8] = b"()[]\"";
23
24pub(crate) fn lex(input: &[u8]) -> Vec<Token<'_>> {
25 let mut index = 0;
26 let mut tokens = vec![];
27
28 while index < input.len() {
29 match input[index] {
30 byte if WHITESPACE.contains(&byte) => index += 1,
31 b';' => {
32 let _start_index = index;
33 while index < input.len() && !LINE_BREAKS.contains(&input[index]) {
34 index += 1;
35 }
36 #[cfg(feature = "comments")]
37 tokens.push(Token::Comment(&input[_start_index..index]));
38 }
39 b'(' => {
40 tokens.push(Token::LParen);
41 index += 1;
42 }
43 b')' => {
44 tokens.push(Token::RParen);
45 index += 1;
46 }
47 b'[' => {
48 tokens.push(Token::LBrack);
49 index += 1;
50 }
51 b']' => {
52 tokens.push(Token::RBrack);
53 index += 1;
54 }
55 b'"' => tokens.push(lex_string(input, &mut index)),
56 _ => {
57 let start_index = index;
58 while index < input.len()
59 && !WHITESPACE.contains(&input[index])
60 && !NOT_IN_ATOM.contains(&input[index])
61 {
62 index += 1;
63 }
64 tokens.push(Token::Atom(&input[start_index..index]));
65 }
66 }
67 }
68
69 tokens
70}
71
72fn lex_string<'src>(input: &'src [u8], index: &mut usize) -> Token<'src> {
73 *index += 1; let start_index = *index;
75 let mut end_index = input.len();
76 let mut requires_allocation = false;
77 let mut allocated_string = vec![];
78
79 while *index < input.len() {
80 match input[*index] {
81 b'"' => {
82 end_index = *index;
83 *index += 1;
84 break;
85 }
86 b'\\' if *index == input.len().wrapping_sub(1) => {
87 end_index = *index;
88 *index += 1;
89 break;
90 }
91 b'\\' => {
92 if !requires_allocation {
93 allocated_string = input[start_index..*index].to_vec();
94 requires_allocation = true;
95 }
96 allocated_string.push(input[*index + 1]);
97 *index += 2;
98 }
99 _ => {
100 if requires_allocation {
101 allocated_string.push(input[*index]);
102 }
103 *index += 1;
104 }
105 }
106 }
107
108 match requires_allocation {
109 true => Token::String(Cow::Owned(allocated_string)),
110 false => Token::String(Cow::Borrowed(&input[start_index..end_index])),
111 }
112}
113
114#[cfg(test)]
115mod tests {
116 use std::assert_eq;
117
118 use super::*;
119
120 fn string_test(input: &str, expected: Cow<'static, [u8]>) {
121 let mut index = 0;
122 assert_eq!(
123 lex_string(input.as_bytes(), &mut index),
124 Token::String(expected)
125 );
126 }
127
128 #[test]
129 fn strings() {
130 string_test(r#""Hello, World!""#, Cow::Borrowed(b"Hello, World!"));
132 string_test(r#""Hello, World!"#, Cow::Borrowed(b"Hello, World!"));
133 string_test(r#""\"\\""#, Cow::Borrowed(b"\"\\"));
134 string_test(r#""\"\"#, Cow::Borrowed(b"\""));
135 string_test(r#""a\"#, Cow::Borrowed(b"a"));
136 }
137
138 #[test]
139 fn parens() {
140 assert_eq!(
141 lex(b"()[]"),
142 vec![Token::LParen, Token::RParen, Token::LBrack, Token::RBrack]
143 );
144 }
145
146 #[test]
147 fn atoms() {
148 assert_eq!(
149 lex(b"abc def"),
150 vec![Token::Atom(b"abc"), Token::Atom(b"def")],
151 );
152 }
153
154 #[test]
155 #[cfg(feature = "comments")]
156 fn comments() {
157 assert_eq!(
158 lex(b"; comment\natom"),
159 vec![Token::Comment(b"; comment"), Token::Atom(b"atom")]
160 );
161 }
162
163 #[test]
164 #[cfg(not(feature = "comments"))]
165 fn comments() {
166 assert_eq!(lex(b"; comment\natom"), vec![Token::Atom(b"atom")]);
167 }
168}