|
1 /* |
|
2 Copyright (c) 2013-2014, Santeri Piippo |
|
3 All rights reserved. |
|
4 |
|
5 Redistribution and use in source and binary forms, with or without |
|
6 modification, are permitted provided that the following conditions are met: |
|
7 |
|
8 * Redistributions of source code must retain the above copyright |
|
9 notice, this list of conditions and the following disclaimer. |
|
10 |
|
11 * Redistributions in binary form must reproduce the above copyright |
|
12 notice, this list of conditions and the following disclaimer in the |
|
13 documentation and/or other materials provided with the distribution. |
|
14 |
|
15 * Neither the name of the <organization> nor the |
|
16 names of its contributors may be used to endorse or promote products |
|
17 derived from this software without specific prior written permission. |
|
18 |
|
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
|
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
|
22 DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY |
|
23 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
|
24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
|
25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
|
26 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
|
28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
29 */ |
|
30 |
|
31 #include <cstdio> |
|
32 #include <cstdlib> |
|
33 #include <cassert> |
|
34 #include <cstring> |
|
35 #include <string> |
|
36 #include "lexer_scanner.h" |
|
37 |
|
38 static const string g_token_strings[] = |
|
39 { |
|
40 "==", |
|
41 "[]", |
|
42 "+=", |
|
43 "-=", |
|
44 "*=", |
|
45 "/=", |
|
46 "%=", |
|
47 "'", |
|
48 "$", |
|
49 "(", |
|
50 ")", |
|
51 "[", |
|
52 "]", |
|
53 "{", |
|
54 "}", |
|
55 "=", |
|
56 "+", |
|
57 "-", |
|
58 "*", |
|
59 "/", |
|
60 "%", |
|
61 ",", |
|
62 "<", |
|
63 ">", |
|
64 ".", |
|
65 ":", |
|
66 ";", |
|
67 "#", |
|
68 "!", |
|
69 "->", |
|
70 "bool", |
|
71 "break", |
|
72 "case", |
|
73 "continue", |
|
74 "const", |
|
75 "default", |
|
76 "do", |
|
77 "else", |
|
78 "event", |
|
79 "for", |
|
80 "goto", |
|
81 "if", |
|
82 "int", |
|
83 "mainloop", |
|
84 "onenter", |
|
85 "onexit", |
|
86 "state", |
|
87 "switch", |
|
88 "str" |
|
89 "void", |
|
90 "while", |
|
91 "enum", |
|
92 "func", |
|
93 "return", |
|
94 }; |
|
95 |
|
96 static_assert (countof (g_token_strings) == (int) last_named_token + 1, |
|
97 "Count of g_token_strings is not the same as the amount of named token identifiers."); |
|
98 |
|
99 // ============================================================================= |
|
100 // |
|
101 lexer_scanner::lexer_scanner (FILE* fp) : |
|
102 m_line (1) |
|
103 { |
|
104 long fsize, bytes; |
|
105 |
|
106 fseek (fp, 0l, SEEK_END); |
|
107 fsize = ftell (fp); |
|
108 rewind (fp); |
|
109 m_data = new char[fsize]; |
|
110 m_ptr = m_line_break_pos = &m_data[0]; |
|
111 bytes = fread (m_data, 1, fsize, fp); |
|
112 assert (bytes >= fsize); |
|
113 } |
|
114 |
|
115 // ============================================================================= |
|
116 // |
|
117 lexer_scanner::~lexer_scanner() |
|
118 { |
|
119 delete m_data; |
|
120 } |
|
121 |
|
122 // ============================================================================= |
|
123 // |
|
124 bool lexer_scanner::check_string (const char* c, int flags) |
|
125 { |
|
126 bool r = strncmp (m_ptr, c, strlen (c)) == 0; |
|
127 |
|
128 // There is to be whitespace after words |
|
129 if (r && (flags & f_check_word) && !isspace (m_ptr[strlen (c)])) |
|
130 r = false; |
|
131 |
|
132 // Advance the cursor unless we want to just peek |
|
133 if (r && ! (flags & f_check_peek)) |
|
134 m_ptr += strlen (c); |
|
135 |
|
136 return r; |
|
137 } |
|
138 |
|
139 // ============================================================================= |
|
140 // |
|
141 bool lexer_scanner::get_next_token() |
|
142 { |
|
143 m_token_text = ""; |
|
144 |
|
145 while (isspace (*m_ptr) == true) |
|
146 { |
|
147 if (*m_ptr == '\n') |
|
148 { |
|
149 m_line++; |
|
150 m_line_break_pos = m_ptr; |
|
151 } |
|
152 |
|
153 m_ptr++; |
|
154 } |
|
155 |
|
156 if (*m_ptr == '\0') |
|
157 return false; |
|
158 |
|
159 // Check tokens |
|
160 for (int i = 0; i < (int) (sizeof g_token_strings / sizeof * g_token_strings); ++i) |
|
161 { |
|
162 if (check_string (g_token_strings[i], f_check_word)) |
|
163 { |
|
164 m_token_text = g_token_strings[i]; |
|
165 m_e_token = (e_token) i; |
|
166 return true; |
|
167 } |
|
168 } |
|
169 |
|
170 // Check and parse string |
|
171 if (*m_ptr == '\"') |
|
172 { |
|
173 m_ptr++; |
|
174 |
|
175 while (*m_ptr != '\"') |
|
176 { |
|
177 if (!*m_ptr) |
|
178 return false; |
|
179 |
|
180 if (check_string ("\\n")) |
|
181 { |
|
182 m_token_text += '\n'; |
|
183 continue; |
|
184 } |
|
185 elif (check_string ("\\t")) |
|
186 { |
|
187 m_token_text += '\t'; |
|
188 continue; |
|
189 } |
|
190 elif (check_string ("\\\"")) |
|
191 { |
|
192 m_token_text += '"'; |
|
193 continue; |
|
194 } |
|
195 |
|
196 m_token_text += *m_ptr++; |
|
197 } |
|
198 |
|
199 m_e_token = tk_string; |
|
200 m_ptr++; // skip the final quote |
|
201 return true; |
|
202 } |
|
203 |
|
204 m_e_token = tk_symbol; |
|
205 |
|
206 if (isdigit (*m_ptr)) |
|
207 { |
|
208 while (isdigit (*m_ptr)) |
|
209 m_token_text += *m_ptr++; |
|
210 |
|
211 m_e_token = tk_number; |
|
212 return true; |
|
213 } |
|
214 |
|
215 if (is_symbol_char (*m_ptr)) |
|
216 { |
|
217 while (m_ptr != '\0') |
|
218 { |
|
219 if (!is_symbol_char (*m_ptr)) |
|
220 break; |
|
221 |
|
222 bool stop_here = false; |
|
223 |
|
224 for (string i : g_token_strings) |
|
225 { |
|
226 if (check_string (i, f_check_peek | f_check_word)) |
|
227 { |
|
228 stop_here = true; |
|
229 break; |
|
230 } |
|
231 } |
|
232 |
|
233 if (stop_here) |
|
234 break; |
|
235 |
|
236 m_token_text += *m_ptr++; |
|
237 } |
|
238 |
|
239 return true; |
|
240 } |
|
241 |
|
242 return false; |
|
243 } |
|
244 |
|
245 string lexer_scanner::get_token_string (e_token a) |
|
246 { |
|
247 assert ((int) a <= (int) last_named_token); |
|
248 return g_token_strings[a]; |
|
249 } |