|
1 /* |
|
2 Copyright 2012-2014 Santeri Piippo |
|
3 All rights reserved. |
|
4 |
|
5 Redistribution and use in source and binary forms, with or without |
|
6 modification, are permitted provided that the following conditions |
|
7 are met: |
|
8 |
|
9 1. Redistributions of source code must retain the above copyright |
|
10 notice, this list of conditions and the following disclaimer. |
|
11 2. Redistributions in binary form must reproduce the above copyright |
|
12 notice, this list of conditions and the following disclaimer in the |
|
13 documentation and/or other materials provided with the distribution. |
|
14 3. The name of the author may not be used to endorse or promote products |
|
15 derived from this software without specific prior written permission. |
|
16 |
|
17 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
|
18 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
|
19 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
|
20 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
|
21 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
|
22 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
|
26 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
27 */ |
|
28 |
|
29 #include <cstdio> |
|
30 #include <cstdlib> |
|
31 #include <cassert> |
|
32 #include <cstring> |
|
33 #include <string> |
|
34 #include "lexerScanner.h" |
|
35 #include "lexer.h" |
|
36 |
|
37 static const String gTokenStrings[] = |
|
38 { |
|
39 "<<=", |
|
40 ">>=", |
|
41 "==", |
|
42 "!=", |
|
43 "+=", |
|
44 "-=", |
|
45 "*=", |
|
46 "/=", |
|
47 "%=", |
|
48 "<<", |
|
49 ">>", |
|
50 ">=", |
|
51 "<=", |
|
52 "&&", |
|
53 "||", |
|
54 "++", |
|
55 "--", |
|
56 "'", |
|
57 "$", |
|
58 "(", |
|
59 ")", |
|
60 "[", |
|
61 "]", |
|
62 "{", |
|
63 "}", |
|
64 "=", |
|
65 "+", |
|
66 "-", |
|
67 "*", |
|
68 "/", |
|
69 "%", |
|
70 ",", |
|
71 "<", |
|
72 ">", |
|
73 ".", |
|
74 ":", |
|
75 ";", |
|
76 "#", |
|
77 "!", |
|
78 "&", |
|
79 "|", |
|
80 "^", |
|
81 "?", |
|
82 "->", |
|
83 "bool", |
|
84 "break", |
|
85 "case", |
|
86 "continue", |
|
87 "const", |
|
88 "constexpr", |
|
89 "default", |
|
90 "do", |
|
91 "else", |
|
92 "event", |
|
93 "eventdef", |
|
94 "for", |
|
95 "funcdef", |
|
96 "if", |
|
97 "int", |
|
98 "mainloop", |
|
99 "onenter", |
|
100 "onexit", |
|
101 "state", |
|
102 "switch", |
|
103 "str", |
|
104 "using", |
|
105 "var", |
|
106 "void", |
|
107 "while", |
|
108 "true", |
|
109 "false", |
|
110 "enum", |
|
111 "func", |
|
112 "return", |
|
113 }; |
|
114 |
|
115 static_assert (countof (gTokenStrings) == (int)gLastNamedToken + 1, |
|
116 "Count of gTokenStrings is not the same as the amount of named token identifiers."); |
|
117 |
|
118 // ============================================================================= |
|
119 // |
|
120 LexerScanner::LexerScanner (FILE* fp) : |
|
121 m_line (1) |
|
122 { |
|
123 long fsize, bytes; |
|
124 |
|
125 fseek (fp, 0l, SEEK_END); |
|
126 fsize = ftell (fp); |
|
127 rewind (fp); |
|
128 m_data = new char[fsize]; |
|
129 m_position = m_lineBreakPosition = &m_data[0]; |
|
130 bytes = fread (m_data, 1, fsize, fp); |
|
131 assert (bytes >= fsize); |
|
132 } |
|
133 |
|
134 // ============================================================================= |
|
135 // |
|
136 LexerScanner::~LexerScanner() |
|
137 { |
|
138 delete m_data; |
|
139 } |
|
140 |
|
141 // ============================================================================= |
|
142 // |
|
143 bool LexerScanner::checkString (const char* c, int flags) |
|
144 { |
|
145 bool r = strncmp (m_position, c, strlen (c)) == 0; |
|
146 |
|
147 // There is to be a non-symbol character after words |
|
148 if (r && (flags & FCheckWord) && isSymbolChar (m_position[strlen (c)], true)) |
|
149 r = false; |
|
150 |
|
151 // Advance the cursor unless we want to just peek |
|
152 if (r && !(flags & FCheckPeek)) |
|
153 m_position += strlen (c); |
|
154 |
|
155 return r; |
|
156 } |
|
157 |
|
158 // ============================================================================= |
|
159 // |
|
160 bool LexerScanner::getNextToken() |
|
161 { |
|
162 m_tokenText = ""; |
|
163 |
|
164 while (isspace (*m_position)) |
|
165 skip(); |
|
166 |
|
167 // Check for comments |
|
168 if (strncmp (m_position, "//", 2) == 0) |
|
169 { |
|
170 m_position += 2; |
|
171 |
|
172 while (*m_position != '\n') |
|
173 skip(); |
|
174 |
|
175 return getNextToken(); |
|
176 } |
|
177 elif (strncmp (m_position, "/*", 2) == 0) |
|
178 { |
|
179 skip (2); // skip the start symbols |
|
180 |
|
181 while (strncmp (m_position, "*/", 2) != 0) |
|
182 skip(); |
|
183 |
|
184 skip (2); // skip the end symbols |
|
185 return getNextToken(); |
|
186 } |
|
187 |
|
188 if (*m_position == '\0') |
|
189 return false; |
|
190 |
|
191 // Check tokens |
|
192 for (int i = 0; i < countof (gTokenStrings); ++i) |
|
193 { |
|
194 int flags = 0; |
|
195 |
|
196 if (i >= gFirstNamedToken) |
|
197 flags |= FCheckWord; |
|
198 |
|
199 if (checkString (gTokenStrings[i], flags)) |
|
200 { |
|
201 m_tokenText = gTokenStrings[i]; |
|
202 m_tokenType = (ETokenType) i; |
|
203 return true; |
|
204 } |
|
205 } |
|
206 |
|
207 // Check and parse string |
|
208 if (*m_position == '\"') |
|
209 { |
|
210 m_position++; |
|
211 |
|
212 while (*m_position != '\"') |
|
213 { |
|
214 if (!*m_position) |
|
215 error ("unterminated string"); |
|
216 |
|
217 if (checkString ("\\n")) |
|
218 { |
|
219 m_tokenText += '\n'; |
|
220 continue; |
|
221 } |
|
222 elif (checkString ("\\t")) |
|
223 { |
|
224 m_tokenText += '\t'; |
|
225 continue; |
|
226 } |
|
227 elif (checkString ("\\\"")) |
|
228 { |
|
229 m_tokenText += '"'; |
|
230 continue; |
|
231 } |
|
232 |
|
233 m_tokenText += *m_position++; |
|
234 } |
|
235 |
|
236 m_tokenType =TK_String; |
|
237 skip(); // skip the final quote |
|
238 return true; |
|
239 } |
|
240 |
|
241 if (isdigit (*m_position)) |
|
242 { |
|
243 while (isdigit (*m_position)) |
|
244 m_tokenText += *m_position++; |
|
245 |
|
246 m_tokenType =TK_Number; |
|
247 return true; |
|
248 } |
|
249 |
|
250 if (isSymbolChar (*m_position, false)) |
|
251 { |
|
252 m_tokenType =TK_Symbol; |
|
253 |
|
254 do |
|
255 { |
|
256 if (!isSymbolChar (*m_position, true)) |
|
257 break; |
|
258 |
|
259 m_tokenText += *m_position++; |
|
260 } while (*m_position != '\0'); |
|
261 |
|
262 return true; |
|
263 } |
|
264 |
|
265 error ("unknown character \"%1\"", *m_position); |
|
266 return false; |
|
267 } |
|
268 |
|
269 // ============================================================================= |
|
270 // |
|
271 void LexerScanner::skip() |
|
272 { |
|
273 if (*m_position == '\n') |
|
274 { |
|
275 m_line++; |
|
276 m_lineBreakPosition = m_position; |
|
277 } |
|
278 |
|
279 m_position++; |
|
280 } |
|
281 |
|
282 // ============================================================================= |
|
283 // |
|
284 void LexerScanner::skip (int chars) |
|
285 { |
|
286 for (int i = 0; i < chars; ++i) |
|
287 skip(); |
|
288 } |
|
289 |
|
290 // ============================================================================= |
|
291 // |
|
292 String LexerScanner::getTokenString (ETokenType a) |
|
293 { |
|
294 assert ((int) a <= gLastNamedToken); |
|
295 return gTokenStrings[a]; |
|
296 } |
|
297 |
|
298 // ============================================================================= |
|
299 // |
|
300 String LexerScanner::readLine() |
|
301 { |
|
302 String line; |
|
303 |
|
304 while (*m_position != '\n') |
|
305 line += *(m_position++); |
|
306 |
|
307 return line; |
|
308 } |