1 /* |
|
2 * botc source code |
|
3 * Copyright (C) 2012 Santeri `azimuth` Piippo |
|
4 * All rights reserved. |
|
5 * |
|
6 * Redistribution and use in source and binary forms, with or without |
|
7 * modification, are permitted provided that the following conditions are met: |
|
8 * |
|
9 * 1. Redistributions of source code must retain the above copyright notice, |
|
10 * this list of conditions and the following disclaimer. |
|
11 * 2. Redistributions in binary form must reproduce the above copyright notice, |
|
12 * this list of conditions and the following disclaimer in the documentation |
|
13 * and/or other materials provided with the distribution. |
|
14 * 3. Neither the name of the developer nor the names of its contributors may |
|
15 * be used to endorse or promote products derived from this software without |
|
16 * specific prior written permission. |
|
17 * 4. Redistributions in any form must be accompanied by information on how to |
|
18 * obtain complete source code for the software and any accompanying |
|
19 * software that uses the software. The source code must either be included |
|
20 * in the distribution or be available for no more than the cost of |
|
21 * distribution plus a nominal fee, and must be freely redistributable |
|
22 * under reasonable conditions. For an executable file, complete source |
|
23 * code means the source code for all modules it contains. It does not |
|
24 * include source code for modules or files that typically accompany the |
|
25 * major components of the operating system on which the executable file |
|
26 * runs. |
|
27 * |
|
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
|
29 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
|
32 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
|
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
|
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
|
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
|
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
|
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|
38 * POSSIBILITY OF SUCH DAMAGE. |
|
39 */ |
|
40 |
|
41 #include <stdio.h> |
|
42 #include <stdlib.h> |
|
43 #include "string.h" |
|
44 #include "str.h" |
|
45 #include "common.h" |
|
46 #include "scanner.h" |
|
47 #include "stdarg.h" |
|
48 |
|
49 #define STORE_POSITION \ |
|
50 const bool _atnewline = bAtNewLine; \ |
|
51 const ulong ulStoredLineNumber = ulaLineNumber[fc]; \ |
|
52 const ulong ulStoredCurChar = ulaCurChar[fc]; |
|
53 |
|
54 #define RESTORE_POSITION \ |
|
55 bAtNewLine = _atnewline; \ |
|
56 ulaLineNumber[fc] = ulStoredLineNumber; \ |
|
57 ulaCurChar[fc] = ulStoredCurChar; |
|
58 |
|
59 // ============================================================================ |
|
60 Scanner::Scanner (str path) { |
|
61 token = ""; |
|
62 zPrevToken = ""; |
|
63 lPrevPos = 0; |
|
64 fc = -1; |
|
65 |
|
66 for (unsigned int u = 0; u < MAX_FILESTACK; u++) |
|
67 fp[u] = NULL; |
|
68 |
|
69 OpenFile (path); |
|
70 dCommentMode = 0; |
|
71 } |
|
72 |
|
73 // ============================================================================ |
|
74 Scanner::~Scanner () { |
|
75 // If comment mode is 2 by the time the file ended, the |
|
76 // comment was left unterminated. 1 is no problem, since |
|
77 // it's terminated by newlines anyway. |
|
78 if (dCommentMode == 2) |
|
79 ParserError ("unterminated `/*`-style comment"); |
|
80 |
|
81 for (unsigned int u = 0; u < MAX_FILESTACK; u++) { |
|
82 if (fp[u]) { |
|
83 ParserWarning ("file idx %u remained open after parsing", u); |
|
84 CloseFile (u); |
|
85 } |
|
86 } |
|
87 } |
|
88 |
|
89 // ============================================================================ |
|
90 // Opens a file and pushes its pointer to stack |
|
91 void Scanner::OpenFile (str path) { |
|
92 if (fc+1 >= MAX_FILESTACK) |
|
93 ParserError ("supposed to open file `%s` but file stack is full! do you have recursive `#include` directives?", |
|
94 path.chars()); |
|
95 |
|
96 // Save the position first. |
|
97 if (fc != -1) { |
|
98 laSavedPos[fc] = ftell (fp[fc]); |
|
99 } |
|
100 |
|
101 fc++; |
|
102 |
|
103 fp[fc] = fopen (path.chars(), "r"); |
|
104 if (!fp[fc]) { |
|
105 ParserError ("couldn't open %s for reading!\n", path.chars ()); |
|
106 exit (1); |
|
107 } |
|
108 |
|
109 fseek (fp[fc], 0, SEEK_SET); |
|
110 saFilePath[fc] = path.chars(); |
|
111 ulaLineNumber[fc] = 1; |
|
112 ulaCurChar[fc] = 1; |
|
113 ulaPosition[fc] = 0; |
|
114 bAtNewLine = 0; |
|
115 } |
|
116 |
|
117 // ============================================================================ |
|
118 // Closes the current file |
|
119 void Scanner::CloseFile (unsigned int u) { |
|
120 if (u >= MAX_FILESTACK) |
|
121 u = fc; |
|
122 |
|
123 if (!fp[u]) |
|
124 return; |
|
125 |
|
126 fclose (fp[u]); |
|
127 fp[u] = NULL; |
|
128 fc--; |
|
129 |
|
130 if (fc != -1) |
|
131 fseek (fp[fc], laSavedPos[fc], SEEK_SET); |
|
132 } |
|
133 |
|
134 // ============================================================================ |
|
135 char Scanner::ReadChar () { |
|
136 if (feof (fp[fc])) |
|
137 return 0; |
|
138 |
|
139 char c; |
|
140 if (!fread (&c, 1, 1, fp[fc])) |
|
141 return 0; |
|
142 |
|
143 // We're at a newline, thus next char read will begin the next line |
|
144 if (bAtNewLine) { |
|
145 bAtNewLine = false; |
|
146 ulaLineNumber[fc]++; |
|
147 ulaCurChar[fc] = 0; // gets incremented to 1 |
|
148 } |
|
149 |
|
150 if (c == '\n') |
|
151 bAtNewLine = true; |
|
152 |
|
153 ulaCurChar[fc]++; |
|
154 return c; |
|
155 } |
|
156 |
|
157 // ============================================================================ |
|
158 // Peeks the next character |
|
159 char Scanner::PeekChar (int offset) { |
|
160 // Store current position |
|
161 long curpos = ftell (fp[fc]); |
|
162 STORE_POSITION |
|
163 |
|
164 // Forward by offset |
|
165 fseek (fp[fc], offset, SEEK_CUR); |
|
166 |
|
167 // Read the character |
|
168 char* c = (char*)malloc (sizeof (char)); |
|
169 |
|
170 if (!fread (c, sizeof (char), 1, fp[fc])) { |
|
171 fseek (fp[fc], curpos, SEEK_SET); |
|
172 return 0; |
|
173 } |
|
174 |
|
175 // Rewind back |
|
176 fseek (fp[fc], curpos, SEEK_SET); |
|
177 RESTORE_POSITION |
|
178 |
|
179 return c[0]; |
|
180 } |
|
181 |
|
182 // ============================================================================ |
|
183 // Read a token from the file buffer. Returns true if token was found, false if not. |
|
184 bool Scanner::Next (bool peek) { |
|
185 lPrevPos = ftell (fp[fc]); |
|
186 str tmp = ""; |
|
187 |
|
188 while (1) { |
|
189 // Check end-of-file |
|
190 if (feof (fp[fc])) { |
|
191 // If we're just peeking, we shouldn't |
|
192 // actually close anything.. |
|
193 if (peek) |
|
194 break; |
|
195 |
|
196 CloseFile (); |
|
197 if (fc == -1) |
|
198 break; |
|
199 } |
|
200 |
|
201 // Check if the next token possibly starts a comment. |
|
202 if (PeekChar () == '/' && !tmp.len()) { |
|
203 char c2 = PeekChar (1); |
|
204 // C++-style comment |
|
205 if (c2 == '/') |
|
206 dCommentMode = 1; |
|
207 else if (c2 == '*') |
|
208 dCommentMode = 2; |
|
209 |
|
210 // We don't need to actually read in the |
|
211 // comment characters, since they will get |
|
212 // ignored due to comment mode anyway. |
|
213 } |
|
214 |
|
215 c = ReadChar (); |
|
216 |
|
217 // If this is a comment we're reading, check if this character |
|
218 // gets the comment terminated, otherwise ignore it. |
|
219 if (dCommentMode > 0) { |
|
220 if (dCommentMode == 1 && c == '\n') { |
|
221 // C++-style comments are terminated by a newline |
|
222 dCommentMode = 0; |
|
223 continue; |
|
224 } else if (dCommentMode == 2 && c == '*') { |
|
225 // C-style comments are terminated by a `*/` |
|
226 if (PeekChar() == '/') { |
|
227 dCommentMode = 0; |
|
228 ReadChar (); |
|
229 } |
|
230 } |
|
231 |
|
232 // Otherwise, ignore it. |
|
233 continue; |
|
234 } |
|
235 |
|
236 // Non-alphanumber characters (sans underscore) break the word too. |
|
237 // If there was prior data, the delimeter pushes the cursor back so |
|
238 // that the next character will be the same delimeter. If there isn't, |
|
239 // the delimeter itself is included (and thus becomes a token itself.) |
|
240 if ((c >= 33 && c <= 47) || |
|
241 (c >= 58 && c <= 64) || |
|
242 (c >= 91 && c <= 96 && c != '_') || |
|
243 (c >= 123 && c <= 126)) { |
|
244 if (tmp.len()) |
|
245 fseek (fp[fc], ftell (fp[fc]) - 1, SEEK_SET); |
|
246 else |
|
247 tmp += c; |
|
248 break; |
|
249 } |
|
250 |
|
251 if (c <= 32 || c >= 127) { |
|
252 // Don't break if we haven't gathered anything yet. |
|
253 if (tmp.len()) |
|
254 break; |
|
255 } else { |
|
256 tmp += c; |
|
257 } |
|
258 } |
|
259 |
|
260 // If we got nothing here, read failed. This should |
|
261 // only happen in the case of EOF. |
|
262 if (!tmp.len()) { |
|
263 token = ""; |
|
264 return false; |
|
265 } |
|
266 |
|
267 ulaPosition[fc]++; |
|
268 zPrevToken = token; |
|
269 token = tmp; |
|
270 return true; |
|
271 } |
|
272 |
|
273 // ============================================================================ |
|
274 // Returns the next token without advancing the cursor. |
|
275 str Scanner::PeekNext (int offset) { |
|
276 // Store current information |
|
277 str storedtoken = token; |
|
278 int cpos = ftell (fp[fc]); |
|
279 STORE_POSITION |
|
280 |
|
281 // Advance on the token. |
|
282 while (offset >= 0) { |
|
283 if (!Next (true)) |
|
284 return ""; |
|
285 offset--; |
|
286 } |
|
287 |
|
288 str tmp = token; |
|
289 |
|
290 // Restore position |
|
291 fseek (fp[fc], cpos, SEEK_SET); |
|
292 ulaPosition[fc]--; |
|
293 token = storedtoken; |
|
294 RESTORE_POSITION |
|
295 return tmp; |
|
296 } |
|
297 |
|
298 // ============================================================================ |
|
299 void Scanner::Seek (unsigned int n, int origin) { |
|
300 switch (origin) { |
|
301 case SEEK_SET: |
|
302 fseek (fp[fc], 0, SEEK_SET); |
|
303 ulaPosition[fc] = 0; |
|
304 break; |
|
305 case SEEK_CUR: |
|
306 break; |
|
307 case SEEK_END: |
|
308 printf ("ScriptReader::Seek: SEEK_END not yet supported.\n"); |
|
309 break; |
|
310 } |
|
311 |
|
312 for (unsigned int i = 0; i < n+1; i++) |
|
313 Next(); |
|
314 } |
|
315 |
|
316 // ============================================================================ |
|
317 void Scanner::MustNext (const char* c) { |
|
318 if (!Next()) { |
|
319 if (strlen (c)) |
|
320 ParserError ("expected `%s`, reached end of file instead\n", c); |
|
321 else |
|
322 ParserError ("expected a token, reached end of file instead\n"); |
|
323 } |
|
324 |
|
325 if (strlen (c)) |
|
326 MustThis (c); |
|
327 } |
|
328 |
|
329 // ============================================================================ |
|
330 void Scanner::MustThis (const char* c) { |
|
331 if (token.compare (c) != 0) |
|
332 ParserError ("expected `%s`, got `%s` instead", c, token.chars()); |
|
333 } |
|
334 |
|
335 // ============================================================================ |
|
336 void Scanner::ParserError (const char* message, ...) { |
|
337 PERFORM_FORMAT (message, outmessage); |
|
338 ParserMessage ("\nError: ", outmessage); |
|
339 exit (1); |
|
340 } |
|
341 |
|
342 // ============================================================================ |
|
343 void Scanner::ParserWarning (const char* message, ...) { |
|
344 PERFORM_FORMAT (message, outmessage); |
|
345 ParserMessage ("Warning: ", outmessage); |
|
346 } |
|
347 |
|
348 // ============================================================================ |
|
349 void Scanner::ParserMessage (const char* header, char* message) { |
|
350 if (fc >= 0 && fc < MAX_FILESTACK) |
|
351 fprintf (stderr, "%s%s:%lu:%lu: %s\n", |
|
352 header, saFilePath[fc], ulaLineNumber[fc], ulaCurChar[fc], message); |
|
353 else |
|
354 fprintf (stderr, "%s%s\n", header, message); |
|
355 } |
|
356 |
|
357 // ============================================================================ |
|
358 // if gotquote == 1, the current token already holds the quotation mark. |
|
359 void Scanner::MustString (bool gotquote) { |
|
360 if (gotquote) |
|
361 MustThis ("\""); |
|
362 else |
|
363 MustNext ("\""); |
|
364 |
|
365 str string; |
|
366 // Keep reading characters until we find a terminating quote. |
|
367 while (1) { |
|
368 // can't end here! |
|
369 if (feof (fp[fc])) |
|
370 ParserError ("unterminated string"); |
|
371 |
|
372 char c = ReadChar (); |
|
373 if (c == '"') |
|
374 break; |
|
375 |
|
376 string += c; |
|
377 } |
|
378 |
|
379 token = string; |
|
380 } |
|
381 |
|
382 // ============================================================================ |
|
383 void Scanner::MustNumber (bool fromthis) { |
|
384 if (!fromthis) |
|
385 MustNext (); |
|
386 |
|
387 str num = token; |
|
388 if (!num.compare ("-")) { |
|
389 MustNext (); |
|
390 num += token; |
|
391 } |
|
392 |
|
393 // "true" and "false" are valid numbers |
|
394 if (!token.icompare ("true")) |
|
395 token = "1"; |
|
396 else if (!token.icompare ("false")) |
|
397 token = "0"; |
|
398 else { |
|
399 if (!token.isnumber()) |
|
400 ParserError ("expected a number, got `%s`", num.chars()); |
|
401 |
|
402 str check; |
|
403 check.appendformat ("%d", atoi (num.chars ())); |
|
404 if (token.compare (check) != 0) |
|
405 ParserWarning ("integer too large: %s -> %s", num.chars(), check.chars()); |
|
406 |
|
407 token = num; |
|
408 } |
|
409 } |
|