Fri, 15 Mar 2013 20:11:18 +0200
Initial commit
0 | 1 | /* |
2 | * botc source code | |
3 | * Copyright (C) 2012 Santeri `azimuth` Piippo | |
4 | * All rights reserved. | |
5 | * | |
6 | * Redistribution and use in source and binary forms, with or without | |
7 | * modification, are permitted provided that the following conditions are met: | |
8 | * | |
9 | * 1. Redistributions of source code must retain the above copyright notice, | |
10 | * this list of conditions and the following disclaimer. | |
11 | * 2. Redistributions in binary form must reproduce the above copyright notice, | |
12 | * this list of conditions and the following disclaimer in the documentation | |
13 | * and/or other materials provided with the distribution. | |
14 | * 3. Neither the name of the developer nor the names of its contributors may | |
15 | * be used to endorse or promote products derived from this software without | |
16 | * specific prior written permission. | |
17 | * 4. Redistributions in any form must be accompanied by information on how to | |
18 | * obtain complete source code for the software and any accompanying | |
19 | * software that uses the software. The source code must either be included | |
20 | * in the distribution or be available for no more than the cost of | |
21 | * distribution plus a nominal fee, and must be freely redistributable | |
22 | * under reasonable conditions. For an executable file, complete source | |
23 | * code means the source code for all modules it contains. It does not | |
24 | * include source code for modules or files that typically accompany the | |
25 | * major components of the operating system on which the executable file | |
26 | * runs. | |
27 | * | |
28 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
29 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
30 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
31 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE | |
32 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
33 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
34 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
35 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
36 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
37 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
38 | * POSSIBILITY OF SUCH DAMAGE. | |
39 | */ | |
40 | ||
41 | #include <stdio.h> | |
42 | #include <stdlib.h> | |
43 | #include "string.h" | |
44 | #include "str.h" | |
45 | #include "common.h" | |
46 | #include "scanner.h" | |
47 | #include "stdarg.h" | |
48 | ||
49 | #define STORE_POSITION \ | |
50 | const bool _atnewline = bAtNewLine; \ | |
51 | const ulong ulStoredLineNumber = ulaLineNumber[fc]; \ | |
52 | const ulong ulStoredCurChar = ulaCurChar[fc]; | |
53 | ||
54 | #define RESTORE_POSITION \ | |
55 | bAtNewLine = _atnewline; \ | |
56 | ulaLineNumber[fc] = ulStoredLineNumber; \ | |
57 | ulaCurChar[fc] = ulStoredCurChar; | |
58 | ||
59 | // ============================================================================ | |
60 | Scanner::Scanner (str path) { | |
61 | token = ""; | |
62 | zPrevToken = ""; | |
63 | lPrevPos = 0; | |
64 | fc = -1; | |
65 | ||
66 | for (unsigned int u = 0; u < MAX_FILESTACK; u++) | |
67 | fp[u] = NULL; | |
68 | ||
69 | OpenFile (path); | |
70 | dCommentMode = 0; | |
71 | } | |
72 | ||
73 | // ============================================================================ | |
74 | Scanner::~Scanner () { | |
75 | // If comment mode is 2 by the time the file ended, the | |
76 | // comment was left unterminated. 1 is no problem, since | |
77 | // it's terminated by newlines anyway. | |
78 | if (dCommentMode == 2) | |
79 | ParserError ("unterminated `/*`-style comment"); | |
80 | ||
81 | for (unsigned int u = 0; u < MAX_FILESTACK; u++) { | |
82 | if (fp[u]) { | |
83 | ParserWarning ("file idx %u remained open after parsing", u); | |
84 | CloseFile (u); | |
85 | } | |
86 | } | |
87 | } | |
88 | ||
89 | // ============================================================================ | |
90 | // Opens a file and pushes its pointer to stack | |
91 | void Scanner::OpenFile (str path) { | |
92 | if (fc+1 >= MAX_FILESTACK) | |
93 | ParserError ("supposed to open file `%s` but file stack is full! do you have recursive `#include` directives?", | |
94 | path.chars()); | |
95 | ||
96 | // Save the position first. | |
97 | if (fc != -1) { | |
98 | laSavedPos[fc] = ftell (fp[fc]); | |
99 | } | |
100 | ||
101 | fc++; | |
102 | ||
103 | fp[fc] = fopen (path.chars(), "r"); | |
104 | if (!fp[fc]) { | |
105 | ParserError ("couldn't open %s for reading!\n", path.chars ()); | |
106 | exit (1); | |
107 | } | |
108 | ||
109 | fseek (fp[fc], 0, SEEK_SET); | |
110 | saFilePath[fc] = path.chars(); | |
111 | ulaLineNumber[fc] = 1; | |
112 | ulaCurChar[fc] = 1; | |
113 | ulaPosition[fc] = 0; | |
114 | bAtNewLine = 0; | |
115 | } | |
116 | ||
117 | // ============================================================================ | |
118 | // Closes the current file | |
119 | void Scanner::CloseFile (unsigned int u) { | |
120 | if (u >= MAX_FILESTACK) | |
121 | u = fc; | |
122 | ||
123 | if (!fp[u]) | |
124 | return; | |
125 | ||
126 | fclose (fp[u]); | |
127 | fp[u] = NULL; | |
128 | fc--; | |
129 | ||
130 | if (fc != -1) | |
131 | fseek (fp[fc], laSavedPos[fc], SEEK_SET); | |
132 | } | |
133 | ||
134 | // ============================================================================ | |
135 | char Scanner::ReadChar () { | |
136 | if (feof (fp[fc])) | |
137 | return 0; | |
138 | ||
139 | char c; | |
140 | if (!fread (&c, 1, 1, fp[fc])) | |
141 | return 0; | |
142 | ||
143 | // We're at a newline, thus next char read will begin the next line | |
144 | if (bAtNewLine) { | |
145 | bAtNewLine = false; | |
146 | ulaLineNumber[fc]++; | |
147 | ulaCurChar[fc] = 0; // gets incremented to 1 | |
148 | } | |
149 | ||
150 | if (c == '\n') | |
151 | bAtNewLine = true; | |
152 | ||
153 | ulaCurChar[fc]++; | |
154 | return c; | |
155 | } | |
156 | ||
157 | // ============================================================================ | |
158 | // Peeks the next character | |
159 | char Scanner::PeekChar (int offset) { | |
160 | // Store current position | |
161 | long curpos = ftell (fp[fc]); | |
162 | STORE_POSITION | |
163 | ||
164 | // Forward by offset | |
165 | fseek (fp[fc], offset, SEEK_CUR); | |
166 | ||
167 | // Read the character | |
168 | char* c = (char*)malloc (sizeof (char)); | |
169 | ||
170 | if (!fread (c, sizeof (char), 1, fp[fc])) { | |
171 | fseek (fp[fc], curpos, SEEK_SET); | |
172 | return 0; | |
173 | } | |
174 | ||
175 | // Rewind back | |
176 | fseek (fp[fc], curpos, SEEK_SET); | |
177 | RESTORE_POSITION | |
178 | ||
179 | return c[0]; | |
180 | } | |
181 | ||
182 | // ============================================================================ | |
183 | // Read a token from the file buffer. Returns true if token was found, false if not. | |
184 | bool Scanner::Next (bool peek) { | |
185 | lPrevPos = ftell (fp[fc]); | |
186 | str tmp = ""; | |
187 | ||
188 | while (1) { | |
189 | // Check end-of-file | |
190 | if (feof (fp[fc])) { | |
191 | // If we're just peeking, we shouldn't | |
192 | // actually close anything.. | |
193 | if (peek) | |
194 | break; | |
195 | ||
196 | CloseFile (); | |
197 | if (fc == -1) | |
198 | break; | |
199 | } | |
200 | ||
201 | // Check if the next token possibly starts a comment. | |
202 | if (PeekChar () == '/' && !tmp.len()) { | |
203 | char c2 = PeekChar (1); | |
204 | // C++-style comment | |
205 | if (c2 == '/') | |
206 | dCommentMode = 1; | |
207 | else if (c2 == '*') | |
208 | dCommentMode = 2; | |
209 | ||
210 | // We don't need to actually read in the | |
211 | // comment characters, since they will get | |
212 | // ignored due to comment mode anyway. | |
213 | } | |
214 | ||
215 | c = ReadChar (); | |
216 | ||
217 | // If this is a comment we're reading, check if this character | |
218 | // gets the comment terminated, otherwise ignore it. | |
219 | if (dCommentMode > 0) { | |
220 | if (dCommentMode == 1 && c == '\n') { | |
221 | // C++-style comments are terminated by a newline | |
222 | dCommentMode = 0; | |
223 | continue; | |
224 | } else if (dCommentMode == 2 && c == '*') { | |
225 | // C-style comments are terminated by a `*/` | |
226 | if (PeekChar() == '/') { | |
227 | dCommentMode = 0; | |
228 | ReadChar (); | |
229 | } | |
230 | } | |
231 | ||
232 | // Otherwise, ignore it. | |
233 | continue; | |
234 | } | |
235 | ||
236 | // Non-alphanumber characters (sans underscore) break the word too. | |
237 | // If there was prior data, the delimeter pushes the cursor back so | |
238 | // that the next character will be the same delimeter. If there isn't, | |
239 | // the delimeter itself is included (and thus becomes a token itself.) | |
240 | if ((c >= 33 && c <= 47) || | |
241 | (c >= 58 && c <= 64) || | |
242 | (c >= 91 && c <= 96 && c != '_') || | |
243 | (c >= 123 && c <= 126)) { | |
244 | if (tmp.len()) | |
245 | fseek (fp[fc], ftell (fp[fc]) - 1, SEEK_SET); | |
246 | else | |
247 | tmp += c; | |
248 | break; | |
249 | } | |
250 | ||
251 | if (c <= 32 || c >= 127) { | |
252 | // Don't break if we haven't gathered anything yet. | |
253 | if (tmp.len()) | |
254 | break; | |
255 | } else { | |
256 | tmp += c; | |
257 | } | |
258 | } | |
259 | ||
260 | // If we got nothing here, read failed. This should | |
261 | // only happen in the case of EOF. | |
262 | if (!tmp.len()) { | |
263 | token = ""; | |
264 | return false; | |
265 | } | |
266 | ||
267 | ulaPosition[fc]++; | |
268 | zPrevToken = token; | |
269 | token = tmp; | |
270 | return true; | |
271 | } | |
272 | ||
273 | // ============================================================================ | |
274 | // Returns the next token without advancing the cursor. | |
275 | str Scanner::PeekNext (int offset) { | |
276 | // Store current information | |
277 | str storedtoken = token; | |
278 | int cpos = ftell (fp[fc]); | |
279 | STORE_POSITION | |
280 | ||
281 | // Advance on the token. | |
282 | while (offset >= 0) { | |
283 | if (!Next (true)) | |
284 | return ""; | |
285 | offset--; | |
286 | } | |
287 | ||
288 | str tmp = token; | |
289 | ||
290 | // Restore position | |
291 | fseek (fp[fc], cpos, SEEK_SET); | |
292 | ulaPosition[fc]--; | |
293 | token = storedtoken; | |
294 | RESTORE_POSITION | |
295 | return tmp; | |
296 | } | |
297 | ||
298 | // ============================================================================ | |
299 | void Scanner::Seek (unsigned int n, int origin) { | |
300 | switch (origin) { | |
301 | case SEEK_SET: | |
302 | fseek (fp[fc], 0, SEEK_SET); | |
303 | ulaPosition[fc] = 0; | |
304 | break; | |
305 | case SEEK_CUR: | |
306 | break; | |
307 | case SEEK_END: | |
308 | printf ("ScriptReader::Seek: SEEK_END not yet supported.\n"); | |
309 | break; | |
310 | } | |
311 | ||
312 | for (unsigned int i = 0; i < n+1; i++) | |
313 | Next(); | |
314 | } | |
315 | ||
316 | // ============================================================================ | |
317 | void Scanner::MustNext (const char* c) { | |
318 | if (!Next()) { | |
319 | if (strlen (c)) | |
320 | ParserError ("expected `%s`, reached end of file instead\n", c); | |
321 | else | |
322 | ParserError ("expected a token, reached end of file instead\n"); | |
323 | } | |
324 | ||
325 | if (strlen (c)) | |
326 | MustThis (c); | |
327 | } | |
328 | ||
329 | // ============================================================================ | |
330 | void Scanner::MustThis (const char* c) { | |
331 | if (token.compare (c) != 0) | |
332 | ParserError ("expected `%s`, got `%s` instead", c, token.chars()); | |
333 | } | |
334 | ||
335 | // ============================================================================ | |
336 | void Scanner::ParserError (const char* message, ...) { | |
337 | PERFORM_FORMAT (message, outmessage); | |
338 | ParserMessage ("\nError: ", outmessage); | |
339 | exit (1); | |
340 | } | |
341 | ||
342 | // ============================================================================ | |
343 | void Scanner::ParserWarning (const char* message, ...) { | |
344 | PERFORM_FORMAT (message, outmessage); | |
345 | ParserMessage ("Warning: ", outmessage); | |
346 | } | |
347 | ||
348 | // ============================================================================ | |
349 | void Scanner::ParserMessage (const char* header, char* message) { | |
350 | if (fc >= 0 && fc < MAX_FILESTACK) | |
351 | fprintf (stderr, "%s%s:%lu:%lu: %s\n", | |
352 | header, saFilePath[fc], ulaLineNumber[fc], ulaCurChar[fc], message); | |
353 | else | |
354 | fprintf (stderr, "%s%s\n", header, message); | |
355 | } | |
356 | ||
357 | // ============================================================================ | |
358 | // if gotquote == 1, the current token already holds the quotation mark. | |
359 | void Scanner::MustString (bool gotquote) { | |
360 | if (gotquote) | |
361 | MustThis ("\""); | |
362 | else | |
363 | MustNext ("\""); | |
364 | ||
365 | str string; | |
366 | // Keep reading characters until we find a terminating quote. | |
367 | while (1) { | |
368 | // can't end here! | |
369 | if (feof (fp[fc])) | |
370 | ParserError ("unterminated string"); | |
371 | ||
372 | char c = ReadChar (); | |
373 | if (c == '"') | |
374 | break; | |
375 | ||
376 | string += c; | |
377 | } | |
378 | ||
379 | token = string; | |
380 | } | |
381 | ||
382 | // ============================================================================ | |
383 | void Scanner::MustNumber (bool fromthis) { | |
384 | if (!fromthis) | |
385 | MustNext (); | |
386 | ||
387 | str num = token; | |
388 | if (!num.compare ("-")) { | |
389 | MustNext (); | |
390 | num += token; | |
391 | } | |
392 | ||
393 | // "true" and "false" are valid numbers | |
394 | if (!token.icompare ("true")) | |
395 | token = "1"; | |
396 | else if (!token.icompare ("false")) | |
397 | token = "0"; | |
398 | else { | |
399 | if (!token.isnumber()) | |
400 | ParserError ("expected a number, got `%s`", num.chars()); | |
401 | ||
402 | str check; | |
403 | check.appendformat ("%d", atoi (num.chars ())); | |
404 | if (token.compare (check) != 0) | |
405 | ParserWarning ("integer too large: %s -> %s", num.chars(), check.chars()); | |
406 | ||
407 | token = num; | |
408 | } | |
409 | } |