scanner.cpp

changeset 25
c74bb88f537d
parent 24
d2d4d0154338
child 26
83184d9407c7
equal deleted inserted replaced
24:d2d4d0154338 25:c74bb88f537d
1 /*
2 * botc source code
3 * Copyright (C) 2012 Santeri `azimuth` Piippo
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 * 3. Neither the name of the developer nor the names of its contributors may
15 * be used to endorse or promote products derived from this software without
16 * specific prior written permission.
17 * 4. Redistributions in any form must be accompanied by information on how to
18 * obtain complete source code for the software and any accompanying
19 * software that uses the software. The source code must either be included
20 * in the distribution or be available for no more than the cost of
21 * distribution plus a nominal fee, and must be freely redistributable
22 * under reasonable conditions. For an executable file, complete source
23 * code means the source code for all modules it contains. It does not
24 * include source code for modules or files that typically accompany the
25 * major components of the operating system on which the executable file
26 * runs.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
32 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include "string.h"
44 #include "str.h"
45 #include "common.h"
46 #include "scanner.h"
47 #include "stdarg.h"
48
49 #define STORE_POSITION \
50 const bool _atnewline = bAtNewLine; \
51 const ulong ulStoredLineNumber = ulaLineNumber[fc]; \
52 const ulong ulStoredCurChar = ulaCurChar[fc];
53
54 #define RESTORE_POSITION \
55 bAtNewLine = _atnewline; \
56 ulaLineNumber[fc] = ulStoredLineNumber; \
57 ulaCurChar[fc] = ulStoredCurChar;
58
59 // ============================================================================
60 Scanner::Scanner (str path) {
61 token = "";
62 zPrevToken = "";
63 lPrevPos = 0;
64 fc = -1;
65
66 for (unsigned int u = 0; u < MAX_FILESTACK; u++)
67 fp[u] = NULL;
68
69 OpenFile (path);
70 dCommentMode = 0;
71 }
72
73 // ============================================================================
74 Scanner::~Scanner () {
75 // If comment mode is 2 by the time the file ended, the
76 // comment was left unterminated. 1 is no problem, since
77 // it's terminated by newlines anyway.
78 if (dCommentMode == 2)
79 ParserError ("unterminated `/*`-style comment");
80
81 for (unsigned int u = 0; u < MAX_FILESTACK; u++) {
82 if (fp[u]) {
83 ParserWarning ("file idx %u remained open after parsing", u);
84 CloseFile (u);
85 }
86 }
87 }
88
89 // ============================================================================
90 // Opens a file and pushes its pointer to stack
91 void Scanner::OpenFile (str path) {
92 if (fc+1 >= MAX_FILESTACK)
93 ParserError ("supposed to open file `%s` but file stack is full! do you have recursive `#include` directives?",
94 path.chars());
95
96 // Save the position first.
97 if (fc != -1) {
98 laSavedPos[fc] = ftell (fp[fc]);
99 }
100
101 fc++;
102
103 fp[fc] = fopen (path.chars(), "r");
104 if (!fp[fc]) {
105 ParserError ("couldn't open %s for reading!\n", path.chars ());
106 exit (1);
107 }
108
109 fseek (fp[fc], 0, SEEK_SET);
110 saFilePath[fc] = path.chars();
111 ulaLineNumber[fc] = 1;
112 ulaCurChar[fc] = 1;
113 ulaPosition[fc] = 0;
114 bAtNewLine = 0;
115 }
116
117 // ============================================================================
118 // Closes the current file
119 void Scanner::CloseFile (unsigned int u) {
120 if (u >= MAX_FILESTACK)
121 u = fc;
122
123 if (!fp[u])
124 return;
125
126 fclose (fp[u]);
127 fp[u] = NULL;
128 fc--;
129
130 if (fc != -1)
131 fseek (fp[fc], laSavedPos[fc], SEEK_SET);
132 }
133
134 // ============================================================================
135 char Scanner::ReadChar () {
136 if (feof (fp[fc]))
137 return 0;
138
139 char c;
140 if (!fread (&c, 1, 1, fp[fc]))
141 return 0;
142
143 // We're at a newline, thus next char read will begin the next line
144 if (bAtNewLine) {
145 bAtNewLine = false;
146 ulaLineNumber[fc]++;
147 ulaCurChar[fc] = 0; // gets incremented to 1
148 }
149
150 if (c == '\n')
151 bAtNewLine = true;
152
153 ulaCurChar[fc]++;
154 return c;
155 }
156
157 // ============================================================================
158 // Peeks the next character
159 char Scanner::PeekChar (int offset) {
160 // Store current position
161 long curpos = ftell (fp[fc]);
162 STORE_POSITION
163
164 // Forward by offset
165 fseek (fp[fc], offset, SEEK_CUR);
166
167 // Read the character
168 char* c = (char*)malloc (sizeof (char));
169
170 if (!fread (c, sizeof (char), 1, fp[fc])) {
171 fseek (fp[fc], curpos, SEEK_SET);
172 return 0;
173 }
174
175 // Rewind back
176 fseek (fp[fc], curpos, SEEK_SET);
177 RESTORE_POSITION
178
179 return c[0];
180 }
181
182 // ============================================================================
183 // Read a token from the file buffer. Returns true if token was found, false if not.
184 bool Scanner::Next (bool peek) {
185 lPrevPos = ftell (fp[fc]);
186 str tmp = "";
187
188 while (1) {
189 // Check end-of-file
190 if (feof (fp[fc])) {
191 // If we're just peeking, we shouldn't
192 // actually close anything..
193 if (peek)
194 break;
195
196 CloseFile ();
197 if (fc == -1)
198 break;
199 }
200
201 // Check if the next token possibly starts a comment.
202 if (PeekChar () == '/' && !tmp.len()) {
203 char c2 = PeekChar (1);
204 // C++-style comment
205 if (c2 == '/')
206 dCommentMode = 1;
207 else if (c2 == '*')
208 dCommentMode = 2;
209
210 // We don't need to actually read in the
211 // comment characters, since they will get
212 // ignored due to comment mode anyway.
213 }
214
215 c = ReadChar ();
216
217 // If this is a comment we're reading, check if this character
218 // gets the comment terminated, otherwise ignore it.
219 if (dCommentMode > 0) {
220 if (dCommentMode == 1 && c == '\n') {
221 // C++-style comments are terminated by a newline
222 dCommentMode = 0;
223 continue;
224 } else if (dCommentMode == 2 && c == '*') {
225 // C-style comments are terminated by a `*/`
226 if (PeekChar() == '/') {
227 dCommentMode = 0;
228 ReadChar ();
229 }
230 }
231
232 // Otherwise, ignore it.
233 continue;
234 }
235
236 // Non-alphanumber characters (sans underscore) break the word too.
237 // If there was prior data, the delimeter pushes the cursor back so
238 // that the next character will be the same delimeter. If there isn't,
239 // the delimeter itself is included (and thus becomes a token itself.)
240 if ((c >= 33 && c <= 47) ||
241 (c >= 58 && c <= 64) ||
242 (c >= 91 && c <= 96 && c != '_') ||
243 (c >= 123 && c <= 126)) {
244 if (tmp.len())
245 fseek (fp[fc], ftell (fp[fc]) - 1, SEEK_SET);
246 else
247 tmp += c;
248 break;
249 }
250
251 if (c <= 32 || c >= 127) {
252 // Don't break if we haven't gathered anything yet.
253 if (tmp.len())
254 break;
255 } else {
256 tmp += c;
257 }
258 }
259
260 // If we got nothing here, read failed. This should
261 // only happen in the case of EOF.
262 if (!tmp.len()) {
263 token = "";
264 return false;
265 }
266
267 ulaPosition[fc]++;
268 zPrevToken = token;
269 token = tmp;
270 return true;
271 }
272
273 // ============================================================================
274 // Returns the next token without advancing the cursor.
275 str Scanner::PeekNext (int offset) {
276 // Store current information
277 str storedtoken = token;
278 int cpos = ftell (fp[fc]);
279 STORE_POSITION
280
281 // Advance on the token.
282 while (offset >= 0) {
283 if (!Next (true))
284 return "";
285 offset--;
286 }
287
288 str tmp = token;
289
290 // Restore position
291 fseek (fp[fc], cpos, SEEK_SET);
292 ulaPosition[fc]--;
293 token = storedtoken;
294 RESTORE_POSITION
295 return tmp;
296 }
297
298 // ============================================================================
299 void Scanner::Seek (unsigned int n, int origin) {
300 switch (origin) {
301 case SEEK_SET:
302 fseek (fp[fc], 0, SEEK_SET);
303 ulaPosition[fc] = 0;
304 break;
305 case SEEK_CUR:
306 break;
307 case SEEK_END:
308 printf ("ScriptReader::Seek: SEEK_END not yet supported.\n");
309 break;
310 }
311
312 for (unsigned int i = 0; i < n+1; i++)
313 Next();
314 }
315
316 // ============================================================================
317 void Scanner::MustNext (const char* c) {
318 if (!Next()) {
319 if (strlen (c))
320 ParserError ("expected `%s`, reached end of file instead\n", c);
321 else
322 ParserError ("expected a token, reached end of file instead\n");
323 }
324
325 if (strlen (c))
326 MustThis (c);
327 }
328
329 // ============================================================================
330 void Scanner::MustThis (const char* c) {
331 if (token.compare (c) != 0)
332 ParserError ("expected `%s`, got `%s` instead", c, token.chars());
333 }
334
335 // ============================================================================
336 void Scanner::ParserError (const char* message, ...) {
337 PERFORM_FORMAT (message, outmessage);
338 ParserMessage ("\nError: ", outmessage);
339 exit (1);
340 }
341
342 // ============================================================================
343 void Scanner::ParserWarning (const char* message, ...) {
344 PERFORM_FORMAT (message, outmessage);
345 ParserMessage ("Warning: ", outmessage);
346 }
347
348 // ============================================================================
349 void Scanner::ParserMessage (const char* header, char* message) {
350 if (fc >= 0 && fc < MAX_FILESTACK)
351 fprintf (stderr, "%s%s:%lu:%lu: %s\n",
352 header, saFilePath[fc], ulaLineNumber[fc], ulaCurChar[fc], message);
353 else
354 fprintf (stderr, "%s%s\n", header, message);
355 }
356
357 // ============================================================================
358 // if gotquote == 1, the current token already holds the quotation mark.
359 void Scanner::MustString (bool gotquote) {
360 if (gotquote)
361 MustThis ("\"");
362 else
363 MustNext ("\"");
364
365 str string;
366 // Keep reading characters until we find a terminating quote.
367 while (1) {
368 // can't end here!
369 if (feof (fp[fc]))
370 ParserError ("unterminated string");
371
372 char c = ReadChar ();
373 if (c == '"')
374 break;
375
376 string += c;
377 }
378
379 token = string;
380 }
381
382 // ============================================================================
383 void Scanner::MustNumber (bool fromthis) {
384 if (!fromthis)
385 MustNext ();
386
387 str num = token;
388 if (!num.compare ("-")) {
389 MustNext ();
390 num += token;
391 }
392
393 // "true" and "false" are valid numbers
394 if (!token.icompare ("true"))
395 token = "1";
396 else if (!token.icompare ("false"))
397 token = "0";
398 else {
399 if (!token.isnumber())
400 ParserError ("expected a number, got `%s`", num.chars());
401
402 str check;
403 check.appendformat ("%d", atoi (num.chars ()));
404 if (token.compare (check) != 0)
405 ParserWarning ("integer too large: %s -> %s", num.chars(), check.chars());
406
407 token = num;
408 }
409 }

mercurial