src/lexerScanner.cpp

changeset 119
bdf8d46c145f
child 126
c5858c0cd476
equal deleted inserted replaced
118:e3361cf7cbf4 119:bdf8d46c145f
1 /*
2 Copyright 2012-2014 Santeri Piippo
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 1. Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11 2. Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14 3. The name of the author may not be used to endorse or promote products
15 derived from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <cstdio>
30 #include <cstdlib>
31 #include <cassert>
32 #include <cstring>
33 #include <string>
34 #include "lexerScanner.h"
35 #include "lexer.h"
36
37 static const String gTokenStrings[] =
38 {
39 "<<=",
40 ">>=",
41 "==",
42 "!=",
43 "+=",
44 "-=",
45 "*=",
46 "/=",
47 "%=",
48 "<<",
49 ">>",
50 ">=",
51 "<=",
52 "&&",
53 "||",
54 "++",
55 "--",
56 "'",
57 "$",
58 "(",
59 ")",
60 "[",
61 "]",
62 "{",
63 "}",
64 "=",
65 "+",
66 "-",
67 "*",
68 "/",
69 "%",
70 ",",
71 "<",
72 ">",
73 ".",
74 ":",
75 ";",
76 "#",
77 "!",
78 "&",
79 "|",
80 "^",
81 "?",
82 "->",
83 "bool",
84 "break",
85 "case",
86 "continue",
87 "const",
88 "constexpr",
89 "default",
90 "do",
91 "else",
92 "event",
93 "eventdef",
94 "for",
95 "funcdef",
96 "if",
97 "int",
98 "mainloop",
99 "onenter",
100 "onexit",
101 "state",
102 "switch",
103 "str",
104 "using",
105 "var",
106 "void",
107 "while",
108 "true",
109 "false",
110 "enum",
111 "func",
112 "return",
113 };
114
115 static_assert (countof (gTokenStrings) == (int)gLastNamedToken + 1,
116 "Count of gTokenStrings is not the same as the amount of named token identifiers.");
117
118 // =============================================================================
119 //
120 LexerScanner::LexerScanner (FILE* fp) :
121 m_line (1)
122 {
123 long fsize, bytes;
124
125 fseek (fp, 0l, SEEK_END);
126 fsize = ftell (fp);
127 rewind (fp);
128 m_data = new char[fsize];
129 m_position = m_lineBreakPosition = &m_data[0];
130 bytes = fread (m_data, 1, fsize, fp);
131 assert (bytes >= fsize);
132 }
133
134 // =============================================================================
135 //
136 LexerScanner::~LexerScanner()
137 {
138 delete m_data;
139 }
140
141 // =============================================================================
142 //
143 bool LexerScanner::checkString (const char* c, int flags)
144 {
145 bool r = strncmp (m_position, c, strlen (c)) == 0;
146
147 // There is to be a non-symbol character after words
148 if (r && (flags & FCheckWord) && isSymbolChar (m_position[strlen (c)], true))
149 r = false;
150
151 // Advance the cursor unless we want to just peek
152 if (r && !(flags & FCheckPeek))
153 m_position += strlen (c);
154
155 return r;
156 }
157
158 // =============================================================================
159 //
160 bool LexerScanner::getNextToken()
161 {
162 m_tokenText = "";
163
164 while (isspace (*m_position))
165 skip();
166
167 // Check for comments
168 if (strncmp (m_position, "//", 2) == 0)
169 {
170 m_position += 2;
171
172 while (*m_position != '\n')
173 skip();
174
175 return getNextToken();
176 }
177 elif (strncmp (m_position, "/*", 2) == 0)
178 {
179 skip (2); // skip the start symbols
180
181 while (strncmp (m_position, "*/", 2) != 0)
182 skip();
183
184 skip (2); // skip the end symbols
185 return getNextToken();
186 }
187
188 if (*m_position == '\0')
189 return false;
190
191 // Check tokens
192 for (int i = 0; i < countof (gTokenStrings); ++i)
193 {
194 int flags = 0;
195
196 if (i >= gFirstNamedToken)
197 flags |= FCheckWord;
198
199 if (checkString (gTokenStrings[i], flags))
200 {
201 m_tokenText = gTokenStrings[i];
202 m_tokenType = (ETokenType) i;
203 return true;
204 }
205 }
206
207 // Check and parse string
208 if (*m_position == '\"')
209 {
210 m_position++;
211
212 while (*m_position != '\"')
213 {
214 if (!*m_position)
215 error ("unterminated string");
216
217 if (checkString ("\\n"))
218 {
219 m_tokenText += '\n';
220 continue;
221 }
222 elif (checkString ("\\t"))
223 {
224 m_tokenText += '\t';
225 continue;
226 }
227 elif (checkString ("\\\""))
228 {
229 m_tokenText += '"';
230 continue;
231 }
232
233 m_tokenText += *m_position++;
234 }
235
236 m_tokenType =TK_String;
237 skip(); // skip the final quote
238 return true;
239 }
240
241 if (isdigit (*m_position))
242 {
243 while (isdigit (*m_position))
244 m_tokenText += *m_position++;
245
246 m_tokenType =TK_Number;
247 return true;
248 }
249
250 if (isSymbolChar (*m_position, false))
251 {
252 m_tokenType =TK_Symbol;
253
254 do
255 {
256 if (!isSymbolChar (*m_position, true))
257 break;
258
259 m_tokenText += *m_position++;
260 } while (*m_position != '\0');
261
262 return true;
263 }
264
265 error ("unknown character \"%1\"", *m_position);
266 return false;
267 }
268
269 // =============================================================================
270 //
271 void LexerScanner::skip()
272 {
273 if (*m_position == '\n')
274 {
275 m_line++;
276 m_lineBreakPosition = m_position;
277 }
278
279 m_position++;
280 }
281
282 // =============================================================================
283 //
284 void LexerScanner::skip (int chars)
285 {
286 for (int i = 0; i < chars; ++i)
287 skip();
288 }
289
290 // =============================================================================
291 //
292 String LexerScanner::getTokenString (ETokenType a)
293 {
294 assert ((int) a <= gLastNamedToken);
295 return gTokenStrings[a];
296 }
297
298 // =============================================================================
299 //
300 String LexerScanner::readLine()
301 {
302 String line;
303
304 while (*m_position != '\n')
305 line += *(m_position++);
306
307 return line;
308 }

mercurial