src/LexerScanner.cc

changeset 88
5def6ff8b466
child 91
427eb377d53e
equal deleted inserted replaced
87:8f65914e7046 88:5def6ff8b466
1 /*
2 Copyright 2012-2014 Santeri Piippo
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 1. Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11 2. Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14 3. The name of the author may not be used to endorse or promote products
15 derived from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <cstdio>
30 #include <cstdlib>
31 #include <cassert>
32 #include <cstring>
33 #include <string>
34 #include "LexerScanner.h"
35 #include "Lexer.h"
36
37 static const String gTokenStrings[] =
38 {
39 "==",
40 "[]",
41 "+=",
42 "-=",
43 "*=",
44 "/=",
45 "%=",
46 "'",
47 "$",
48 "(",
49 ")",
50 "[",
51 "]",
52 "{",
53 "}",
54 "=",
55 "+",
56 "-",
57 "*",
58 "/",
59 "%",
60 ",",
61 "<",
62 ">",
63 ".",
64 ":",
65 ";",
66 "#",
67 "!",
68 "->",
69 "bool",
70 "break",
71 "case",
72 "continue",
73 "const",
74 "default",
75 "do",
76 "else",
77 "event",
78 "eventdef",
79 "for",
80 "funcdef",
81 "goto",
82 "if",
83 "int",
84 "mainloop",
85 "onenter",
86 "onexit",
87 "state",
88 "switch",
89 "str",
90 "void",
91 "while",
92 "enum",
93 "func",
94 "return",
95 };
96
97 static_assert (countof (gTokenStrings) == (int) tkLastNamedToken + 1,
98 "Count of gTokenStrings is not the same as the amount of named token identifiers.");
99
100 // =============================================================================
101 //
102 LexerScanner::LexerScanner (FILE* fp) :
103 mLine (1)
104 {
105 long fsize, bytes;
106
107 fseek (fp, 0l, SEEK_END);
108 fsize = ftell (fp);
109 rewind (fp);
110 mData = new char[fsize];
111 mPosition = mLineBreakPosition = &mData[0];
112 bytes = fread (mData, 1, fsize, fp);
113 assert (bytes >= fsize);
114 }
115
116 // =============================================================================
117 //
118 LexerScanner::~LexerScanner()
119 {
120 delete mData;
121 }
122
123 // =============================================================================
124 //
125 bool LexerScanner::CheckString (const char* c, int flags)
126 {
127 bool r = strncmp (mPosition, c, strlen (c)) == 0;
128
129 // There is to be a non-symbol character after words
130 if (r && (flags & FCheckWord) && IsSymbolChar (mPosition[strlen (c)], true))
131 r = false;
132
133 // Advance the cursor unless we want to just peek
134 if (r && !(flags & FCheckPeek))
135 mPosition += strlen (c);
136
137 return r;
138 }
139
140 // =============================================================================
141 //
142 bool LexerScanner::GetNextToken()
143 {
144 mTokenText = "";
145
146 while (isspace (*mPosition))
147 Skip();
148
149 // Check for comments
150 if (strncmp (mPosition, "//", 2) == 0)
151 {
152 mPosition += 2;
153
154 while (*mPosition != '\n')
155 Skip();
156
157 return GetNextToken();
158 }
159 elif (strncmp (mPosition, "/*", 2) == 0)
160 {
161 Skip (2); // skip the start symbols
162
163 while (strncmp (mPosition, "*/", 2) != 0)
164 Skip();
165
166 Skip (2); // skip the end symbols
167 return GetNextToken();
168 }
169
170 if (*mPosition == '\0')
171 return false;
172
173 // Check tokens
174 for (int i = 0; i < countof (gTokenStrings); ++i)
175 {
176 int flags = 0;
177
178 if (i >= tkFirstNamedToken)
179 flags |= FCheckWord;
180
181 if (CheckString (gTokenStrings[i], flags))
182 {
183 mTokenText = gTokenStrings[i];
184 mTokenType = (EToken) i;
185 return true;
186 }
187 }
188
189 // Check and parse string
190 if (*mPosition == '\"')
191 {
192 mPosition++;
193
194 while (*mPosition != '\"')
195 {
196 if (!*mPosition)
197 Error ("unterminated string");
198
199 if (CheckString ("\\n"))
200 {
201 mTokenText += '\n';
202 continue;
203 }
204 elif (CheckString ("\\t"))
205 {
206 mTokenText += '\t';
207 continue;
208 }
209 elif (CheckString ("\\\""))
210 {
211 mTokenText += '"';
212 continue;
213 }
214
215 mTokenText += *mPosition++;
216 }
217
218 mTokenType = tkString;
219 Skip(); // skip the final quote
220 return true;
221 }
222
223 if (isdigit (*mPosition))
224 {
225 while (isdigit (*mPosition))
226 mTokenText += *mPosition++;
227
228 mTokenType = tkNumber;
229 return true;
230 }
231
232 if (IsSymbolChar (*mPosition, false))
233 {
234 mTokenType = tkSymbol;
235
236 do
237 {
238 if (!IsSymbolChar (*mPosition, true))
239 break;
240
241 mTokenText += *mPosition++;
242 } while (*mPosition != '\0');
243
244 return true;
245 }
246
247 Error ("unknown character \"%1\"", *mPosition);
248 return false;
249 }
250
251 // =============================================================================
252 //
253 void LexerScanner::Skip()
254 {
255 if (*mPosition == '\n')
256 {
257 mLine++;
258 mLineBreakPosition = mPosition;
259 }
260
261 mPosition++;
262 }
263
264 // =============================================================================
265 //
266 void LexerScanner::Skip (int chars)
267 {
268 for (int i = 0; i < chars; ++i)
269 Skip();
270 }
271
272 // =============================================================================
273 //
274 String LexerScanner::GetTokenString (EToken a)
275 {
276 assert ((int) a <= tkLastNamedToken);
277 return gTokenStrings[a];
278 }
279
280 // =============================================================================
281 //
282 String LexerScanner::ReadLine()
283 {
284 String line;
285
286 while (*mPosition != '\n')
287 line += *(mPosition++);
288
289 return line;
290 }

mercurial