|
1 /* |
|
2 Copyright 2012-2014 Santeri Piippo |
|
3 All rights reserved. |
|
4 |
|
5 Redistribution and use in source and binary forms, with or without |
|
6 modification, are permitted provided that the following conditions |
|
7 are met: |
|
8 |
|
9 1. Redistributions of source code must retain the above copyright |
|
10 notice, this list of conditions and the following disclaimer. |
|
11 2. Redistributions in binary form must reproduce the above copyright |
|
12 notice, this list of conditions and the following disclaimer in the |
|
13 documentation and/or other materials provided with the distribution. |
|
14 3. The name of the author may not be used to endorse or promote products |
|
15 derived from this software without specific prior written permission. |
|
16 |
|
17 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
|
18 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
|
19 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
|
20 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
|
21 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
|
22 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
|
26 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
27 */ |
|
28 |
|
29 #include <cstdio> |
|
30 #include <cstdlib> |
|
31 #include <cassert> |
|
32 #include <cstring> |
|
33 #include <string> |
|
34 #include "LexerScanner.h" |
|
35 #include "Lexer.h" |
|
36 |
|
37 static const String gTokenStrings[] = |
|
38 { |
|
39 "==", |
|
40 "[]", |
|
41 "+=", |
|
42 "-=", |
|
43 "*=", |
|
44 "/=", |
|
45 "%=", |
|
46 "'", |
|
47 "$", |
|
48 "(", |
|
49 ")", |
|
50 "[", |
|
51 "]", |
|
52 "{", |
|
53 "}", |
|
54 "=", |
|
55 "+", |
|
56 "-", |
|
57 "*", |
|
58 "/", |
|
59 "%", |
|
60 ",", |
|
61 "<", |
|
62 ">", |
|
63 ".", |
|
64 ":", |
|
65 ";", |
|
66 "#", |
|
67 "!", |
|
68 "->", |
|
69 "bool", |
|
70 "break", |
|
71 "case", |
|
72 "continue", |
|
73 "const", |
|
74 "default", |
|
75 "do", |
|
76 "else", |
|
77 "event", |
|
78 "eventdef", |
|
79 "for", |
|
80 "funcdef", |
|
81 "goto", |
|
82 "if", |
|
83 "int", |
|
84 "mainloop", |
|
85 "onenter", |
|
86 "onexit", |
|
87 "state", |
|
88 "switch", |
|
89 "str", |
|
90 "void", |
|
91 "while", |
|
92 "enum", |
|
93 "func", |
|
94 "return", |
|
95 }; |
|
96 |
|
97 static_assert (countof (gTokenStrings) == (int) tkLastNamedToken + 1, |
|
98 "Count of gTokenStrings is not the same as the amount of named token identifiers."); |
|
99 |
|
100 // ============================================================================= |
|
101 // |
|
102 LexerScanner::LexerScanner (FILE* fp) : |
|
103 mLine (1) |
|
104 { |
|
105 long fsize, bytes; |
|
106 |
|
107 fseek (fp, 0l, SEEK_END); |
|
108 fsize = ftell (fp); |
|
109 rewind (fp); |
|
110 mData = new char[fsize]; |
|
111 mPosition = mLineBreakPosition = &mData[0]; |
|
112 bytes = fread (mData, 1, fsize, fp); |
|
113 assert (bytes >= fsize); |
|
114 } |
|
115 |
|
116 // ============================================================================= |
|
117 // |
|
118 LexerScanner::~LexerScanner() |
|
119 { |
|
120 delete mData; |
|
121 } |
|
122 |
|
123 // ============================================================================= |
|
124 // |
|
125 bool LexerScanner::CheckString (const char* c, int flags) |
|
126 { |
|
127 bool r = strncmp (mPosition, c, strlen (c)) == 0; |
|
128 |
|
129 // There is to be a non-symbol character after words |
|
130 if (r && (flags & FCheckWord) && IsSymbolChar (mPosition[strlen (c)], true)) |
|
131 r = false; |
|
132 |
|
133 // Advance the cursor unless we want to just peek |
|
134 if (r && !(flags & FCheckPeek)) |
|
135 mPosition += strlen (c); |
|
136 |
|
137 return r; |
|
138 } |
|
139 |
|
140 // ============================================================================= |
|
141 // |
|
142 bool LexerScanner::GetNextToken() |
|
143 { |
|
144 mTokenText = ""; |
|
145 |
|
146 while (isspace (*mPosition)) |
|
147 Skip(); |
|
148 |
|
149 // Check for comments |
|
150 if (strncmp (mPosition, "//", 2) == 0) |
|
151 { |
|
152 mPosition += 2; |
|
153 |
|
154 while (*mPosition != '\n') |
|
155 Skip(); |
|
156 |
|
157 return GetNextToken(); |
|
158 } |
|
159 elif (strncmp (mPosition, "/*", 2) == 0) |
|
160 { |
|
161 Skip (2); // skip the start symbols |
|
162 |
|
163 while (strncmp (mPosition, "*/", 2) != 0) |
|
164 Skip(); |
|
165 |
|
166 Skip (2); // skip the end symbols |
|
167 return GetNextToken(); |
|
168 } |
|
169 |
|
170 if (*mPosition == '\0') |
|
171 return false; |
|
172 |
|
173 // Check tokens |
|
174 for (int i = 0; i < countof (gTokenStrings); ++i) |
|
175 { |
|
176 int flags = 0; |
|
177 |
|
178 if (i >= tkFirstNamedToken) |
|
179 flags |= FCheckWord; |
|
180 |
|
181 if (CheckString (gTokenStrings[i], flags)) |
|
182 { |
|
183 mTokenText = gTokenStrings[i]; |
|
184 mTokenType = (EToken) i; |
|
185 return true; |
|
186 } |
|
187 } |
|
188 |
|
189 // Check and parse string |
|
190 if (*mPosition == '\"') |
|
191 { |
|
192 mPosition++; |
|
193 |
|
194 while (*mPosition != '\"') |
|
195 { |
|
196 if (!*mPosition) |
|
197 Error ("unterminated string"); |
|
198 |
|
199 if (CheckString ("\\n")) |
|
200 { |
|
201 mTokenText += '\n'; |
|
202 continue; |
|
203 } |
|
204 elif (CheckString ("\\t")) |
|
205 { |
|
206 mTokenText += '\t'; |
|
207 continue; |
|
208 } |
|
209 elif (CheckString ("\\\"")) |
|
210 { |
|
211 mTokenText += '"'; |
|
212 continue; |
|
213 } |
|
214 |
|
215 mTokenText += *mPosition++; |
|
216 } |
|
217 |
|
218 mTokenType = tkString; |
|
219 Skip(); // skip the final quote |
|
220 return true; |
|
221 } |
|
222 |
|
223 if (isdigit (*mPosition)) |
|
224 { |
|
225 while (isdigit (*mPosition)) |
|
226 mTokenText += *mPosition++; |
|
227 |
|
228 mTokenType = tkNumber; |
|
229 return true; |
|
230 } |
|
231 |
|
232 if (IsSymbolChar (*mPosition, false)) |
|
233 { |
|
234 mTokenType = tkSymbol; |
|
235 |
|
236 do |
|
237 { |
|
238 if (!IsSymbolChar (*mPosition, true)) |
|
239 break; |
|
240 |
|
241 mTokenText += *mPosition++; |
|
242 } while (*mPosition != '\0'); |
|
243 |
|
244 return true; |
|
245 } |
|
246 |
|
247 Error ("unknown character \"%1\"", *mPosition); |
|
248 return false; |
|
249 } |
|
250 |
|
251 // ============================================================================= |
|
252 // |
|
253 void LexerScanner::Skip() |
|
254 { |
|
255 if (*mPosition == '\n') |
|
256 { |
|
257 mLine++; |
|
258 mLineBreakPosition = mPosition; |
|
259 } |
|
260 |
|
261 mPosition++; |
|
262 } |
|
263 |
|
264 // ============================================================================= |
|
265 // |
|
266 void LexerScanner::Skip (int chars) |
|
267 { |
|
268 for (int i = 0; i < chars; ++i) |
|
269 Skip(); |
|
270 } |
|
271 |
|
272 // ============================================================================= |
|
273 // |
|
274 String LexerScanner::GetTokenString (EToken a) |
|
275 { |
|
276 assert ((int) a <= tkLastNamedToken); |
|
277 return gTokenStrings[a]; |
|
278 } |
|
279 |
|
280 // ============================================================================= |
|
281 // |
|
282 String LexerScanner::ReadLine() |
|
283 { |
|
284 String line; |
|
285 |
|
286 while (*mPosition != '\n') |
|
287 line += *(mPosition++); |
|
288 |
|
289 return line; |
|
290 } |