34 "@", |
62 "@", |
35 "#", |
63 "#", |
36 "~", |
64 "~", |
37 "`", |
65 "`", |
38 "%", |
66 "%", |
|
67 "<variable>", |
39 "<string>", |
68 "<string>", |
40 "<symbol>", |
69 "<symbol>", |
41 "<number>", |
70 "<number>", |
42 "<any>", |
71 "<any>", |
43 }; |
72 }; |
44 |
73 |
|
74 // |
|
75 // ------------------------------------------------------------------------------------------------- |
|
76 // |
45 Script::Parser::Parser(QString text) : |
77 Script::Parser::Parser(QString text) : |
46 m_script(text) {} |
78 m_script(text) {} |
47 |
79 |
|
80 // |
|
81 // ------------------------------------------------------------------------------------------------- |
|
82 // |
|
83 Script::Parser::~Parser() {} |
|
84 |
|
85 // |
|
86 // ------------------------------------------------------------------------------------------------- |
|
87 // |
48 void Script::Parser::parse() |
88 void Script::Parser::parse() |
|
89 { |
|
90 preprocess(); |
|
91 m_state.reset(); |
|
92 |
|
93 while (next (TOK_Any)) |
|
94 { |
|
95 print ("token: %1 (%2)\n", state().token.text, TokenNames[state().token.type]); |
|
96 } |
|
97 } |
|
98 |
|
99 // |
|
100 // ------------------------------------------------------------------------------------------------- |
|
101 // |
|
102 void Script::Parser::preprocess() |
49 { |
103 { |
50 bool inString = false; |
104 bool inString = false; |
51 bool inComment = false; |
105 bool inComment = false; |
52 bool inBackslash = false; |
106 bool inBackslash = false; |
53 int ln = 1; |
107 int ln = 1; |
54 int pos = 0; |
108 int pos = 0; |
55 |
109 |
56 // Preprocess |
110 // Preprocess |
57 for (QChar qch : text) |
111 for (QChar qch : m_script) |
58 { |
112 { |
59 char ch = qch.toAscii(); |
113 char ch = qch.toAscii(); |
60 |
114 |
61 if (not inComment && not inString && ch == '\0') |
115 if (not inComment and not inString and ch == '\0') |
62 scriptError ("bad character %s in script text on line %d", qch, ln); |
116 scriptError ("bad character %1 in script text on line %2", qch, ln); |
63 |
117 |
64 if (ch == '\\') |
118 if (not inString) |
65 { |
119 { |
66 inBackslash = true; |
120 if (ch == '\\') |
67 continue; |
|
68 } |
|
69 |
|
70 if (inBackslash) |
|
71 { |
|
72 if (inString) |
|
73 { |
121 { |
74 switch (ch) |
122 inBackslash = true; |
75 { |
|
76 case 'n': data << '\n'; break; |
|
77 case 't': data << '\t'; break; |
|
78 case 'b': data << '\b'; break; |
|
79 case '\\': data << '\\'; break; |
|
80 default: scriptError ("misplaced backslash on line %d", ln); |
|
81 } |
|
82 |
|
83 ++pos; |
|
84 inBackslash == false; |
|
85 continue; |
123 continue; |
86 } |
124 } |
87 else if (ch != '\n') |
125 |
88 { |
126 if (inBackslash and ch != '\n') |
89 scriptError ("misplaced backslash on line %d", ln); |
127 scriptError ("misplaced backslash on line %1", ln); |
90 } |
128 } |
91 } |
129 |
|
130 if (ch == '"') |
|
131 inString ^= 1; |
92 |
132 |
93 if (ch == '\n') |
133 if (ch == '\n') |
94 { |
134 { |
95 if (inString) |
135 if (inString) |
96 scriptError ("unterminated string on line %d", ln); |
136 scriptError ("unterminated string on line %1", ln); |
97 |
137 |
98 if (not inBackslash) |
138 if (not inBackslash) |
99 { |
139 { |
100 m_data << ';'; |
140 m_data.append (';'); |
101 ++pos; |
141 m_data.append ('\n'); |
|
142 pos += 2; |
102 inComment = false; |
143 inComment = false; |
103 m_lineEndings << pos; |
144 m_lineEndings << pos; |
104 ++ln; |
145 ++ln; |
105 } |
146 } |
106 else |
147 else |
107 { |
|
108 inBackslash = false; |
148 inBackslash = false; |
109 } |
149 |
110 continue; |
150 continue; |
111 } |
151 } |
112 |
152 |
113 if (ch == '#' && not inString) |
153 if (ch == '#' and not inString) |
114 { |
154 { |
115 inComment = true; |
155 inComment = true; |
116 continue; |
156 continue; |
117 } |
157 } |
118 |
158 |
119 m_data << ch; |
159 if (not inComment) |
120 ++pos; |
160 { |
121 } |
161 m_data.append (ch); |
122 |
162 ++pos; |
123 m_position.reset(); |
163 } |
124 } |
164 } |
125 |
165 } |
126 bool Script::Parser::next(TokenType desiredType) |
166 |
127 { |
167 // |
128 SavedPosition oldpos = position(); |
168 // ------------------------------------------------------------------------------------------------- |
|
169 // |
|
170 namespace Script |
|
171 { |
|
172 class UnexpectedEOF : public std::exception |
|
173 { |
|
174 const char* what() const throw() |
|
175 { |
|
176 return "unexpected EOF"; |
|
177 } |
|
178 }; |
|
179 } |
|
180 |
|
181 // |
|
182 // ------------------------------------------------------------------------------------------------- |
|
183 // |
|
184 char Script::Parser::read() |
|
185 { |
|
186 if (m_state.position >= m_data.length()) |
|
187 throw UnexpectedEOF(); |
|
188 |
|
189 char ch = m_data[m_state.position]; |
|
190 m_state.position++; |
|
191 |
|
192 if (m_state.position == m_lineEndings[m_state.lineNumber]) |
|
193 m_state.lineNumber++; |
|
194 |
|
195 return ch; |
|
196 } |
|
197 |
|
198 // |
|
199 // ------------------------------------------------------------------------------------------------- |
|
200 // |
|
201 void Script::Parser::unread() |
|
202 { |
|
203 if (m_state.position <= 0) |
|
204 return; |
|
205 |
|
206 if (m_state.lineNumber > 0 |
|
207 and m_state.position == m_lineEndings[m_state.lineNumber - 1]) |
|
208 { |
|
209 m_state.lineNumber--; |
|
210 } |
|
211 |
|
212 m_state.position--; |
|
213 } |
|
214 |
|
215 // |
|
216 // ------------------------------------------------------------------------------------------------- |
|
217 // |
|
218 // Takes a hexadecimal character and returns its numerical value. It is assumed that isxdigit(xd) |
|
219 // is true (if not, result is undefined). |
|
220 // |
|
221 int parseXDigit (char xd) |
|
222 { |
|
223 if (xd >= 'a') |
|
224 return xd - 'a'; |
|
225 |
|
226 if (xd >= 'A') |
|
227 return xd - 'A'; |
|
228 |
|
229 return xd - '0'; |
|
230 } |
|
231 |
|
232 // |
|
233 // ------------------------------------------------------------------------------------------------- |
|
234 // |
|
235 bool Script::Parser::next (TokenType desiredType) |
|
236 { |
|
237 SavedState oldpos = state(); |
|
238 Token oldtoken = m_state.token; |
|
239 |
|
240 if (not getNextToken()) |
|
241 return false; |
|
242 |
|
243 if (desiredType != TOK_Any and m_state.token.type != desiredType) |
|
244 { |
|
245 // Did not find the token we wanted, revert back |
|
246 m_rejectedToken = m_state.token; |
|
247 m_state.token = oldtoken; |
|
248 setState (oldpos); |
|
249 return false; |
|
250 } |
|
251 |
|
252 return true; |
|
253 } |
|
254 |
|
255 // |
|
256 // ------------------------------------------------------------------------------------------------- |
|
257 // |
|
258 bool Script::Parser::getNextToken() |
|
259 { |
|
260 try |
|
261 { |
|
262 m_state.token.text.clear(); |
|
263 m_state.token.number = 0; |
|
264 m_state.token.real = 0.0; |
|
265 skipSpace(); |
|
266 |
|
267 const char* data = m_data.constData() + m_state.position; |
|
268 |
|
269 // Does this character start one of our tokens? |
|
270 for (int tt = 0; tt <= LastNamedToken; ++tt) |
|
271 { |
|
272 if (strncmp (data, TokenNames[tt], strlen (TokenNames[tt])) != 0) |
|
273 continue; |
|
274 |
|
275 m_state.position += strlen (TokenNames[tt]); |
|
276 m_state.token.text = QString::fromAscii (TokenNames[tt]); |
|
277 m_state.token.type = TokenType (tt); |
|
278 return true; |
|
279 } |
|
280 |
|
281 // Check for number |
|
282 if (parseNumber()) |
|
283 return true; |
|
284 |
|
285 // Check for string |
|
286 if (*data == '"') |
|
287 { |
|
288 read(); |
|
289 parseString(); |
|
290 return true; |
|
291 } |
|
292 |
|
293 // Check for variable |
|
294 if (*data == '$') |
|
295 { |
|
296 read(); |
|
297 m_state.token.text = parseIdentifier(); |
|
298 m_state.token.type = TOK_Variable; |
|
299 return true; |
|
300 } |
|
301 |
|
302 // Must be a symbol of some sort then |
|
303 m_state.token.text = parseIdentifier(); |
|
304 m_state.token.type = TOK_Symbol; |
|
305 } |
|
306 catch (UnexpectedEOF) |
|
307 { |
|
308 return false; |
|
309 } |
|
310 |
|
311 return true; |
|
312 } |
|
313 |
|
314 // |
|
315 // ------------------------------------------------------------------------------------------------- |
|
316 // |
|
317 bool Script::Parser::parseNumber() |
|
318 { |
|
319 SavedState pos = state(); |
|
320 char ch = read(); |
|
321 unread(); |
|
322 QString numberString; |
|
323 |
|
324 if (not isdigit (ch) and ch != '.') |
|
325 { |
|
326 setState (pos); |
|
327 return false; |
|
328 } |
|
329 |
|
330 int base = 10; |
|
331 bool gotDot = false; |
|
332 |
|
333 if (tryMatch ("0x", false)) |
|
334 base = 16; |
|
335 elif (tryMatch ("0b", false)) |
|
336 base = 2; |
|
337 |
|
338 int (*checkFunc)(int) = base == 16 ? isxdigit : isdigit; |
|
339 |
|
340 for (int n = 0; not isspace (ch = read()); ++n) |
|
341 { |
|
342 if (n == 0 && ch == '0') |
|
343 base = 8; |
|
344 |
|
345 if (ch == '.') |
|
346 { |
|
347 if (gotDot) |
|
348 scriptError ("multiple dots in numeric literal"); |
|
349 |
|
350 // If reading numbers like 0.1234 where the first number is zero, the parser |
|
351 // will initially think the number is octal so we must take that into account here. |
|
352 // Note that even if you have numbers like 05.612, it will still be decimal. |
|
353 if (base != 10 and base != 8) |
|
354 scriptError ("real number constant must be decimal"); |
|
355 |
|
356 base = 10; |
|
357 gotDot = true; |
|
358 } |
|
359 else if (checkFunc (ch)) |
|
360 { |
|
361 if (base <= 10 and (ch - '0') >= base) |
|
362 scriptError ("bad base-%1 numeric literal", base); |
|
363 |
|
364 numberString += ch; |
|
365 } |
|
366 else if (isalpha (ch)) |
|
367 scriptError ("invalid digit %1 in literal", ch); |
|
368 else |
|
369 break; |
|
370 } |
|
371 |
|
372 unread(); |
|
373 bool ok; |
|
374 |
|
375 if (gotDot) |
|
376 { |
|
377 // Floating point number |
|
378 m_state.token.real = numberString.toFloat (&ok); |
|
379 m_state.token.number = m_state.token.real; |
|
380 } |
|
381 else |
|
382 { |
|
383 // Integral number |
|
384 m_state.token.number = numberString.toInt (&ok, base); |
|
385 m_state.token.real = m_state.token.number; |
|
386 } |
|
387 |
|
388 if (ok == false) |
|
389 scriptError ("invalid numeric literal '%1'", numberString); |
|
390 |
|
391 m_state.token.text = numberString; |
|
392 m_state.token.type = TOK_Number; |
|
393 |
|
394 return true; |
|
395 } |
|
396 |
|
397 // |
|
398 // ------------------------------------------------------------------------------------------------- |
|
399 // |
|
400 void Script::Parser::scriptError (QString text) |
|
401 { |
|
402 throw ParseError (text); |
|
403 } |
|
404 |
|
405 // |
|
406 // ------------------------------------------------------------------------------------------------- |
|
407 // |
|
408 // Checks whether the parser is at the beginning of the given string in the code. The string is |
|
409 // expected not to have newlines. If true, the parser jumps over the text. |
|
410 // |
|
411 bool Script::Parser::tryMatch (const char* text, bool caseSensitive) |
|
412 { |
|
413 assert (strstr (text, "\n") == NULL); |
|
414 const char* data = m_data.constData() + m_state.position; |
|
415 int (*func) (const char*, const char*) = caseSensitive ? &strcmp : &strcasecmp; |
|
416 |
|
417 if ((*func) (data, text) == 0) |
|
418 { |
|
419 m_state.position += strlen (text); |
|
420 return true; |
|
421 } |
|
422 |
129 return false; |
423 return false; |
130 } |
424 } |
131 |
425 |
132 void Script::Parser::mustGetNext(TokenType desiredType) |
426 // |
133 { |
427 // ------------------------------------------------------------------------------------------------- |
134 |
428 // |
135 } |
429 QString Script::Parser::parseEscapeSequence() |
136 |
430 { |
137 bool Script::Parser::peekNext(Token& tok) |
431 char ch = read(); |
138 { |
432 QString result; |
|
433 |
|
434 switch (ch) |
|
435 { |
|
436 case '"': |
|
437 result += "\""; |
|
438 break; |
|
439 |
|
440 case 'n': |
|
441 result += "\n"; |
|
442 break; |
|
443 |
|
444 case 't': |
|
445 result += "\t"; |
|
446 break; |
|
447 |
|
448 case '\\': |
|
449 result += "\\"; |
|
450 break; |
|
451 |
|
452 case 'x': |
|
453 case 'X': |
|
454 { |
|
455 char n1 = read(); |
|
456 char n2 = read(); |
|
457 |
|
458 if (not isxdigit(n1) or not isxdigit(n2)) |
|
459 scriptError ("bad hexa-decimal character \\x%1%2", n1, n2); |
|
460 |
|
461 unsigned char num = parseXDigit(n1) * 16 + parseXDigit(n2); |
|
462 result += char (num); |
|
463 } |
|
464 break; |
|
465 |
|
466 default: |
|
467 scriptError ("unknown escape sequence \\%1", ch); |
|
468 } |
|
469 |
|
470 return result; |
|
471 } |
|
472 |
|
473 // |
|
474 // ------------------------------------------------------------------------------------------------- |
|
475 // |
|
476 void Script::Parser::parseString() |
|
477 { |
|
478 m_state.token.type = TOK_String; |
|
479 m_state.token.text.clear(); |
|
480 |
|
481 try |
|
482 { |
|
483 char ch; |
|
484 |
|
485 while ((ch = read()) != '"') |
|
486 { |
|
487 if (ch == '\\') |
|
488 m_state.token.text += parseEscapeSequence(); |
|
489 else |
|
490 m_state.token.text += ch; |
|
491 } |
|
492 } |
|
493 catch (UnexpectedEOF) |
|
494 { |
|
495 scriptError ("unterminated string"); |
|
496 } |
|
497 } |
|
498 |
|
499 // |
|
500 // ------------------------------------------------------------------------------------------------- |
|
501 // |
|
502 void Script::Parser::skipSpace() |
|
503 { |
|
504 while (isspace (read())) |
|
505 ; |
|
506 |
|
507 unread(); |
|
508 } |
|
509 |
|
510 // |
|
511 // ------------------------------------------------------------------------------------------------- |
|
512 // |
|
513 void Script::Parser::mustGetNext (TokenType desiredType) |
|
514 { |
|
515 if (not next (desiredType)) |
|
516 { |
|
517 scriptError ("Expected %1, got %2", |
|
518 TokenNames[m_rejectedToken.type], |
|
519 TokenNames[desiredType]); |
|
520 } |
|
521 } |
|
522 |
|
523 // |
|
524 // ------------------------------------------------------------------------------------------------- |
|
525 // |
|
526 bool Script::Parser::peekNext (Token& tok) |
|
527 { |
|
528 SavedState pos = state(); |
|
529 |
|
530 if (next (TOK_Any)) |
|
531 { |
|
532 tok = m_state.token; |
|
533 setState (pos); |
|
534 return true; |
|
535 } |
|
536 |
139 return false; |
537 return false; |
140 } |
538 } |
141 |
539 |
142 const Script::SavedPosition& Script::Parser::position() const |
540 // |
143 { |
541 // ------------------------------------------------------------------------------------------------- |
144 return m_position; |
542 // |
145 } |
543 const Script::SavedState& Script::Parser::state() const |
146 |
544 { |
147 void Script::Parser::setPosition(const SavedPosition& pos) |
545 return m_state; |
148 { |
546 } |
149 m_position = pos; |
547 |
150 } |
548 // |
|
549 // ------------------------------------------------------------------------------------------------- |
|
550 // |
|
551 void Script::Parser::setState (const SavedState& pos) |
|
552 { |
|
553 m_state = pos; |
|
554 } |
|
555 |
|
556 // |
|
557 // ------------------------------------------------------------------------------------------------- |
|
558 // |
|
559 QString Script::Parser::parseIdentifier() |
|
560 { |
|
561 char ch; |
|
562 QString identifier; |
|
563 |
|
564 while (not isspace (ch = read())) |
|
565 { |
|
566 if (isalnum (ch) == false and ch != '_') |
|
567 break; |
|
568 |
|
569 identifier += QChar::fromAscii (ch); |
|
570 } |
|
571 |
|
572 unread(); |
|
573 return identifier; |
|
574 } |