| 34 "@", |
62 "@", |
| 35 "#", |
63 "#", |
| 36 "~", |
64 "~", |
| 37 "`", |
65 "`", |
| 38 "%", |
66 "%", |
| |
67 "<variable>", |
| 39 "<string>", |
68 "<string>", |
| 40 "<symbol>", |
69 "<symbol>", |
| 41 "<number>", |
70 "<number>", |
| 42 "<any>", |
71 "<any>", |
| 43 }; |
72 }; |
| 44 |
73 |
| |
74 // |
| |
75 // ------------------------------------------------------------------------------------------------- |
| |
76 // |
| 45 Script::Parser::Parser(QString text) : |
77 Script::Parser::Parser(QString text) : |
| 46 m_script(text) {} |
78 m_script(text) {} |
| 47 |
79 |
| |
80 // |
| |
81 // ------------------------------------------------------------------------------------------------- |
| |
82 // |
| |
83 Script::Parser::~Parser() {} |
| |
84 |
| |
85 // |
| |
86 // ------------------------------------------------------------------------------------------------- |
| |
87 // |
| 48 void Script::Parser::parse() |
88 void Script::Parser::parse() |
| |
89 { |
| |
90 preprocess(); |
| |
91 m_state.reset(); |
| |
92 |
| |
93 while (next (TOK_Any)) |
| |
94 { |
| |
95 print ("token: %1 (%2)\n", state().token.text, TokenNames[state().token.type]); |
| |
96 } |
| |
97 } |
| |
98 |
| |
99 // |
| |
100 // ------------------------------------------------------------------------------------------------- |
| |
101 // |
| |
102 void Script::Parser::preprocess() |
| 49 { |
103 { |
| 50 bool inString = false; |
104 bool inString = false; |
| 51 bool inComment = false; |
105 bool inComment = false; |
| 52 bool inBackslash = false; |
106 bool inBackslash = false; |
| 53 int ln = 1; |
107 int ln = 1; |
| 54 int pos = 0; |
108 int pos = 0; |
| 55 |
109 |
| 56 // Preprocess |
110 // Preprocess |
| 57 for (QChar qch : text) |
111 for (QChar qch : m_script) |
| 58 { |
112 { |
| 59 char ch = qch.toAscii(); |
113 char ch = qch.toAscii(); |
| 60 |
114 |
| 61 if (not inComment && not inString && ch == '\0') |
115 if (not inComment and not inString and ch == '\0') |
| 62 scriptError ("bad character %s in script text on line %d", qch, ln); |
116 scriptError ("bad character %1 in script text on line %2", qch, ln); |
| 63 |
117 |
| 64 if (ch == '\\') |
118 if (not inString) |
| 65 { |
119 { |
| 66 inBackslash = true; |
120 if (ch == '\\') |
| 67 continue; |
|
| 68 } |
|
| 69 |
|
| 70 if (inBackslash) |
|
| 71 { |
|
| 72 if (inString) |
|
| 73 { |
121 { |
| 74 switch (ch) |
122 inBackslash = true; |
| 75 { |
|
| 76 case 'n': data << '\n'; break; |
|
| 77 case 't': data << '\t'; break; |
|
| 78 case 'b': data << '\b'; break; |
|
| 79 case '\\': data << '\\'; break; |
|
| 80 default: scriptError ("misplaced backslash on line %d", ln); |
|
| 81 } |
|
| 82 |
|
| 83 ++pos; |
|
| 84 inBackslash == false; |
|
| 85 continue; |
123 continue; |
| 86 } |
124 } |
| 87 else if (ch != '\n') |
125 |
| 88 { |
126 if (inBackslash and ch != '\n') |
| 89 scriptError ("misplaced backslash on line %d", ln); |
127 scriptError ("misplaced backslash on line %1", ln); |
| 90 } |
128 } |
| 91 } |
129 |
| |
130 if (ch == '"') |
| |
131 inString ^= 1; |
| 92 |
132 |
| 93 if (ch == '\n') |
133 if (ch == '\n') |
| 94 { |
134 { |
| 95 if (inString) |
135 if (inString) |
| 96 scriptError ("unterminated string on line %d", ln); |
136 scriptError ("unterminated string on line %1", ln); |
| 97 |
137 |
| 98 if (not inBackslash) |
138 if (not inBackslash) |
| 99 { |
139 { |
| 100 m_data << ';'; |
140 m_data.append (';'); |
| 101 ++pos; |
141 m_data.append ('\n'); |
| |
142 pos += 2; |
| 102 inComment = false; |
143 inComment = false; |
| 103 m_lineEndings << pos; |
144 m_lineEndings << pos; |
| 104 ++ln; |
145 ++ln; |
| 105 } |
146 } |
| 106 else |
147 else |
| 107 { |
|
| 108 inBackslash = false; |
148 inBackslash = false; |
| 109 } |
149 |
| 110 continue; |
150 continue; |
| 111 } |
151 } |
| 112 |
152 |
| 113 if (ch == '#' && not inString) |
153 if (ch == '#' and not inString) |
| 114 { |
154 { |
| 115 inComment = true; |
155 inComment = true; |
| 116 continue; |
156 continue; |
| 117 } |
157 } |
| 118 |
158 |
| 119 m_data << ch; |
159 if (not inComment) |
| 120 ++pos; |
160 { |
| 121 } |
161 m_data.append (ch); |
| 122 |
162 ++pos; |
| 123 m_position.reset(); |
163 } |
| 124 } |
164 } |
| 125 |
165 } |
| 126 bool Script::Parser::next(TokenType desiredType) |
166 |
| 127 { |
167 // |
| 128 SavedPosition oldpos = position(); |
168 // ------------------------------------------------------------------------------------------------- |
| |
169 // |
| |
170 namespace Script |
| |
171 { |
| |
172 class UnexpectedEOF : public std::exception |
| |
173 { |
| |
174 const char* what() const throw() |
| |
175 { |
| |
176 return "unexpected EOF"; |
| |
177 } |
| |
178 }; |
| |
179 } |
| |
180 |
| |
181 // |
| |
182 // ------------------------------------------------------------------------------------------------- |
| |
183 // |
| |
184 char Script::Parser::read() |
| |
185 { |
| |
186 if (m_state.position >= m_data.length()) |
| |
187 throw UnexpectedEOF(); |
| |
188 |
| |
189 char ch = m_data[m_state.position]; |
| |
190 m_state.position++; |
| |
191 |
| |
192 if (m_state.position == m_lineEndings[m_state.lineNumber]) |
| |
193 m_state.lineNumber++; |
| |
194 |
| |
195 return ch; |
| |
196 } |
| |
197 |
| |
198 // |
| |
199 // ------------------------------------------------------------------------------------------------- |
| |
200 // |
| |
201 void Script::Parser::unread() |
| |
202 { |
| |
203 if (m_state.position <= 0) |
| |
204 return; |
| |
205 |
| |
206 if (m_state.lineNumber > 0 |
| |
207 and m_state.position == m_lineEndings[m_state.lineNumber - 1]) |
| |
208 { |
| |
209 m_state.lineNumber--; |
| |
210 } |
| |
211 |
| |
212 m_state.position--; |
| |
213 } |
| |
214 |
| |
215 // |
| |
216 // ------------------------------------------------------------------------------------------------- |
| |
217 // |
| |
218 // Takes a hexadecimal character and returns its numerical value. It is assumed that isxdigit(xd) |
| |
219 // is true (if not, result is undefined). |
| |
220 // |
| |
221 int parseXDigit (char xd) |
| |
222 { |
| |
223 if (xd >= 'a') |
| |
224 return xd - 'a'; |
| |
225 |
| |
226 if (xd >= 'A') |
| |
227 return xd - 'A'; |
| |
228 |
| |
229 return xd - '0'; |
| |
230 } |
| |
231 |
| |
232 // |
| |
233 // ------------------------------------------------------------------------------------------------- |
| |
234 // |
| |
235 bool Script::Parser::next (TokenType desiredType) |
| |
236 { |
| |
237 SavedState oldpos = state(); |
| |
238 Token oldtoken = m_state.token; |
| |
239 |
| |
240 if (not getNextToken()) |
| |
241 return false; |
| |
242 |
| |
243 if (desiredType != TOK_Any and m_state.token.type != desiredType) |
| |
244 { |
| |
245 // Did not find the token we wanted, revert back |
| |
246 m_rejectedToken = m_state.token; |
| |
247 m_state.token = oldtoken; |
| |
248 setState (oldpos); |
| |
249 return false; |
| |
250 } |
| |
251 |
| |
252 return true; |
| |
253 } |
| |
254 |
| |
255 // |
| |
256 // ------------------------------------------------------------------------------------------------- |
| |
257 // |
| |
258 bool Script::Parser::getNextToken() |
| |
259 { |
| |
260 try |
| |
261 { |
| |
262 m_state.token.text.clear(); |
| |
263 m_state.token.number = 0; |
| |
264 m_state.token.real = 0.0; |
| |
265 skipSpace(); |
| |
266 |
| |
267 const char* data = m_data.constData() + m_state.position; |
| |
268 |
| |
269 // Does this character start one of our tokens? |
| |
270 for (int tt = 0; tt <= LastNamedToken; ++tt) |
| |
271 { |
| |
272 if (strncmp (data, TokenNames[tt], strlen (TokenNames[tt])) != 0) |
| |
273 continue; |
| |
274 |
| |
275 m_state.position += strlen (TokenNames[tt]); |
| |
276 m_state.token.text = QString::fromAscii (TokenNames[tt]); |
| |
277 m_state.token.type = TokenType (tt); |
| |
278 return true; |
| |
279 } |
| |
280 |
| |
281 // Check for number |
| |
282 if (parseNumber()) |
| |
283 return true; |
| |
284 |
| |
285 // Check for string |
| |
286 if (*data == '"') |
| |
287 { |
| |
288 read(); |
| |
289 parseString(); |
| |
290 return true; |
| |
291 } |
| |
292 |
| |
293 // Check for variable |
| |
294 if (*data == '$') |
| |
295 { |
| |
296 read(); |
| |
297 m_state.token.text = parseIdentifier(); |
| |
298 m_state.token.type = TOK_Variable; |
| |
299 return true; |
| |
300 } |
| |
301 |
| |
302 // Must be a symbol of some sort then |
| |
303 m_state.token.text = parseIdentifier(); |
| |
304 m_state.token.type = TOK_Symbol; |
| |
305 } |
| |
306 catch (UnexpectedEOF) |
| |
307 { |
| |
308 return false; |
| |
309 } |
| |
310 |
| |
311 return true; |
| |
312 } |
| |
313 |
| |
314 // |
| |
315 // ------------------------------------------------------------------------------------------------- |
| |
316 // |
| |
317 bool Script::Parser::parseNumber() |
| |
318 { |
| |
319 SavedState pos = state(); |
| |
320 char ch = read(); |
| |
321 unread(); |
| |
322 QString numberString; |
| |
323 |
| |
324 if (not isdigit (ch) and ch != '.') |
| |
325 { |
| |
326 setState (pos); |
| |
327 return false; |
| |
328 } |
| |
329 |
| |
330 int base = 10; |
| |
331 bool gotDot = false; |
| |
332 |
| |
333 if (tryMatch ("0x", false)) |
| |
334 base = 16; |
| |
335 elif (tryMatch ("0b", false)) |
| |
336 base = 2; |
| |
337 |
| |
338 int (*checkFunc)(int) = base == 16 ? isxdigit : isdigit; |
| |
339 |
| |
340 for (int n = 0; not isspace (ch = read()); ++n) |
| |
341 { |
| |
342 if (n == 0 && ch == '0') |
| |
343 base = 8; |
| |
344 |
| |
345 if (ch == '.') |
| |
346 { |
| |
347 if (gotDot) |
| |
348 scriptError ("multiple dots in numeric literal"); |
| |
349 |
| |
350 // If reading numbers like 0.1234 where the first number is zero, the parser |
| |
351 // will initially think the number is octal so we must take that into account here. |
| |
352 // Note that even if you have numbers like 05.612, it will still be decimal. |
| |
353 if (base != 10 and base != 8) |
| |
354 scriptError ("real number constant must be decimal"); |
| |
355 |
| |
356 base = 10; |
| |
357 gotDot = true; |
| |
358 } |
| |
359 else if (checkFunc (ch)) |
| |
360 { |
| |
361 if (base <= 10 and (ch - '0') >= base) |
| |
362 scriptError ("bad base-%1 numeric literal", base); |
| |
363 |
| |
364 numberString += ch; |
| |
365 } |
| |
366 else if (isalpha (ch)) |
| |
367 scriptError ("invalid digit %1 in literal", ch); |
| |
368 else |
| |
369 break; |
| |
370 } |
| |
371 |
| |
372 unread(); |
| |
373 bool ok; |
| |
374 |
| |
375 if (gotDot) |
| |
376 { |
| |
377 // Floating point number |
| |
378 m_state.token.real = numberString.toFloat (&ok); |
| |
379 m_state.token.number = m_state.token.real; |
| |
380 } |
| |
381 else |
| |
382 { |
| |
383 // Integral number |
| |
384 m_state.token.number = numberString.toInt (&ok, base); |
| |
385 m_state.token.real = m_state.token.number; |
| |
386 } |
| |
387 |
| |
388 if (ok == false) |
| |
389 scriptError ("invalid numeric literal '%1'", numberString); |
| |
390 |
| |
391 m_state.token.text = numberString; |
| |
392 m_state.token.type = TOK_Number; |
| |
393 |
| |
394 return true; |
| |
395 } |
| |
396 |
| |
397 // |
| |
398 // ------------------------------------------------------------------------------------------------- |
| |
399 // |
| |
400 void Script::Parser::scriptError (QString text) |
| |
401 { |
| |
402 throw ParseError (text); |
| |
403 } |
| |
404 |
| |
405 // |
| |
406 // ------------------------------------------------------------------------------------------------- |
| |
407 // |
| |
408 // Checks whether the parser is at the beginning of the given string in the code. The string is |
| |
409 // expected not to have newlines. If true, the parser jumps over the text. |
| |
410 // |
| |
411 bool Script::Parser::tryMatch (const char* text, bool caseSensitive) |
| |
412 { |
| |
413 assert (strstr (text, "\n") == NULL); |
| |
414 const char* data = m_data.constData() + m_state.position; |
| |
415 int (*func) (const char*, const char*) = caseSensitive ? &strcmp : &strcasecmp; |
| |
416 |
| |
417 if ((*func) (data, text) == 0) |
| |
418 { |
| |
419 m_state.position += strlen (text); |
| |
420 return true; |
| |
421 } |
| |
422 |
| 129 return false; |
423 return false; |
| 130 } |
424 } |
| 131 |
425 |
| 132 void Script::Parser::mustGetNext(TokenType desiredType) |
426 // |
| 133 { |
427 // ------------------------------------------------------------------------------------------------- |
| 134 |
428 // |
| 135 } |
429 QString Script::Parser::parseEscapeSequence() |
| 136 |
430 { |
| 137 bool Script::Parser::peekNext(Token& tok) |
431 char ch = read(); |
| 138 { |
432 QString result; |
| |
433 |
| |
434 switch (ch) |
| |
435 { |
| |
436 case '"': |
| |
437 result += "\""; |
| |
438 break; |
| |
439 |
| |
440 case 'n': |
| |
441 result += "\n"; |
| |
442 break; |
| |
443 |
| |
444 case 't': |
| |
445 result += "\t"; |
| |
446 break; |
| |
447 |
| |
448 case '\\': |
| |
449 result += "\\"; |
| |
450 break; |
| |
451 |
| |
452 case 'x': |
| |
453 case 'X': |
| |
454 { |
| |
455 char n1 = read(); |
| |
456 char n2 = read(); |
| |
457 |
| |
458 if (not isxdigit(n1) or not isxdigit(n2)) |
| |
459 scriptError ("bad hexa-decimal character \\x%1%2", n1, n2); |
| |
460 |
| |
461 unsigned char num = parseXDigit(n1) * 16 + parseXDigit(n2); |
| |
462 result += char (num); |
| |
463 } |
| |
464 break; |
| |
465 |
| |
466 default: |
| |
467 scriptError ("unknown escape sequence \\%1", ch); |
| |
468 } |
| |
469 |
| |
470 return result; |
| |
471 } |
| |
472 |
| |
473 // |
| |
474 // ------------------------------------------------------------------------------------------------- |
| |
475 // |
| |
476 void Script::Parser::parseString() |
| |
477 { |
| |
478 m_state.token.type = TOK_String; |
| |
479 m_state.token.text.clear(); |
| |
480 |
| |
481 try |
| |
482 { |
| |
483 char ch; |
| |
484 |
| |
485 while ((ch = read()) != '"') |
| |
486 { |
| |
487 if (ch == '\\') |
| |
488 m_state.token.text += parseEscapeSequence(); |
| |
489 else |
| |
490 m_state.token.text += ch; |
| |
491 } |
| |
492 } |
| |
493 catch (UnexpectedEOF) |
| |
494 { |
| |
495 scriptError ("unterminated string"); |
| |
496 } |
| |
497 } |
| |
498 |
| |
499 // |
| |
500 // ------------------------------------------------------------------------------------------------- |
| |
501 // |
| |
502 void Script::Parser::skipSpace() |
| |
503 { |
| |
504 while (isspace (read())) |
| |
505 ; |
| |
506 |
| |
507 unread(); |
| |
508 } |
| |
509 |
| |
510 // |
| |
511 // ------------------------------------------------------------------------------------------------- |
| |
512 // |
| |
513 void Script::Parser::mustGetNext (TokenType desiredType) |
| |
514 { |
| |
515 if (not next (desiredType)) |
| |
516 { |
| |
517 scriptError ("Expected %1, got %2", |
| |
518 TokenNames[m_rejectedToken.type], |
| |
519 TokenNames[desiredType]); |
| |
520 } |
| |
521 } |
| |
522 |
| |
523 // |
| |
524 // ------------------------------------------------------------------------------------------------- |
| |
525 // |
| |
526 bool Script::Parser::peekNext (Token& tok) |
| |
527 { |
| |
528 SavedState pos = state(); |
| |
529 |
| |
530 if (next (TOK_Any)) |
| |
531 { |
| |
532 tok = m_state.token; |
| |
533 setState (pos); |
| |
534 return true; |
| |
535 } |
| |
536 |
| 139 return false; |
537 return false; |
| 140 } |
538 } |
| 141 |
539 |
| 142 const Script::SavedPosition& Script::Parser::position() const |
540 // |
| 143 { |
541 // ------------------------------------------------------------------------------------------------- |
| 144 return m_position; |
542 // |
| 145 } |
543 const Script::SavedState& Script::Parser::state() const |
| 146 |
544 { |
| 147 void Script::Parser::setPosition(const SavedPosition& pos) |
545 return m_state; |
| 148 { |
546 } |
| 149 m_position = pos; |
547 |
| 150 } |
548 // |
| |
549 // ------------------------------------------------------------------------------------------------- |
| |
550 // |
| |
551 void Script::Parser::setState (const SavedState& pos) |
| |
552 { |
| |
553 m_state = pos; |
| |
554 } |
| |
555 |
| |
556 // |
| |
557 // ------------------------------------------------------------------------------------------------- |
| |
558 // |
| |
559 QString Script::Parser::parseIdentifier() |
| |
560 { |
| |
561 char ch; |
| |
562 QString identifier; |
| |
563 |
| |
564 while (not isspace (ch = read())) |
| |
565 { |
| |
566 if (isalnum (ch) == false and ch != '_') |
| |
567 break; |
| |
568 |
| |
569 identifier += QChar::fromAscii (ch); |
| |
570 } |
| |
571 |
| |
572 unread(); |
| |
573 return identifier; |
| |
574 } |