src/script/parser.cpp

branch
scripting
changeset 923
e15a577a0bfe
parent 922
81887a77baa0
child 924
d1ac217c9165
equal deleted inserted replaced
922:81887a77baa0 923:e15a577a0bfe
1 /*
2 * LDForge: LDraw parts authoring CAD
3 * Copyright (C) 2013 - 2015 Teemu Piippo
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
1 #include "parser.h" 19 #include "parser.h"
2 20
3 static const char* TokenNames[] = 21 static const char* TokenNames[] =
4 { 22 {
23 "if",
24 "then",
25 "else",
26 "endif",
27 "endmacro",
28 "macro",
29 "for",
30 "while",
31 "done",
32 "do",
5 "==", 33 "==",
6 "<=", 34 "<=",
7 ">=", 35 ">=",
8 "&&", 36 "&&",
9 "||", 37 "||",
10 "$", 38 "!=",
11 ":", 39 ":",
12 ";", 40 ";",
13 ".", 41 ".",
14 ",", 42 ",",
15 "=", 43 "=",
34 "@", 62 "@",
35 "#", 63 "#",
36 "~", 64 "~",
37 "`", 65 "`",
38 "%", 66 "%",
67 "<variable>",
39 "<string>", 68 "<string>",
40 "<symbol>", 69 "<symbol>",
41 "<number>", 70 "<number>",
42 "<any>", 71 "<any>",
43 }; 72 };
44 73
74 //
75 // -------------------------------------------------------------------------------------------------
76 //
45 Script::Parser::Parser(QString text) : 77 Script::Parser::Parser(QString text) :
46 m_script(text) {} 78 m_script(text) {}
47 79
80 //
81 // -------------------------------------------------------------------------------------------------
82 //
83 Script::Parser::~Parser() {}
84
85 //
86 // -------------------------------------------------------------------------------------------------
87 //
48 void Script::Parser::parse() 88 void Script::Parser::parse()
89 {
90 preprocess();
91 m_state.reset();
92
93 while (next (TOK_Any))
94 {
95 print ("token: %1 (%2)\n", state().token.text, TokenNames[state().token.type]);
96 }
97 }
98
99 //
100 // -------------------------------------------------------------------------------------------------
101 //
102 void Script::Parser::preprocess()
49 { 103 {
50 bool inString = false; 104 bool inString = false;
51 bool inComment = false; 105 bool inComment = false;
52 bool inBackslash = false; 106 bool inBackslash = false;
53 int ln = 1; 107 int ln = 1;
54 int pos = 0; 108 int pos = 0;
55 109
56 // Preprocess 110 // Preprocess
57 for (QChar qch : text) 111 for (QChar qch : m_script)
58 { 112 {
59 char ch = qch.toAscii(); 113 char ch = qch.toAscii();
60 114
61 if (not inComment && not inString && ch == '\0') 115 if (not inComment and not inString and ch == '\0')
62 scriptError ("bad character %s in script text on line %d", qch, ln); 116 scriptError ("bad character %1 in script text on line %2", qch, ln);
63 117
64 if (ch == '\\') 118 if (not inString)
65 { 119 {
66 inBackslash = true; 120 if (ch == '\\')
67 continue;
68 }
69
70 if (inBackslash)
71 {
72 if (inString)
73 { 121 {
74 switch (ch) 122 inBackslash = true;
75 {
76 case 'n': data << '\n'; break;
77 case 't': data << '\t'; break;
78 case 'b': data << '\b'; break;
79 case '\\': data << '\\'; break;
80 default: scriptError ("misplaced backslash on line %d", ln);
81 }
82
83 ++pos;
84 inBackslash == false;
85 continue; 123 continue;
86 } 124 }
87 else if (ch != '\n') 125
88 { 126 if (inBackslash and ch != '\n')
89 scriptError ("misplaced backslash on line %d", ln); 127 scriptError ("misplaced backslash on line %1", ln);
90 } 128 }
91 } 129
130 if (ch == '"')
131 inString ^= 1;
92 132
93 if (ch == '\n') 133 if (ch == '\n')
94 { 134 {
95 if (inString) 135 if (inString)
96 scriptError ("unterminated string on line %d", ln); 136 scriptError ("unterminated string on line %1", ln);
97 137
98 if (not inBackslash) 138 if (not inBackslash)
99 { 139 {
100 m_data << ';'; 140 m_data.append (';');
101 ++pos; 141 m_data.append ('\n');
142 pos += 2;
102 inComment = false; 143 inComment = false;
103 m_lineEndings << pos; 144 m_lineEndings << pos;
104 ++ln; 145 ++ln;
105 } 146 }
106 else 147 else
107 {
108 inBackslash = false; 148 inBackslash = false;
109 } 149
110 continue; 150 continue;
111 } 151 }
112 152
113 if (ch == '#' && not inString) 153 if (ch == '#' and not inString)
114 { 154 {
115 inComment = true; 155 inComment = true;
116 continue; 156 continue;
117 } 157 }
118 158
119 m_data << ch; 159 if (not inComment)
120 ++pos; 160 {
121 } 161 m_data.append (ch);
122 162 ++pos;
123 m_position.reset(); 163 }
124 } 164 }
125 165 }
126 bool Script::Parser::next(TokenType desiredType) 166
127 { 167 //
128 SavedPosition oldpos = position(); 168 // -------------------------------------------------------------------------------------------------
169 //
170 namespace Script
171 {
172 class UnexpectedEOF : public std::exception
173 {
174 const char* what() const throw()
175 {
176 return "unexpected EOF";
177 }
178 };
179 }
180
181 //
182 // -------------------------------------------------------------------------------------------------
183 //
184 char Script::Parser::read()
185 {
186 if (m_state.position >= m_data.length())
187 throw UnexpectedEOF();
188
189 char ch = m_data[m_state.position];
190 m_state.position++;
191
192 if (m_state.position == m_lineEndings[m_state.lineNumber])
193 m_state.lineNumber++;
194
195 return ch;
196 }
197
198 //
199 // -------------------------------------------------------------------------------------------------
200 //
201 void Script::Parser::unread()
202 {
203 if (m_state.position <= 0)
204 return;
205
206 if (m_state.lineNumber > 0
207 and m_state.position == m_lineEndings[m_state.lineNumber - 1])
208 {
209 m_state.lineNumber--;
210 }
211
212 m_state.position--;
213 }
214
215 //
216 // -------------------------------------------------------------------------------------------------
217 //
218 // Takes a hexadecimal character and returns its numerical value. It is assumed that isxdigit(xd)
219 // is true (if not, result is undefined).
220 //
221 int parseXDigit (char xd)
222 {
223 if (xd >= 'a')
224 return xd - 'a';
225
226 if (xd >= 'A')
227 return xd - 'A';
228
229 return xd - '0';
230 }
231
232 //
233 // -------------------------------------------------------------------------------------------------
234 //
235 bool Script::Parser::next (TokenType desiredType)
236 {
237 SavedState oldpos = state();
238 Token oldtoken = m_state.token;
239
240 if (not getNextToken())
241 return false;
242
243 if (desiredType != TOK_Any and m_state.token.type != desiredType)
244 {
245 // Did not find the token we wanted, revert back
246 m_rejectedToken = m_state.token;
247 m_state.token = oldtoken;
248 setState (oldpos);
249 return false;
250 }
251
252 return true;
253 }
254
255 //
256 // -------------------------------------------------------------------------------------------------
257 //
258 bool Script::Parser::getNextToken()
259 {
260 try
261 {
262 m_state.token.text.clear();
263 m_state.token.number = 0;
264 m_state.token.real = 0.0;
265 skipSpace();
266
267 const char* data = m_data.constData() + m_state.position;
268
269 // Does this character start one of our tokens?
270 for (int tt = 0; tt <= LastNamedToken; ++tt)
271 {
272 if (strncmp (data, TokenNames[tt], strlen (TokenNames[tt])) != 0)
273 continue;
274
275 m_state.position += strlen (TokenNames[tt]);
276 m_state.token.text = QString::fromAscii (TokenNames[tt]);
277 m_state.token.type = TokenType (tt);
278 return true;
279 }
280
281 // Check for number
282 if (parseNumber())
283 return true;
284
285 // Check for string
286 if (*data == '"')
287 {
288 read();
289 parseString();
290 return true;
291 }
292
293 // Check for variable
294 if (*data == '$')
295 {
296 read();
297 m_state.token.text = parseIdentifier();
298 m_state.token.type = TOK_Variable;
299 return true;
300 }
301
302 // Must be a symbol of some sort then
303 m_state.token.text = parseIdentifier();
304 m_state.token.type = TOK_Symbol;
305 }
306 catch (UnexpectedEOF)
307 {
308 return false;
309 }
310
311 return true;
312 }
313
314 //
315 // -------------------------------------------------------------------------------------------------
316 //
317 bool Script::Parser::parseNumber()
318 {
319 SavedState pos = state();
320 char ch = read();
321 unread();
322 QString numberString;
323
324 if (not isdigit (ch) and ch != '.')
325 {
326 setState (pos);
327 return false;
328 }
329
330 int base = 10;
331 bool gotDot = false;
332
333 if (tryMatch ("0x", false))
334 base = 16;
335 elif (tryMatch ("0b", false))
336 base = 2;
337
338 int (*checkFunc)(int) = base == 16 ? isxdigit : isdigit;
339
340 for (int n = 0; not isspace (ch = read()); ++n)
341 {
342 if (n == 0 && ch == '0')
343 base = 8;
344
345 if (ch == '.')
346 {
347 if (gotDot)
348 scriptError ("multiple dots in numeric literal");
349
350 // If reading numbers like 0.1234 where the first number is zero, the parser
351 // will initially think the number is octal so we must take that into account here.
352 // Note that even if you have numbers like 05.612, it will still be decimal.
353 if (base != 10 and base != 8)
354 scriptError ("real number constant must be decimal");
355
356 base = 10;
357 gotDot = true;
358 }
359 else if (checkFunc (ch))
360 {
361 if (base <= 10 and (ch - '0') >= base)
362 scriptError ("bad base-%1 numeric literal", base);
363
364 numberString += ch;
365 }
366 else if (isalpha (ch))
367 scriptError ("invalid digit %1 in literal", ch);
368 else
369 break;
370 }
371
372 unread();
373 bool ok;
374
375 if (gotDot)
376 {
377 // Floating point number
378 m_state.token.real = numberString.toFloat (&ok);
379 m_state.token.number = m_state.token.real;
380 }
381 else
382 {
383 // Integral number
384 m_state.token.number = numberString.toInt (&ok, base);
385 m_state.token.real = m_state.token.number;
386 }
387
388 if (ok == false)
389 scriptError ("invalid numeric literal '%1'", numberString);
390
391 m_state.token.text = numberString;
392 m_state.token.type = TOK_Number;
393
394 return true;
395 }
396
397 //
398 // -------------------------------------------------------------------------------------------------
399 //
400 void Script::Parser::scriptError (QString text)
401 {
402 throw ParseError (text);
403 }
404
405 //
406 // -------------------------------------------------------------------------------------------------
407 //
408 // Checks whether the parser is at the beginning of the given string in the code. The string is
409 // expected not to have newlines. If true, the parser jumps over the text.
410 //
411 bool Script::Parser::tryMatch (const char* text, bool caseSensitive)
412 {
413 assert (strstr (text, "\n") == NULL);
414 const char* data = m_data.constData() + m_state.position;
415 int (*func) (const char*, const char*) = caseSensitive ? &strcmp : &strcasecmp;
416
417 if ((*func) (data, text) == 0)
418 {
419 m_state.position += strlen (text);
420 return true;
421 }
422
129 return false; 423 return false;
130 } 424 }
131 425
132 void Script::Parser::mustGetNext(TokenType desiredType) 426 //
133 { 427 // -------------------------------------------------------------------------------------------------
134 428 //
135 } 429 QString Script::Parser::parseEscapeSequence()
136 430 {
137 bool Script::Parser::peekNext(Token& tok) 431 char ch = read();
138 { 432 QString result;
433
434 switch (ch)
435 {
436 case '"':
437 result += "\"";
438 break;
439
440 case 'n':
441 result += "\n";
442 break;
443
444 case 't':
445 result += "\t";
446 break;
447
448 case '\\':
449 result += "\\";
450 break;
451
452 case 'x':
453 case 'X':
454 {
455 char n1 = read();
456 char n2 = read();
457
458 if (not isxdigit(n1) or not isxdigit(n2))
459 scriptError ("bad hexa-decimal character \\x%1%2", n1, n2);
460
461 unsigned char num = parseXDigit(n1) * 16 + parseXDigit(n2);
462 result += char (num);
463 }
464 break;
465
466 default:
467 scriptError ("unknown escape sequence \\%1", ch);
468 }
469
470 return result;
471 }
472
473 //
474 // -------------------------------------------------------------------------------------------------
475 //
476 void Script::Parser::parseString()
477 {
478 m_state.token.type = TOK_String;
479 m_state.token.text.clear();
480
481 try
482 {
483 char ch;
484
485 while ((ch = read()) != '"')
486 {
487 if (ch == '\\')
488 m_state.token.text += parseEscapeSequence();
489 else
490 m_state.token.text += ch;
491 }
492 }
493 catch (UnexpectedEOF)
494 {
495 scriptError ("unterminated string");
496 }
497 }
498
499 //
500 // -------------------------------------------------------------------------------------------------
501 //
502 void Script::Parser::skipSpace()
503 {
504 while (isspace (read()))
505 ;
506
507 unread();
508 }
509
510 //
511 // -------------------------------------------------------------------------------------------------
512 //
513 void Script::Parser::mustGetNext (TokenType desiredType)
514 {
515 if (not next (desiredType))
516 {
517 scriptError ("Expected %1, got %2",
518 TokenNames[m_rejectedToken.type],
519 TokenNames[desiredType]);
520 }
521 }
522
523 //
524 // -------------------------------------------------------------------------------------------------
525 //
526 bool Script::Parser::peekNext (Token& tok)
527 {
528 SavedState pos = state();
529
530 if (next (TOK_Any))
531 {
532 tok = m_state.token;
533 setState (pos);
534 return true;
535 }
536
139 return false; 537 return false;
140 } 538 }
141 539
142 const Script::SavedPosition& Script::Parser::position() const 540 //
143 { 541 // -------------------------------------------------------------------------------------------------
144 return m_position; 542 //
145 } 543 const Script::SavedState& Script::Parser::state() const
146 544 {
147 void Script::Parser::setPosition(const SavedPosition& pos) 545 return m_state;
148 { 546 }
149 m_position = pos; 547
150 } 548 //
549 // -------------------------------------------------------------------------------------------------
550 //
551 void Script::Parser::setState (const SavedState& pos)
552 {
553 m_state = pos;
554 }
555
556 //
557 // -------------------------------------------------------------------------------------------------
558 //
559 QString Script::Parser::parseIdentifier()
560 {
561 char ch;
562 QString identifier;
563
564 while (not isspace (ch = read()))
565 {
566 if (isalnum (ch) == false and ch != '_')
567 break;
568
569 identifier += QChar::fromAscii (ch);
570 }
571
572 unread();
573 return identifier;
574 }

mercurial