diff -r 81887a77baa0 -r e15a577a0bfe src/script/parser.cpp --- a/src/script/parser.cpp Mon Jan 26 12:46:58 2015 +0200 +++ b/src/script/parser.cpp Tue Feb 03 04:03:19 2015 +0200 @@ -1,13 +1,41 @@ +/* + * LDForge: LDraw parts authoring CAD + * Copyright (C) 2013 - 2015 Teemu Piippo + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + #include "parser.h" static const char* TokenNames[] = { + "if", + "then", + "else", + "endif", + "endmacro", + "macro", + "for", + "while", + "done", + "do", "==", "<=", ">=", "&&", "||", - "$", + "!=", ":", ";", ".", @@ -36,17 +64,43 @@ "~", "`", "%", + "", "", "", "", "", }; +// +// ------------------------------------------------------------------------------------------------- +// Script::Parser::Parser(QString text) : m_script(text) {} +// +// ------------------------------------------------------------------------------------------------- +// +Script::Parser::~Parser() {} + +// +// ------------------------------------------------------------------------------------------------- +// void Script::Parser::parse() { + preprocess(); + m_state.reset(); + + while (next (TOK_Any)) + { + print ("token: %1 (%2)\n", state().token.text, TokenNames[state().token.type]); + } +} + +// +// ------------------------------------------------------------------------------------------------- +// +void Script::Parser::preprocess() +{ bool inString = false; bool inComment = false; bool inBackslash = false; @@ -54,97 +108,467 @@ int pos = 0; // Preprocess - for (QChar qch : text) + for (QChar qch : m_script) { char ch = qch.toAscii(); - if (not inComment && not inString && ch == '\0') - scriptError ("bad character %s in script text on line %d", qch, ln); + if (not inComment and not inString and ch == '\0') + scriptError ("bad character %1 in script text on line %2", qch, ln); - if (ch == '\\') + if (not inString) { - inBackslash = true; - continue; + if (ch == '\\') + { + inBackslash = true; + continue; + } + + if (inBackslash and ch != '\n') + scriptError ("misplaced backslash on line %1", ln); } - if (inBackslash) - { - if (inString) - { - switch (ch) - { - case 'n': data << '\n'; break; - case 't': data << '\t'; break; - case 'b': data << '\b'; break; - case '\\': data << '\\'; break; - default: scriptError ("misplaced backslash on line %d", ln); - } - - ++pos; - inBackslash == false; - continue; - } - else if (ch != '\n') - { - scriptError ("misplaced backslash on line %d", ln); - } - } + if (ch == '"') + inString ^= 1; if (ch == '\n') { if (inString) - scriptError ("unterminated string on line %d", ln); + scriptError ("unterminated string on line %1", ln); if (not inBackslash) { - m_data << ';'; - ++pos; + m_data.append (';'); + m_data.append ('\n'); + pos += 2; inComment = false; m_lineEndings << pos; ++ln; } else - { inBackslash = false; - } + continue; } - if (ch == '#' && not inString) + if (ch == '#' and not inString) { inComment = true; continue; } - m_data << ch; - ++pos; + if (not inComment) + { + m_data.append (ch); + ++pos; + } + } +} + +// +// ------------------------------------------------------------------------------------------------- +// +namespace Script +{ + class UnexpectedEOF : public std::exception + { + const char* what() const throw() + { + return "unexpected EOF"; + } + }; +} + +// +// ------------------------------------------------------------------------------------------------- +// +char Script::Parser::read() +{ + if (m_state.position >= m_data.length()) + throw UnexpectedEOF(); + + char ch = m_data[m_state.position]; + m_state.position++; + + if (m_state.position == m_lineEndings[m_state.lineNumber]) + m_state.lineNumber++; + + return ch; +} + +// +// ------------------------------------------------------------------------------------------------- +// +void Script::Parser::unread() +{ + if (m_state.position <= 0) + return; + + if (m_state.lineNumber > 0 + and m_state.position == m_lineEndings[m_state.lineNumber - 1]) + { + m_state.lineNumber--; + } + + m_state.position--; +} + +// +// ------------------------------------------------------------------------------------------------- +// +// Takes a hexadecimal character and returns its numerical value. It is assumed that isxdigit(xd) +// is true (if not, result is undefined). +// +int parseXDigit (char xd) +{ + if (xd >= 'a') + return xd - 'a'; + + if (xd >= 'A') + return xd - 'A'; + + return xd - '0'; +} + +// +// ------------------------------------------------------------------------------------------------- +// +bool Script::Parser::next (TokenType desiredType) +{ + SavedState oldpos = state(); + Token oldtoken = m_state.token; + + if (not getNextToken()) + return false; + + if (desiredType != TOK_Any and m_state.token.type != desiredType) + { + // Did not find the token we wanted, revert back + m_rejectedToken = m_state.token; + m_state.token = oldtoken; + setState (oldpos); + return false; } - m_position.reset(); + return true; +} + +// +// ------------------------------------------------------------------------------------------------- +// +bool Script::Parser::getNextToken() +{ + try + { + m_state.token.text.clear(); + m_state.token.number = 0; + m_state.token.real = 0.0; + skipSpace(); + + const char* data = m_data.constData() + m_state.position; + + // Does this character start one of our tokens? + for (int tt = 0; tt <= LastNamedToken; ++tt) + { + if (strncmp (data, TokenNames[tt], strlen (TokenNames[tt])) != 0) + continue; + + m_state.position += strlen (TokenNames[tt]); + m_state.token.text = QString::fromAscii (TokenNames[tt]); + m_state.token.type = TokenType (tt); + return true; + } + + // Check for number + if (parseNumber()) + return true; + + // Check for string + if (*data == '"') + { + read(); + parseString(); + return true; + } + + // Check for variable + if (*data == '$') + { + read(); + m_state.token.text = parseIdentifier(); + m_state.token.type = TOK_Variable; + return true; + } + + // Must be a symbol of some sort then + m_state.token.text = parseIdentifier(); + m_state.token.type = TOK_Symbol; + } + catch (UnexpectedEOF) + { + return false; + } + + return true; } -bool Script::Parser::next(TokenType desiredType) +// +// ------------------------------------------------------------------------------------------------- +// +bool Script::Parser::parseNumber() { - SavedPosition oldpos = position(); + SavedState pos = state(); + char ch = read(); + unread(); + QString numberString; + + if (not isdigit (ch) and ch != '.') + { + setState (pos); + return false; + } + + int base = 10; + bool gotDot = false; + + if (tryMatch ("0x", false)) + base = 16; + elif (tryMatch ("0b", false)) + base = 2; + + int (*checkFunc)(int) = base == 16 ? isxdigit : isdigit; + + for (int n = 0; not isspace (ch = read()); ++n) + { + if (n == 0 && ch == '0') + base = 8; + + if (ch == '.') + { + if (gotDot) + scriptError ("multiple dots in numeric literal"); + + // If reading numbers like 0.1234 where the first number is zero, the parser + // will initially think the number is octal so we must take that into account here. + // Note that even if you have numbers like 05.612, it will still be decimal. + if (base != 10 and base != 8) + scriptError ("real number constant must be decimal"); + + base = 10; + gotDot = true; + } + else if (checkFunc (ch)) + { + if (base <= 10 and (ch - '0') >= base) + scriptError ("bad base-%1 numeric literal", base); + + numberString += ch; + } + else if (isalpha (ch)) + scriptError ("invalid digit %1 in literal", ch); + else + break; + } + + unread(); + bool ok; + + if (gotDot) + { + // Floating point number + m_state.token.real = numberString.toFloat (&ok); + m_state.token.number = m_state.token.real; + } + else + { + // Integral number + m_state.token.number = numberString.toInt (&ok, base); + m_state.token.real = m_state.token.number; + } + + if (ok == false) + scriptError ("invalid numeric literal '%1'", numberString); + + m_state.token.text = numberString; + m_state.token.type = TOK_Number; + + return true; +} + +// +// ------------------------------------------------------------------------------------------------- +// +void Script::Parser::scriptError (QString text) +{ + throw ParseError (text); +} + +// +// ------------------------------------------------------------------------------------------------- +// +// Checks whether the parser is at the beginning of the given string in the code. The string is +// expected not to have newlines. If true, the parser jumps over the text. +// +bool Script::Parser::tryMatch (const char* text, bool caseSensitive) +{ + assert (strstr (text, "\n") == NULL); + const char* data = m_data.constData() + m_state.position; + int (*func) (const char*, const char*) = caseSensitive ? &strcmp : &strcasecmp; + + if ((*func) (data, text) == 0) + { + m_state.position += strlen (text); + return true; + } + return false; } -void Script::Parser::mustGetNext(TokenType desiredType) +// +// ------------------------------------------------------------------------------------------------- +// +QString Script::Parser::parseEscapeSequence() { + char ch = read(); + QString result; + switch (ch) + { + case '"': + result += "\""; + break; + + case 'n': + result += "\n"; + break; + + case 't': + result += "\t"; + break; + + case '\\': + result += "\\"; + break; + + case 'x': + case 'X': + { + char n1 = read(); + char n2 = read(); + + if (not isxdigit(n1) or not isxdigit(n2)) + scriptError ("bad hexa-decimal character \\x%1%2", n1, n2); + + unsigned char num = parseXDigit(n1) * 16 + parseXDigit(n2); + result += char (num); + } + break; + + default: + scriptError ("unknown escape sequence \\%1", ch); + } + + return result; } -bool Script::Parser::peekNext(Token& tok) +// +// ------------------------------------------------------------------------------------------------- +// +void Script::Parser::parseString() +{ + m_state.token.type = TOK_String; + m_state.token.text.clear(); + + try + { + char ch; + + while ((ch = read()) != '"') + { + if (ch == '\\') + m_state.token.text += parseEscapeSequence(); + else + m_state.token.text += ch; + } + } + catch (UnexpectedEOF) + { + scriptError ("unterminated string"); + } +} + +// +// ------------------------------------------------------------------------------------------------- +// +void Script::Parser::skipSpace() { + while (isspace (read())) + ; + + unread(); +} + +// +// ------------------------------------------------------------------------------------------------- +// +void Script::Parser::mustGetNext (TokenType desiredType) +{ + if (not next (desiredType)) + { + scriptError ("Expected %1, got %2", + TokenNames[m_rejectedToken.type], + TokenNames[desiredType]); + } +} + +// +// ------------------------------------------------------------------------------------------------- +// +bool Script::Parser::peekNext (Token& tok) +{ + SavedState pos = state(); + + if (next (TOK_Any)) + { + tok = m_state.token; + setState (pos); + return true; + } + return false; } -const Script::SavedPosition& Script::Parser::position() const +// +// ------------------------------------------------------------------------------------------------- +// +const Script::SavedState& Script::Parser::state() const { - return m_position; + return m_state; +} + +// +// ------------------------------------------------------------------------------------------------- +// +void Script::Parser::setState (const SavedState& pos) +{ + m_state = pos; } -void Script::Parser::setPosition(const SavedPosition& pos) +// +// ------------------------------------------------------------------------------------------------- +// +QString Script::Parser::parseIdentifier() { - m_position = pos; -} + char ch; + QString identifier; + + while (not isspace (ch = read())) + { + if (isalnum (ch) == false and ch != '_') + break; + + identifier += QChar::fromAscii (ch); + } + + unread(); + return identifier; +} \ No newline at end of file