diff -r 81887a77baa0 -r e15a577a0bfe src/script/parser.cpp
--- a/src/script/parser.cpp Mon Jan 26 12:46:58 2015 +0200
+++ b/src/script/parser.cpp Tue Feb 03 04:03:19 2015 +0200
@@ -1,13 +1,41 @@
+/*
+ * LDForge: LDraw parts authoring CAD
+ * Copyright (C) 2013 - 2015 Teemu Piippo
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
#include "parser.h"
static const char* TokenNames[] =
{
+ "if",
+ "then",
+ "else",
+ "endif",
+ "endmacro",
+ "macro",
+ "for",
+ "while",
+ "done",
+ "do",
"==",
"<=",
">=",
"&&",
"||",
- "$",
+ "!=",
":",
";",
".",
@@ -36,17 +64,43 @@
"~",
"`",
"%",
+ "",
"",
"",
"",
"",
};
+//
+// -------------------------------------------------------------------------------------------------
+//
Script::Parser::Parser(QString text) :
m_script(text) {}
+//
+// -------------------------------------------------------------------------------------------------
+//
+Script::Parser::~Parser() {}
+
+//
+// -------------------------------------------------------------------------------------------------
+//
void Script::Parser::parse()
{
+ preprocess();
+ m_state.reset();
+
+ while (next (TOK_Any))
+ {
+ print ("token: %1 (%2)\n", state().token.text, TokenNames[state().token.type]);
+ }
+}
+
+//
+// -------------------------------------------------------------------------------------------------
+//
+void Script::Parser::preprocess()
+{
bool inString = false;
bool inComment = false;
bool inBackslash = false;
@@ -54,97 +108,467 @@
int pos = 0;
// Preprocess
- for (QChar qch : text)
+ for (QChar qch : m_script)
{
char ch = qch.toAscii();
- if (not inComment && not inString && ch == '\0')
- scriptError ("bad character %s in script text on line %d", qch, ln);
+ if (not inComment and not inString and ch == '\0')
+ scriptError ("bad character %1 in script text on line %2", qch, ln);
- if (ch == '\\')
+ if (not inString)
{
- inBackslash = true;
- continue;
+ if (ch == '\\')
+ {
+ inBackslash = true;
+ continue;
+ }
+
+ if (inBackslash and ch != '\n')
+ scriptError ("misplaced backslash on line %1", ln);
}
- if (inBackslash)
- {
- if (inString)
- {
- switch (ch)
- {
- case 'n': data << '\n'; break;
- case 't': data << '\t'; break;
- case 'b': data << '\b'; break;
- case '\\': data << '\\'; break;
- default: scriptError ("misplaced backslash on line %d", ln);
- }
-
- ++pos;
- inBackslash == false;
- continue;
- }
- else if (ch != '\n')
- {
- scriptError ("misplaced backslash on line %d", ln);
- }
- }
+ if (ch == '"')
+ inString ^= 1;
if (ch == '\n')
{
if (inString)
- scriptError ("unterminated string on line %d", ln);
+ scriptError ("unterminated string on line %1", ln);
if (not inBackslash)
{
- m_data << ';';
- ++pos;
+ m_data.append (';');
+ m_data.append ('\n');
+ pos += 2;
inComment = false;
m_lineEndings << pos;
++ln;
}
else
- {
inBackslash = false;
- }
+
continue;
}
- if (ch == '#' && not inString)
+ if (ch == '#' and not inString)
{
inComment = true;
continue;
}
- m_data << ch;
- ++pos;
+ if (not inComment)
+ {
+ m_data.append (ch);
+ ++pos;
+ }
+ }
+}
+
+//
+// -------------------------------------------------------------------------------------------------
+//
+namespace Script
+{
+ class UnexpectedEOF : public std::exception
+ {
+ const char* what() const throw()
+ {
+ return "unexpected EOF";
+ }
+ };
+}
+
+//
+// -------------------------------------------------------------------------------------------------
+//
+char Script::Parser::read()
+{
+ if (m_state.position >= m_data.length())
+ throw UnexpectedEOF();
+
+ char ch = m_data[m_state.position];
+ m_state.position++;
+
+ if (m_state.position == m_lineEndings[m_state.lineNumber])
+ m_state.lineNumber++;
+
+ return ch;
+}
+
+//
+// -------------------------------------------------------------------------------------------------
+//
+void Script::Parser::unread()
+{
+ if (m_state.position <= 0)
+ return;
+
+ if (m_state.lineNumber > 0
+ and m_state.position == m_lineEndings[m_state.lineNumber - 1])
+ {
+ m_state.lineNumber--;
+ }
+
+ m_state.position--;
+}
+
+//
+// -------------------------------------------------------------------------------------------------
+//
+// Takes a hexadecimal character and returns its numerical value. It is assumed that isxdigit(xd)
+// is true (if not, result is undefined).
+//
+int parseXDigit (char xd)
+{
+ if (xd >= 'a')
+ return xd - 'a';
+
+ if (xd >= 'A')
+ return xd - 'A';
+
+ return xd - '0';
+}
+
+//
+// -------------------------------------------------------------------------------------------------
+//
+bool Script::Parser::next (TokenType desiredType)
+{
+ SavedState oldpos = state();
+ Token oldtoken = m_state.token;
+
+ if (not getNextToken())
+ return false;
+
+ if (desiredType != TOK_Any and m_state.token.type != desiredType)
+ {
+ // Did not find the token we wanted, revert back
+ m_rejectedToken = m_state.token;
+ m_state.token = oldtoken;
+ setState (oldpos);
+ return false;
}
- m_position.reset();
+ return true;
+}
+
+//
+// -------------------------------------------------------------------------------------------------
+//
+bool Script::Parser::getNextToken()
+{
+ try
+ {
+ m_state.token.text.clear();
+ m_state.token.number = 0;
+ m_state.token.real = 0.0;
+ skipSpace();
+
+ const char* data = m_data.constData() + m_state.position;
+
+ // Does this character start one of our tokens?
+ for (int tt = 0; tt <= LastNamedToken; ++tt)
+ {
+ if (strncmp (data, TokenNames[tt], strlen (TokenNames[tt])) != 0)
+ continue;
+
+ m_state.position += strlen (TokenNames[tt]);
+ m_state.token.text = QString::fromAscii (TokenNames[tt]);
+ m_state.token.type = TokenType (tt);
+ return true;
+ }
+
+ // Check for number
+ if (parseNumber())
+ return true;
+
+ // Check for string
+ if (*data == '"')
+ {
+ read();
+ parseString();
+ return true;
+ }
+
+ // Check for variable
+ if (*data == '$')
+ {
+ read();
+ m_state.token.text = parseIdentifier();
+ m_state.token.type = TOK_Variable;
+ return true;
+ }
+
+ // Must be a symbol of some sort then
+ m_state.token.text = parseIdentifier();
+ m_state.token.type = TOK_Symbol;
+ }
+ catch (UnexpectedEOF)
+ {
+ return false;
+ }
+
+ return true;
}
-bool Script::Parser::next(TokenType desiredType)
+//
+// -------------------------------------------------------------------------------------------------
+//
+bool Script::Parser::parseNumber()
{
- SavedPosition oldpos = position();
+ SavedState pos = state();
+ char ch = read();
+ unread();
+ QString numberString;
+
+ if (not isdigit (ch) and ch != '.')
+ {
+ setState (pos);
+ return false;
+ }
+
+ int base = 10;
+ bool gotDot = false;
+
+ if (tryMatch ("0x", false))
+ base = 16;
+ elif (tryMatch ("0b", false))
+ base = 2;
+
+ int (*checkFunc)(int) = base == 16 ? isxdigit : isdigit;
+
+ for (int n = 0; not isspace (ch = read()); ++n)
+ {
+ if (n == 0 && ch == '0')
+ base = 8;
+
+ if (ch == '.')
+ {
+ if (gotDot)
+ scriptError ("multiple dots in numeric literal");
+
+ // If reading numbers like 0.1234 where the first number is zero, the parser
+ // will initially think the number is octal so we must take that into account here.
+ // Note that even if you have numbers like 05.612, it will still be decimal.
+ if (base != 10 and base != 8)
+ scriptError ("real number constant must be decimal");
+
+ base = 10;
+ gotDot = true;
+ }
+ else if (checkFunc (ch))
+ {
+ if (base <= 10 and (ch - '0') >= base)
+ scriptError ("bad base-%1 numeric literal", base);
+
+ numberString += ch;
+ }
+ else if (isalpha (ch))
+ scriptError ("invalid digit %1 in literal", ch);
+ else
+ break;
+ }
+
+ unread();
+ bool ok;
+
+ if (gotDot)
+ {
+ // Floating point number
+ m_state.token.real = numberString.toFloat (&ok);
+ m_state.token.number = m_state.token.real;
+ }
+ else
+ {
+ // Integral number
+ m_state.token.number = numberString.toInt (&ok, base);
+ m_state.token.real = m_state.token.number;
+ }
+
+ if (ok == false)
+ scriptError ("invalid numeric literal '%1'", numberString);
+
+ m_state.token.text = numberString;
+ m_state.token.type = TOK_Number;
+
+ return true;
+}
+
+//
+// -------------------------------------------------------------------------------------------------
+//
+void Script::Parser::scriptError (QString text)
+{
+ throw ParseError (text);
+}
+
+//
+// -------------------------------------------------------------------------------------------------
+//
+// Checks whether the parser is at the beginning of the given string in the code. The string is
+// expected not to have newlines. If true, the parser jumps over the text.
+//
+bool Script::Parser::tryMatch (const char* text, bool caseSensitive)
+{
+ assert (strstr (text, "\n") == NULL);
+ const char* data = m_data.constData() + m_state.position;
+ int (*func) (const char*, const char*) = caseSensitive ? &strcmp : &strcasecmp;
+
+ if ((*func) (data, text) == 0)
+ {
+ m_state.position += strlen (text);
+ return true;
+ }
+
return false;
}
-void Script::Parser::mustGetNext(TokenType desiredType)
+//
+// -------------------------------------------------------------------------------------------------
+//
+QString Script::Parser::parseEscapeSequence()
{
+ char ch = read();
+ QString result;
+ switch (ch)
+ {
+ case '"':
+ result += "\"";
+ break;
+
+ case 'n':
+ result += "\n";
+ break;
+
+ case 't':
+ result += "\t";
+ break;
+
+ case '\\':
+ result += "\\";
+ break;
+
+ case 'x':
+ case 'X':
+ {
+ char n1 = read();
+ char n2 = read();
+
+ if (not isxdigit(n1) or not isxdigit(n2))
+ scriptError ("bad hexa-decimal character \\x%1%2", n1, n2);
+
+ unsigned char num = parseXDigit(n1) * 16 + parseXDigit(n2);
+ result += char (num);
+ }
+ break;
+
+ default:
+ scriptError ("unknown escape sequence \\%1", ch);
+ }
+
+ return result;
}
-bool Script::Parser::peekNext(Token& tok)
+//
+// -------------------------------------------------------------------------------------------------
+//
+void Script::Parser::parseString()
+{
+ m_state.token.type = TOK_String;
+ m_state.token.text.clear();
+
+ try
+ {
+ char ch;
+
+ while ((ch = read()) != '"')
+ {
+ if (ch == '\\')
+ m_state.token.text += parseEscapeSequence();
+ else
+ m_state.token.text += ch;
+ }
+ }
+ catch (UnexpectedEOF)
+ {
+ scriptError ("unterminated string");
+ }
+}
+
+//
+// -------------------------------------------------------------------------------------------------
+//
+void Script::Parser::skipSpace()
{
+ while (isspace (read()))
+ ;
+
+ unread();
+}
+
+//
+// -------------------------------------------------------------------------------------------------
+//
+void Script::Parser::mustGetNext (TokenType desiredType)
+{
+ if (not next (desiredType))
+ {
+ scriptError ("Expected %1, got %2",
+ TokenNames[m_rejectedToken.type],
+ TokenNames[desiredType]);
+ }
+}
+
+//
+// -------------------------------------------------------------------------------------------------
+//
+bool Script::Parser::peekNext (Token& tok)
+{
+ SavedState pos = state();
+
+ if (next (TOK_Any))
+ {
+ tok = m_state.token;
+ setState (pos);
+ return true;
+ }
+
return false;
}
-const Script::SavedPosition& Script::Parser::position() const
+//
+// -------------------------------------------------------------------------------------------------
+//
+const Script::SavedState& Script::Parser::state() const
{
- return m_position;
+ return m_state;
+}
+
+//
+// -------------------------------------------------------------------------------------------------
+//
+void Script::Parser::setState (const SavedState& pos)
+{
+ m_state = pos;
}
-void Script::Parser::setPosition(const SavedPosition& pos)
+//
+// -------------------------------------------------------------------------------------------------
+//
+QString Script::Parser::parseIdentifier()
{
- m_position = pos;
-}
+ char ch;
+ QString identifier;
+
+ while (not isspace (ch = read()))
+ {
+ if (isalnum (ch) == false and ch != '_')
+ break;
+
+ identifier += QChar::fromAscii (ch);
+ }
+
+ unread();
+ return identifier;
+}
\ No newline at end of file