Tue, 03 Feb 2015 04:03:19 +0200
- now parses to tokens
CMakeLists.txt | file | annotate | diff | comparison | revisions | |
src/main.cc | file | annotate | diff | comparison | revisions | |
src/script/ast.h | file | annotate | diff | comparison | revisions | |
src/script/objtype.cpp | file | annotate | diff | comparison | revisions | |
src/script/objtype.h | file | annotate | diff | comparison | revisions | |
src/script/parser.cpp | file | annotate | diff | comparison | revisions | |
src/script/parser.h | file | annotate | diff | comparison | revisions |
--- a/CMakeLists.txt Mon Jan 26 12:46:58 2015 +0200 +++ b/CMakeLists.txt Tue Feb 03 04:03:19 2015 +0200 @@ -72,6 +72,7 @@ src/editmodes/rectangleMode.cc src/editmodes/selectMode.cc src/script/parser.cpp + src/script/objtype.cpp ) set (LDFORGE_HEADERS
--- a/src/main.cc Mon Jan 26 12:46:58 2015 +0200 +++ b/src/main.cc Tue Feb 03 04:03:19 2015 +0200 @@ -48,11 +48,26 @@ // int main (int argc, char* argv[]) { - QFile fp ("script.txt", QIODevice::ReadOnly); - Script::Parser parser (QString::fromLocal8Bit (fp.readAll())); - parser.parse(); - QFile fp2 ("script.out.txt", QIODevice::WriteOnly); - fp2.write (parser.preprocessedScript(), parser.preprocessedScript().length()); + QFile fp ("script.txt"); + if (fp.open (QIODevice::ReadOnly)) + { + Script::Parser parser (QString::fromLocal8Bit (fp.readAll())); + try + { + parser.parse(); + QFile fp2 ("script.out.txt"); + + if (fp2.open (QIODevice::WriteOnly)) + { + fp2.write (parser.preprocessedScript().toLocal8Bit(), + parser.preprocessedScript().length()); + } + } + catch (Script::ParseError e) + { + print ("error: %1: %2\n", parser.state().lineNumber, e.message()); + } + } return 0; QApplication app (argc, argv);
--- a/src/script/ast.h Mon Jan 26 12:46:58 2015 +0200 +++ b/src/script/ast.h Tue Feb 03 04:03:19 2015 +0200 @@ -4,6 +4,8 @@ namespace Script { + using AstPointer = QSharedPointer<class AstNode>; + enum AstNodeType { @@ -12,10 +14,10 @@ class AstNode { public: - AstNode (QSharedPointer<AstNode> parent); + AstNode (AstPointer parent); private: - QVector<QSharedPointer<AstNode>> m_children; - QSharedPointer<AstNode> m_parent; + QVector<AstPointer> m_children; + AstPointer m_parent; }; }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/script/objtype.cpp Tue Feb 03 04:03:19 2015 +0200 @@ -0,0 +1,57 @@ +/* + * LDForge: LDraw parts authoring CAD + * Copyright (C) 2013 - 2015 Teemu Piippo + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "objtype.h" + + +Script::Type::Type() {} + +QString Script::ContainerType::asString() const +{ + switch (m_kind) + { + case ARRAY: + return m_elementType->asString() + "[]"; + + case TUPLE: + return m_elementType->asString() + + "(" + QString::number(m_n1) + ")"; + + case MATRIX: + return m_elementType->asString() + + "(" + QString::number(m_n1) + + "," + QString::number(m_n2) + ")"; + } + + return "???"; +} + +QString Script::BasicType::asString() const +{ + static const char* names[] = + { + "var", + "int", + "real", + "string", + "type", + "object", + }; + + return names[int (m_kind)]; +} \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/script/objtype.h Tue Feb 03 04:03:19 2015 +0200 @@ -0,0 +1,76 @@ +/* + * LDForge: LDraw parts authoring CAD + * Copyright (C) 2013 - 2015 Teemu Piippo + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#pragma once +#include "../main.h" + +namespace Script +{ + class Type + { + public: + Type(); + virtual QString asString() const = 0; + }; + + class BasicType : public Type + { + public: + enum Kind + { + VAR, // mixed + INT, + REAL, + STRING, + TYPE, // heh + OBJECT, + }; + + QString asString() const override; + Kind kind() const { return m_kind; } + + private: + Kind m_kind; + }; + + class ContainerType : public Type + { + public: + enum Kind + { + ARRAY, + TUPLE, + MATRIX + }; + + ContainerType (Kind kind, int n1, int n2) : + m_kind (kind), m_n1 (n1), m_n2 (n2) {} + + QString asString() const override; + Kind kind() const { return m_kind; } + int n1() const { return m_n1; } + int n2() const { return m_n1; } + QSharedPointer<Type> elementType() const { return m_elementType; } + + private: + Kind m_kind; + QSharedPointer<Type> m_elementType; + int m_n1; + int m_n2; + }; +}
--- a/src/script/parser.cpp Mon Jan 26 12:46:58 2015 +0200 +++ b/src/script/parser.cpp Tue Feb 03 04:03:19 2015 +0200 @@ -1,13 +1,41 @@ +/* + * LDForge: LDraw parts authoring CAD + * Copyright (C) 2013 - 2015 Teemu Piippo + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + #include "parser.h" static const char* TokenNames[] = { + "if", + "then", + "else", + "endif", + "endmacro", + "macro", + "for", + "while", + "done", + "do", "==", "<=", ">=", "&&", "||", - "$", + "!=", ":", ";", ".", @@ -36,17 +64,43 @@ "~", "`", "%", + "<variable>", "<string>", "<symbol>", "<number>", "<any>", }; +// +// ------------------------------------------------------------------------------------------------- +// Script::Parser::Parser(QString text) : m_script(text) {} +// +// ------------------------------------------------------------------------------------------------- +// +Script::Parser::~Parser() {} + +// +// ------------------------------------------------------------------------------------------------- +// void Script::Parser::parse() { + preprocess(); + m_state.reset(); + + while (next (TOK_Any)) + { + print ("token: %1 (%2)\n", state().token.text, TokenNames[state().token.type]); + } +} + +// +// ------------------------------------------------------------------------------------------------- +// +void Script::Parser::preprocess() +{ bool inString = false; bool inComment = false; bool inBackslash = false; @@ -54,97 +108,467 @@ int pos = 0; // Preprocess - for (QChar qch : text) + for (QChar qch : m_script) { char ch = qch.toAscii(); - if (not inComment && not inString && ch == '\0') - scriptError ("bad character %s in script text on line %d", qch, ln); + if (not inComment and not inString and ch == '\0') + scriptError ("bad character %1 in script text on line %2", qch, ln); - if (ch == '\\') + if (not inString) { - inBackslash = true; - continue; + if (ch == '\\') + { + inBackslash = true; + continue; + } + + if (inBackslash and ch != '\n') + scriptError ("misplaced backslash on line %1", ln); } - if (inBackslash) - { - if (inString) - { - switch (ch) - { - case 'n': data << '\n'; break; - case 't': data << '\t'; break; - case 'b': data << '\b'; break; - case '\\': data << '\\'; break; - default: scriptError ("misplaced backslash on line %d", ln); - } - - ++pos; - inBackslash == false; - continue; - } - else if (ch != '\n') - { - scriptError ("misplaced backslash on line %d", ln); - } - } + if (ch == '"') + inString ^= 1; if (ch == '\n') { if (inString) - scriptError ("unterminated string on line %d", ln); + scriptError ("unterminated string on line %1", ln); if (not inBackslash) { - m_data << ';'; - ++pos; + m_data.append (';'); + m_data.append ('\n'); + pos += 2; inComment = false; m_lineEndings << pos; ++ln; } else - { inBackslash = false; - } + continue; } - if (ch == '#' && not inString) + if (ch == '#' and not inString) { inComment = true; continue; } - m_data << ch; - ++pos; + if (not inComment) + { + m_data.append (ch); + ++pos; + } + } +} + +// +// ------------------------------------------------------------------------------------------------- +// +namespace Script +{ + class UnexpectedEOF : public std::exception + { + const char* what() const throw() + { + return "unexpected EOF"; + } + }; +} + +// +// ------------------------------------------------------------------------------------------------- +// +char Script::Parser::read() +{ + if (m_state.position >= m_data.length()) + throw UnexpectedEOF(); + + char ch = m_data[m_state.position]; + m_state.position++; + + if (m_state.position == m_lineEndings[m_state.lineNumber]) + m_state.lineNumber++; + + return ch; +} + +// +// ------------------------------------------------------------------------------------------------- +// +void Script::Parser::unread() +{ + if (m_state.position <= 0) + return; + + if (m_state.lineNumber > 0 + and m_state.position == m_lineEndings[m_state.lineNumber - 1]) + { + m_state.lineNumber--; + } + + m_state.position--; +} + +// +// ------------------------------------------------------------------------------------------------- +// +// Takes a hexadecimal character and returns its numerical value. It is assumed that isxdigit(xd) +// is true (if not, result is undefined). +// +int parseXDigit (char xd) +{ + if (xd >= 'a') + return xd - 'a'; + + if (xd >= 'A') + return xd - 'A'; + + return xd - '0'; +} + +// +// ------------------------------------------------------------------------------------------------- +// +bool Script::Parser::next (TokenType desiredType) +{ + SavedState oldpos = state(); + Token oldtoken = m_state.token; + + if (not getNextToken()) + return false; + + if (desiredType != TOK_Any and m_state.token.type != desiredType) + { + // Did not find the token we wanted, revert back + m_rejectedToken = m_state.token; + m_state.token = oldtoken; + setState (oldpos); + return false; } - m_position.reset(); + return true; +} + +// +// ------------------------------------------------------------------------------------------------- +// +bool Script::Parser::getNextToken() +{ + try + { + m_state.token.text.clear(); + m_state.token.number = 0; + m_state.token.real = 0.0; + skipSpace(); + + const char* data = m_data.constData() + m_state.position; + + // Does this character start one of our tokens? + for (int tt = 0; tt <= LastNamedToken; ++tt) + { + if (strncmp (data, TokenNames[tt], strlen (TokenNames[tt])) != 0) + continue; + + m_state.position += strlen (TokenNames[tt]); + m_state.token.text = QString::fromAscii (TokenNames[tt]); + m_state.token.type = TokenType (tt); + return true; + } + + // Check for number + if (parseNumber()) + return true; + + // Check for string + if (*data == '"') + { + read(); + parseString(); + return true; + } + + // Check for variable + if (*data == '$') + { + read(); + m_state.token.text = parseIdentifier(); + m_state.token.type = TOK_Variable; + return true; + } + + // Must be a symbol of some sort then + m_state.token.text = parseIdentifier(); + m_state.token.type = TOK_Symbol; + } + catch (UnexpectedEOF) + { + return false; + } + + return true; } -bool Script::Parser::next(TokenType desiredType) +// +// ------------------------------------------------------------------------------------------------- +// +bool Script::Parser::parseNumber() { - SavedPosition oldpos = position(); + SavedState pos = state(); + char ch = read(); + unread(); + QString numberString; + + if (not isdigit (ch) and ch != '.') + { + setState (pos); + return false; + } + + int base = 10; + bool gotDot = false; + + if (tryMatch ("0x", false)) + base = 16; + elif (tryMatch ("0b", false)) + base = 2; + + int (*checkFunc)(int) = base == 16 ? isxdigit : isdigit; + + for (int n = 0; not isspace (ch = read()); ++n) + { + if (n == 0 && ch == '0') + base = 8; + + if (ch == '.') + { + if (gotDot) + scriptError ("multiple dots in numeric literal"); + + // If reading numbers like 0.1234 where the first number is zero, the parser + // will initially think the number is octal so we must take that into account here. + // Note that even if you have numbers like 05.612, it will still be decimal. + if (base != 10 and base != 8) + scriptError ("real number constant must be decimal"); + + base = 10; + gotDot = true; + } + else if (checkFunc (ch)) + { + if (base <= 10 and (ch - '0') >= base) + scriptError ("bad base-%1 numeric literal", base); + + numberString += ch; + } + else if (isalpha (ch)) + scriptError ("invalid digit %1 in literal", ch); + else + break; + } + + unread(); + bool ok; + + if (gotDot) + { + // Floating point number + m_state.token.real = numberString.toFloat (&ok); + m_state.token.number = m_state.token.real; + } + else + { + // Integral number + m_state.token.number = numberString.toInt (&ok, base); + m_state.token.real = m_state.token.number; + } + + if (ok == false) + scriptError ("invalid numeric literal '%1'", numberString); + + m_state.token.text = numberString; + m_state.token.type = TOK_Number; + + return true; +} + +// +// ------------------------------------------------------------------------------------------------- +// +void Script::Parser::scriptError (QString text) +{ + throw ParseError (text); +} + +// +// ------------------------------------------------------------------------------------------------- +// +// Checks whether the parser is at the beginning of the given string in the code. The string is +// expected not to have newlines. If true, the parser jumps over the text. +// +bool Script::Parser::tryMatch (const char* text, bool caseSensitive) +{ + assert (strstr (text, "\n") == NULL); + const char* data = m_data.constData() + m_state.position; + int (*func) (const char*, const char*) = caseSensitive ? &strcmp : &strcasecmp; + + if ((*func) (data, text) == 0) + { + m_state.position += strlen (text); + return true; + } + return false; } -void Script::Parser::mustGetNext(TokenType desiredType) +// +// ------------------------------------------------------------------------------------------------- +// +QString Script::Parser::parseEscapeSequence() { + char ch = read(); + QString result; + switch (ch) + { + case '"': + result += "\""; + break; + + case 'n': + result += "\n"; + break; + + case 't': + result += "\t"; + break; + + case '\\': + result += "\\"; + break; + + case 'x': + case 'X': + { + char n1 = read(); + char n2 = read(); + + if (not isxdigit(n1) or not isxdigit(n2)) + scriptError ("bad hexa-decimal character \\x%1%2", n1, n2); + + unsigned char num = parseXDigit(n1) * 16 + parseXDigit(n2); + result += char (num); + } + break; + + default: + scriptError ("unknown escape sequence \\%1", ch); + } + + return result; } -bool Script::Parser::peekNext(Token& tok) +// +// ------------------------------------------------------------------------------------------------- +// +void Script::Parser::parseString() +{ + m_state.token.type = TOK_String; + m_state.token.text.clear(); + + try + { + char ch; + + while ((ch = read()) != '"') + { + if (ch == '\\') + m_state.token.text += parseEscapeSequence(); + else + m_state.token.text += ch; + } + } + catch (UnexpectedEOF) + { + scriptError ("unterminated string"); + } +} + +// +// ------------------------------------------------------------------------------------------------- +// +void Script::Parser::skipSpace() { + while (isspace (read())) + ; + + unread(); +} + +// +// ------------------------------------------------------------------------------------------------- +// +void Script::Parser::mustGetNext (TokenType desiredType) +{ + if (not next (desiredType)) + { + scriptError ("Expected %1, got %2", + TokenNames[m_rejectedToken.type], + TokenNames[desiredType]); + } +} + +// +// ------------------------------------------------------------------------------------------------- +// +bool Script::Parser::peekNext (Token& tok) +{ + SavedState pos = state(); + + if (next (TOK_Any)) + { + tok = m_state.token; + setState (pos); + return true; + } + return false; } -const Script::SavedPosition& Script::Parser::position() const +// +// ------------------------------------------------------------------------------------------------- +// +const Script::SavedState& Script::Parser::state() const { - return m_position; + return m_state; +} + +// +// ------------------------------------------------------------------------------------------------- +// +void Script::Parser::setState (const SavedState& pos) +{ + m_state = pos; } -void Script::Parser::setPosition(const SavedPosition& pos) +// +// ------------------------------------------------------------------------------------------------- +// +QString Script::Parser::parseIdentifier() { - m_position = pos; -} + char ch; + QString identifier; + + while (not isspace (ch = read())) + { + if (isalnum (ch) == false and ch != '_') + break; + + identifier += QChar::fromAscii (ch); + } + + unread(); + return identifier; +} \ No newline at end of file
--- a/src/script/parser.h Mon Jan 26 12:46:58 2015 +0200 +++ b/src/script/parser.h Tue Feb 03 04:03:19 2015 +0200 @@ -1,3 +1,21 @@ +/* + * LDForge: LDraw parts authoring CAD + * Copyright (C) 2013 - 2015 Teemu Piippo + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + #pragma once #include "../main.h" #include "ast.h" @@ -6,12 +24,22 @@ { enum TokenType { + TOK_If, + TOK_Then, + TOK_Else, + TOK_EndIf, + TOK_EndMacro, + TOK_Macro, + TOK_For, + TOK_While, + TOK_Done, + TOK_Do, TOK_DoubleEquals, // == TOK_AngleLeftEquals, // <= TOK_AngleRightEquals, // >= TOK_DoubleAmperstand, // && TOK_DoubleBar, // || - TOK_Dollar, // $ + TOK_NotEquals, // != TOK_Colon, // : TOK_Semicolon, // ; TOK_Dot, // . @@ -40,6 +68,7 @@ TOK_Tilde, // ~ TOK_GraveAccent, // ` TOK_Percent, // % + TOK_Variable, // $var TOK_String, // "foo" TOK_Symbol, // bar TOK_Number, // 42 @@ -51,22 +80,6 @@ LastNamedToken = TOK_Percent }; - enum Type - { - TYPE_Var, // mixed - TYPE_Int, - TYPE_Real, - TYPE_String, - TYPE_Type, // heh - TYPE_Vertex, - TYPE_Object, - TYPE_Line, - TYPE_OptLine, - TYPE_Triangle, - TYPE_Quad, - TYPE_Reference, - }; - enum Function { FUNC_Abs, @@ -78,25 +91,31 @@ { TokenType type; QString text; + qint32 number; + qreal real; }; - struct SavedPosition + struct SavedState { int position; int lineNumber; + Token token; void reset() { position = 0; lineNumber = 1; + token.number = token.real = 0; + token.text.clear(); + token.type = TOK_Any; } }; - class ParseError : public std::exception + class ParseError { public: - ParseError(QString text) : m_text (text) {} - const char* what() const { return m_text; } + ParseError (QString text) : m_text (text) {} + const QString& message() const { return m_text; } private: QString m_text; @@ -109,20 +128,38 @@ ~Parser(); void parse(); - void scriptError(QString text) { throw ParseError(text); } - bool next(TokenType desiredType = TOK_Any); - void mustGetNext(TokenType desiredType = TOK_Any); - bool peekNext(Token& tok); - const SavedPosition& position() const; - void setPosition(const SavedPosition& pos); + void scriptError (QString text); + bool next (TokenType desiredType = TOK_Any); + void mustGetNext (TokenType desiredType = TOK_Any); + bool peekNext (Token& tok); + const SavedState& state() const; + void setState(const SavedState& pos); + void preprocess(); QString preprocessedScript() const { return QString::fromAscii (m_data); } + char read(); + void unread(); + void skipSpace(); + bool isAtEnd() const { return m_state.position >= m_data.length(); } + bool tryMatch (const char* text, bool caseSensitive); + + template<typename... Args> + void scriptError (QString text, Args... args) + { + scriptError (format (text, args...)); + } private: QString m_script; QByteArray m_data; QVector<int> m_lineEndings; - SavedPosition m_position; - Token m_token; + SavedState m_state; AstNode* m_astRoot; + Token m_rejectedToken; + + void parseString(); + bool parseNumber(); + QString parseEscapeSequence(); + QString parseIdentifier(); + bool getNextToken(); }; }