Mon, 03 Feb 2014 20:12:44 +0200
- committed work so far done on expressions
src/DataBuffer.cc | file | annotate | diff | comparison | revisions | |
src/DataBuffer.h | file | annotate | diff | comparison | revisions | |
src/Expression.cc | file | annotate | diff | comparison | revisions | |
src/Expression.h | file | annotate | diff | comparison | revisions | |
src/Lexer.h | file | annotate | diff | comparison | revisions | |
src/LexerScanner.cc | file | annotate | diff | comparison | revisions | |
src/Parser.cc | file | annotate | diff | comparison | revisions | |
src/Parser.h | file | annotate | diff | comparison | revisions | |
src/Tokens.h | file | annotate | diff | comparison | revisions |
--- a/src/DataBuffer.cc Mon Feb 03 11:23:56 2014 +0200 +++ b/src/DataBuffer.cc Mon Feb 03 20:12:44 2014 +0200 @@ -102,7 +102,7 @@ // ============================================================================ // -MarkReference* DataBuffer::AddReference (ByteMark* mark, bool writePlaceholder) +MarkReference* DataBuffer::AddReference (ByteMark* mark) { MarkReference* ref = new MarkReference; ref->target = mark; @@ -110,8 +110,7 @@ PushToReferences (ref); // Write a dummy placeholder for the reference - if (writePlaceholder) - WriteDWord (0xBEEFCAFE); + WriteDWord (0xBEEFCAFE); return ref; }
--- a/src/DataBuffer.h Mon Feb 03 11:23:56 2014 +0200 +++ b/src/DataBuffer.h Mon Feb 03 20:12:44 2014 +0200 @@ -70,7 +70,7 @@ DataBuffer* Clone(); ByteMark* AddMark (String name); - MarkReference* AddReference (ByteMark* mark, bool write_placeholder = true); + MarkReference* AddReference (ByteMark* mark); void CheckSpace (int bytes); void DeleteMark (int marknum); void AdjustMark (ByteMark* mark);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Expression.cc Mon Feb 03 20:12:44 2014 +0200 @@ -0,0 +1,277 @@ +#include "Expression.h" +#include "DataBuffer.h" +#include "Lexer.h" +#include "Variables.h" + +struct OperatorInfo +{ + EToken token; + int numoperands; + int priority; + EDataHeader header; +}; + +static const OperatorInfo gOperators[] = +{ + { tkExclamationMark, 0, 1, dhNegateLogical, }, + { tkMinus, 0, 1, dhUnaryMinus, }, + { tkMultiply, 10, 2, dhMultiply, }, + { tkDivide, 10, 2, dhDivide, }, + { tkModulus, 10, 2, dhModulus, }, + { tkPlus, 20, 2, dhAdd, }, + { tkMinus, 20, 2, dhSubtract, }, + { tkLeftShift, 30, 2, dhLeftShift, }, + { tkRightShift, 30, 2, dhRightShift, }, + { tkLesser, 40, 2, dhLessThan, }, + { tkGreater, 40, 2, dhGreaterThan, }, + { tkAtLeast, 40, 2, dhAtLeast, }, + { tkAtMost, 40, 2, dhAtMost, }, + { tkEquals, 50, 2, dhEquals }, + { tkNotEquals, 50, 2, dhNotEquals }, + { tkAmperstand, 60, 2, dhAndBitwise }, + { tkCaret, 70, 2, dhEorBitwise }, + { tkBar, 80, 2, dhOrBitwise }, + { tkDoubleAmperstand, 90, 2, dhAndLogical }, + { tkDoubleBar, 100, 2, dhOrLogical }, + { tkQuestionMark, 110, 3, (EDataHeader) 0 }, +}; + +/* + // There isn't a dataheader for ternary operator. Instead, we use dhIfNotGoto + // to create an "if-block" inside an expression. + // Behold, big block of writing madness! :P + ByteMark* mark1 = retbuf->AddMark (""); // start of "else" case + ByteMark* mark2 = retbuf->AddMark (""); // end of expression + retbuf->WriteDWord (dhIfNotGoto); // if the first operand (condition) + retbuf->AddReference (mark1); // didn't eval true, jump into mark1 + retbuf->MergeAndDestroy (rb); // otherwise, perform second operand (true case) + retbuf->WriteDWord (dhGoto); // afterwards, jump to the end, which is + retbuf->AddReference (mark2); // marked by mark2. + retbuf->AdjustMark (mark1); // move mark1 at the end of the true case + retbuf->MergeAndDestroy (tb); // perform third operand (false case) + retbuf->AdjustMark (mark2); // move the ending mark2 here +*/ + +// ============================================================================= +// +Expression::Expression (BotscriptParser* parser, EType reqtype, Lexer* lx) : + mParser (parser), + mLexer (lx), + mType (reqtype), + mResult (null) +{ + ExpressionSymbol* sym; + + while ((sym = ParseSymbol()) != null) + mSymbols << sym; + + if (mSymbols.IsEmpty()) + Error ("Expected expression"); + + Verify(); + mResult = Evaluate(); +} + +// ============================================================================= +// +Expression::~Expression() +{ + for (ExpressionSymbol* sym : mSymbols) + delete sym; + + delete mResult; +} + +// ============================================================================= +// +// Try to parse an expression symbol (i.e. an operator or operand or a colon) +// from the lexer. +// +ExpressionSymbol* Expression::ParseSymbol() +{ + int pos = mLexer->GetPosition(); + ExpressionValue* op = null; + enum ELocalException { failed }; + + try + { + ScriptVariable* globalvar; + mLexer->MustGetNext(); + + if (mLexer->GetTokenType() == tkColon) + return new ExpressionColon; + + // Check for operator + for (const OperatorInfo* op : gOperators) + if (mLexer->GetTokenType() == op->token) + return new ExpressionOperator (op - gOperators); + + // Check sub-expression + if (mLexer->GetTokenType() == tkParenStart) + { + mLexer->MustGetNext(); + Expression expr (mParser, mLexer, mType); + mLexer->MustGetNext (tkParenEnd); + return expr.GetResult(); + } + + op = new ExpressionValue; + + // Check function + if (CommandInfo* comm = FindCommandByName (GetTokenString())) + { + if (mType != EUnknownType && comm->returnvalue != mType) + Error ("%1 returns an incompatible data type", comm->name); + + op->SetBuffer (mParser->ParseCommand (comm)); + return op; + } + + // Check constant + if (ConstantInfo* constant = mParser->FindConstant (GetTokenString())) + { + if (mType != constant->type) + Error ("constant `%1` is %2, expression requires %3\n", + constant->name, GetTypeName (constant->type), + GetTypeName (mType)); + + switch (constant->type) + { + case EBoolType: + case EIntType: + op->SetValue (constant->val.ToLong()); + break; + + case EStringType: + op->SetValue (GetStringTableIndex (constant->val)); + break; + + case EVoidType: + case EUnknownType: + break; + } + + return op; + } + + // Check global variable + if ((globalvar = FindGlobalVariable (GetTokenString()))) + { + DataBuffer* buf = new DataBuffer (8); + buf->WriteDWord (dhPushGlobalVar); + buf->WriteDWord (globalvar->index); + op->SetBuffer (buf); + return op; + } + + EToken tt; + + // Check for literal + switch (mType) + { + case EVoidType: + case EUnknownType: + Error ("unknown identifier `%1` (expected keyword, function or variable)", GetTokenString()); + break; + + case EBoolType: + if ((tt = mLexer->GetTokenType()) == tkTrue || tt == tkFalse) + { + op->SetValue (tt == tkTrue ? 1 : 0); + return op; + } + case EIntType: + if (!mLexer->GetTokenType() != tkNumber) + throw failed; + + op->SetValue (GetTokenString().ToLong()); + return op; + + case EStringType: + if (!mLexer->GetTokenType() != tkString) + throw failed; + + op->SetValue (GetStringTableIndex (GetTokenString())); + return op; + } + + assert (false); + throw failed; + } + catch (ELocalException&) + { + // We use a local enum here since catch(...) would catch Error() calls. + mLexer->SetPosition (pos); + delete op; + return false; + } + + assert (false); + return false; +} + +// ============================================================================= +// +ExpressionValue* Expression::Evaluate() +{ + +} + +// ============================================================================= +// +ExpressionValue* Expression::GetResult() +{ + return mResult; +} + +// ============================================================================= +// +String Expression::GetTokenString() +{ + return mLexer->GetToken()->text; +} + +// ============================================================================= +// +ExpressionOperator::ExpressionOperator (int id) : + mID (id), + mType (eOperator) {} + +// ============================================================================= +// +ExpressionValue::ExpressionValue(EType valuetype) : + mBuffer (null), + mType (eOperand), + mValueType (valuetype) {} + +// ============================================================================= +// +void ExpressionValue::ConvertToBuffer() +{ + if (IsConstexpr() == false) + return; + + SetBuffer (new DataBuffer); + + switch (mValueType) + { + case EBoolType: + case EIntType: + GetBuffer()->WriteDWord (dhPushNumber); + GetBuffer()->WriteDWord (abs (mValue)); + + if (mValue < 0) + GetBuffer()->WriteDWord (dhUnaryMinus); + break; + + case EStringType: + GetBuffer()->WriteDWord (dhPushStringIndex); + GetBuffer()->WriteDWord (mValue); + break; + + case EVoidType: + case EUnknownType: + assert (false); + break; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Expression.h Mon Feb 03 20:12:44 2014 +0200 @@ -0,0 +1,88 @@ +#ifndef BOTC_EXPRESSION_H +#define BOTC_EXPRESSION_H +#include "Parser.h" + +class DataBuffer; +class ExpressionSymbol; +class ExpressionValue; + +// ============================================================================= +// +class Expression final +{ + public: + Expression (BotscriptParser* parser, EType reqtype, Lexer* lx); + ~Expression(); + ExpressionValue* GetResult(); + + private: + Lexer* mLexer; + List<ExpressionSymbol*> mSymbols; + EType mType; + ExpressionValue* mResult; + BotscriptParser* mParser; + + ExpressionValue* Evaluate(); // Process the expression and yield a result + ExpressionSymbol* ParseSymbol(); + String GetTokenString(); + void Verify(); // Ensure the expr is valid +}; + +// ============================================================================= +// +class ExpressionSymbol +{ + public: + enum EExpressionSymbolType + { + eOperator, + eOperand, + eColon, + }; + + PROPERTY (private, EExpressionSymbolType, Type, NO_OPS, STOCK_WRITE) +}; + +// ============================================================================= +// +class ExpressionOperator final : public ExpressionSymbol +{ + PROPERTY (public, int, ID, NO_OPS, STOCK_WRITE) + + public: + ExpressionOperator (int id); +}; + +// ============================================================================= +// +class ExpressionValue final : public ExpressionSymbol +{ + PROPERTY (public, int, Value, BOOL_OPS, STOCK_WRITE) + PROPERTY (public, DataBuffer*, Buffer, NO_OPS, STOCK_WRITE) + PROPERTY (public, EType, ValueType, NO_OPS, STOCK_WRITE) + + public: + ExpressionValue (EType valuetype); + + void ConvertToBuffer(); + + inline bool IsConstexpr() const + { + return GetBuffer() == null; + } +}; + +// ============================================================================= +// +// This class represents a ":" in the expression. It serves as the colon for the +// ternary ?: operator. It's not an operand nor is an operator, nor can we just +// skip it so it is its own thing here. +// +class ExpressionColon final : public ExpressionSymbol +{ + public: + ExpressionColon() : + mType (eColon) {} +}; + +#endif // BOTC_EXPRESSION_H \ No newline at end of file
--- a/src/Lexer.h Mon Feb 03 11:23:56 2014 +0200 +++ b/src/Lexer.h Mon Feb 03 20:12:44 2014 +0200 @@ -58,6 +58,10 @@ int GetOneSymbol (const StringList& syms); void TokenMustBe (EToken tok); bool PeekNext (Token* tk = null); + String PeekNextString (int a = 1); + String DescribePosition(); + + static Lexer* GetCurrentLexer(); inline bool HasValidToken() const { @@ -80,6 +84,21 @@ return GetToken()->type; } + inline void Skip (int a = 1) + { + mTokenPosition += a; + } + + inline int GetPosition() + { + return mTokenPosition - mTokens.begin(); + } + + inline void SetPosition (int pos) + { + mTokenPosition = mTokens.begin() + pos; + } + // If @tok is given, describes the token. If not, describes @tok_type. static inline String DescribeTokenType (EToken toktype) { @@ -91,16 +110,6 @@ return DescribeTokenPrivate (tok->type, tok); } - static Lexer* GetCurrentLexer(); - - inline void Skip (int a = 1) - { - mTokenPosition += a; - } - - String PeekNextString (int a = 1); - String DescribePosition(); - private: TokenList mTokens; Iterator mTokenPosition;
--- a/src/LexerScanner.cc Mon Feb 03 11:23:56 2014 +0200 +++ b/src/LexerScanner.cc Mon Feb 03 20:12:44 2014 +0200 @@ -36,13 +36,22 @@ static const String gTokenStrings[] = { + "<<=", + ">>=", "==", + "!=", "[]", "+=", "-=", "*=", "/=", "%=", + "<<", + ">>", + ">=", + "<=", + "&&", + "||", "'", "$", "(", @@ -65,6 +74,10 @@ ";", "#", "!", + "&", + "|", + "^", + "?", "->", "bool", "break", @@ -89,6 +102,8 @@ "str", "void", "while", + "true", + "false", "enum", "func", "return",
--- a/src/Parser.cc Mon Feb 03 11:23:56 2014 +0200 +++ b/src/Parser.cc Mon Feb 03 20:12:44 2014 +0200 @@ -34,6 +34,7 @@ #include "Containers.h" #include "Lexer.h" #include "DataBuffer.h" +#include "Expression.h" #define SCOPE(n) (mScopeStack[mScopeCursor - n]) @@ -1134,143 +1135,6 @@ } // ============================================================================ -// Parses an expression, potentially recursively -// -DataBuffer* BotscriptParser::ParseExpression (EType reqtype) -{ - DataBuffer* retbuf = new DataBuffer (64); - - // Parse first operand - retbuf->MergeAndDestroy (ParseExprValue (reqtype)); - - // Parse any and all operators we get - int oper; - - while ( (oper = ParseOperator (true)) != -1) - { - // We peeked the operator, move forward now - mLexer->Skip(); - - // Can't be an assignement operator, those belong in assignments. - if (IsAssignmentOperator (oper)) - Error ("assignment operator inside expression"); - - // Parse the right operand. - mLexer->MustGetNext(); - DataBuffer* rb = ParseExprValue (reqtype); - - if (oper == OPER_TERNARY) - { - // Ternary operator requires - naturally - a third operand. - mLexer->MustGetNext (tkColon); - mLexer->MustGetNext(); - DataBuffer* tb = ParseExprValue (reqtype); - - // It also is handled differently: there isn't a dataheader for ternary - // operator. Instead, we abuse PUSHNUMBER and IFNOTGOTO for this. - // Behold, big block of writing madness! :P - ByteMark* mark1 = retbuf->AddMark (""); // start of "else" case - ByteMark* mark2 = retbuf->AddMark (""); // end of expression - retbuf->WriteDWord (dhIfNotGoto); // if the first operand (condition) - retbuf->AddReference (mark1); // didn't eval true, jump into mark1 - retbuf->MergeAndDestroy (rb); // otherwise, perform second operand (true case) - retbuf->WriteDWord (dhGoto); // afterwards, jump to the end, which is - retbuf->AddReference (mark2); // marked by mark2. - retbuf->AdjustMark (mark1); // move mark1 at the end of the true case - retbuf->MergeAndDestroy (tb); // perform third operand (false case) - retbuf->AdjustMark (mark2); // move the ending mark2 here - } - else - { - // write to buffer - retbuf->MergeAndDestroy (rb); - retbuf->WriteDWord (GetDataHeaderByOperator (null, oper)); - } - } - - return retbuf; -} - -// ============================================================================ -// Parses an operator string. Returns the operator number code. -// -#define ISNEXT(C) (mLexer->PeekNextString (peek ? 1 : 0) == C) - -int BotscriptParser::ParseOperator (bool peek) -{ - String oper; - - if (peek) - oper += mLexer->PeekNextString(); - else - oper += GetTokenString(); - - if (-oper == "strlen") - return OPER_STRLEN; - - // Check one-char operators - bool equalsnext = ISNEXT ("="); - - int o = (oper == "=" && !equalsnext) ? OPER_ASSIGN : - (oper == ">" && !equalsnext && !ISNEXT (">")) ? OPER_GREATERTHAN : - (oper == "<" && !equalsnext && !ISNEXT ("<")) ? OPER_LESSTHAN : - (oper == "&" && !ISNEXT ("&")) ? OPER_BITWISEAND : - (oper == "|" && !ISNEXT ("|")) ? OPER_BITWISEOR : - (oper == "+" && !equalsnext) ? OPER_ADD : - (oper == "-" && !equalsnext) ? OPER_SUBTRACT : - (oper == "*" && !equalsnext) ? OPER_MULTIPLY : - (oper == "/" && !equalsnext) ? OPER_DIVIDE : - (oper == "%" && !equalsnext) ? OPER_MODULUS : - (oper == "^") ? OPER_BITWISEEOR : - (oper == "?") ? OPER_TERNARY : - -1; - - if (o != -1) - { - return o; - } - - // Two-char operators - oper += mLexer->PeekNextString (peek ? 1 : 0); - equalsnext = mLexer->PeekNextString (peek ? 2 : 1) == ("="); - - o = (oper == "+=") ? OPER_ASSIGNADD : - (oper == "-=") ? OPER_ASSIGNSUB : - (oper == "*=") ? OPER_ASSIGNMUL : - (oper == "/=") ? OPER_ASSIGNDIV : - (oper == "%=") ? OPER_ASSIGNMOD : - (oper == "==") ? OPER_EQUALS : - (oper == "!=") ? OPER_NOTEQUALS : - (oper == ">=") ? OPER_GREATERTHANEQUALS : - (oper == "<=") ? OPER_LESSTHANEQUALS : - (oper == "&&") ? OPER_AND : - (oper == "||") ? OPER_OR : - (oper == "<<" && !equalsnext) ? OPER_LEFTSHIFT : - (oper == ">>" && !equalsnext) ? OPER_RIGHTSHIFT : - -1; - - if (o != -1) - { - mLexer->MustGetNext(); - return o; - } - - // Three-char opers - oper += mLexer->PeekNextString (peek ? 2 : 1); - o = oper == "<<=" ? OPER_ASSIGNLEFTSHIFT : - oper == ">>=" ? OPER_ASSIGNRIGHTSHIFT : - -1; - - if (o != -1) - { - mLexer->MustGetNext(); - mLexer->MustGetNext(); - } - - return o; -} - -// ============================================================================ // String BotscriptParser::ParseFloat() { @@ -1291,124 +1155,6 @@ } // ============================================================================ -// Parses a value in the expression and returns the data needed to push -// it, contained in a data buffer. A value can be either a variable, a command, -// a literal or an expression. -// -DataBuffer* BotscriptParser::ParseExprValue (EType reqtype) -{ - DataBuffer* b = new DataBuffer (16); - ScriptVariable* g; - - // Prefixing "!" means negation. - bool negate = TokenIs (tkExclamationMark); - - if (negate) // Jump past the "!" - mLexer->Skip(); - - if (TokenIs (tkParenStart)) - { - // Expression - mLexer->MustGetNext(); - DataBuffer* c = ParseExpression (reqtype); - b->MergeAndDestroy (c); - mLexer->MustGetNext (tkParenEnd); - } - else if (CommandInfo* comm = FindCommandByName (GetTokenString())) - { - delete b; - - // Command - if (reqtype && comm->returnvalue != reqtype) - Error ("%1 returns an incompatible data type", comm->name); - - b = ParseCommand (comm); - } - else if (ConstantInfo* constant = FindConstant (GetTokenString())) - { - // Type check - if (reqtype != constant->type) - Error ("constant `%1` is %2, expression requires %3\n", - constant->name, GetTypeName (constant->type), - GetTypeName (reqtype)); - - switch (constant->type) - { - case EBoolType: - case EIntType: - { - b->WriteDWord (dhPushNumber); - b->WriteDWord (constant->val.ToLong()); - break; - } - - case EStringType: - { - b->WriteStringIndex (constant->val); - break; - } - - case EVoidType: - case EUnknownType: - break; - } - } - else if ((g = FindGlobalVariable (GetTokenString()))) - { - // Global variable - b->WriteDWord (dhPushGlobalVar); - b->WriteDWord (g->index); - } - else - { - // If nothing else, check for literal - switch (reqtype) - { - case EVoidType: - case EUnknownType: - { - Error ("unknown identifier `%1` (expected keyword, function or variable)", GetTokenString()); - break; - } - - case EBoolType: - case EIntType: - { - mLexer->TokenMustBe (tkNumber); - - // All values are written unsigned - thus we need to write the value's - // absolute value, followed by an unary minus for negatives. - b->WriteDWord (dhPushNumber); - - long v = GetTokenString().ToLong(); - b->WriteDWord (static_cast<word> (abs (v))); - - if (v < 0) - b->WriteDWord (dhUnaryMinus); - - break; - } - - case EStringType: - { - // PushToStringTable either returns the string index of the - // string if it finds it in the table, or writes it to the - // table and returns it index if it doesn't find it there. - mLexer->TokenMustBe (tkString); - b->WriteStringIndex (GetTokenString()); - break; - } - } - } - - // Negate it now if desired - if (negate) - b->WriteDWord (dhNegateLogical); - - return b; -} - -// ============================================================================ // Parses an assignment. An assignment starts with a variable name, followed // by an assignment operator, followed by an expression value. Expects current // token to be the name of the variable, and expects the variable to be given. @@ -1479,6 +1225,15 @@ // ============================================================================ // +void BotscriptParser::ParseExpression (EType reqtype) +{ + Expression expr (this, reqtype, mLexer); + expr.GetResult()->ConvertToBuffer(); + buffer()->MergeAndDestroy (expr.GetResult()->GetBuffer()); +} + +// ============================================================================ +// DataBuffer* BotscriptParser::ParseStatement() { if (FindConstant (GetTokenString())) // There should not be constants here.
--- a/src/Parser.h Mon Feb 03 11:23:56 2014 +0200 +++ b/src/Parser.h Mon Feb 03 20:12:44 2014 +0200 @@ -171,12 +171,11 @@ // METHODS BotscriptParser(); ~BotscriptParser(); + ConstantInfo* FindConstant (const String& tok); void ParseBotscript (String fileName); DataBuffer* ParseCommand (CommandInfo* comm); - DataBuffer* ParseExpression (EType reqtype); DataBuffer* ParseAssignment (ScriptVariable* var); int ParseOperator (bool peek = false); - DataBuffer* ParseExprValue (EType reqtype); String ParseFloat(); void PushScope(); DataBuffer* ParseStatement(); @@ -236,7 +235,6 @@ ScopeInfo mScopeStack[MAX_SCOPE]; DataBuffer* buffer(); - ConstantInfo* FindConstant (const String& tok); void ParseStateBlock(); void ParseEventBlock(); void ParseMainloop(); @@ -260,6 +258,7 @@ void ParseFuncdef(); void writeMemberBuffers(); void WriteStringTable(); + void ParseExpression (EType reqtype); }; #endif // BOTC_PARSER_H
--- a/src/Tokens.h Mon Feb 03 11:23:56 2014 +0200 +++ b/src/Tokens.h Mon Feb 03 20:12:44 2014 +0200 @@ -35,13 +35,22 @@ enum EToken { // Non-word tokens + tkLeftShiftAssign, // + tkRightShiftAssign, // tkEquals, // ----- 0 + tkNotEquals, tkBrackets, // - 1 tkAddAssign, // - 2 tkSubAssign, // - 3 tkMultiplyAssign, // - 4 tkDivideAssign, // ----- 5 tkModulusAssign, // - 6 + tkLeftShift, + tkRightShift, + tkAtLeast, + tkAtMost, + tkDoubleAmperstand, + tkDoubleBar, tkSingleQuote, // - 7 tkDollarSign, // - 8 tkParenStart, // - 9 @@ -64,6 +73,10 @@ tkSemicolon, // - 26 tkHash, // - 27 tkExclamationMark, // - 28 + tkAmperstand, + tkBar, + tkCaret, + tkQuestionMark, tkArrow, // - 29 // -------------- @@ -91,6 +104,8 @@ tkStr, // ----- 50 tkVoid, // - 51 tkWhile, // - 52 + tkTrue, + tkFalse, // These ones aren't implemented yet but I plan to do so, thus they are // reserved. Also serves as a to-do list of sorts for me. >:F