src/expression.cpp

changeset 119
bdf8d46c145f
child 125
85814c0918c5
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/expression.cpp	Sun Mar 30 21:51:23 2014 +0300
@@ -0,0 +1,651 @@
+#include "expression.h"
+#include "dataBuffer.h"
+#include "lexer.h"
+
+struct OperatorInfo
+{
+	ETokenType	token;
+	int			priority;
+	int			numoperands;
+	DataHeader	header;
+};
+
+static const OperatorInfo g_Operators[] =
+{
+	{TK_ExclamationMark,	0,		1,	DH_NegateLogical,	},
+	{TK_Minus,				0,		1,	DH_UnaryMinus,		},
+	{TK_Multiply,			10,		2,	DH_Multiply,		},
+	{TK_Divide,				10,		2,	DH_Divide,			},
+	{TK_Modulus,			10,		2,	DH_Modulus,			},
+	{TK_Plus,				20,		2,	DH_Add,				},
+	{TK_Minus,				20,		2,	DH_Subtract,		},
+	{TK_LeftShift,			30,		2,	DH_LeftShift,		},
+	{TK_RightShift,			30,		2,	DH_RightShift,		},
+	{TK_Lesser,				40,		2,	DH_LessThan,		},
+	{TK_Greater,			40,		2,	DH_GreaterThan,		},
+	{TK_AtLeast,			40,		2,	DH_AtLeast,			},
+	{TK_AtMost,				40,		2,	DH_AtMost,			},
+	{TK_Equals,				50,		2,	DH_Equals			},
+	{TK_NotEquals,			50,		2,	DH_NotEquals		},
+	{TK_Amperstand,			60,		2,	DH_AndBitwise		},
+	{TK_Caret,				70,		2,	DH_EorBitwise		},
+	{TK_Bar,				80,		2,	DH_OrBitwise		},
+	{TK_DoubleAmperstand,	90,		2,	DH_AndLogical		},
+	{TK_DoubleBar,			100,	2,	DH_OrLogical		},
+	{TK_QuestionMark,		110,	3,	(DataHeader) 0		},
+};
+
+// =============================================================================
+//
+Expression::Expression (BotscriptParser* parser, Lexer* lx, DataType reqtype) :
+	m_parser (parser),
+	m_lexer (lx),
+	m_type (reqtype)
+{
+	ExpressionSymbol* sym;
+
+	while ((sym = parseSymbol()) != null)
+		m_symbols << sym;
+
+	// If we were unable to get any expression symbols, something's wonky with
+	// the script. Report an error. mBadTokenText is set to the token that
+	// ParseSymbol ends at when it returns false.
+	if (m_symbols.isEmpty())
+		error ("unknown identifier '%1'", m_badTokenText);
+
+	adjustOperators();
+	verify();
+	evaluate();
+}
+
+// =============================================================================
+//
+Expression::~Expression()
+{
+	for (ExpressionSymbol* sym : m_symbols)
+		delete sym;
+}
+
+// =============================================================================
+//
+// Try to parse an expression symbol (i.e. an OPER_erator or OPER_erand or a colon)
+// from the lexer.
+//
+ExpressionSymbol* Expression::parseSymbol()
+{
+	int pos = m_lexer->position();
+	ExpressionValue* op = null;
+
+	if (m_lexer->next (TK_Colon))
+		return new ExpressionColon;
+
+	// Check for OPER_erator
+	for (const OperatorInfo& op : g_Operators)
+		if (m_lexer->next (op.token))
+			return new ExpressionOperator ((ExpressionOperatorType) (&op - &g_Operators[0]));
+
+	// Check sub-expression
+	if (m_lexer->next (TK_ParenStart))
+	{
+		Expression expr (m_parser, m_lexer, m_type);
+		m_lexer->mustGetNext (TK_ParenEnd);
+		return expr.getResult()->clone();
+	}
+
+	op = new ExpressionValue (m_type);
+
+	// Check function
+	if (CommandInfo* comm = findCommandByName (m_lexer->peekNextString()))
+	{
+		m_lexer->skip();
+
+		if (m_type != TYPE_Unknown && comm->returnvalue != m_type)
+			error ("%1 returns an incompatible data type", comm->name);
+
+		op->setBuffer (m_parser->parseCommand (comm));
+		return op;
+	}
+
+	// Check for variables
+	if (m_lexer->next (TK_DollarSign))
+	{
+		m_lexer->mustGetNext (TK_Symbol);
+		Variable* var = m_parser->findVariable (getTokenString());
+
+		if (var == null)
+			error ("unknown variable %1", getTokenString());
+
+		if (var->type != m_type)
+			error ("expression requires %1, variable $%2 is of type %3",
+				dataTypeName (m_type), var->name, dataTypeName (var->type));
+
+		if (var->isarray)
+		{
+			m_lexer->mustGetNext (TK_BracketStart);
+			Expression expr (m_parser, m_lexer, TYPE_Int);
+			expr.getResult()->convertToBuffer();
+			DataBuffer* buf = expr.getResult()->buffer()->clone();
+			buf->writeDWord (DH_PushGlobalArray);
+			buf->writeDWord (var->index);
+			op->setBuffer (buf);
+			m_lexer->mustGetNext (TK_BracketEnd);
+		}
+		elif (var->writelevel == WRITE_Constexpr)
+			op->setValue (var->value);
+		else
+		{
+			DataBuffer* buf = new DataBuffer (8);
+
+			if (var->IsGlobal())
+				buf->writeDWord (DH_PushGlobalVar);
+			else
+				buf->writeDWord (DH_PushLocalVar);
+
+			buf->writeDWord (var->index);
+			op->setBuffer (buf);
+		}
+
+		return op;
+	}
+
+	// Check for literal
+	switch (m_type)
+	{
+		case TYPE_Void:
+		case TYPE_Unknown:
+		{
+			error ("unknown identifier `%1` (expected keyword, function or variable)", getTokenString());
+			break;
+		}
+
+		case TYPE_Bool:
+		{
+			if (m_lexer->next (TK_True) || m_lexer->next (TK_False))
+			{
+				ETokenType tt = m_lexer->tokenType();
+				op->setValue (tt == TK_True ? 1 : 0);
+				return op;
+			}
+		}
+
+		case TYPE_Int:
+		{
+			if (m_lexer->next (TK_Number))
+			{
+				op->setValue (getTokenString().toLong());
+				return op;
+			}
+		}
+
+		case TYPE_String:
+		{
+			if (m_lexer->next (TK_String))
+			{
+				op->setValue (getStringTableIndex (getTokenString()));
+				return op;
+			}
+		}
+	}
+
+	m_badTokenText = m_lexer->token()->text;
+	m_lexer->setPosition (pos);
+	delete op;
+	return null;
+}
+
+// =============================================================================
+//
+// The symbol parsing process only does token-based checking for OPER_erators.
+// Thus ALL minus OPER_erators are actually unary minuses simply because both
+// have TK_Minus as their token and the unary minus is prior to the binary minus
+// in the OPER_erator table. Now that we have all symbols present, we can
+// correct cases like this.
+//
+void Expression::adjustOperators()
+{
+	for (auto it = m_symbols.begin() + 1; it != m_symbols.end(); ++it)
+	{
+		if ((*it)->type() != EXPRSYM_Operator)
+			continue;
+
+		ExpressionOperator* op = static_cast<ExpressionOperator*> (*it);
+
+		// Unary minus with a value as the previous symbol cannot really be
+		// unary; replace with binary minus.
+		if (op->id() == OPER_UnaryMinus && (*(it - 1))->type() == EXPRSYM_Value)
+			op->setID (OPER_Subtraction);
+	}
+}
+
+// =============================================================================
+//
+// Verifies a single value. Helper function for Expression::verify.
+//
+void Expression::tryVerifyValue (bool* verified, SymbolList::Iterator it)
+{
+	// If it's an unary OPER_erator we skip to its value. The actual OPER_erator will
+	// be verified separately.
+	if ((*it)->type() == EXPRSYM_Operator &&
+			g_Operators[static_cast<ExpressionOperator*> (*it)->id()].numoperands == 1)
+	{
+		++it;
+	}
+
+	int i = it - m_symbols.begin();
+
+	// Ensure it's an actual value
+	if ((*it)->type() != EXPRSYM_Value)
+		error ("malformed expression (symbol #%1 is not a value)", i);
+
+	verified[i] = true;
+}
+
+// =============================================================================
+//
+// Ensures the expression is valid and well-formed and not OMGWTFBBQ. Throws an
+// error if this is not the case.
+//
+void Expression::verify()
+{
+	if (m_symbols.size() == 1)
+	{
+		if (m_symbols[0]->type() != EXPRSYM_Value)
+			error ("bad expression");
+
+		return;
+	}
+
+	if (m_type == TYPE_String)
+		error ("Cannot perform OPER_erations on strings");
+
+	bool* verified = new bool[m_symbols.size()];
+	memset (verified, 0, m_symbols.size() * sizeof (decltype (*verified)));
+	const auto last = m_symbols.end() - 1;
+	const auto first = m_symbols.begin();
+
+	for (auto it = m_symbols.begin(); it != m_symbols.end(); ++it)
+	{
+		int i = (it - first);
+
+		if ((*it)->type() != EXPRSYM_Operator)
+			continue;
+
+		ExpressionOperator* op = static_cast<ExpressionOperator*> (*it);
+		int numoperands = g_Operators[op->id()].numoperands;
+
+		switch (numoperands)
+		{
+			case 1:
+			{
+				// Ensure that:
+				// -	unary OPER_erator is not the last symbol
+				// -	unary OPER_erator is succeeded by a value symbol
+				// -	neither symbol overlaps with something already verified
+				tryVerifyValue (verified, it + 1);
+
+				if (it == last || verified[i] == true)
+					error ("malformed expression");
+
+				verified[i] = true;
+				break;
+			}
+
+			case 2:
+			{
+				// Ensure that:
+				// -	binary OPER_erator is not the first or last symbol
+				// -	is preceded and succeeded by values
+				// -	none of the three tokens are already verified
+				//
+				// Basically similar logic as above.
+				if (it == first || it == last || verified[i] == true)
+					error ("malformed expression");
+
+				tryVerifyValue (verified, it + 1);
+				tryVerifyValue (verified, it - 1);
+				verified[i] = true;
+				break;
+			}
+
+			case 3:
+			{
+				// Ternary OPER_erator case. This goes a bit nuts.
+				// This time we have the following:
+				//
+				// (VALUE) ? (VALUE) : (VALUE)
+				//         ^
+				// --------/ we are here
+				//
+				// Check that the:
+				// -	questionmark OPER_erator is not misplaced (first or last)
+				// -	the value behind the OPER_erator (-1) is valid
+				// -	the value after the OPER_erator (+1) is valid
+				// -	the value after the colon (+3) is valid
+				// -	none of the five tokens are verified
+				//
+				tryVerifyValue (verified, it - 1);
+				tryVerifyValue (verified, it + 1);
+				tryVerifyValue (verified, it + 3);
+
+				if (it == first ||
+					it >= m_symbols.end() - 3 ||
+					verified[i] == true ||
+					verified[i + 2] == true ||
+					(*(it + 2))->type() != EXPRSYM_Colon)
+				{
+					error ("malformed expression");
+				}
+
+				verified[i] = true;
+				verified[i + 2] = true;
+				break;
+			}
+
+			default:
+				error ("WTF OPER_erator with %1 OPER_erands", numoperands);
+		}
+	}
+
+	for (int i = 0; i < m_symbols.size(); ++i)
+		if (verified[i] == false)
+			error ("malformed expression: expr symbol #%1 is was left unverified", i);
+
+	delete verified;
+}
+
+
+// =============================================================================
+//
+// Which operator to evaluate?
+//
+Expression::SymbolList::Iterator Expression::findPrioritizedOperator()
+{
+	SymbolList::Iterator	best = m_symbols.end();
+	int						bestpriority = __INT_MAX__;
+
+	for (SymbolList::Iterator it = m_symbols.begin(); it != m_symbols.end(); ++it)
+	{
+		if ((*it)->type() != EXPRSYM_Operator)
+			continue;
+
+		ExpressionOperator* op = static_cast<ExpressionOperator*> (*it);
+		const OperatorInfo* info = &g_Operators[op->id()];
+
+		if (info->priority < bestpriority)
+		{
+			best = it;
+			bestpriority = info->priority;
+		}
+	}
+
+	return best;
+}
+
+// =============================================================================
+//
+// Process the given OPER_erator and values into a new value.
+//
+ExpressionValue* Expression::evaluateOperator (const ExpressionOperator* op,
+											   const List<ExpressionValue*>& values)
+{
+	const OperatorInfo* info = &g_Operators[op->id()];
+	bool isconstexpr = true;
+	assert (values.size() == info->numoperands);
+
+	for (ExpressionValue* val : values)
+	{
+		if (val->isConstexpr() == false)
+		{
+			isconstexpr = false;
+			break;
+		}
+	}
+
+	// If not all of the values are constant expressions, none of them shall be.
+	if (isconstexpr == false)
+		for (ExpressionValue* val : values)
+			val->convertToBuffer();
+
+	ExpressionValue* newval = new ExpressionValue (m_type);
+
+	if (isconstexpr == false)
+	{
+		// This is not a constant expression so we'll have to use databuffers
+		// to convey the expression to bytecode. Actual value cannot be evaluated
+		// until Zandronum processes it at run-time.
+		newval->setBuffer (new DataBuffer);
+
+		if (op->id() == OPER_Ternary)
+		{
+			// There isn't a dataheader for ternary OPER_erator. Instead, we use DH_IfNotGoto
+			// to create an "if-block" inside an expression.
+			// Behold, big block of writing madness! :P
+			//
+			DataBuffer* buf = newval->buffer();
+			DataBuffer* b0 = values[0]->buffer();
+			DataBuffer* b1 = values[1]->buffer();
+			DataBuffer* b2 = values[2]->buffer();
+			ByteMark* mark1 = buf->addMark (""); // start of "else" case
+			ByteMark* mark2 = buf->addMark (""); // end of expression
+			buf->mergeAndDestroy (b0);
+			buf->writeDWord (DH_IfNotGoto); // if the first OPER_erand (condition)
+			buf->addReference (mark1); // didn't eval true, jump into mark1
+			buf->mergeAndDestroy (b1); // otherwise, perform second OPER_erand (true case)
+			buf->writeDWord (DH_Goto); // afterwards, jump to the end, which is
+			buf->addReference (mark2); // marked by mark2.
+			buf->adjustMark (mark1); // move mark1 at the end of the true case
+			buf->mergeAndDestroy (b2); // perform third OPER_erand (false case)
+			buf->adjustMark (mark2); // move the ending mark2 here
+
+			for (int i = 0; i < 3; ++i)
+				values[i]->setBuffer (null);
+		}
+		else
+		{
+			// Generic case: write all arguments and apply the OPER_erator's
+			// data header.
+			for (ExpressionValue* val : values)
+			{
+				newval->buffer()->mergeAndDestroy (val->buffer());
+
+				// Null the pointer out so that the value's destructor will not
+				// attempt to double-free it.
+				val->setBuffer (null);
+			}
+
+			newval->buffer()->writeDWord (info->header);
+		}
+	}
+	else
+	{
+		// We have a constant expression. We know all the values involved and
+		// can thus compute the result of this expression on compile-time.
+		List<int> nums;
+		int a;
+
+		for (ExpressionValue* val : values)
+			nums << val->value();
+
+		switch (op->id())
+		{
+			case OPER_Addition:				a = nums[0] + nums[1];					break;
+			case OPER_Subtraction:			a = nums[0] - nums[1];					break;
+			case OPER_Multiplication:		a = nums[0] * nums[1];					break;
+			case OPER_UnaryMinus:			a = -nums[0];							break;
+			case OPER_NegateLogical:		a = !nums[0];							break;
+			case OPER_LeftShift:			a = nums[0] << nums[1];					break;
+			case OPER_RightShift:			a = nums[0] >> nums[1];					break;
+			case OPER_CompareLesser:		a = (nums[0] < nums[1]) ? 1 : 0;		break;
+			case OPER_CompareGreater:		a = (nums[0] > nums[1]) ? 1 : 0;		break;
+			case OPER_CompareAtLeast:		a = (nums[0] <= nums[1]) ? 1 : 0;		break;
+			case OPER_CompareAtMost:		a = (nums[0] >= nums[1]) ? 1 : 0;		break;
+			case OPER_CompareEquals:		a = (nums[0] == nums[1]) ? 1 : 0;		break;
+			case OPER_CompareNotEquals:		a = (nums[0] != nums[1]) ? 1 : 0;		break;
+			case OPER_BitwiseAnd:			a = nums[0] & nums[1];					break;
+			case OPER_BitwiseOr:			a = nums[0] | nums[1];					break;
+			case OPER_BitwiseXOr:			a = nums[0] ^ nums[1];					break;
+			case OPER_LogicalAnd:			a = (nums[0] && nums[1]) ? 1 : 0;		break;
+			case OPER_LogicalOr:			a = (nums[0] || nums[1]) ? 1 : 0;		break;
+			case OPER_Ternary:				a = (nums[0] != 0) ? nums[1] : nums[2];	break;
+
+			case OPER_Division:
+			{
+				if (nums[1] == 0)
+					error ("division by zero in constant expression");
+
+				a = nums[0] / nums[1];
+				break;
+			}
+
+			case OPER_Modulus:
+			{
+				if (nums[1] == 0)
+					error ("modulus by zero in constant expression");
+
+				a = nums[0] % nums[1];
+				break;
+			}
+		}
+
+		newval->setValue (a);
+	}
+
+	// The new value has been generated. We don't need the old stuff anymore.
+	for (ExpressionValue* val : values)
+		delete val;
+
+	delete op;
+	return newval;
+}
+
+// =============================================================================
+//
+ExpressionValue* Expression::evaluate()
+{
+	SymbolList::Iterator it;
+
+	while ((it = findPrioritizedOperator()) != m_symbols.end())
+	{
+		int i = it - m_symbols.begin();
+		List<SymbolList::Iterator> OPER_erands;
+		ExpressionOperator* op = static_cast<ExpressionOperator*> (*it);
+		const OperatorInfo* info = &g_Operators[op->id()];
+		int lower, upper; // Boundaries of area to replace
+
+		switch (info->numoperands)
+		{
+			case 1:
+			{
+				lower = i;
+				upper = i + 1;
+				OPER_erands << it + 1;
+				break;
+			}
+
+			case 2:
+			{
+				lower = i - 1;
+				upper = i + 1;
+				OPER_erands << it - 1
+				         << it + 1;
+				break;
+			}
+
+			case 3:
+			{
+				lower = i - 1;
+				upper = i + 3;
+				OPER_erands << it - 1
+				         << it + 1
+				         << it + 3;
+				break;
+			}
+
+			default:
+				assert (false);
+		}
+
+		List<ExpressionValue*> values;
+
+		for (auto it : OPER_erands)
+			values << static_cast<ExpressionValue*> (*it);
+
+		// Note: @op and all of @values are invalid after this call.
+		ExpressionValue* newvalue = evaluateOperator (op, values);
+
+		for (int i = upper; i >= lower; --i)
+			m_symbols.removeAt (i);
+
+		m_symbols.insert (lower, newvalue);
+	}
+
+	assert (m_symbols.size() == 1 && m_symbols.first()->type() == EXPRSYM_Value);
+	ExpressionValue* val = static_cast<ExpressionValue*> (m_symbols.first());
+	return val;
+}
+
+// =============================================================================
+//
+ExpressionValue* Expression::getResult()
+{
+	return static_cast<ExpressionValue*> (m_symbols.first());
+}
+
+// =============================================================================
+//
+String Expression::getTokenString()
+{
+	return m_lexer->token()->text;
+}
+
+// =============================================================================
+//
+ExpressionOperator::ExpressionOperator (ExpressionOperatorType id) :
+	ExpressionSymbol (EXPRSYM_Operator),
+	m_id (id) {}
+
+// =============================================================================
+//
+ExpressionValue::ExpressionValue (DataType valuetype) :
+	ExpressionSymbol (EXPRSYM_Value),
+	m_buffer (null),
+	m_valueType (valuetype) {}
+
+// =============================================================================
+//
+ExpressionValue::~ExpressionValue()
+{
+	delete m_buffer;
+}
+
+// =============================================================================
+//
+void ExpressionValue::convertToBuffer()
+{
+	if (isConstexpr() == false)
+		return;
+
+	setBuffer (new DataBuffer);
+
+	switch (m_valueType)
+	{
+		case TYPE_Bool:
+		case TYPE_Int:
+			buffer()->writeDWord (DH_PushNumber);
+			buffer()->writeDWord (abs (value()));
+
+			if (value() < 0)
+				buffer()->writeDWord (DH_UnaryMinus);
+			break;
+
+		case TYPE_String:
+			buffer()->writeDWord (DH_PushStringIndex);
+			buffer()->writeDWord (value());
+			break;
+
+		case TYPE_Void:
+		case TYPE_Unknown:
+			assert (false);
+			break;
+	}
+}

mercurial