--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/parser.cc Mon Jan 13 23:44:15 2014 +0200 @@ -0,0 +1,1450 @@ +/* + Copyright (c) 2012-2014, Santeri Piippo + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the <organization> nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "object_writer.h" +#include "parser.h" +#include "events.h" +#include "commands.h" +#include "stringtable.h" +#include "variables.h" +#include "containers.h" +#include "lexer.h" + +#define SCOPE(n) scopestack[g_ScopeCursor - n] + +// TODO: make these static +int g_NumStates = 0; +int g_NumEvents = 0; +parsermode_e g_CurMode = MODE_TOPLEVEL; +string g_CurState = ""; +bool g_stateSpawnDefined = false; +bool g_GotMainLoop = false; +int g_ScopeCursor = 0; +data_buffer* g_IfExpression = null; +bool g_CanElse = false; +static string* g_undefined_labels[MAX_MARKS]; // TODO: make a list +list<constant_info> g_ConstInfo; + +static botscript_parser* g_current_parser = null; + +// ============================================================================ +// +botscript_parser::botscript_parser() : + m_lx (new lexer) {} + +// ============================================================================ +// +botscript_parser::~botscript_parser() +{ + delete m_lx; +} + +// ============================================================================ +// +void botscript_parser::check_toplevel() +{ + if (g_CurMode != MODE_TOPLEVEL) + error ("%1-statements may only be defined at top level!", token_string().chars()); +} + +// ============================================================================ +// +void botscript_parser::check_not_toplevel() +{ + if (g_CurMode == MODE_TOPLEVEL) + error ("%1-statements must not be defined at top level!", token_string().chars()); +} + +// ============================================================================ +// Main parser code. Begins read of the script file, checks the syntax of it +// and writes the data to the object file via Objwriter - which also takes care +// of necessary buffering so stuff is written in the correct order. +void botscript_parser::parse_botscript (string file_name, object_writer* w) +{ + m_writer = w; + + // Lex and preprocess the file + m_lx->process_file (file_name); + + // Zero the entire block stack first + // TODO: this shouldn't be necessary + for (int i = 0; i < MAX_SCOPE; i++) + ZERO (scopestack[i]); + + for (int i = 0; i < MAX_MARKS; i++) + g_undefined_labels[i] = null; + + while (m_lx->get_next()) + { + // Check if else is potentically valid + if (token_is (tk_else) && !g_CanElse) + error ("else without preceding if"); + + if (!token_is (tk_else)) + g_CanElse = false; + + switch (m_lx->get_token()->type) + { + case tk_state: + parse_state_block(); + break; + + case tk_event: + parse_event_block(); + break; + + case tk_mainloop: + parse_mainloop(); + break; + + case tk_onenter: + case tk_onexit: + parse_on_enter_exit(); + break; + + case tk_int: + case tk_str: + case tk_void: + parse_variable_declaration(); + break; + + case tk_goto: + parse_goto(); + break; + + case tk_if: + parse_if(); + break; + + case tk_else: + parse_else(); + break; + + case tk_while: + parse_while_block(); + break; + + case tk_for: + parse_for_block(); + break; + + case tk_do: + parse_do_block(); + break; + + case tk_switch: + parse_switch_block(); + break; + + case tk_case: + parse_switch_case(); + break; + + case tk_default: + parse_switch_default(); + break; + + case tk_break: + parse_break(); + break; + + case tk_continue: + parse_continue(); + break; + + case tk_brace_end: + parse_block_end(); + break; + + case tk_const: + parse_const(); + break; + + default: + { + // Check for labels + lexer::token next; + + if (token_is (tk_symbol) && + m_lx->peek_next (&next) && + next.type == tk_colon) + { + parse_label(); + break; + } + + // Check if it's a command + command_info* comm = find_command_by_name (token_string()); + + if (comm) + { + m_writer->get_current_buffer()->merge (ParseCommand (comm)); + m_lx->must_get_next (tk_semicolon); + continue; + } + + // If nothing else, parse it as a statement + data_buffer* b = parse_statement (w); + + if (!b) + error ("unknown token `%1`", token_string()); + + m_writer->write_buffer (b); + m_lx->must_get_next (tk_semicolon); + } + break; + } + } + + // =============================================================================== + // Script file ended. Do some last checks and write the last things to main buffer + if (g_CurMode != MODE_TOPLEVEL) + error ("script did not end at top level; a `}` is missing somewhere"); + + // stateSpawn must be defined! + if (!g_stateSpawnDefined) + error ("script must have a state named `stateSpawn`!"); + + for (int i = 0; i < MAX_MARKS; i++) + if (g_undefined_labels[i]) + error ("label `%s` is referenced via `goto` but isn't defined\n", g_undefined_labels[i]->chars()); + + // Dump the last state's onenter and mainloop + m_writer->write_member_buffers(); + + // String table + m_writer->write_string_table(); +} + +// ============================================================================ +// +void botscript_parser::parse_state_block() +{ + check_toplevel(); + m_lx->must_get_next (tk_string); + string statename = token_string(); + + // State name must be a word. + if (statename.first (" ") != -1) + error ("state name must be a single word, got `%1`", statename); + + // stateSpawn is special - it *must* be defined. If we + // encountered it, then mark down that we have it. + if (-statename == "statespawn") + g_stateSpawnDefined = true; + + // Must end in a colon + m_lx->must_get_next (tk_colon); + + // write the previous state's onenter and + // mainloop buffers to file now + if (g_CurState.is_empty() == false) + m_writer->write_member_buffers(); + + m_writer->write (dh_state_name); + m_writer->write_string (statename); + m_writer->write (dh_state_index); + m_writer->write (g_NumStates); + + g_NumStates++; + g_CurState = statename; + g_GotMainLoop = false; +} + +// ============================================================================ +// +void botscript_parser::parse_event_block() +{ + check_toplevel(); + m_lx->must_get_next (tk_string); + + event_info* e = find_event_by_name (token_string()); + + if (!e) + error ("bad event, got `%1`\n", token_string()); + + m_lx->must_get_next (tk_brace_start); + g_CurMode = MODE_EVENT; + m_writer->write (dh_event); + m_writer->write (e->number); + g_NumEvents++; +} + +// ============================================================================ +// +void botscript_parser::parse_mainloop() +{ + check_toplevel(); + m_lx->must_get_next (tk_brace_start); + + // Mode must be set before dataheader is written here! + g_CurMode = MODE_MAINLOOP; + m_writer->write (dh_main_loop); +} + +// ============================================================================ +// +void botscript_parser::parse_on_enter_exit() +{ + check_toplevel(); + bool onenter = (token_is (tk_onenter)); + m_lx->must_get_next (tk_brace_start); + + // Mode must be set before dataheader is written here, + // because onenter goes to a separate buffer. + g_CurMode = onenter ? MODE_ONENTER : MODE_ONEXIT; + m_writer->write (onenter ? dh_on_enter : dh_on_exit); +} + +// ============================================================================ +// +void botscript_parser::parse_variable_declaration() +{ + // For now, only globals are supported + if (g_CurMode != MODE_TOPLEVEL || g_CurState.is_empty() == false) + error ("variables must only be global for now"); + + type_e type = (token_is (tk_int)) ? TYPE_INT : + (token_is (tk_str)) ? TYPE_STRING : + TYPE_BOOL; + + m_lx->must_get_next(); + string varname = token_string(); + + // Var name must not be a number + if (varname.is_numeric()) + error ("variable name must not be a number"); + + script_variable* var = declare_global_variable (type, varname); + (void) var; + m_lx->must_get_next (tk_semicolon); +} + +// ============================================================================ +// +void botscript_parser::parse_goto() +{ + check_not_toplevel(); + + // Get the name of the label + m_lx->must_get_next(); + + // Find the mark this goto statement points to + string target = token_string(); + int m = m_writer->find_byte_mark (target); + + // If not set, define it + if (m == MAX_MARKS) + { + m = m_writer->add_mark (target); + g_undefined_labels[m] = new string (target); + } + + // Add a reference to the mark. + m_writer->write (dh_goto); + m_writer->add_reference (m); + m_lx->must_get_next (tk_semicolon); + continue; +} + +// ============================================================================ +// +void botscript_parser::parse_if() +{ + check_not_toplevel(); + push_scope(); + + // Condition + m_lx->must_get_next (tk_paren_start); + + // Read the expression and write it. + m_lx->must_get_next(); + data_buffer* c = parse_expression (TYPE_INT); + m_writer->write_buffer (c); + + m_lx->must_get_next (tk_paren_end); + m_lx->must_get_next (tk_brace_start); + + // Add a mark - to here temporarily - and add a reference to it. + // Upon a closing brace, the mark will be adjusted. + int marknum = m_writer->add_mark (""); + + // Use dh_if_not_goto - if the expression is not true, we goto the mark + // we just defined - and this mark will be at the end of the scope block. + m_writer->write (dh_if_not_goto); + m_writer->add_reference (marknum); + + // Store it + SCOPE (0).mark1 = marknum; + SCOPE (0).type = e_if_scope; +} + +// ============================================================================ +// +void botscript_parser::parse_else() +{ + check_not_toplevel(); + m_lx->must_get_next (tk_brace_start); + + // Don't use PushScope as it resets the scope + g_ScopeCursor++; + + if (g_ScopeCursor >= MAX_SCOPE) + error ("too deep scope"); + + if (SCOPE (0).type != e_if_scope) + error ("else without preceding if"); + + // write down to jump to the end of the else statement + // Otherwise we have fall-throughs + SCOPE (0).mark2 = m_writer->add_mark (""); + + // Instruction to jump to the end after if block is complete + m_writer->write (dh_goto); + m_writer->add_reference (SCOPE (0).mark2); + + // Move the ifnot mark here and set type to else + m_writer->move_mark (SCOPE (0).mark1); + SCOPE (0).type = e_else_scope; +} + +// ============================================================================ +// +void botscript_parser::parse_while_block() +{ + check_not_toplevel(); + push_scope(); + + // While loops need two marks - one at the start of the loop and one at the + // end. The condition is checked at the very start of the loop, if it fails, + // we use goto to skip to the end of the loop. At the end, we loop back to + // the beginning with a go-to statement. + int mark1 = m_writer->add_mark (""); // start + int mark2 = m_writer->add_mark (""); // end + + // Condition + m_lx->must_get_next (tk_paren_start); + m_lx->must_get_next(); + data_buffer* expr = parse_expression (TYPE_INT); + m_lx->must_get_next (tk_paren_end); + m_lx->must_get_next (tk_brace_start); + + // write condition + m_writer->write_buffer (expr); + + // Instruction to go to the end if it fails + m_writer->write (dh_if_not_goto); + m_writer->add_reference (mark2); + + // Store the needed stuff + SCOPE (0).mark1 = mark1; + SCOPE (0).mark2 = mark2; + SCOPE (0).type = e_while_scope; +} + +// ============================================================================ +// +void botscript_parser::parse_for_block() +{ + check_not_toplevel(); + push_scope(); + + // Initializer + m_lx->must_get_next (tk_paren_start); + m_lx->must_get_next(); + data_buffer* init = parse_statement (w); + + if (!init) + error ("bad statement for initializer of for"); + + m_lx->must_get_next (tk_semicolon); + + // Condition + m_lx->must_get_next(); + data_buffer* cond = parse_expression (TYPE_INT); + + if (!cond) + error ("bad statement for condition of for"); + + m_lx->must_get_next (tk_semicolon); + + // Incrementor + m_lx->must_get_next(); + data_buffer* incr = parse_statement (w); + + if (!incr) + error ("bad statement for incrementor of for"); + + m_lx->must_get_next (tk_paren_end); + m_lx->must_get_next (tk_brace_start); + + // First, write out the initializer + m_writer->write_buffer (init); + + // Init two marks + int mark1 = m_writer->add_mark (""); + int mark2 = m_writer->add_mark (""); + + // Add the condition + m_writer->write_buffer (cond); + m_writer->write (dh_if_not_goto); + m_writer->add_reference (mark2); + + // Store the marks and incrementor + SCOPE (0).mark1 = mark1; + SCOPE (0).mark2 = mark2; + SCOPE (0).buffer1 = incr; + SCOPE (0).type = e_for_scope; +} + +// ============================================================================ +// +void botscript_parser::parse_do_block() +{ + check_not_toplevel(); + push_scope(); + m_lx->must_get_next (tk_brace_start); + SCOPE (0).mark1 = m_writer->add_mark (""); + SCOPE (0).type = e_do_scope; +} + +// ============================================================================ +// +void botscript_parser::parse_switch_block() +{ + // This gets a bit tricky. switch is structured in the + // bytecode followingly: + // + // (expression) + // case a: goto casemark1 + // case b: goto casemark2 + // case c: goto casemark3 + // goto mark1 // jump to end if no matches + // casemark1: ... + // casemark2: ... + // casemark3: ... + // mark1: // end mark + + check_not_toplevel(); + push_scope(); + m_lx->must_get_next (tk_paren_start); + m_lx->must_get_next(); + m_writer->write_buffer (parse_expression (TYPE_INT)); + m_lx->must_get_next (tk_paren_end); + m_lx->must_get_next (tk_brace_start); + SCOPE (0).type = e_switch_scope; + SCOPE (0).mark1 = m_writer->add_mark (""); // end mark + SCOPE (0).buffer1 = null; // default header +} + +// ============================================================================ +// +void botscript_parser::parse_switch_case() +{ + // case is only allowed inside switch + if (SCOPE (0).type != e_switch_scope) + error ("case label outside switch"); + + // Get the literal (Zandronum does not support expressions here) + m_lx->must_get_next (tk_number); + int num = m_lx->get_token()->text.to_long(); + m_lx->must_get_next (tk_colon); + + for (int i = 0; i < MAX_CASE; i++) + if (SCOPE (0).casenumbers[i] == num) + error ("multiple case %d labels in one switch", num); + + // write down the expression and case-go-to. This builds + // the case tree. The closing event will write the actual + // blocks and move the marks appropriately. + // AddSwitchCase will add the reference to the mark + // for the case block that this heralds, and takes care + // of buffering setup and stuff like that. + // null the switch buffer for the case-go-to statement, + // we want it all under the switch, not into the case-buffers. + m_writer->SwitchBuffer = null; + m_writer->write (dh_case_goto); + m_writer->write (num); + add_switch_case (m_writer, null); + SCOPE (0).casenumbers[SCOPE (0).casecursor] = num; +} + +// ============================================================================ +// +void botscript_parser::parse_switch_default() +{ + if (SCOPE (0).type != e_switch_scope) + error ("default label outside switch"); + + if (SCOPE (0).buffer1) + error ("multiple default labels in one switch"); + + m_lx->must_get_next (tk_colon); + + // The default header is buffered into buffer1, since + // it has to be the last of the case headers + // + // Since the expression is pushed into the switch + // and is only popped when case succeeds, we have + // to pop it with dh_drop manually if we end up in + // a default. + data_buffer* b = new data_buffer; + SCOPE (0).buffer1 = b; + b->write (dh_drop); + b->write (dh_goto); + add_switch_case (m_writer, b); +} + +// ============================================================================ +// +void botscript_parser::parse_break() +{ + if (!g_ScopeCursor) + error ("unexpected `break`"); + + m_writer->write (dh_goto); + + // switch and if use mark1 for the closing point, + // for and while use mark2. + switch (SCOPE (0).type) + { + case e_if_scope: + case e_switch_scope: + { + m_writer->add_reference (SCOPE (0).mark1); + } break; + + case e_for_scope: + case e_while_scope: + { + m_writer->add_reference (SCOPE (0).mark2); + } break; + + default: + { + error ("unexpected `break`"); + } break; + } + + m_lx->must_get_next (tk_semicolon); +} + +// ============================================================================ +// +void botscript_parser::parse_continue() +{ + m_lx->must_get_next (tk_semicolon); + + int curs; + bool found = false; + + // Fall through the scope until we find a loop block + for (curs = g_ScopeCursor; curs > 0 && !found; curs--) + { + switch (scopestack[curs].type) + { + case e_for_scope: + case e_while_scope: + case e_do_scope: + { + m_writer->write (dh_goto); + m_writer->add_reference (scopestack[curs].mark1); + found = true; + } break; + + default: + break; + } + } + + // No loop blocks + if (!found) + error ("`continue`-statement not inside a loop"); +} + +// ============================================================================ +// +void botscript_parser::parse_block_end() +{ + // Closing brace + // If we're in the block stack, we're descending down from it now + if (g_ScopeCursor > 0) + { + switch (SCOPE (0).type) + { + case e_if_scope: + // Adjust the closing mark. + m_writer->move_mark (SCOPE (0).mark1); + + // We're returning from if, thus else can be next + g_CanElse = true; + break; + + case e_else_scope: + // else instead uses mark1 for itself (so if expression + // fails, jump to else), mark2 means end of else + m_writer->move_mark (SCOPE (0).mark2); + break; + + case e_for_scope: + // write the incrementor at the end of the loop block + m_writer->write_buffer (SCOPE (0).buffer1); + + // fall-thru + case e_while_scope: + // write down the instruction to go back to the start of the loop + m_writer->write (dh_goto); + m_writer->add_reference (SCOPE (0).mark1); + + // Move the closing mark here since we're at the end of the while loop + m_writer->move_mark (SCOPE (0).mark2); + break; + + case e_do_scope: + { + m_lx->must_get_next (tk_while); + m_lx->must_get_next (tk_paren_start); + m_lx->must_get_next(); + data_buffer* expr = parse_expression (TYPE_INT); + m_lx->must_get_next (tk_paren_end); + m_lx->must_get_next (tk_semicolon); + + // If the condition runs true, go back to the start. + m_writer->write_buffer (expr); + m_writer->write (dh_if_goto); + m_writer->add_reference (SCOPE (0).mark1); + break; + } + + case e_switch_scope: + { + // Switch closes. Move down to the record buffer of + // the lower block. + if (SCOPE (1).casecursor != -1) + m_writer->SwitchBuffer = SCOPE (1).casebuffers[SCOPE (1).casecursor]; + else + m_writer->SwitchBuffer = null; + + // If there was a default in the switch, write its header down now. + // If not, write instruction to jump to the end of switch after + // the headers (thus won't fall-through if no case matched) + if (SCOPE (0).buffer1) + m_writer->write_buffer (SCOPE (0).buffer1); + else + { + m_writer->write (dh_drop); + m_writer->write (dh_goto); + m_writer->add_reference (SCOPE (0).mark1); + } + + // Go through all of the buffers we + // recorded down and write them. + for (int u = 0; u < MAX_CASE; u++) + { + if (!SCOPE (0).casebuffers[u]) + continue; + + m_writer->move_mark (SCOPE (0).casemarks[u]); + m_writer->write_buffer (SCOPE (0).casebuffers[u]); + } + + // Move the closing mark here + m_writer->move_mark (SCOPE (0).mark1); + break; + } + + case e_unknown_scope: + break; + } + + // Descend down the stack + g_ScopeCursor--; + continue; + } + + int dataheader = (g_CurMode == MODE_EVENT) ? dh_end_event : + (g_CurMode == MODE_MAINLOOP) ? dh_end_main_loop : + (g_CurMode == MODE_ONENTER) ? dh_end_on_enter : + (g_CurMode == MODE_ONEXIT) ? dh_end_on_exit : -1; + + if (dataheader == -1) + error ("unexpected `}`"); + + // Data header must be written before mode is changed because + // onenter and mainloop go into special buffers, and we want + // the closing data headers into said buffers too. + m_writer->write (dataheader); + g_CurMode = MODE_TOPLEVEL; + m_lx->get_next (tk_semicolon); +} + +// ============================================================================ +// +void botscript_parser::parse_const() +{ + constant_info info; + + // Get the type + m_lx->must_get_next(); + string typestring = token_string(); + info.type = GetTypeByName (typestring); + + if (info.type == TYPE_UNKNOWN || info.type == TYPE_VOID) + error ("unknown type `%1` for constant", typestring); + + m_lx->must_get_next(); + info.name = token_string(); + + m_lx->must_get_next (tk_assign); + + switch (info.type) + { + case TYPE_BOOL: + case TYPE_INT: + { + m_lx->must_get_next (tk_number); + } break; + + case TYPE_STRING: + { + m_lx->must_get_next (tk_string); + } break; + + case TYPE_UNKNOWN: + case TYPE_VOID: + break; + } + + info.val = m_lx->get_token()->text; + g_ConstInfo << info; + + m_lx->must_get_next (tk_semicolon); +} + +// ============================================================================ +// +void botscript_parser::parse_label() +{ + check_not_toplevel(); + string label_name = token_string(); + + // want no conflicts.. + if (find_command_by_name (label_name)) + error ("label name `%s` conflicts with command name\n", label_name); + + if (find_global_variable (label_name)) + error ("label name `%s` conflicts with variable\n", label_name); + + // See if a mark already exists for this label + int mark = -1; + + for (int i = 0; i < MAX_MARKS; i++) + { + if (g_undefined_labels[i] && *g_undefined_labels[i] == label_name) + { + mark = i; + m_writer->move_mark (i); + + // No longer undefinde + delete g_undefined_labels[i]; + g_undefined_labels[i] = null; + } + } + + // Not found in unmarked lists, define it now + if (mark == -1) + m_writer->add_mark (label_name); + + m_lx->must_get_next (tk_colon); +} + +// ============================================================================ +// Parses a command call +data_buffer* botscript_parser::ParseCommand (command_info* comm) +{ + data_buffer* r = new data_buffer (64); + + if (g_CurMode == MODE_TOPLEVEL) + error ("command call at top level"); + + m_lx->must_get_next (tk_paren_start); + m_lx->must_get_next(); + + int curarg = 0; + + while (1) + { + if (token_is (tk_paren_end)) + { + if (curarg < comm->numargs) + error ("too few arguments passed to %s\n\tprototype: %s", + comm->name.chars(), get_command_signature (comm).chars()); + + break; + curarg++; + } + + if (curarg >= comm->maxargs) + error ("too many arguments passed to %s\n\tprototype: %s", + comm->name.chars(), get_command_signature (comm).chars()); + + r->merge (parse_expression (comm->args[curarg].type)); + m_lx->must_get_next(); + + if (curarg < comm->numargs - 1) + { + m_lx->must_be (tk_comma); + m_lx->must_get_next(); + } + else if (curarg < comm->maxargs - 1) + { + // Can continue, but can terminate as well. + if (token_is (tk_paren_end)) + { + curarg++; + break; + } + else + { + m_lx->must_be (tk_comma); + m_lx->must_get_next(); + } + } + + curarg++; + } + + // If the script skipped any optional arguments, fill in defaults. + while (curarg < comm->maxargs) + { + r->write (dh_push_number); + r->write (comm->args[curarg].defvalue); + curarg++; + } + + r->write (dh_command); + r->write (comm->number); + r->write (comm->maxargs); + + return r; +} + +// ============================================================================ +// Is the given operator an assignment operator? +static bool is_assignment_operator (int oper) +{ + switch (oper) + { + case OPER_ASSIGNADD: + case OPER_ASSIGNSUB: + case OPER_ASSIGNMUL: + case OPER_ASSIGNDIV: + case OPER_ASSIGNMOD: + case OPER_ASSIGNLEFTSHIFT: + case OPER_ASSIGNRIGHTSHIFT: + case OPER_ASSIGN: + return true; + } + + return false; +} + +// ============================================================================ +// Finds an operator's corresponding dataheader +static word get_data_header_by_operator (script_variable* var, int oper) +{ + if (is_assignment_operator (oper)) + { + if (!var) + error ("operator %d requires left operand to be a variable\n", oper); + + // TODO: At the moment, vars only are global + // OPER_ASSIGNLEFTSHIFT and OPER_ASSIGNRIGHTSHIFT do not + // have data headers, instead they are expanded out in + // the operator parser + switch (oper) + { + case OPER_ASSIGNADD: return dh_add_global_var; + case OPER_ASSIGNSUB: return dh_subtract_global_var; + case OPER_ASSIGNMUL: return dh_multiply_global_var; + case OPER_ASSIGNDIV: return dh_divide_global_var; + case OPER_ASSIGNMOD: return dh_mod_global_var; + case OPER_ASSIGN: return dh_assign_global_var; + + default: error ("bad assignment operator!!\n"); + } + } + + switch (oper) + { + case OPER_ADD: return dh_add; + case OPER_SUBTRACT: return dh_subtract; + case OPER_MULTIPLY: return dh_multiply; + case OPER_DIVIDE: return dh_divide; + case OPER_MODULUS: return dh_modulus; + case OPER_EQUALS: return dh_equals; + case OPER_NOTEQUALS: return dh_not_equals; + case OPER_LESSTHAN: return dh_less_than; + case OPER_GREATERTHAN: return dh_greater_than; + case OPER_LESSTHANEQUALS: return dh_at_most; + case OPER_GREATERTHANEQUALS: return dh_at_least; + case OPER_LEFTSHIFT: return dh_left_shift; + case OPER_RIGHTSHIFT: return dh_right_shift; + case OPER_OR: return dh_or_logical; + case OPER_AND: return dh_and_logical; + case OPER_BITWISEOR: return dh_or_bitwise; + case OPER_BITWISEEOR: return dh_eor_bitwise; + case OPER_BITWISEAND: return dh_and_bitwise; + } + + error ("DataHeaderByOperator: couldn't find dataheader for operator %d!\n", oper); + return 0; +} + +// ============================================================================ +// Parses an expression, potentially recursively +data_buffer* botscript_parser::parse_expression (type_e reqtype) +{ + data_buffer* retbuf = new data_buffer (64); + + // Parse first operand + retbuf->merge (parse_expr_value (reqtype)); + + // Parse any and all operators we get + int oper; + + while ( (oper = parse_operator (true)) != -1) + { + // We peeked the operator, move forward now + m_lx->skip(); + + // Can't be an assignement operator, those belong in assignments. + if (is_assignment_operator (oper)) + error ("assignment operator inside expression"); + + // Parse the right operand. + m_lx->must_get_next(); + data_buffer* rb = parse_expr_value (reqtype); + + if (oper == OPER_TERNARY) + { + // Ternary operator requires - naturally - a third operand. + m_lx->must_get_next (tk_colon); + m_lx->must_get_next(); + data_buffer* tb = parse_expr_value (reqtype); + + // It also is handled differently: there isn't a dataheader for ternary + // operator. Instead, we abuse PUSHNUMBER and IFNOTGOTO for this. + // Behold, big block of writing madness! :P + int mark1 = retbuf->add_mark (""); // start of "else" case + int mark2 = retbuf->add_mark (""); // end of expression + retbuf->write (dh_if_not_goto); // if the first operand (condition) + retbuf->add_reference (mark1); // didn't eval true, jump into mark1 + retbuf->merge (rb); // otherwise, perform second operand (true case) + retbuf->write (dh_goto); // afterwards, jump to the end, which is + retbuf->add_reference (mark2); // marked by mark2. + retbuf->move_mark (mark1); // move mark1 at the end of the true case + retbuf->merge (tb); // perform third operand (false case) + retbuf->move_mark (mark2); // move the ending mark2 here + } + else + { + // write to buffer + retbuf->merge (rb); + retbuf->write (get_data_header_by_operator (null, oper)); + } + } + + return retbuf; +} + +// ============================================================================ +// Parses an operator string. Returns the operator number code. +#define ISNEXT(C) (m_lx->peek_next_string (peek ? 1 : 0) == C) +int botscript_parser::parse_operator (bool peek) +{ + string oper; + + if (peek) + oper += m_lx->peek_next_string(); + else + oper += token_string(); + + if (-oper == "strlen") + return OPER_STRLEN; + + // Check one-char operators + bool equalsnext = ISNEXT ("="); + + int o = (oper == "=" && !equalsnext) ? OPER_ASSIGN : + (oper == ">" && !equalsnext && !ISNEXT (">")) ? OPER_GREATERTHAN : + (oper == "<" && !equalsnext && !ISNEXT ("<")) ? OPER_LESSTHAN : + (oper == "&" && !ISNEXT ("&")) ? OPER_BITWISEAND : + (oper == "|" && !ISNEXT ("|")) ? OPER_BITWISEOR : + (oper == "+" && !equalsnext) ? OPER_ADD : + (oper == "-" && !equalsnext) ? OPER_SUBTRACT : + (oper == "*" && !equalsnext) ? OPER_MULTIPLY : + (oper == "/" && !equalsnext) ? OPER_DIVIDE : + (oper == "%" && !equalsnext) ? OPER_MODULUS : + (oper == "^") ? OPER_BITWISEEOR : + (oper == "?") ? OPER_TERNARY : + -1; + + if (o != -1) + { + return o; + } + + // Two-char operators + oper += m_lx->peek_next_string (peek ? 1 : 0); + equalsnext = m_lx->peek_next_string (peek ? 2 : 1) == ("="); + + o = (oper == "+=") ? OPER_ASSIGNADD : + (oper == "-=") ? OPER_ASSIGNSUB : + (oper == "*=") ? OPER_ASSIGNMUL : + (oper == "/=") ? OPER_ASSIGNDIV : + (oper == "%=") ? OPER_ASSIGNMOD : + (oper == "==") ? OPER_EQUALS : + (oper == "!=") ? OPER_NOTEQUALS : + (oper == ">=") ? OPER_GREATERTHANEQUALS : + (oper == "<=") ? OPER_LESSTHANEQUALS : + (oper == "&&") ? OPER_AND : + (oper == "||") ? OPER_OR : + (oper == "<<" && !equalsnext) ? OPER_LEFTSHIFT : + (oper == ">>" && !equalsnext) ? OPER_RIGHTSHIFT : + -1; + + if (o != -1) + { + m_lx->must_get_next(); + return o; + } + + // Three-char opers + oper += m_lx->peek_next_string (peek ? 2 : 1); + o = oper == "<<=" ? OPER_ASSIGNLEFTSHIFT : + oper == ">>=" ? OPER_ASSIGNRIGHTSHIFT : + -1; + + if (o != -1) + { + m_lx->must_get_next(); + m_lx->must_get_next(); + } + + return o; +} + +// ============================================================================ +string botscript_parser::parse_float() +{ + m_lx->must_be (tk_number); + string floatstring = token_string(); + lexer::token tok; + + // Go after the decimal point + if (m_lx->peek_next (&tok) && tok.type == tk_dot) + { + m_lx->skip(); + m_lx->must_get_next (tk_number); + floatstring += "."; + floatstring += token_string(); + } + + return floatstring; +} + +// ============================================================================ +// Parses a value in the expression and returns the data needed to push +// it, contained in a data buffer. A value can be either a variable, a command, +// a literal or an expression. +data_buffer* botscript_parser::parse_expr_value (type_e reqtype) +{ + data_buffer* b = new data_buffer (16); + script_variable* g; + + // Prefixing "!" means negation. + bool negate = token_is (tk_exclamation_mark); + + if (negate) // Jump past the "!" + m_lx->skip(); + + // Handle strlen + /* if (token_string() == "strlen") + { + m_lx->must_get_next (tk_paren_start); + m_lx->must_get_next(); + + // By this token we should get a string constant. + constant_info* constant = find_constant (token_string()); + + if (!constant || constant->type != TYPE_STRING) + error ("strlen only works with const str"); + + if (reqtype != TYPE_INT) + error ("strlen returns int but %s is expected\n", GetTypeName (reqtype).c_str()); + + b->write (dh_push_number); + b->write (constant->val.len()); + + m_lx->must_get_next (tk_paren_end); + } + else */ + if (token_is (tk_paren_start)) + { + // Expression + m_lx->must_get_next(); + data_buffer* c = parse_expression (reqtype); + b->merge (c); + m_lx->must_get_next (tk_paren_end); + } + else if (command_info* comm = find_command_by_name (token_string())) + { + delete b; + + // Command + if (reqtype && comm->returnvalue != reqtype) + error ("%s returns an incompatible data type", comm->name.chars()); + + b = ParseCommand (comm); + } + else if (constant_info* constant = find_constant (token_string())) + { + // Type check + if (reqtype != constant->type) + error ("constant `%s` is %s, expression requires %s\n", + constant->name.c_str(), GetTypeName (constant->type).c_str(), + GetTypeName (reqtype).c_str()); + + switch (constant->type) + { + case TYPE_BOOL: + case TYPE_INT: + b->write (dh_push_number); + b->write (atoi (constant->val)); + break; + + case TYPE_STRING: + b->write_string (constant->val); + break; + + case TYPE_VOID: + case TYPE_UNKNOWN: + break; + } + } + else if ((g = find_global_variable (token_string()))) + { + // Global variable + b->write (dh_push_global_var); + b->write (g->index); + } + else + { + // If nothing else, check for literal + switch (reqtype) + { + case TYPE_VOID: + case TYPE_UNKNOWN: + error ("unknown identifier `%1` (expected keyword, function or variable)", token_string()); + break; + + case TYPE_BOOL: + case TYPE_INT: + { + m_lx->must_be (tk_number); + + // All values are written unsigned - thus we need to write the value's + // absolute value, followed by an unary minus for negatives. + b->write (dh_push_number); + + long v = token_string().to_long(); + b->write (static_cast<word> (abs (v))); + + if (v < 0) + b->write (dh_unary_minus); + + break; + } + + case TYPE_STRING: + // PushToStringTable either returns the string index of the + // string if it finds it in the table, or writes it to the + // table and returns it index if it doesn't find it there. + m_lx->must_be (tk_string); + b->write_string (token_string()); + break; + } + } + + // Negate it now if desired + if (negate) + b->write (dh_negate_logical); + + return b; +} + +// ============================================================================ +// Parses an assignment. An assignment starts with a variable name, followed +// by an assignment operator, followed by an expression value. Expects current +// token to be the name of the variable, and expects the variable to be given. +data_buffer* botscript_parser::ParseAssignment (script_variable* var) +{ + bool global = !var->statename.len(); + + // Get an operator + m_lx->must_get_next(); + int oper = parse_operator(); + + if (!is_assignment_operator (oper)) + error ("expected assignment operator"); + + if (g_CurMode == MODE_TOPLEVEL) + error ("can't alter variables at top level"); + + // Parse the right operand + m_lx->must_get_next(); + data_buffer* retbuf = new data_buffer; + data_buffer* expr = parse_expression (var->type); + + // <<= and >>= do not have data headers. Solution: expand them. + // a <<= b -> a = a << b + // a >>= b -> a = a >> b + if (oper == OPER_ASSIGNLEFTSHIFT || oper == OPER_ASSIGNRIGHTSHIFT) + { + retbuf->write (global ? dh_push_global_var : dh_push_local_var); + retbuf->write (var->index); + retbuf->merge (expr); + retbuf->write ((oper == OPER_ASSIGNLEFTSHIFT) ? dh_left_shift : dh_right_shift); + retbuf->write (global ? dh_assign_global_var : dh_assign_local_var); + retbuf->write (var->index); + } + else + { + retbuf->merge (expr); + long dh = get_data_header_by_operator (var, oper); + retbuf->write (dh); + retbuf->write (var->index); + } + + return retbuf; +} + +void botscript_parser::push_scope() +{ + g_ScopeCursor++; + + if (g_ScopeCursor >= MAX_SCOPE) + error ("too deep scope"); + + ScopeInfo* info = &SCOPE (0); + info->type = e_unknown_scope; + info->mark1 = 0; + info->mark2 = 0; + info->buffer1 = null; + info->casecursor = -1; + + for (int i = 0; i < MAX_CASE; i++) + { + info->casemarks[i] = MAX_MARKS; + info->casebuffers[i] = null; + info->casenumbers[i] = -1; + } +} + +data_buffer* botscript_parser::parse_statement (object_writer* w) +{ + if (find_constant (token_string())) // There should not be constants here. + error ("invalid use for constant\n"); + + // If it's a variable, expect assignment. + if (script_variable* var = find_global_variable (token_string())) + return ParseAssignment (var); + + return null; +} + +void botscript_parser::add_switch_case (object_writer* w, data_buffer* b) +{ + ScopeInfo* info = &SCOPE (0); + + info->casecursor++; + + if (info->casecursor >= MAX_CASE) + error ("too many cases in one switch"); + + // Init a mark for the case buffer + int m = m_writer->add_mark (""); + info->casemarks[info->casecursor] = m; + + // Add a reference to the mark. "case" and "default" both + // add the necessary bytecode before the reference. + if (b) + b->add_reference (m); + else + m_writer->add_reference (m); + + // Init a buffer for the case block and tell the object + // writer to record all written data to it. + info->casebuffers[info->casecursor] = m_writer->SwitchBuffer = new data_buffer; +} + +// ============================================================================ +// +constant_info* find_constant (const string& tok) +{ + for (int i = 0; i < g_ConstInfo.size(); i++) + if (g_ConstInfo[i].name == tok) + return &g_ConstInfo[i]; + + return null; +} + +// ============================================================================ +// +bool botscript_parser::token_is (e_token a) +{ + return (m_lx->get_token_type() == a); +} + +// ============================================================================ +// +string botscript_parser::token_string() +{ + return m_lx->get_token()->text; +} + +// ============================================================================ +// +string botscript_parser::describe_position() const +{ + lexer::token* tok = m_lx->get_token(); + return tok->file + ":" + string (tok->line) + ":" + string (tok->column); +}