src/lexer.cc

changeset 73
1ee9b312dc18
child 74
007fbadfa7f9
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lexer.cc	Sat Jan 11 22:36:31 2014 +0200
@@ -0,0 +1,253 @@
+/*
+	Copyright (c) 2013-2014, Santeri Piippo
+	All rights reserved.
+
+	Redistribution and use in source and binary forms, with or without
+	modification, are permitted provided that the following conditions are met:
+
+		* Redistributions of source code must retain the above copyright
+		  notice, this list of conditions and the following disclaimer.
+
+		* Redistributions in binary form must reproduce the above copyright
+		  notice, this list of conditions and the following disclaimer in the
+		  documentation and/or other materials provided with the distribution.
+
+		* Neither the name of the <organization> nor the
+		  names of its contributors may be used to endorse or promote products
+		  derived from this software without specific prior written permission.
+
+	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+	ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+	WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+	DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+	DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+	(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+	LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+	ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+	SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <cstring>
+#include "lexer.h"
+
+static string_list	g_file_name_stack;
+static lexer*		g_main_lexer = null;
+
+lexer::lexer()
+{
+	assert (g_main_lexer == null);
+	g_main_lexer = this;
+	devf ("Lexer initialized\n");
+}
+
+lexer::~lexer()
+{
+	g_main_lexer = null;
+	devf ("Lexer de-initialized\n");
+}
+
+void lexer::process_file (string file_name)
+{
+	devf ("Lexer: processing %1\n", file_name);
+	FILE* fp = fopen (file_name, "r");
+
+	if (fp == null)
+		error ("couldn't open %1 for reading: %2", file_name, strerror (errno));
+
+	lexer_scanner sc (fp);
+
+	while (sc.get_next_token())
+	{
+		// Preprocessor commands:
+		if (sc.get_e_token() == tk_hash)
+		{
+			must_get_next_from_scanner (sc, tk_symbol);
+
+			if (sc.get_token_text() == "include")
+			{
+				devf ("Lexer: encountered #include\n");
+
+				must_get_next_from_scanner (sc, tk_string);
+				string file_name = sc.get_token_text();
+
+				if (g_file_name_stack.contains (file_name))
+					error ("attempted to #include %1 recursively", sc.get_token_text());
+
+				process_file (file_name);
+			}
+			else
+				error ("unknown preprocessor directive \"#%1\"", sc.get_token_text());
+		}
+		else
+		{
+			token tok;
+			tok.file = file_name;
+			tok.line = sc.get_line();
+			tok.column = sc.get_column();
+			tok.type = sc.get_e_token();
+			tok.text = sc.get_token_text();
+			m_tokens << tok;
+			devf ("Lexer: added %1 (%2)\n", describe_e_token (tok.type),
+				  describe_token (&tok));
+		}
+	}
+
+	devf ("Lexer: File %1 processed.\n", file_name);
+	m_token_position = m_tokens.begin() - 1;
+}
+
+// =============================================================================
+//
+bool lexer::get_next (e_token req)
+{
+	iterator pos = m_token_position;
+	devf ("Lexer: Requested next token, requirement: %1\n", describe_e_token (req));
+
+	if (is_at_end())
+	{
+		devf ("Lexer: at end of tokens. Failed.\n");
+		return false;
+	}
+
+	m_token_position++;
+
+	if (req != tk_any && get_token() != req)
+	{
+		devf ("Lexer: Token %1 does not meet the requirement\n", describe_token (get_token()));
+		m_token_position = pos;
+		return false;
+	}
+
+	devf ("Lexer: Get successful: %1\n", describe_token (get_token()));
+	return true;
+}
+
+// =============================================================================
+//
+void lexer::must_get_next (e_token tok)
+{
+	if (!get_next())
+		error ("unexpected EOF");
+
+	must_be (tok);
+}
+
+// =============================================================================
+// eugh..
+//
+void lexer::must_get_next_from_scanner (lexer_scanner& sc, e_token tok)
+{
+	if (!sc.get_next_token())
+		error ("unexpected EOF");
+
+	if (tok != tk_any && sc.get_e_token() != tok)
+		error ("expected %1, got %2", describe_e_token (tok),
+			   describe_token (get_token()));
+}
+
+// =============================================================================
+//
+void lexer::must_get_any_of (const list<e_token>& toks)
+{
+	devf ("Lexer: need to get a token that is any of: %1\n", toks);
+
+	if (!get_next())
+		error ("unexpected EOF");
+
+	for (e_token tok : toks)
+		if (get_token() == tok)
+			return;
+
+	string toknames;
+
+	for (const e_token& tok_type : toks)
+	{
+		if (&tok_type == &toks.last())
+			toknames += " or ";
+		elif (toknames.is_empty() == false)
+			toknames += ", ";
+
+		toknames += describe_e_token (tok_type);
+	}
+
+	error ("expected %1, got %2", toknames, describe_token (get_token()));
+}
+
+// =============================================================================
+//
+int lexer::get_one_symbol (const string_list& syms)
+{
+	if (!get_next())
+		error ("unexpected EOF");
+
+	if (get_token() == tk_symbol)
+	{
+		for (int i = 0; i < syms.size(); ++i)
+		{
+			if (syms[i] == get_token()->text)
+				return i;
+		}
+	}
+
+	error ("expected one of %1, got %2", syms, describe_token (get_token()));
+	return -1;
+}
+
+// =============================================================================
+//
+void lexer::must_be (e_token tok)
+{
+	if (get_token() != tok)
+		error ("expected %1, got %2", describe_e_token (tok),
+			describe_token (get_token()));
+}
+
+// =============================================================================
+//
+string lexer::describe_token_private (e_token tok_type, lexer::token* tok)
+{
+	if ( (int) tok_type < (int) last_named_token)
+		return "\"" + lexer_scanner::get_token_string (tok_type) + "\"";
+
+	switch (tok_type)
+	{
+		case tk_symbol:
+			return tok ? tok->text : "a symbol";
+
+		case tk_number:
+			return tok ? tok->text : "a number";
+
+		case tk_string:
+			return tok ? ("\"" + tok->text + "\"") : "a string";
+
+		case tk_any:
+			return tok ? tok->text : "any token";
+
+		default:
+			break;
+	}
+
+	return "";
+}
+
+// =============================================================================
+//
+bool lexer::peek_next (lexer::token* tk)
+{
+	iterator pos = m_token_position;
+	bool r = get_next();
+
+	if (r && tk != null)
+		*tk = *m_token_position;
+
+	m_token_position = pos;
+	return r;
+}
+
+// =============================================================================
+//
+lexer* lexer::get_main_lexer()
+{
+	return g_main_lexer;
+}

mercurial