src/lexer_scanner.cc

changeset 73
1ee9b312dc18
child 75
bf8c57437231
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lexer_scanner.cc	Sat Jan 11 22:36:31 2014 +0200
@@ -0,0 +1,249 @@
+/*
+	Copyright (c) 2013-2014, Santeri Piippo
+	All rights reserved.
+
+	Redistribution and use in source and binary forms, with or without
+	modification, are permitted provided that the following conditions are met:
+
+		* Redistributions of source code must retain the above copyright
+		  notice, this list of conditions and the following disclaimer.
+
+		* Redistributions in binary form must reproduce the above copyright
+		  notice, this list of conditions and the following disclaimer in the
+		  documentation and/or other materials provided with the distribution.
+
+		* Neither the name of the <organization> nor the
+		  names of its contributors may be used to endorse or promote products
+		  derived from this software without specific prior written permission.
+
+	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+	ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+	WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+	DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+	DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+	(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+	LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+	ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+	SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <cstdio>
+#include <cstdlib>
+#include <cassert>
+#include <cstring>
+#include <string>
+#include "lexer_scanner.h"
+
+static const string g_token_strings[] =
+{
+	"==",
+	"[]",
+	"+=",
+	"-=",
+	"*=",
+	"/=",
+	"%=",
+	"'",
+	"$",
+	"(",
+	")",
+	"[",
+	"]",
+	"{",
+	"}",
+	"=",
+	"+",
+	"-",
+	"*",
+	"/",
+	"%",
+	",",
+	"<",
+	">",
+	".",
+	":",
+	";",
+	"#",
+	"!",
+	"->",
+	"bool",
+	"break",
+	"case",
+	"continue",
+	"const",
+	"default",
+	"do",
+	"else",
+	"event",
+	"for",
+	"goto",
+	"if",
+	"int",
+	"mainloop",
+	"onenter",
+	"onexit",
+	"state",
+	"switch",
+	"str"
+	"void",
+	"while",
+	"enum",
+	"func",
+	"return",
+};
+
+static_assert (countof (g_token_strings) == (int) last_named_token + 1,
+	"Count of g_token_strings is not the same as the amount of named token identifiers.");
+
+// =============================================================================
+//
+lexer_scanner::lexer_scanner (FILE* fp) :
+	m_line (1)
+{
+	long fsize, bytes;
+
+	fseek (fp, 0l, SEEK_END);
+	fsize = ftell (fp);
+	rewind (fp);
+	m_data = new char[fsize];
+	m_ptr = m_line_break_pos = &m_data[0];
+	bytes = fread (m_data, 1, fsize, fp);
+	assert (bytes >= fsize);
+}
+
+// =============================================================================
+//
+lexer_scanner::~lexer_scanner()
+{
+	delete m_data;
+}
+
+// =============================================================================
+//
+bool lexer_scanner::check_string (const char* c, int flags)
+{
+	bool r = strncmp (m_ptr, c, strlen (c)) == 0;
+
+	// There is to be whitespace after words
+	if (r && (flags & f_check_word) && !isspace (m_ptr[strlen (c)]))
+		r = false;
+
+	// Advance the cursor unless we want to just peek
+	if (r && ! (flags & f_check_peek))
+		m_ptr += strlen (c);
+
+	return r;
+}
+
+// =============================================================================
+//
+bool lexer_scanner::get_next_token()
+{
+	m_token_text = "";
+
+	while (isspace (*m_ptr) == true)
+	{
+		if (*m_ptr == '\n')
+		{
+			m_line++;
+			m_line_break_pos = m_ptr;
+		}
+
+		m_ptr++;
+	}
+
+	if (*m_ptr == '\0')
+		return false;
+
+	// Check tokens
+	for (int i = 0; i < (int) (sizeof g_token_strings / sizeof * g_token_strings); ++i)
+	{
+		if (check_string (g_token_strings[i], f_check_word))
+		{
+			m_token_text = g_token_strings[i];
+			m_e_token = (e_token) i;
+			return true;
+		}
+	}
+
+	// Check and parse string
+	if (*m_ptr == '\"')
+	{
+		m_ptr++;
+
+		while (*m_ptr != '\"')
+		{
+			if (!*m_ptr)
+				return false;
+
+			if (check_string ("\\n"))
+			{
+				m_token_text += '\n';
+				continue;
+			}
+			elif (check_string ("\\t"))
+			{
+				m_token_text += '\t';
+				continue;
+			}
+			elif (check_string ("\\\""))
+			{
+				m_token_text += '"';
+				continue;
+			}
+
+			m_token_text += *m_ptr++;
+		}
+
+		m_e_token = tk_string;
+		m_ptr++; // skip the final quote
+		return true;
+	}
+
+	m_e_token = tk_symbol;
+
+	if (isdigit (*m_ptr))
+	{
+		while (isdigit (*m_ptr))
+			m_token_text += *m_ptr++;
+
+		m_e_token = tk_number;
+		return true;
+	}
+
+	if (is_symbol_char (*m_ptr))
+	{
+		while (m_ptr != '\0')
+		{
+			if (!is_symbol_char (*m_ptr))
+				break;
+
+			bool stop_here = false;
+
+			for (string i : g_token_strings)
+			{
+				if (check_string (i, f_check_peek | f_check_word))
+				{
+					stop_here = true;
+					break;
+				}
+			}
+
+			if (stop_here)
+				break;
+
+			m_token_text += *m_ptr++;
+		}
+
+		return true;
+	}
+
+	return false;
+}
+
+string lexer_scanner::get_token_string (e_token a)
+{
+	assert ((int) a <= (int) last_named_token);
+	return g_token_strings[a];
+}

mercurial