src/LexerScanner.cc

changeset 88
5def6ff8b466
child 91
427eb377d53e
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/LexerScanner.cc	Sun Feb 02 17:06:39 2014 +0200
@@ -0,0 +1,290 @@
+/*
+	Copyright 2012-2014 Santeri Piippo
+	All rights reserved.
+
+	Redistribution and use in source and binary forms, with or without
+	modification, are permitted provided that the following conditions
+	are met:
+
+	1. Redistributions of source code must retain the above copyright
+	   notice, this list of conditions and the following disclaimer.
+	2. Redistributions in binary form must reproduce the above copyright
+	   notice, this list of conditions and the following disclaimer in the
+	   documentation and/or other materials provided with the distribution.
+	3. The name of the author may not be used to endorse or promote products
+	   derived from this software without specific prior written permission.
+
+	THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+	IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+	OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+	IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+	INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+	NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+	THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <cstdio>
+#include <cstdlib>
+#include <cassert>
+#include <cstring>
+#include <string>
+#include "LexerScanner.h"
+#include "Lexer.h"
+
+static const String gTokenStrings[] =
+{
+	"==",
+	"[]",
+	"+=",
+	"-=",
+	"*=",
+	"/=",
+	"%=",
+	"'",
+	"$",
+	"(",
+	")",
+	"[",
+	"]",
+	"{",
+	"}",
+	"=",
+	"+",
+	"-",
+	"*",
+	"/",
+	"%",
+	",",
+	"<",
+	">",
+	".",
+	":",
+	";",
+	"#",
+	"!",
+	"->",
+	"bool",
+	"break",
+	"case",
+	"continue",
+	"const",
+	"default",
+	"do",
+	"else",
+	"event",
+	"eventdef",
+	"for",
+	"funcdef",
+	"goto",
+	"if",
+	"int",
+	"mainloop",
+	"onenter",
+	"onexit",
+	"state",
+	"switch",
+	"str",
+	"void",
+	"while",
+	"enum",
+	"func",
+	"return",
+};
+
+static_assert (countof (gTokenStrings) == (int) tkLastNamedToken + 1,
+	"Count of gTokenStrings is not the same as the amount of named token identifiers.");
+
+// =============================================================================
+//
+LexerScanner::LexerScanner (FILE* fp) :
+	mLine (1)
+{
+	long fsize, bytes;
+
+	fseek (fp, 0l, SEEK_END);
+	fsize = ftell (fp);
+	rewind (fp);
+	mData = new char[fsize];
+	mPosition = mLineBreakPosition = &mData[0];
+	bytes = fread (mData, 1, fsize, fp);
+	assert (bytes >= fsize);
+}
+
+// =============================================================================
+//
+LexerScanner::~LexerScanner()
+{
+	delete mData;
+}
+
+// =============================================================================
+//
+bool LexerScanner::CheckString (const char* c, int flags)
+{
+	bool r = strncmp (mPosition, c, strlen (c)) == 0;
+
+	// There is to be a non-symbol character after words
+	if (r && (flags & FCheckWord) && IsSymbolChar (mPosition[strlen (c)], true))
+		r = false;
+
+	// Advance the cursor unless we want to just peek
+	if (r && !(flags & FCheckPeek))
+		mPosition += strlen (c);
+
+	return r;
+}
+
+// =============================================================================
+//
+bool LexerScanner::GetNextToken()
+{
+	mTokenText = "";
+
+	while (isspace (*mPosition))
+		Skip();
+
+	// Check for comments
+	if (strncmp (mPosition, "//", 2) == 0)
+	{
+		mPosition += 2;
+
+		while (*mPosition != '\n')
+			Skip();
+
+		return GetNextToken();
+	}
+	elif (strncmp (mPosition, "/*", 2) == 0)
+	{
+		Skip (2); // skip the start symbols
+
+		while (strncmp (mPosition, "*/", 2) != 0)
+			Skip();
+
+		Skip (2); // skip the end symbols
+		return GetNextToken();
+	}
+
+	if (*mPosition == '\0')
+		return false;
+
+	// Check tokens
+	for (int i = 0; i < countof (gTokenStrings); ++i)
+	{
+		int flags = 0;
+
+		if (i >= tkFirstNamedToken)
+			flags |= FCheckWord;
+
+		if (CheckString (gTokenStrings[i], flags))
+		{
+			mTokenText = gTokenStrings[i];
+			mTokenType = (EToken) i;
+			return true;
+		}
+	}
+
+	// Check and parse string
+	if (*mPosition == '\"')
+	{
+		mPosition++;
+
+		while (*mPosition != '\"')
+		{
+			if (!*mPosition)
+				Error ("unterminated string");
+
+			if (CheckString ("\\n"))
+			{
+				mTokenText += '\n';
+				continue;
+			}
+			elif (CheckString ("\\t"))
+			{
+				mTokenText += '\t';
+				continue;
+			}
+			elif (CheckString ("\\\""))
+			{
+				mTokenText += '"';
+				continue;
+			}
+
+			mTokenText += *mPosition++;
+		}
+
+		mTokenType = tkString;
+		Skip(); // skip the final quote
+		return true;
+	}
+
+	if (isdigit (*mPosition))
+	{
+		while (isdigit (*mPosition))
+			mTokenText += *mPosition++;
+
+		mTokenType = tkNumber;
+		return true;
+	}
+
+	if (IsSymbolChar (*mPosition, false))
+	{
+		mTokenType = tkSymbol;
+
+		do
+		{
+			if (!IsSymbolChar (*mPosition, true))
+				break;
+
+			mTokenText += *mPosition++;
+		} while (*mPosition != '\0');
+
+		return true;
+	}
+
+	Error ("unknown character \"%1\"", *mPosition);
+	return false;
+}
+
+// =============================================================================
+//
+void LexerScanner::Skip()
+{
+	if (*mPosition == '\n')
+	{
+		mLine++;
+		mLineBreakPosition = mPosition;
+	}
+
+	mPosition++;
+}
+
+// =============================================================================
+//
+void LexerScanner::Skip (int chars)
+{
+	for (int i = 0; i < chars; ++i)
+		Skip();
+}
+
+// =============================================================================
+//
+String LexerScanner::GetTokenString (EToken a)
+{
+	assert ((int) a <= tkLastNamedToken);
+	return gTokenStrings[a];
+}
+
+// =============================================================================
+//
+String LexerScanner::ReadLine()
+{
+	String line;
+
+	while (*mPosition != '\n')
+		line += *(mPosition++);
+
+	return line;
+}
\ No newline at end of file

mercurial