scanner.cpp

changeset 0
c51cce84a9ac
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scanner.cpp	Fri Mar 15 20:11:18 2013 +0200
@@ -0,0 +1,409 @@
+/*
+ *	botc source code
+ *	Copyright (C) 2012 Santeri `azimuth` Piippo
+ *	All rights reserved.
+ *	
+ *	Redistribution and use in source and binary forms, with or without
+ *	modification, are permitted provided that the following conditions are met:
+ *	
+ *	1. Redistributions of source code must retain the above copyright notice,
+ *	   this list of conditions and the following disclaimer.
+ *	2. Redistributions in binary form must reproduce the above copyright notice,
+ *	   this list of conditions and the following disclaimer in the documentation
+ *	   and/or other materials provided with the distribution.
+ *	3. Neither the name of the developer nor the names of its contributors may
+ *	   be used to endorse or promote products derived from this software without
+ *	   specific prior written permission.
+ *	4. Redistributions in any form must be accompanied by information on how to
+ *	   obtain complete source code for the software and any accompanying
+ *	   software that uses the software. The source code must either be included
+ *	   in the distribution or be available for no more than the cost of
+ *	   distribution plus a nominal fee, and must be freely redistributable
+ *	   under reasonable conditions. For an executable file, complete source
+ *	   code means the source code for all modules it contains. It does not
+ *	   include source code for modules or files that typically accompany the
+ *	   major components of the operating system on which the executable file
+ *	   runs.
+ *	
+ *	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ *	AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ *	IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ *	ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ *	LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *	CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *	SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *	INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *	CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *	ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *	POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "string.h"
+#include "str.h"
+#include "common.h"
+#include "scanner.h"
+#include "stdarg.h"
+
+#define STORE_POSITION \
+	const bool _atnewline = bAtNewLine; \
+	const ulong ulStoredLineNumber = ulaLineNumber[fc]; \
+	const ulong ulStoredCurChar = ulaCurChar[fc];
+
+#define RESTORE_POSITION \
+	bAtNewLine = _atnewline; \
+	ulaLineNumber[fc] = ulStoredLineNumber; \
+	ulaCurChar[fc] = ulStoredCurChar;
+
+// ============================================================================
+Scanner::Scanner (str path) {
+	token = "";
+	zPrevToken = "";
+	lPrevPos = 0;
+	fc = -1;
+	
+	for (unsigned int u = 0; u < MAX_FILESTACK; u++)
+		fp[u] = NULL;
+	
+	OpenFile (path);
+	dCommentMode = 0;
+}
+
+// ============================================================================
+Scanner::~Scanner () {
+	// If comment mode is 2 by the time the file ended, the
+	// comment was left unterminated. 1 is no problem, since
+	// it's terminated by newlines anyway.
+	if (dCommentMode == 2)
+		ParserError ("unterminated `/*`-style comment");
+	
+	for (unsigned int u = 0; u < MAX_FILESTACK; u++) {
+		if (fp[u]) {
+			ParserWarning ("file idx %u remained open after parsing", u);
+			CloseFile (u);
+		}
+	}
+}
+
+// ============================================================================
+// Opens a file and pushes its pointer to stack
+void Scanner::OpenFile (str path) {
+	if (fc+1 >= MAX_FILESTACK) 
+		ParserError ("supposed to open file `%s` but file stack is full! do you have recursive `#include` directives?",
+			path.chars());
+	
+	// Save the position first.
+	if (fc != -1) {
+		laSavedPos[fc] = ftell (fp[fc]);
+	}
+	
+	fc++;
+	
+	fp[fc] = fopen (path.chars(), "r");
+	if (!fp[fc]) {
+		ParserError ("couldn't open %s for reading!\n", path.chars ());
+		exit (1);
+	}
+	
+	fseek (fp[fc], 0, SEEK_SET);
+	saFilePath[fc] = path.chars();
+	ulaLineNumber[fc] = 1;
+	ulaCurChar[fc] = 1;
+	ulaPosition[fc] = 0;
+	bAtNewLine = 0;
+}
+
+// ============================================================================
+// Closes the current file
+void Scanner::CloseFile (unsigned int u) {
+	if (u >= MAX_FILESTACK)
+		u = fc;
+	
+	if (!fp[u])
+		return;
+	
+	fclose (fp[u]);
+	fp[u] = NULL;
+	fc--;
+	
+	if (fc != -1)
+		fseek (fp[fc], laSavedPos[fc], SEEK_SET);
+}
+
+// ============================================================================
+char Scanner::ReadChar () {
+	if (feof (fp[fc]))
+		return 0;
+	
+	char c;
+	if (!fread (&c, 1, 1, fp[fc]))
+		return 0;
+	
+	// We're at a newline, thus next char read will begin the next line
+	if (bAtNewLine) {
+		bAtNewLine = false;
+		ulaLineNumber[fc]++;
+		ulaCurChar[fc] = 0; // gets incremented to 1
+	}
+	
+	if (c == '\n')
+		bAtNewLine = true;
+	
+	ulaCurChar[fc]++;
+	return c;
+}
+
+// ============================================================================
+// Peeks the next character
+char Scanner::PeekChar (int offset) {
+	// Store current position
+	long curpos = ftell (fp[fc]);
+	STORE_POSITION
+	
+	// Forward by offset
+	fseek (fp[fc], offset, SEEK_CUR);
+	
+	// Read the character
+	char* c = (char*)malloc (sizeof (char));
+	
+	if (!fread (c, sizeof (char), 1, fp[fc])) {
+		fseek (fp[fc], curpos, SEEK_SET);
+		return 0;
+	}
+	
+	// Rewind back
+	fseek (fp[fc], curpos, SEEK_SET);
+	RESTORE_POSITION
+	
+	return c[0];
+}
+
+// ============================================================================
+// Read a token from the file buffer. Returns true if token was found, false if not.
+bool Scanner::Next (bool peek) {
+	lPrevPos = ftell (fp[fc]);
+	str tmp = "";
+	
+	while (1) {
+		// Check end-of-file
+		if (feof (fp[fc])) {
+			// If we're just peeking, we shouldn't
+			// actually close anything.. 
+			if (peek)
+				break;
+			
+			CloseFile ();
+			if (fc == -1)
+				break;
+		}
+		
+		// Check if the next token possibly starts a comment.
+		if (PeekChar () == '/' && !tmp.len()) {
+			char c2 = PeekChar (1);
+			// C++-style comment
+			if (c2 == '/')
+				dCommentMode = 1;
+			else if (c2 == '*')
+				dCommentMode = 2;
+			
+			// We don't need to actually read in the
+			// comment characters, since they will get
+			// ignored due to comment mode anyway.
+		}
+		
+		c = ReadChar ();
+		
+		// If this is a comment we're reading, check if this character
+		// gets the comment terminated, otherwise ignore it.
+		if (dCommentMode > 0) {
+			if (dCommentMode == 1 && c == '\n') {
+				// C++-style comments are terminated by a newline
+				dCommentMode = 0;
+				continue;
+			} else if (dCommentMode == 2 && c == '*') {
+				// C-style comments are terminated by a `*/`
+				if (PeekChar() == '/') {
+					dCommentMode = 0;
+					ReadChar ();
+				}
+			}
+			
+			// Otherwise, ignore it.
+			continue;
+		}
+		
+		// Non-alphanumber characters (sans underscore) break the word too.
+		// If there was prior data, the delimeter pushes the cursor back so
+		// that the next character will be the same delimeter. If there isn't,
+		// the delimeter itself is included (and thus becomes a token itself.)
+		if ((c >= 33 && c <= 47) ||
+			(c >= 58 && c <= 64) ||
+			(c >= 91 && c <= 96 && c != '_') ||
+			(c >= 123 && c <= 126)) {
+			if (tmp.len())
+				fseek (fp[fc], ftell (fp[fc]) - 1, SEEK_SET);
+			else
+				tmp += c;
+			break;
+		}
+		
+		if (c <= 32 || c >= 127) {
+			// Don't break if we haven't gathered anything yet.
+			if (tmp.len())
+				break;
+		} else {
+			tmp += c;
+		}
+	}
+	
+	// If we got nothing here, read failed. This should
+	// only happen in the case of EOF.
+	if (!tmp.len()) {
+		token = "";
+		return false;
+	}
+	
+	ulaPosition[fc]++;
+	zPrevToken = token;
+	token = tmp;
+	return true;
+}
+
+// ============================================================================
+// Returns the next token without advancing the cursor.
+str Scanner::PeekNext (int offset) {
+	// Store current information
+	str storedtoken = token;
+	int cpos = ftell (fp[fc]);
+	STORE_POSITION
+	
+	// Advance on the token.
+	while (offset >= 0) {
+		if (!Next (true))
+			return "";
+		offset--;
+	}
+	
+	str tmp = token;
+	
+	// Restore position
+	fseek (fp[fc], cpos, SEEK_SET);
+	ulaPosition[fc]--;
+	token = storedtoken;
+	RESTORE_POSITION
+	return tmp;
+}
+
+// ============================================================================
+void Scanner::Seek (unsigned int n, int origin) {
+	switch (origin) {
+	case SEEK_SET:
+		fseek (fp[fc], 0, SEEK_SET);
+		ulaPosition[fc] = 0;
+		break;
+	case SEEK_CUR:
+		break;
+	case SEEK_END:
+		printf ("ScriptReader::Seek: SEEK_END not yet supported.\n");
+		break;
+	}
+	
+	for (unsigned int i = 0; i < n+1; i++)
+		Next();
+}
+
+// ============================================================================
+void Scanner::MustNext (const char* c) {
+	if (!Next()) {
+		if (strlen (c))
+			ParserError ("expected `%s`, reached end of file instead\n", c);
+		else
+			ParserError ("expected a token, reached end of file instead\n");
+	}
+	
+	if (strlen (c))
+		MustThis (c);
+}
+
+// ============================================================================
+void Scanner::MustThis (const char* c) {
+	if (token.compare (c) != 0)
+		ParserError ("expected `%s`, got `%s` instead", c, token.chars());
+}
+
+// ============================================================================
+void Scanner::ParserError (const char* message, ...) {
+	PERFORM_FORMAT (message, outmessage);
+	ParserMessage ("\nError: ", outmessage);
+	exit (1);
+}
+
+// ============================================================================
+void Scanner::ParserWarning (const char* message, ...) {
+	PERFORM_FORMAT (message, outmessage);
+	ParserMessage ("Warning: ", outmessage);
+}
+
+// ============================================================================
+void Scanner::ParserMessage (const char* header, char* message) {
+	if (fc >= 0 && fc < MAX_FILESTACK)
+		fprintf (stderr, "%s%s:%lu:%lu: %s\n",
+			header, saFilePath[fc], ulaLineNumber[fc], ulaCurChar[fc], message);
+	else
+		fprintf (stderr, "%s%s\n", header, message);
+}
+
+// ============================================================================
+// if gotquote == 1, the current token already holds the quotation mark.
+void Scanner::MustString (bool gotquote) {
+	if (gotquote)
+		MustThis ("\"");
+	else
+		MustNext ("\"");
+	
+	str string;
+	// Keep reading characters until we find a terminating quote.
+	while (1) {
+		// can't end here!
+		if (feof (fp[fc]))
+			ParserError ("unterminated string");
+		
+		char c = ReadChar ();
+		if (c == '"')
+			break;
+		
+		string += c;
+	}
+	
+	token = string;
+}
+
+// ============================================================================
+void Scanner::MustNumber (bool fromthis) {
+	if (!fromthis)
+		MustNext ();
+	
+	str num = token;
+	if (!num.compare ("-")) {
+		MustNext ();
+		num += token;
+	}
+	
+	// "true" and "false" are valid numbers
+	if (!token.icompare ("true"))
+		token = "1";
+	else if (!token.icompare ("false"))
+		token = "0";
+	else {
+		if (!token.isnumber())
+			ParserError ("expected a number, got `%s`", num.chars());
+		
+		str check;
+		check.appendformat ("%d", atoi (num.chars ()));
+		if (token.compare (check) != 0)
+			ParserWarning ("integer too large: %s -> %s", num.chars(), check.chars());
+		
+		token = num;
+	}
+}
\ No newline at end of file

mercurial