scriptreader.cxx

Mon, 16 Jul 2012 04:07:15 +0300

author
Teemu Piippo <crimsondusk64@gmail.com>
date
Mon, 16 Jul 2012 04:07:15 +0300
changeset 28
fb46d3d40064
parent 22
b48e10ca8832
child 30
6c4efed2dbdd
permissions
-rw-r--r--

Added comment support

/*
 *	botc source code
 *	Copyright (C) 2012 Santeri `Dusk` Piippo
 *	All rights reserved.
 *	
 *	Redistribution and use in source and binary forms, with or without
 *	modification, are permitted provided that the following conditions are met:
 *	
 *	1. Redistributions of source code must retain the above copyright notice,
 *	   this list of conditions and the following disclaimer.
 *	2. Redistributions in binary form must reproduce the above copyright notice,
 *	   this list of conditions and the following disclaimer in the documentation
 *	   and/or other materials provided with the distribution.
 *	3. Neither the name of the developer nor the names of its contributors may
 *	   be used to endorse or promote products derived from this software without
 *	   specific prior written permission.
 *	4. Redistributions in any form must be accompanied by information on how to
 *	   obtain complete source code for the software and any accompanying
 *	   software that uses the software. The source code must either be included
 *	   in the distribution or be available for no more than the cost of
 *	   distribution plus a nominal fee, and must be freely redistributable
 *	   under reasonable conditions. For an executable file, complete source
 *	   code means the source code for all modules it contains. It does not
 *	   include source code for modules or files that typically accompany the
 *	   major components of the operating system on which the executable file
 *	   runs.
 *	
 *	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 *	AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 *	IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 *	ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 *	LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 *	CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 *	SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 *	INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 *	CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 *	ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 *	POSSIBILITY OF SUCH DAMAGE.
 */

#include <stdio.h>
#include <stdlib.h>
#include "string.h"
#include "str.h"
#include "common.h"
#include "scriptreader.h"

static bool IsWhitespace (char c) {
	// These characters are invisible, thus considered whitespace
	if (c <= 32 || c == 127 || c == 255)
		return true;
	
	return false;
}

ScriptReader::ScriptReader (str path) {
	if (!(fp = fopen (path, "r"))) {
		error ("couldn't open %s for reading!\n", path.chars ());
		exit (1);
	}
	
	filepath = path;
	curline = 1;
	curchar = 1;
	pos = 0;
	token = "";
	atnewline = false;
	commentmode = 0;
}

ScriptReader::~ScriptReader () {
	FinalChecks ();
	fclose (fp);
}

char ScriptReader::ReadChar () {
	char* c = (char*)malloc (sizeof (char));
	if (!fread (c, sizeof (char), 1, fp))
		return 0;
	
	// We're at a newline, thus next char read will begin the next line
	if (atnewline) {
		atnewline = false;
		curline++;
		curchar = 0; // gets incremented to 1
	}
	
	if (c[0] == '\n')
		atnewline = true;
	
	curchar++;
	return c[0];
}

char ScriptReader::PeekChar (int offset) {
	// Store current position
	long curpos = ftell (fp);
	
	// Forward by offset
	fseek (fp, offset, SEEK_CUR);
	
	// Read the character
	char* c = (char*)malloc (sizeof (char));
	if (!fread (c, sizeof (char), 1, fp))
		return 0;
	
	// Rewind back
	fseek (fp, curpos, SEEK_SET);
	
	return c[0];
}

// true if was found, false if not.
bool ScriptReader::Next () {
	str tmp = "";
	
	while (!feof (fp)) {
		// Check if the next token possibly starts a comment.
		if (PeekChar () == '/' && !tmp.len()) {
			char c2 = PeekChar (1);
			// C++-style comment
			if (c2 == '/')
				commentmode = 1;
			else if (c2 == '*')
				commentmode = 2;
			
			// We don't need to actually read in the
			// comment characters, since they will get
			// ignored due to comment mode anyway.
		}
		
		c = ReadChar ();
		
		// If this is a comment we're reading, check if this character
		// gets the comment terminated, otherwise ignore it.
		if (commentmode > 0) {
			if (commentmode == 1 && c == '\n') {
				// C++-style comments are terminated by a newline
				commentmode = 0;
				continue;
			} else if (commentmode == 2 && c == '*') {
				// C-style comments are terminated by a `*/`
				if (PeekChar() == '/') {
					commentmode = 0;
					// Now the char has to be read in since we
					// no longer are reading a comment
					ReadChar ();
				}
			}
			
			// Otherwise, ignore it.
			continue;
		}
		
		// Non-alphanumber characters (sans underscore) break the word too.
		// If there was prior data, the delimeter pushes the cursor back so
		// that the next character will be the same delimeter. If there isn't,
		// the delimeter itself is included (and thus becomes a token itself.)
		if ((c >= 33 && c <= 47) ||
			(c >= 58 && c <= 64) ||
			(c >= 91 && c <= 96 && c != '_') ||
			(c >= 123 && c <= 126)) {
			if (tmp.len())
				fseek (fp, ftell (fp) - 1, SEEK_SET);
			else
				tmp += c;
			break;
		}
		
		if (IsWhitespace (c)) {
			// Don't break if we haven't gathered anything yet.
			if (tmp.len())
				break;
		} else {
			tmp += c;
		}
	}
	
	// If we got nothing here, read failed. This should
	// only hapen in the case of EOF.
	if (!tmp.len()) {
		token = "";
		return false;
	}
	
	pos++;
	token = tmp;
	return true;
}

// Returns the next token without advancing the cursor.
str ScriptReader::PeekNext () {
	// Store current position
	int cpos = ftell (fp);
	
	// Advance on the token.
	if (!Next ())
		return "";
	
	str tmp = token;
	
	// Restore position
	fseek (fp, cpos, SEEK_SET);
	pos--;
	
	return tmp;
}

void ScriptReader::Seek (unsigned int n, int origin) {
	switch (origin) {
	case SEEK_SET:
		fseek (fp, 0, SEEK_SET);
		pos = 0;
		break;
	case SEEK_CUR:
		break;
	case SEEK_END:
		printf ("ScriptReader::Seek: SEEK_END not yet supported.\n");
		break;
	}
	
	for (unsigned int i = 0; i < n+1; i++)
		Next();
}

void ScriptReader::MustNext (const char* c) {
	if (!Next()) {
		if (strlen (c))
			ParserError ("expected `%s`, reached end of file instead\n", c);
		else
			ParserError ("expected a token, reached end of file instead\n");
	}
	
	if (strlen (c) && token.compare (c) != 0) {
		ParserError ("expected `%s`, got `%s` instead", c, token.chars());
	}
}

void ScriptReader::ParserError (const char* message, ...) {
	PERFORM_FORMAT (message, outmessage);
	ParserMessage ("\nParse error\n", outmessage);
	exit (1);
}

void ScriptReader::ParserWarning (const char* message, ...) {
	PERFORM_FORMAT (message, outmessage);
	ParserMessage ("Warning: ", outmessage);
}

void ScriptReader::ParserMessage (const char* header, char* message) {
	fprintf (stderr, "%sIn file %s, at line %u, col %u: %s\n",
		header, filepath.chars(), curline, curchar, message);
}

void ScriptReader::MustString () {
	MustNext ("\"");
	
	str string;
	// Keep reading characters until we find a terminating quote.
	while (1) {
		// can't end here!
		if (feof (fp))
			ParserError ("unterminated string");
		
		char c = ReadChar ();
		if (c == '"')
			break;
		
		string += c;
	}
	
	token = string;
}

void ScriptReader::MustNumber () {
	MustNext ();
	if (!token.isnumber())
		ParserError ("expected a number, got `%s`", token.chars());
}

void ScriptReader::MustBool () {
	MustNext();
	if (!token.compare ("0") || !token.compare ("1") ||
	    !token.compare ("true") || !token.compare ("false") ||
	    !token.compare ("yes") || !token.compare ("no")) {
			return;
	}
	
	ParserError ("expected a boolean value, got `%s`", token.chars());
}

bool ScriptReader::BoolValue () {
	return (!token.compare ("1") || !token.compare ("true") || !token.compare ("yes"));
}

void ScriptReader::MustValue (int type) {
	switch (type) {
	case RETURNVAL_INT: MustNumber (); break;
	case RETURNVAL_STRING: MustString (); break;
	case RETURNVAL_BOOLEAN: MustBool (); break;
	}
}

// Checks to be performed at the end of file
void ScriptReader::FinalChecks () {
	// If comment mode is 2 by the time the file ended, the
	// comment was left unterminated. 1 is no problem, since
	// it's terminated by newlines anyway.
	if (commentmode == 2)
		ParserError ("unterminated `/*`-style comment");
}

mercurial