Added a preprocessor with proper #include support. Macro support via #define is planned too. God, was it a B-I-T-C-H to get working right, though..

Tue, 17 Jul 2012 20:35:43 +0300

author
Teemu Piippo <crimsondusk64@gmail.com>
date
Tue, 17 Jul 2012 20:35:43 +0300
changeset 33
fd35f6cb5f28
parent 32
d11a034aabfd
child 34
0a9a5902beaa

Added a preprocessor with proper #include support. Macro support via #define is planned too. God, was it a B-I-T-C-H to get working right, though..

Makefile file | annotate | diff | comparison | revisions
commands.cxx file | annotate | diff | comparison | revisions
common.h file | annotate | diff | comparison | revisions
main.cxx file | annotate | diff | comparison | revisions
objwriter.cxx file | annotate | diff | comparison | revisions
parser.cxx file | annotate | diff | comparison | revisions
preprocessor.cxx file | annotate | diff | comparison | revisions
scriptreader.cxx file | annotate | diff | comparison | revisions
scriptreader.h file | annotate | diff | comparison | revisions
--- a/Makefile	Mon Jul 16 16:15:16 2012 +0300
+++ b/Makefile	Tue Jul 17 20:35:43 2012 +0300
@@ -8,7 +8,9 @@
 	g++ -Wall -c -o commands.o commands.cxx
 	g++ -Wall -c -o stringtable.o stringtable.cxx
 	g++ -Wall -c -o variables.o variables.cxx
-	g++ -Wall -o botc scriptreader.o objwriter.o str.o main.o parser.o events.o commands.o stringtable.o variables.o
+	g++ -Wall -c -o preprocessor.o preprocessor.cxx
+	g++ -Wall -o botc scriptreader.o objwriter.o str.o main.o parser.o events.o \
+		commands.o stringtable.o variables.o preprocessor.o
 
 clean:
 	rm -f *.o *~ botc
--- a/commands.cxx	Mon Jul 16 16:15:16 2012 +0300
+++ b/commands.cxx	Tue Jul 17 20:35:43 2012 +0300
@@ -142,6 +142,7 @@
 		numCommDefs++;
 	}
 	
+	r->CloseFile ();
 	delete r;
 	
 	if (!numCommDefs)
--- a/common.h	Mon Jul 16 16:15:16 2012 +0300
+++ b/common.h	Tue Jul 17 20:35:43 2012 +0300
@@ -54,7 +54,7 @@
 
 // On Windows, files are case-insensitive
 #if (defined(WIN32) || defined(_WIN32) || defined(__WIN32)) && !defined(__CYGWIN__)
-	#define FILE_CASEINSENSITIVE 0
+	#define FILE_CASEINSENSITIVE
 #endif
 
 // Where is the parser at?
@@ -107,4 +107,9 @@
 	return r;
 }
 
+// Whitespace check
+inline bool IsCharWhitespace (char c) {
+	return (c <= 32 || c == 127 || c == 255);
+}
+
 #endif // __COMMON_H__
\ No newline at end of file
--- a/main.cxx	Mon Jul 16 16:15:16 2012 +0300
+++ b/main.cxx	Tue Jul 17 20:35:43 2012 +0300
@@ -161,7 +161,7 @@
 char* ObjectFileName (str s) {
 	// Locate the extension and chop it out
 	unsigned int extdot = s.last (".");
-	if (extdot != s.len() && extdot >= s.len()-4)
+	if (extdot >= s.len()-4)
 		s.trim (s.len() - extdot);
 	
 	// Add new ".o" extension
--- a/objwriter.cxx	Mon Jul 16 16:15:16 2012 +0300
+++ b/objwriter.cxx	Tue Jul 17 20:35:43 2012 +0300
@@ -113,7 +113,7 @@
 	for (unsigned int x = 0; x < MainBuffer->writesize; x++) {
 		unsigned char c = *(MainBuffer->buffer+x);
 		fwrite (&c, 1, 1, fp);
-		numWrittenBytes ++;
+		numWrittenBytes++;
 	}
 	
 	printf ("-- %u byte%s written to %s\n", numWrittenBytes, PLURAL (numWrittenBytes), filepath.chars());
--- a/parser.cxx	Mon Jul 16 16:15:16 2012 +0300
+++ b/parser.cxx	Tue Jul 17 20:35:43 2012 +0300
@@ -63,20 +63,6 @@
 
 void ScriptReader::BeginParse (ObjWriter* w) {
 	while (Next()) {
-		// printf ("got token %s\n", token.chars());
-		if (!token.icompare ("#include")) {
-			MustString ();
-			
-			// First ensure that the file can be opened
-			FILE* newfile = fopen (token.chars(), "r");
-			if (!newfile)
-				ParserError ("couldn't open included file `%s`!", token.chars());
-			fclose (newfile);
-			ScriptReader* newreader = new ScriptReader (token.chars());
-			newreader->BeginParse (w);
-			continue;
-		}
-		
 		if (!token.icompare ("state")) {
 			MUST_TOPLEVEL
 			
@@ -89,7 +75,7 @@
 			
 			// stateSpawn is special - it *must* be defined. If we
 			// encountered it, then mark down that we have it.
-			if (!token.icompare ("stateSpawn"))
+			if (!token.icompare ("statespawn"))
 				g_stateSpawnDefined = true;
 			
 			// Must end in a colon
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/preprocessor.cxx	Tue Jul 17 20:35:43 2012 +0300
@@ -0,0 +1,117 @@
+/*
+ *	botc source code
+ *	Copyright (C) 2012 Santeri `Dusk` Piippo
+ *	All rights reserved.
+ *	
+ *	Redistribution and use in source and binary forms, with or without
+ *	modification, are permitted provided that the following conditions are met:
+ *	
+ *	1. Redistributions of source code must retain the above copyright notice,
+ *	   this list of conditions and the following disclaimer.
+ *	2. Redistributions in binary form must reproduce the above copyright notice,
+ *	   this list of conditions and the following disclaimer in the documentation
+ *	   and/or other materials provided with the distribution.
+ *	3. Neither the name of the developer nor the names of its contributors may
+ *	   be used to endorse or promote products derived from this software without
+ *	   specific prior written permission.
+ *	4. Redistributions in any form must be accompanied by information on how to
+ *	   obtain complete source code for the software and any accompanying
+ *	   software that uses the software. The source code must either be included
+ *	   in the distribution or be available for no more than the cost of
+ *	   distribution plus a nominal fee, and must be freely redistributable
+ *	   under reasonable conditions. For an executable file, complete source
+ *	   code means the source code for all modules it contains. It does not
+ *	   include source code for modules or files that typically accompany the
+ *	   major components of the operating system on which the executable file
+ *	   runs.
+ *	
+ *	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ *	AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ *	IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ *	ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ *	LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *	CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *	SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *	INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *	CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *	ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *	POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define __PARSER_CXX__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "common.h"
+#include "str.h"
+#include "scriptreader.h"
+
+/* Since the preprocessor is *called* from ReadChar and I don't want
+ * to worry about recursive preprocessing, the preprocessor uses its
+ * own bare-bones variant of the function for file reading.
+ */
+char ScriptReader::PPReadChar () {
+	char* c = (char*)malloc (sizeof (char));
+	if (!fread (c, sizeof (char), 1, fp[fc]))
+		return 0;
+	curchar[fc]++;
+	return c[0];
+}
+
+void ScriptReader::PPMustChar (char c) {
+	char d = PPReadChar ();
+	if (c != d) {
+		ParserError ("expected `%c`, got `%d`", c, d);
+	}
+}
+
+// Reads a word until whitespace
+str ScriptReader::PPReadWord (char &term) {
+	str word;
+	while (1) {
+		char c = PPReadChar();
+		if (feof (fp[fc]) || (IsCharWhitespace (c) && word.len ())) {
+			term = c;
+			break;
+		}
+		word += c;
+	}
+	return word;
+}
+
+void ScriptReader::PreprocessDirectives () {
+	size_t spos = ftell (fp[fc]);
+	if (!DoDirectivePreprocessing ())
+		fseek (fp[fc], spos, SEEK_SET);
+}
+
+/* Returns true if the pre-processing was successful, false if not.
+ * If pre-processing was successful, the file pointer remains where
+ * it was, if not, it's pushed back to where it was before preprocessing
+ * took place and is parsed normally.
+ */
+bool ScriptReader::DoDirectivePreprocessing () {
+	char trash;
+	// Directives start with a pound sign
+	if (PPReadChar() != '#')
+		return false;
+	
+	// Read characters until next whitespace to
+	// build the name of the directive
+	str directive = PPReadWord (trash);
+	
+	// Now check the directive name against known names
+	if (!directive.icompare ("include")) {
+		// #include-directive
+		char terminator;
+		str file = PPReadWord (terminator);
+		
+		if (!file.len())
+			ParserError ("expected file name for #include, got nothing instead");
+		OpenFile (file);
+		return true;
+	}
+	
+	ParserError ("unknown directive `#%s`!", directive.chars());
+	return false;
+}
\ No newline at end of file
--- a/scriptreader.cxx	Mon Jul 16 16:15:16 2012 +0300
+++ b/scriptreader.cxx	Tue Jul 17 20:35:43 2012 +0300
@@ -45,76 +45,136 @@
 #include "common.h"
 #include "scriptreader.h"
 
-static bool IsWhitespace (char c) {
-	// These characters are invisible, thus considered whitespace
-	if (c <= 32 || c == 127 || c == 255)
-		return true;
+ScriptReader::ScriptReader (str path) {
+	token = "";
+	fc = -1;
 	
-	return false;
-}
-
-ScriptReader::ScriptReader (str path) {
-	if (!(fp = fopen (path, "r"))) {
-		error ("couldn't open %s for reading!\n", path.chars ());
-		exit (1);
-	}
+	for (unsigned int u = 0; u < MAX_FILESTACK; u++)
+		fp[u] = NULL;
 	
-	filepath = path;
-	curline = 1;
-	curchar = 1;
-	pos = 0;
-	token = "";
-	atnewline = false;
+	OpenFile (path);
 	commentmode = 0;
 }
 
 ScriptReader::~ScriptReader () {
 	FinalChecks ();
-	fclose (fp);
+	
+	for (unsigned int u = 0; u < MAX_FILESTACK; u++) {
+		if (fp[u]) {
+			ParserWarning ("file idx %u remained open after parsing", u);
+			CloseFile (u);
+		}
+	}
+}
+
+// Opens a file and pushes its pointer to stack
+void ScriptReader::OpenFile (str path) {
+	if (fc+1 >= MAX_FILESTACK) 
+		ParserError ("supposed to open file `%s` but file stack is full! \
+			do you have recursive `#include` directives?",
+			path.chars());
+	
+	// Save the position first.
+	if (fc != -1) {
+		savedpos[fc] = ftell (fp[fc]);
+	}
+	
+	fc++;
+	
+	fp[fc] = fopen (path, "r");
+	if (!fp[fc]) {
+		ParserError ("couldn't open %s for reading!\n", path.chars ());
+		exit (1);
+	}
+	
+	fseek (fp[fc], 0, SEEK_SET);
+	filepath[fc] = path.chars();
+	curline[fc] = 1;
+	curchar[fc] = 1;
+	pos[fc] = 0;
+	atnewline = 0;
+}
+
+void ScriptReader::CloseFile (unsigned int u) {
+	if (u >= MAX_FILESTACK)
+ 		u = fc;
+	
+	if (!fp[u])
+		return;
+	
+	fclose (fp[u]);
+	fp[u] = NULL;
+	fc--;
+	
+	if (fc != -1)
+		fseek (fp[fc], savedpos[fc], SEEK_SET);
 }
 
 char ScriptReader::ReadChar () {
+	if (feof (fp[fc]))
+		return 0;
+	
 	char* c = (char*)malloc (sizeof (char));
-	if (!fread (c, sizeof (char), 1, fp))
+	if (!fread (c, sizeof (char), 1, fp[fc]))
 		return 0;
 	
 	// We're at a newline, thus next char read will begin the next line
 	if (atnewline) {
 		atnewline = false;
-		curline++;
-		curchar = 0; // gets incremented to 1
+		curline[fc]++;
+		curchar[fc] = 0; // gets incremented to 1
 	}
 	
-	if (c[0] == '\n')
+	if (c[0] == '\n') {
 		atnewline = true;
+		
+		// Check for pre-processor directives
+		PreprocessDirectives ();
+	}
 	
-	curchar++;
+	curchar[fc]++;
 	return c[0];
 }
 
 char ScriptReader::PeekChar (int offset) {
 	// Store current position
-	long curpos = ftell (fp);
+	long curpos = ftell (fp[fc]);
 	
 	// Forward by offset
-	fseek (fp, offset, SEEK_CUR);
+	fseek (fp[fc], offset, SEEK_CUR);
 	
 	// Read the character
 	char* c = (char*)malloc (sizeof (char));
-	if (!fread (c, sizeof (char), 1, fp))
+	
+	if (!fread (c, sizeof (char), 1, fp[fc])) {
+		fseek (fp[fc], curpos, SEEK_SET);
 		return 0;
+	}
 	
 	// Rewind back
-	fseek (fp, curpos, SEEK_SET);
+	fseek (fp[fc], curpos, SEEK_SET);
 	
 	return c[0];
 }
 
 // true if was found, false if not.
-bool ScriptReader::Next () {
+bool ScriptReader::Next (bool peek) {
 	str tmp = "";
+	// printf ("begin token\n");
 	
-	while (!feof (fp)) {
+	while (1) {
+		// Check end-of-file
+		if (feof (fp[fc])) {
+			// If we're just peeking, we shouldn't
+			// actually close anything.. 
+			if (peek)
+				break;
+			
+			CloseFile ();
+			if (fc == -1)
+				break;
+		}
+		
 		// Check if the next token possibly starts a comment.
 		if (PeekChar () == '/' && !tmp.len()) {
 			char c2 = PeekChar (1);
@@ -130,6 +190,7 @@
 		}
 		
 		c = ReadChar ();
+		// printf ("add char [%d] `%c`\n", c, c);
 		
 		// If this is a comment we're reading, check if this character
 		// gets the comment terminated, otherwise ignore it.
@@ -142,8 +203,6 @@
 				// C-style comments are terminated by a `*/`
 				if (PeekChar() == '/') {
 					commentmode = 0;
-					// Now the char has to be read in since we
-					// no longer are reading a comment
 					ReadChar ();
 				}
 			}
@@ -161,13 +220,13 @@
 			(c >= 91 && c <= 96 && c != '_') ||
 			(c >= 123 && c <= 126)) {
 			if (tmp.len())
-				fseek (fp, ftell (fp) - 1, SEEK_SET);
+				fseek (fp[fc], ftell (fp[fc]) - 1, SEEK_SET);
 			else
 				tmp += c;
 			break;
 		}
 		
-		if (IsWhitespace (c)) {
+		if (IsCharWhitespace (c)) {
 			// Don't break if we haven't gathered anything yet.
 			if (tmp.len())
 				break;
@@ -183,7 +242,7 @@
 		return false;
 	}
 	
-	pos++;
+	pos[fc]++;
 	token = tmp;
 	return true;
 }
@@ -191,17 +250,17 @@
 // Returns the next token without advancing the cursor.
 str ScriptReader::PeekNext () {
 	// Store current position
-	int cpos = ftell (fp);
+	int cpos = ftell (fp[fc]);
 	
 	// Advance on the token.
-	if (!Next ())
+	if (!Next (true))
 		return "";
 	
 	str tmp = token;
 	
 	// Restore position
-	fseek (fp, cpos, SEEK_SET);
-	pos--;
+	fseek (fp[fc], cpos, SEEK_SET);
+	pos[fc]--;
 	
 	return tmp;
 }
@@ -209,8 +268,8 @@
 void ScriptReader::Seek (unsigned int n, int origin) {
 	switch (origin) {
 	case SEEK_SET:
-		fseek (fp, 0, SEEK_SET);
-		pos = 0;
+		fseek (fp[fc], 0, SEEK_SET);
+		pos[fc] = 0;
 		break;
 	case SEEK_CUR:
 		break;
@@ -238,7 +297,7 @@
 
 void ScriptReader::ParserError (const char* message, ...) {
 	PERFORM_FORMAT (message, outmessage);
-	ParserMessage ("\nParse error\n", outmessage);
+	ParserMessage ("\nError: ", outmessage);
 	exit (1);
 }
 
@@ -248,8 +307,11 @@
 }
 
 void ScriptReader::ParserMessage (const char* header, char* message) {
-	fprintf (stderr, "%sIn file %s, at line %u, col %u: %s\n",
-		header, filepath.chars(), curline, curchar, message);
+	if (fc >= 0 && fc < MAX_FILESTACK)
+		fprintf (stderr, "%sIn file %s, at line %u, col %u: %s\n",
+			header, filepath[fc], curline[fc], curchar[fc], message);
+	else
+		fprintf (stderr, "%s%s\n", header, message);
 }
 
 void ScriptReader::MustString () {
@@ -259,7 +321,7 @@
 	// Keep reading characters until we find a terminating quote.
 	while (1) {
 		// can't end here!
-		if (feof (fp))
+		if (feof (fp[fc]))
 			ParserError ("unterminated string");
 		
 		char c = ReadChar ();
--- a/scriptreader.h	Mon Jul 16 16:15:16 2012 +0300
+++ b/scriptreader.h	Tue Jul 17 20:35:43 2012 +0300
@@ -46,15 +46,20 @@
 #include "objwriter.h"
 #include "commands.h"
 
+#define MAX_FILESTACK 8
+
 class ScriptReader {
 public:
 	// ====================================================================
 	// MEMBERS
-	FILE* fp;
-	str filepath;
-	unsigned int pos;
-	unsigned int curline;
-	unsigned int curchar;
+	FILE* fp[MAX_FILESTACK];
+	char* filepath[MAX_FILESTACK];
+	int fc;
+	
+	unsigned int pos[MAX_FILESTACK];
+	unsigned int curline[MAX_FILESTACK];
+	unsigned int curchar[MAX_FILESTACK];
+	long savedpos[MAX_FILESTACK]; // filepointer cursor position
 	str token;
 	int commentmode;
 	
@@ -63,9 +68,11 @@
 	// scriptreader.cxx:
 	ScriptReader (str path);
 	~ScriptReader ();
+	void OpenFile (str path);
+	void CloseFile (unsigned int u = MAX_FILESTACK);
 	char ReadChar ();
 	char PeekChar (int offset = 0);
-	bool Next ();
+	bool Next (bool peek = false);
 	str PeekNext ();
 	void Seek (unsigned int n, int origin);
 	void MustNext (const char* c = "");
@@ -84,10 +91,19 @@
 	void BeginParse (ObjWriter* w);
 	void ParseCommand (CommandDef* comm, ObjWriter* w);
 	
+	// preprocessor.cxx:
+	void PreprocessDirectives ();
+	void PreprocessMacros ();
+	
 private:
 	bool atnewline;
 	char c;
 	void ParserMessage (const char* header, char* message);
+	
+	bool DoDirectivePreprocessing ();
+	char PPReadChar ();
+	void PPMustChar (char c);
+	str PPReadWord (char &term);
 };
 
 #endif // __SCRIPTREADER_H__
\ No newline at end of file

mercurial