--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scanner.cpp Fri Mar 15 20:11:18 2013 +0200 @@ -0,0 +1,409 @@ +/* + * botc source code + * Copyright (C) 2012 Santeri `azimuth` Piippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. Neither the name of the developer nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * 4. Redistributions in any form must be accompanied by information on how to + * obtain complete source code for the software and any accompanying + * software that uses the software. The source code must either be included + * in the distribution or be available for no more than the cost of + * distribution plus a nominal fee, and must be freely redistributable + * under reasonable conditions. For an executable file, complete source + * code means the source code for all modules it contains. It does not + * include source code for modules or files that typically accompany the + * major components of the operating system on which the executable file + * runs. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include "string.h" +#include "str.h" +#include "common.h" +#include "scanner.h" +#include "stdarg.h" + +#define STORE_POSITION \ + const bool _atnewline = bAtNewLine; \ + const ulong ulStoredLineNumber = ulaLineNumber[fc]; \ + const ulong ulStoredCurChar = ulaCurChar[fc]; + +#define RESTORE_POSITION \ + bAtNewLine = _atnewline; \ + ulaLineNumber[fc] = ulStoredLineNumber; \ + ulaCurChar[fc] = ulStoredCurChar; + +// ============================================================================ +Scanner::Scanner (str path) { + token = ""; + zPrevToken = ""; + lPrevPos = 0; + fc = -1; + + for (unsigned int u = 0; u < MAX_FILESTACK; u++) + fp[u] = NULL; + + OpenFile (path); + dCommentMode = 0; +} + +// ============================================================================ +Scanner::~Scanner () { + // If comment mode is 2 by the time the file ended, the + // comment was left unterminated. 1 is no problem, since + // it's terminated by newlines anyway. + if (dCommentMode == 2) + ParserError ("unterminated `/*`-style comment"); + + for (unsigned int u = 0; u < MAX_FILESTACK; u++) { + if (fp[u]) { + ParserWarning ("file idx %u remained open after parsing", u); + CloseFile (u); + } + } +} + +// ============================================================================ +// Opens a file and pushes its pointer to stack +void Scanner::OpenFile (str path) { + if (fc+1 >= MAX_FILESTACK) + ParserError ("supposed to open file `%s` but file stack is full! do you have recursive `#include` directives?", + path.chars()); + + // Save the position first. + if (fc != -1) { + laSavedPos[fc] = ftell (fp[fc]); + } + + fc++; + + fp[fc] = fopen (path.chars(), "r"); + if (!fp[fc]) { + ParserError ("couldn't open %s for reading!\n", path.chars ()); + exit (1); + } + + fseek (fp[fc], 0, SEEK_SET); + saFilePath[fc] = path.chars(); + ulaLineNumber[fc] = 1; + ulaCurChar[fc] = 1; + ulaPosition[fc] = 0; + bAtNewLine = 0; +} + +// ============================================================================ +// Closes the current file +void Scanner::CloseFile (unsigned int u) { + if (u >= MAX_FILESTACK) + u = fc; + + if (!fp[u]) + return; + + fclose (fp[u]); + fp[u] = NULL; + fc--; + + if (fc != -1) + fseek (fp[fc], laSavedPos[fc], SEEK_SET); +} + +// ============================================================================ +char Scanner::ReadChar () { + if (feof (fp[fc])) + return 0; + + char c; + if (!fread (&c, 1, 1, fp[fc])) + return 0; + + // We're at a newline, thus next char read will begin the next line + if (bAtNewLine) { + bAtNewLine = false; + ulaLineNumber[fc]++; + ulaCurChar[fc] = 0; // gets incremented to 1 + } + + if (c == '\n') + bAtNewLine = true; + + ulaCurChar[fc]++; + return c; +} + +// ============================================================================ +// Peeks the next character +char Scanner::PeekChar (int offset) { + // Store current position + long curpos = ftell (fp[fc]); + STORE_POSITION + + // Forward by offset + fseek (fp[fc], offset, SEEK_CUR); + + // Read the character + char* c = (char*)malloc (sizeof (char)); + + if (!fread (c, sizeof (char), 1, fp[fc])) { + fseek (fp[fc], curpos, SEEK_SET); + return 0; + } + + // Rewind back + fseek (fp[fc], curpos, SEEK_SET); + RESTORE_POSITION + + return c[0]; +} + +// ============================================================================ +// Read a token from the file buffer. Returns true if token was found, false if not. +bool Scanner::Next (bool peek) { + lPrevPos = ftell (fp[fc]); + str tmp = ""; + + while (1) { + // Check end-of-file + if (feof (fp[fc])) { + // If we're just peeking, we shouldn't + // actually close anything.. + if (peek) + break; + + CloseFile (); + if (fc == -1) + break; + } + + // Check if the next token possibly starts a comment. + if (PeekChar () == '/' && !tmp.len()) { + char c2 = PeekChar (1); + // C++-style comment + if (c2 == '/') + dCommentMode = 1; + else if (c2 == '*') + dCommentMode = 2; + + // We don't need to actually read in the + // comment characters, since they will get + // ignored due to comment mode anyway. + } + + c = ReadChar (); + + // If this is a comment we're reading, check if this character + // gets the comment terminated, otherwise ignore it. + if (dCommentMode > 0) { + if (dCommentMode == 1 && c == '\n') { + // C++-style comments are terminated by a newline + dCommentMode = 0; + continue; + } else if (dCommentMode == 2 && c == '*') { + // C-style comments are terminated by a `*/` + if (PeekChar() == '/') { + dCommentMode = 0; + ReadChar (); + } + } + + // Otherwise, ignore it. + continue; + } + + // Non-alphanumber characters (sans underscore) break the word too. + // If there was prior data, the delimeter pushes the cursor back so + // that the next character will be the same delimeter. If there isn't, + // the delimeter itself is included (and thus becomes a token itself.) + if ((c >= 33 && c <= 47) || + (c >= 58 && c <= 64) || + (c >= 91 && c <= 96 && c != '_') || + (c >= 123 && c <= 126)) { + if (tmp.len()) + fseek (fp[fc], ftell (fp[fc]) - 1, SEEK_SET); + else + tmp += c; + break; + } + + if (c <= 32 || c >= 127) { + // Don't break if we haven't gathered anything yet. + if (tmp.len()) + break; + } else { + tmp += c; + } + } + + // If we got nothing here, read failed. This should + // only happen in the case of EOF. + if (!tmp.len()) { + token = ""; + return false; + } + + ulaPosition[fc]++; + zPrevToken = token; + token = tmp; + return true; +} + +// ============================================================================ +// Returns the next token without advancing the cursor. +str Scanner::PeekNext (int offset) { + // Store current information + str storedtoken = token; + int cpos = ftell (fp[fc]); + STORE_POSITION + + // Advance on the token. + while (offset >= 0) { + if (!Next (true)) + return ""; + offset--; + } + + str tmp = token; + + // Restore position + fseek (fp[fc], cpos, SEEK_SET); + ulaPosition[fc]--; + token = storedtoken; + RESTORE_POSITION + return tmp; +} + +// ============================================================================ +void Scanner::Seek (unsigned int n, int origin) { + switch (origin) { + case SEEK_SET: + fseek (fp[fc], 0, SEEK_SET); + ulaPosition[fc] = 0; + break; + case SEEK_CUR: + break; + case SEEK_END: + printf ("ScriptReader::Seek: SEEK_END not yet supported.\n"); + break; + } + + for (unsigned int i = 0; i < n+1; i++) + Next(); +} + +// ============================================================================ +void Scanner::MustNext (const char* c) { + if (!Next()) { + if (strlen (c)) + ParserError ("expected `%s`, reached end of file instead\n", c); + else + ParserError ("expected a token, reached end of file instead\n"); + } + + if (strlen (c)) + MustThis (c); +} + +// ============================================================================ +void Scanner::MustThis (const char* c) { + if (token.compare (c) != 0) + ParserError ("expected `%s`, got `%s` instead", c, token.chars()); +} + +// ============================================================================ +void Scanner::ParserError (const char* message, ...) { + PERFORM_FORMAT (message, outmessage); + ParserMessage ("\nError: ", outmessage); + exit (1); +} + +// ============================================================================ +void Scanner::ParserWarning (const char* message, ...) { + PERFORM_FORMAT (message, outmessage); + ParserMessage ("Warning: ", outmessage); +} + +// ============================================================================ +void Scanner::ParserMessage (const char* header, char* message) { + if (fc >= 0 && fc < MAX_FILESTACK) + fprintf (stderr, "%s%s:%lu:%lu: %s\n", + header, saFilePath[fc], ulaLineNumber[fc], ulaCurChar[fc], message); + else + fprintf (stderr, "%s%s\n", header, message); +} + +// ============================================================================ +// if gotquote == 1, the current token already holds the quotation mark. +void Scanner::MustString (bool gotquote) { + if (gotquote) + MustThis ("\""); + else + MustNext ("\""); + + str string; + // Keep reading characters until we find a terminating quote. + while (1) { + // can't end here! + if (feof (fp[fc])) + ParserError ("unterminated string"); + + char c = ReadChar (); + if (c == '"') + break; + + string += c; + } + + token = string; +} + +// ============================================================================ +void Scanner::MustNumber (bool fromthis) { + if (!fromthis) + MustNext (); + + str num = token; + if (!num.compare ("-")) { + MustNext (); + num += token; + } + + // "true" and "false" are valid numbers + if (!token.icompare ("true")) + token = "1"; + else if (!token.icompare ("false")) + token = "0"; + else { + if (!token.isnumber()) + ParserError ("expected a number, got `%s`", num.chars()); + + str check; + check.appendformat ("%d", atoi (num.chars ())); + if (token.compare (check) != 0) + ParserWarning ("integer too large: %s -> %s", num.chars(), check.chars()); + + token = num; + } +} \ No newline at end of file