Fri, 15 Mar 2013 20:11:18 +0200
Initial commit
/* * botc source code * Copyright (C) 2012 Santeri `azimuth` Piippo * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * 3. Neither the name of the developer nor the names of its contributors may * be used to endorse or promote products derived from this software without * specific prior written permission. * 4. Redistributions in any form must be accompanied by information on how to * obtain complete source code for the software and any accompanying * software that uses the software. The source code must either be included * in the distribution or be available for no more than the cost of * distribution plus a nominal fee, and must be freely redistributable * under reasonable conditions. For an executable file, complete source * code means the source code for all modules it contains. It does not * include source code for modules or files that typically accompany the * major components of the operating system on which the executable file * runs. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <stdio.h> #include <stdlib.h> #include "string.h" #include "str.h" #include "common.h" #include "scanner.h" #include "stdarg.h" #define STORE_POSITION \ const bool _atnewline = bAtNewLine; \ const ulong ulStoredLineNumber = ulaLineNumber[fc]; \ const ulong ulStoredCurChar = ulaCurChar[fc]; #define RESTORE_POSITION \ bAtNewLine = _atnewline; \ ulaLineNumber[fc] = ulStoredLineNumber; \ ulaCurChar[fc] = ulStoredCurChar; // ============================================================================ Scanner::Scanner (str path) { token = ""; zPrevToken = ""; lPrevPos = 0; fc = -1; for (unsigned int u = 0; u < MAX_FILESTACK; u++) fp[u] = NULL; OpenFile (path); dCommentMode = 0; } // ============================================================================ Scanner::~Scanner () { // If comment mode is 2 by the time the file ended, the // comment was left unterminated. 1 is no problem, since // it's terminated by newlines anyway. if (dCommentMode == 2) ParserError ("unterminated `/*`-style comment"); for (unsigned int u = 0; u < MAX_FILESTACK; u++) { if (fp[u]) { ParserWarning ("file idx %u remained open after parsing", u); CloseFile (u); } } } // ============================================================================ // Opens a file and pushes its pointer to stack void Scanner::OpenFile (str path) { if (fc+1 >= MAX_FILESTACK) ParserError ("supposed to open file `%s` but file stack is full! do you have recursive `#include` directives?", path.chars()); // Save the position first. if (fc != -1) { laSavedPos[fc] = ftell (fp[fc]); } fc++; fp[fc] = fopen (path.chars(), "r"); if (!fp[fc]) { ParserError ("couldn't open %s for reading!\n", path.chars ()); exit (1); } fseek (fp[fc], 0, SEEK_SET); saFilePath[fc] = path.chars(); ulaLineNumber[fc] = 1; ulaCurChar[fc] = 1; ulaPosition[fc] = 0; bAtNewLine = 0; } // ============================================================================ // Closes the current file void Scanner::CloseFile (unsigned int u) { if (u >= MAX_FILESTACK) u = fc; if (!fp[u]) return; fclose (fp[u]); fp[u] = NULL; fc--; if (fc != -1) fseek (fp[fc], laSavedPos[fc], SEEK_SET); } // ============================================================================ char Scanner::ReadChar () { if (feof (fp[fc])) return 0; char c; if (!fread (&c, 1, 1, fp[fc])) return 0; // We're at a newline, thus next char read will begin the next line if (bAtNewLine) { bAtNewLine = false; ulaLineNumber[fc]++; ulaCurChar[fc] = 0; // gets incremented to 1 } if (c == '\n') bAtNewLine = true; ulaCurChar[fc]++; return c; } // ============================================================================ // Peeks the next character char Scanner::PeekChar (int offset) { // Store current position long curpos = ftell (fp[fc]); STORE_POSITION // Forward by offset fseek (fp[fc], offset, SEEK_CUR); // Read the character char* c = (char*)malloc (sizeof (char)); if (!fread (c, sizeof (char), 1, fp[fc])) { fseek (fp[fc], curpos, SEEK_SET); return 0; } // Rewind back fseek (fp[fc], curpos, SEEK_SET); RESTORE_POSITION return c[0]; } // ============================================================================ // Read a token from the file buffer. Returns true if token was found, false if not. bool Scanner::Next (bool peek) { lPrevPos = ftell (fp[fc]); str tmp = ""; while (1) { // Check end-of-file if (feof (fp[fc])) { // If we're just peeking, we shouldn't // actually close anything.. if (peek) break; CloseFile (); if (fc == -1) break; } // Check if the next token possibly starts a comment. if (PeekChar () == '/' && !tmp.len()) { char c2 = PeekChar (1); // C++-style comment if (c2 == '/') dCommentMode = 1; else if (c2 == '*') dCommentMode = 2; // We don't need to actually read in the // comment characters, since they will get // ignored due to comment mode anyway. } c = ReadChar (); // If this is a comment we're reading, check if this character // gets the comment terminated, otherwise ignore it. if (dCommentMode > 0) { if (dCommentMode == 1 && c == '\n') { // C++-style comments are terminated by a newline dCommentMode = 0; continue; } else if (dCommentMode == 2 && c == '*') { // C-style comments are terminated by a `*/` if (PeekChar() == '/') { dCommentMode = 0; ReadChar (); } } // Otherwise, ignore it. continue; } // Non-alphanumber characters (sans underscore) break the word too. // If there was prior data, the delimeter pushes the cursor back so // that the next character will be the same delimeter. If there isn't, // the delimeter itself is included (and thus becomes a token itself.) if ((c >= 33 && c <= 47) || (c >= 58 && c <= 64) || (c >= 91 && c <= 96 && c != '_') || (c >= 123 && c <= 126)) { if (tmp.len()) fseek (fp[fc], ftell (fp[fc]) - 1, SEEK_SET); else tmp += c; break; } if (c <= 32 || c >= 127) { // Don't break if we haven't gathered anything yet. if (tmp.len()) break; } else { tmp += c; } } // If we got nothing here, read failed. This should // only happen in the case of EOF. if (!tmp.len()) { token = ""; return false; } ulaPosition[fc]++; zPrevToken = token; token = tmp; return true; } // ============================================================================ // Returns the next token without advancing the cursor. str Scanner::PeekNext (int offset) { // Store current information str storedtoken = token; int cpos = ftell (fp[fc]); STORE_POSITION // Advance on the token. while (offset >= 0) { if (!Next (true)) return ""; offset--; } str tmp = token; // Restore position fseek (fp[fc], cpos, SEEK_SET); ulaPosition[fc]--; token = storedtoken; RESTORE_POSITION return tmp; } // ============================================================================ void Scanner::Seek (unsigned int n, int origin) { switch (origin) { case SEEK_SET: fseek (fp[fc], 0, SEEK_SET); ulaPosition[fc] = 0; break; case SEEK_CUR: break; case SEEK_END: printf ("ScriptReader::Seek: SEEK_END not yet supported.\n"); break; } for (unsigned int i = 0; i < n+1; i++) Next(); } // ============================================================================ void Scanner::MustNext (const char* c) { if (!Next()) { if (strlen (c)) ParserError ("expected `%s`, reached end of file instead\n", c); else ParserError ("expected a token, reached end of file instead\n"); } if (strlen (c)) MustThis (c); } // ============================================================================ void Scanner::MustThis (const char* c) { if (token.compare (c) != 0) ParserError ("expected `%s`, got `%s` instead", c, token.chars()); } // ============================================================================ void Scanner::ParserError (const char* message, ...) { PERFORM_FORMAT (message, outmessage); ParserMessage ("\nError: ", outmessage); exit (1); } // ============================================================================ void Scanner::ParserWarning (const char* message, ...) { PERFORM_FORMAT (message, outmessage); ParserMessage ("Warning: ", outmessage); } // ============================================================================ void Scanner::ParserMessage (const char* header, char* message) { if (fc >= 0 && fc < MAX_FILESTACK) fprintf (stderr, "%s%s:%lu:%lu: %s\n", header, saFilePath[fc], ulaLineNumber[fc], ulaCurChar[fc], message); else fprintf (stderr, "%s%s\n", header, message); } // ============================================================================ // if gotquote == 1, the current token already holds the quotation mark. void Scanner::MustString (bool gotquote) { if (gotquote) MustThis ("\""); else MustNext ("\""); str string; // Keep reading characters until we find a terminating quote. while (1) { // can't end here! if (feof (fp[fc])) ParserError ("unterminated string"); char c = ReadChar (); if (c == '"') break; string += c; } token = string; } // ============================================================================ void Scanner::MustNumber (bool fromthis) { if (!fromthis) MustNext (); str num = token; if (!num.compare ("-")) { MustNext (); num += token; } // "true" and "false" are valid numbers if (!token.icompare ("true")) token = "1"; else if (!token.icompare ("false")) token = "0"; else { if (!token.isnumber()) ParserError ("expected a number, got `%s`", num.chars()); str check; check.appendformat ("%d", atoi (num.chars ())); if (token.compare (check) != 0) ParserWarning ("integer too large: %s -> %s", num.chars(), check.chars()); token = num; } }