src/lexer_scanner.cc

changeset 79
2425fa6a4f21
parent 75
bf8c57437231
child 82
841562f5a32f
--- a/src/lexer_scanner.cc	Fri Jan 17 21:42:02 2014 +0200
+++ b/src/lexer_scanner.cc	Fri Jan 17 22:55:13 2014 +0200
@@ -86,7 +86,7 @@
 	"onexit",
 	"state",
 	"switch",
-	"str"
+	"str",
 	"void",
 	"while",
 	"enum",
@@ -94,7 +94,7 @@
 	"return",
 };
 
-static_assert (countof (g_token_strings) == (int) last_named_token,
+static_assert (countof (g_token_strings) == (int) tk_last_named_token + 1,
 	"Count of g_token_strings is not the same as the amount of named token identifiers.");
 
 // =============================================================================
@@ -126,8 +126,8 @@
 {
 	bool r = strncmp (m_ptr, c, strlen (c)) == 0;
 
-	// There is to be whitespace after words
-	if (r && (flags & f_check_word) && !isspace (m_ptr[strlen (c)]))
+	// There is to be a non-symbol character after words
+	if (r && (flags & f_check_word) && is_symbol_char (m_ptr[strlen (c)], true))
 		r = false;
 
 	// Advance the cursor unless we want to just peek
@@ -143,41 +143,27 @@
 {
 	m_token_text = "";
 
-	while (isspace (*m_ptr) == true)
-	{
-		if (*m_ptr == '\n')
-		{
-			m_line++;
-			m_line_break_pos = m_ptr;
-		}
-
-		m_ptr++;
-	}
+	while (isspace (*m_ptr))
+		skip();
 
 	// Check for comments
 	if (strncmp (m_ptr, "//", 2) == 0)
 	{
 		m_ptr += 2;
 
-		while (*(++m_ptr) != '\n')
-			;
+		while (*m_ptr != '\n')
+			skip();
 
 		return get_next_token();
 	}
 	elif (strncmp (m_ptr, "/*", 2) == 0)
 	{
-		m_ptr += 2;
+		skip (2); // skip the start symbols
 
-		while (strncmp (++m_ptr, "*/", 2) != 0)
-		{
-			if (*m_ptr == '\n')
-			{
-				m_line++;
-				m_line_break_pos = m_ptr;
-			}
-		}
+		while (strncmp (m_ptr, "*/", 2) != 0)
+			skip();
 
-		m_ptr += 2; // skip the */
+		skip (2); // skip the end symbols
 		return get_next_token();
 	}
 
@@ -185,9 +171,14 @@
 		return false;
 
 	// Check tokens
-	for (int i = 0; i < (int) (sizeof g_token_strings / sizeof * g_token_strings); ++i)
+	for (int i = 0; i < countof (g_token_strings); ++i)
 	{
-		if (check_string (g_token_strings[i], f_check_word))
+		int flags = 0;
+
+		if (i >= tk_first_named_token)
+			flags |= f_check_word;
+
+		if (check_string (g_token_strings[i], flags))
 		{
 			m_token_text = g_token_strings[i];
 			m_token_type = (e_token) i;
@@ -229,8 +220,6 @@
 		return true;
 	}
 
-	m_token_type = tk_symbol;
-
 	if (isdigit (*m_ptr))
 	{
 		while (isdigit (*m_ptr))
@@ -240,25 +229,13 @@
 		return true;
 	}
 
-	if (is_symbol_char (*m_ptr))
+	if (is_symbol_char (*m_ptr, false))
 	{
+		m_token_type = tk_symbol;
+
 		while (m_ptr != '\0')
 		{
-			if (!is_symbol_char (*m_ptr))
-				break;
-
-			bool stop_here = false;
-
-			for (string i : g_token_strings)
-			{
-				if (check_string (i, f_check_peek | f_check_word))
-				{
-					stop_here = true;
-					break;
-				}
-			}
-
-			if (stop_here)
+			if (!is_symbol_char (*m_ptr, true))
 				break;
 
 			m_token_text += *m_ptr++;
@@ -273,8 +250,29 @@
 
 // =============================================================================
 //
+void lexer_scanner::skip()
+{
+	if (*m_ptr == '\n')
+	{
+		m_line++;
+		m_line_break_pos = m_ptr;
+	}
+
+	m_ptr++;
+}
+
+// =============================================================================
+//
+void lexer_scanner::skip (int chars)
+{
+	for (int i = 0; i < chars; ++i)
+		skip();
+}
+
+// =============================================================================
+//
 string lexer_scanner::get_token_string (e_token a)
 {
-	assert ((int) a <= last_named_token);
+	assert ((int) a <= tk_last_named_token);
 	return g_token_strings[a];
 }

mercurial