botc: comparison src/lexer

-:e6d7e32e6481
+:2425fa6a4f21
 	"mainloop",
 	"onenter",
 	"onexit",
 	"state",
 	"switch",
-	"str"
+	"str",
 	"void",
 	"while",
 	"enum",
 	"func",
 	"return",
 };
-static_assert (countof (g_token_strings) == (int) last_named_token,
+static_assert (countof (g_token_strings) == (int) tk_last_named_token + 1,
 	"Count of g_token_strings is not the same as the amount of named token identifiers.");
 // =============================================================================
 //
 lexer_scanner::lexer_scanner (FILE* fp) :
 //
 bool lexer_scanner::check_string (const char* c, int flags)
 {
 	bool r = strncmp (m_ptr, c, strlen (c)) == 0;
-	// There is to be whitespace after words
+	// There is to be a non-symbol character after words
-	if (r && (flags & f_check_word) && !isspace (m_ptr[strlen (c)]))
+	if (r && (flags & f_check_word) && is_symbol_char (m_ptr[strlen (c)], true))
 		r = false;
 	// Advance the cursor unless we want to just peek
 	if (r && !(flags & f_check_peek))
 		m_ptr += strlen (c);
 //
 bool lexer_scanner::get_next_token()
 {
 	m_token_text = "";
-	while (isspace (*m_ptr) == true)
+	while (isspace (*m_ptr))
-	{
+		skip();
-		if (*m_ptr == '\n')
-		{
-			m_line++;
-			m_line_break_pos = m_ptr;
-		}
-		m_ptr++;
-	}
 	// Check for comments
 	if (strncmp (m_ptr, "//", 2) == 0)
 	{
 		m_ptr += 2;
-		while (*(++m_ptr) != '\n')
+		while (*m_ptr != '\n')
-			;
+			skip();
 		return get_next_token();
 	}
 	elif (strncmp (m_ptr, "/*", 2) == 0)
 	{
-		m_ptr += 2;
+		skip (2); // skip the start symbols
-		while (strncmp (++m_ptr, "*/", 2) != 0)
+		while (strncmp (m_ptr, "*/", 2) != 0)
-		{
+			skip();
-			if (*m_ptr == '\n')
-			{
+		skip (2); // skip the end symbols
-				m_line++;
-				m_line_break_pos = m_ptr;
-			}
-		}
-		m_ptr += 2; // skip the */
 		return get_next_token();
 	}
 	if (*m_ptr == '\0')
 		return false;
 	// Check tokens
-	for (int i = 0; i < (int) (sizeof g_token_strings / sizeof * g_token_strings); ++i)
+	for (int i = 0; i < countof (g_token_strings); ++i)
 	{
-		if (check_string (g_token_strings[i], f_check_word))
+		int flags = 0;
+		if (i >= tk_first_named_token)
+			flags |= f_check_word;
+		if (check_string (g_token_strings[i], flags))
 		{
 			m_token_text = g_token_strings[i];
 			m_token_type = (e_token) i;
 			return true;
 		}
 		m_token_type = tk_string;
 		m_ptr++; // skip the final quote
 		return true;
 	}
-	m_token_type = tk_symbol;
 	if (isdigit (*m_ptr))
 	{
 		while (isdigit (*m_ptr))
 			m_token_text += *m_ptr++;
 		m_token_type = tk_number;
 		return true;
 	}
-	if (is_symbol_char (*m_ptr))
+	if (is_symbol_char (*m_ptr, false))
 	{
+		m_token_type = tk_symbol;
 		while (m_ptr != '\0')
 		{
-			if (!is_symbol_char (*m_ptr))
+			if (!is_symbol_char (*m_ptr, true))
-				break;
-			bool stop_here = false;
-			for (string i : g_token_strings)
-			{
-				if (check_string (i, f_check_peek | f_check_word))
-				{
-					stop_here = true;
-					break;
-				}
-			}
-			if (stop_here)
 				break;
 			m_token_text += *m_ptr++;
 		}
 	return false;
 }
 // =============================================================================
 //
+void lexer_scanner::skip()
+{
+	if (*m_ptr == '\n')
+	{
+		m_line++;
+		m_line_break_pos = m_ptr;
+	}
+	m_ptr++;
+}
+// =============================================================================
+//
+void lexer_scanner::skip (int chars)
+{
+	for (int i = 0; i < chars; ++i)
+		skip();
+}
+// =============================================================================
+//
 string lexer_scanner::get_token_string (e_token a)
 {
-	assert ((int) a <= last_named_token);
+	assert ((int) a <= tk_last_named_token);
 	return g_token_strings[a];
 }

comparison: src/lexer_scanner.cc

src/lexer_scanner.cc

Mercurial > botc / file comparison

comparison: src/lexer_scanner.cc

src/lexer_scanner.cc