src/lexer_scanner.cc

changeset 79
2425fa6a4f21
parent 75
bf8c57437231
child 82
841562f5a32f
equal deleted inserted replaced
78:e6d7e32e6481 79:2425fa6a4f21
84 "mainloop", 84 "mainloop",
85 "onenter", 85 "onenter",
86 "onexit", 86 "onexit",
87 "state", 87 "state",
88 "switch", 88 "switch",
89 "str" 89 "str",
90 "void", 90 "void",
91 "while", 91 "while",
92 "enum", 92 "enum",
93 "func", 93 "func",
94 "return", 94 "return",
95 }; 95 };
96 96
97 static_assert (countof (g_token_strings) == (int) last_named_token, 97 static_assert (countof (g_token_strings) == (int) tk_last_named_token + 1,
98 "Count of g_token_strings is not the same as the amount of named token identifiers."); 98 "Count of g_token_strings is not the same as the amount of named token identifiers.");
99 99
100 // ============================================================================= 100 // =============================================================================
101 // 101 //
102 lexer_scanner::lexer_scanner (FILE* fp) : 102 lexer_scanner::lexer_scanner (FILE* fp) :
124 // 124 //
125 bool lexer_scanner::check_string (const char* c, int flags) 125 bool lexer_scanner::check_string (const char* c, int flags)
126 { 126 {
127 bool r = strncmp (m_ptr, c, strlen (c)) == 0; 127 bool r = strncmp (m_ptr, c, strlen (c)) == 0;
128 128
129 // There is to be whitespace after words 129 // There is to be a non-symbol character after words
130 if (r && (flags & f_check_word) && !isspace (m_ptr[strlen (c)])) 130 if (r && (flags & f_check_word) && is_symbol_char (m_ptr[strlen (c)], true))
131 r = false; 131 r = false;
132 132
133 // Advance the cursor unless we want to just peek 133 // Advance the cursor unless we want to just peek
134 if (r && !(flags & f_check_peek)) 134 if (r && !(flags & f_check_peek))
135 m_ptr += strlen (c); 135 m_ptr += strlen (c);
141 // 141 //
142 bool lexer_scanner::get_next_token() 142 bool lexer_scanner::get_next_token()
143 { 143 {
144 m_token_text = ""; 144 m_token_text = "";
145 145
146 while (isspace (*m_ptr) == true) 146 while (isspace (*m_ptr))
147 { 147 skip();
148 if (*m_ptr == '\n')
149 {
150 m_line++;
151 m_line_break_pos = m_ptr;
152 }
153
154 m_ptr++;
155 }
156 148
157 // Check for comments 149 // Check for comments
158 if (strncmp (m_ptr, "//", 2) == 0) 150 if (strncmp (m_ptr, "//", 2) == 0)
159 { 151 {
160 m_ptr += 2; 152 m_ptr += 2;
161 153
162 while (*(++m_ptr) != '\n') 154 while (*m_ptr != '\n')
163 ; 155 skip();
164 156
165 return get_next_token(); 157 return get_next_token();
166 } 158 }
167 elif (strncmp (m_ptr, "/*", 2) == 0) 159 elif (strncmp (m_ptr, "/*", 2) == 0)
168 { 160 {
169 m_ptr += 2; 161 skip (2); // skip the start symbols
170 162
171 while (strncmp (++m_ptr, "*/", 2) != 0) 163 while (strncmp (m_ptr, "*/", 2) != 0)
172 { 164 skip();
173 if (*m_ptr == '\n') 165
174 { 166 skip (2); // skip the end symbols
175 m_line++;
176 m_line_break_pos = m_ptr;
177 }
178 }
179
180 m_ptr += 2; // skip the */
181 return get_next_token(); 167 return get_next_token();
182 } 168 }
183 169
184 if (*m_ptr == '\0') 170 if (*m_ptr == '\0')
185 return false; 171 return false;
186 172
187 // Check tokens 173 // Check tokens
188 for (int i = 0; i < (int) (sizeof g_token_strings / sizeof * g_token_strings); ++i) 174 for (int i = 0; i < countof (g_token_strings); ++i)
189 { 175 {
190 if (check_string (g_token_strings[i], f_check_word)) 176 int flags = 0;
177
178 if (i >= tk_first_named_token)
179 flags |= f_check_word;
180
181 if (check_string (g_token_strings[i], flags))
191 { 182 {
192 m_token_text = g_token_strings[i]; 183 m_token_text = g_token_strings[i];
193 m_token_type = (e_token) i; 184 m_token_type = (e_token) i;
194 return true; 185 return true;
195 } 186 }
227 m_token_type = tk_string; 218 m_token_type = tk_string;
228 m_ptr++; // skip the final quote 219 m_ptr++; // skip the final quote
229 return true; 220 return true;
230 } 221 }
231 222
232 m_token_type = tk_symbol;
233
234 if (isdigit (*m_ptr)) 223 if (isdigit (*m_ptr))
235 { 224 {
236 while (isdigit (*m_ptr)) 225 while (isdigit (*m_ptr))
237 m_token_text += *m_ptr++; 226 m_token_text += *m_ptr++;
238 227
239 m_token_type = tk_number; 228 m_token_type = tk_number;
240 return true; 229 return true;
241 } 230 }
242 231
243 if (is_symbol_char (*m_ptr)) 232 if (is_symbol_char (*m_ptr, false))
244 { 233 {
234 m_token_type = tk_symbol;
235
245 while (m_ptr != '\0') 236 while (m_ptr != '\0')
246 { 237 {
247 if (!is_symbol_char (*m_ptr)) 238 if (!is_symbol_char (*m_ptr, true))
248 break;
249
250 bool stop_here = false;
251
252 for (string i : g_token_strings)
253 {
254 if (check_string (i, f_check_peek | f_check_word))
255 {
256 stop_here = true;
257 break;
258 }
259 }
260
261 if (stop_here)
262 break; 239 break;
263 240
264 m_token_text += *m_ptr++; 241 m_token_text += *m_ptr++;
265 } 242 }
266 243
271 return false; 248 return false;
272 } 249 }
273 250
274 // ============================================================================= 251 // =============================================================================
275 // 252 //
253 void lexer_scanner::skip()
254 {
255 if (*m_ptr == '\n')
256 {
257 m_line++;
258 m_line_break_pos = m_ptr;
259 }
260
261 m_ptr++;
262 }
263
264 // =============================================================================
265 //
266 void lexer_scanner::skip (int chars)
267 {
268 for (int i = 0; i < chars; ++i)
269 skip();
270 }
271
272 // =============================================================================
273 //
276 string lexer_scanner::get_token_string (e_token a) 274 string lexer_scanner::get_token_string (e_token a)
277 { 275 {
278 assert ((int) a <= last_named_token); 276 assert ((int) a <= tk_last_named_token);
279 return g_token_strings[a]; 277 return g_token_strings[a];
280 } 278 }

mercurial