src/lexer_scanner.cc

changeset 75
bf8c57437231
parent 73
1ee9b312dc18
child 79
2425fa6a4f21
equal deleted inserted replaced
74:007fbadfa7f9 75:bf8c57437231
1 /* 1 /*
2 Copyright (c) 2013-2014, Santeri Piippo 2 Copyright (c) 2014, Santeri Piippo
3 All rights reserved. 3 All rights reserved.
4 4
5 Redistribution and use in source and binary forms, with or without 5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met: 6 modification, are permitted provided that the following conditions are met:
7 7
32 #include <cstdlib> 32 #include <cstdlib>
33 #include <cassert> 33 #include <cassert>
34 #include <cstring> 34 #include <cstring>
35 #include <string> 35 #include <string>
36 #include "lexer_scanner.h" 36 #include "lexer_scanner.h"
37 #include "lexer.h"
37 38
38 static const string g_token_strings[] = 39 static const string g_token_strings[] =
39 { 40 {
40 "==", 41 "==",
41 "[]", 42 "[]",
91 "enum", 92 "enum",
92 "func", 93 "func",
93 "return", 94 "return",
94 }; 95 };
95 96
96 static_assert (countof (g_token_strings) == (int) last_named_token + 1, 97 static_assert (countof (g_token_strings) == (int) last_named_token,
97 "Count of g_token_strings is not the same as the amount of named token identifiers."); 98 "Count of g_token_strings is not the same as the amount of named token identifiers.");
98 99
99 // ============================================================================= 100 // =============================================================================
100 // 101 //
101 lexer_scanner::lexer_scanner (FILE* fp) : 102 lexer_scanner::lexer_scanner (FILE* fp) :
128 // There is to be whitespace after words 129 // There is to be whitespace after words
129 if (r && (flags & f_check_word) && !isspace (m_ptr[strlen (c)])) 130 if (r && (flags & f_check_word) && !isspace (m_ptr[strlen (c)]))
130 r = false; 131 r = false;
131 132
132 // Advance the cursor unless we want to just peek 133 // Advance the cursor unless we want to just peek
133 if (r && ! (flags & f_check_peek)) 134 if (r && !(flags & f_check_peek))
134 m_ptr += strlen (c); 135 m_ptr += strlen (c);
135 136
136 return r; 137 return r;
137 } 138 }
138 139
151 } 152 }
152 153
153 m_ptr++; 154 m_ptr++;
154 } 155 }
155 156
157 // Check for comments
158 if (strncmp (m_ptr, "//", 2) == 0)
159 {
160 m_ptr += 2;
161
162 while (*(++m_ptr) != '\n')
163 ;
164
165 return get_next_token();
166 }
167 elif (strncmp (m_ptr, "/*", 2) == 0)
168 {
169 m_ptr += 2;
170
171 while (strncmp (++m_ptr, "*/", 2) != 0)
172 {
173 if (*m_ptr == '\n')
174 {
175 m_line++;
176 m_line_break_pos = m_ptr;
177 }
178 }
179
180 m_ptr += 2; // skip the */
181 return get_next_token();
182 }
183
156 if (*m_ptr == '\0') 184 if (*m_ptr == '\0')
157 return false; 185 return false;
158 186
159 // Check tokens 187 // Check tokens
160 for (int i = 0; i < (int) (sizeof g_token_strings / sizeof * g_token_strings); ++i) 188 for (int i = 0; i < (int) (sizeof g_token_strings / sizeof * g_token_strings); ++i)
161 { 189 {
162 if (check_string (g_token_strings[i], f_check_word)) 190 if (check_string (g_token_strings[i], f_check_word))
163 { 191 {
164 m_token_text = g_token_strings[i]; 192 m_token_text = g_token_strings[i];
165 m_e_token = (e_token) i; 193 m_token_type = (e_token) i;
166 return true; 194 return true;
167 } 195 }
168 } 196 }
169 197
170 // Check and parse string 198 // Check and parse string
194 } 222 }
195 223
196 m_token_text += *m_ptr++; 224 m_token_text += *m_ptr++;
197 } 225 }
198 226
199 m_e_token = tk_string; 227 m_token_type = tk_string;
200 m_ptr++; // skip the final quote 228 m_ptr++; // skip the final quote
201 return true; 229 return true;
202 } 230 }
203 231
204 m_e_token = tk_symbol; 232 m_token_type = tk_symbol;
205 233
206 if (isdigit (*m_ptr)) 234 if (isdigit (*m_ptr))
207 { 235 {
208 while (isdigit (*m_ptr)) 236 while (isdigit (*m_ptr))
209 m_token_text += *m_ptr++; 237 m_token_text += *m_ptr++;
210 238
211 m_e_token = tk_number; 239 m_token_type = tk_number;
212 return true; 240 return true;
213 } 241 }
214 242
215 if (is_symbol_char (*m_ptr)) 243 if (is_symbol_char (*m_ptr))
216 { 244 {
237 } 265 }
238 266
239 return true; 267 return true;
240 } 268 }
241 269
270 error ("unknown character \"%1\"", *m_ptr);
242 return false; 271 return false;
243 } 272 }
244 273
274 // =============================================================================
275 //
245 string lexer_scanner::get_token_string (e_token a) 276 string lexer_scanner::get_token_string (e_token a)
246 { 277 {
247 assert ((int) a <= (int) last_named_token); 278 assert ((int) a <= last_named_token);
248 return g_token_strings[a]; 279 return g_token_strings[a];
249 } 280 }

mercurial