src/lexer_scanner.cc

changeset 88
5def6ff8b466
parent 87
8f65914e7046
child 89
029a330a9bef
equal deleted inserted replaced
87:8f65914e7046 88:5def6ff8b466
1 /*
2 Copyright 2012-2014 Santeri Piippo
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 1. Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11 2. Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14 3. The name of the author may not be used to endorse or promote products
15 derived from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <cstdio>
30 #include <cstdlib>
31 #include <cassert>
32 #include <cstring>
33 #include <string>
34 #include "lexer_scanner.h"
35 #include "lexer.h"
36
37 static const string g_token_strings[] =
38 {
39 "==",
40 "[]",
41 "+=",
42 "-=",
43 "*=",
44 "/=",
45 "%=",
46 "'",
47 "$",
48 "(",
49 ")",
50 "[",
51 "]",
52 "{",
53 "}",
54 "=",
55 "+",
56 "-",
57 "*",
58 "/",
59 "%",
60 ",",
61 "<",
62 ">",
63 ".",
64 ":",
65 ";",
66 "#",
67 "!",
68 "->",
69 "bool",
70 "break",
71 "case",
72 "continue",
73 "const",
74 "default",
75 "do",
76 "else",
77 "event",
78 "eventdef",
79 "for",
80 "funcdef",
81 "goto",
82 "if",
83 "int",
84 "mainloop",
85 "onenter",
86 "onexit",
87 "state",
88 "switch",
89 "str",
90 "void",
91 "while",
92 "enum",
93 "func",
94 "return",
95 };
96
97 static_assert (countof (g_token_strings) == (int) tk_last_named_token + 1,
98 "Count of g_token_strings is not the same as the amount of named token identifiers.");
99
100 // =============================================================================
101 //
102 lexer_scanner::lexer_scanner (FILE* fp) :
103 m_line (1)
104 {
105 long fsize, bytes;
106
107 fseek (fp, 0l, SEEK_END);
108 fsize = ftell (fp);
109 rewind (fp);
110 m_data = new char[fsize];
111 m_ptr = m_line_break_pos = &m_data[0];
112 bytes = fread (m_data, 1, fsize, fp);
113 assert (bytes >= fsize);
114 }
115
116 // =============================================================================
117 //
118 lexer_scanner::~lexer_scanner()
119 {
120 delete m_data;
121 }
122
123 // =============================================================================
124 //
125 bool lexer_scanner::check_string (const char* c, int flags)
126 {
127 bool r = strncmp (m_ptr, c, strlen (c)) == 0;
128
129 // There is to be a non-symbol character after words
130 if (r && (flags & f_check_word) && is_symbol_char (m_ptr[strlen (c)], true))
131 r = false;
132
133 // Advance the cursor unless we want to just peek
134 if (r && !(flags & f_check_peek))
135 m_ptr += strlen (c);
136
137 return r;
138 }
139
140 // =============================================================================
141 //
142 bool lexer_scanner::get_next_token()
143 {
144 m_token_text = "";
145
146 while (isspace (*m_ptr))
147 skip();
148
149 // Check for comments
150 if (strncmp (m_ptr, "//", 2) == 0)
151 {
152 m_ptr += 2;
153
154 while (*m_ptr != '\n')
155 skip();
156
157 return get_next_token();
158 }
159 elif (strncmp (m_ptr, "/*", 2) == 0)
160 {
161 skip (2); // skip the start symbols
162
163 while (strncmp (m_ptr, "*/", 2) != 0)
164 skip();
165
166 skip (2); // skip the end symbols
167 return get_next_token();
168 }
169
170 if (*m_ptr == '\0')
171 return false;
172
173 // Check tokens
174 for (int i = 0; i < countof (g_token_strings); ++i)
175 {
176 int flags = 0;
177
178 if (i >= tk_first_named_token)
179 flags |= f_check_word;
180
181 if (check_string (g_token_strings[i], flags))
182 {
183 m_token_text = g_token_strings[i];
184 m_token_type = (e_token) i;
185 return true;
186 }
187 }
188
189 // Check and parse string
190 if (*m_ptr == '\"')
191 {
192 m_ptr++;
193
194 while (*m_ptr != '\"')
195 {
196 if (!*m_ptr)
197 error ("unterminated string");
198
199 if (check_string ("\\n"))
200 {
201 m_token_text += '\n';
202 continue;
203 }
204 elif (check_string ("\\t"))
205 {
206 m_token_text += '\t';
207 continue;
208 }
209 elif (check_string ("\\\""))
210 {
211 m_token_text += '"';
212 continue;
213 }
214
215 m_token_text += *m_ptr++;
216 }
217
218 m_token_type = tk_string;
219 skip(); // skip the final quote
220 return true;
221 }
222
223 if (isdigit (*m_ptr))
224 {
225 while (isdigit (*m_ptr))
226 m_token_text += *m_ptr++;
227
228 m_token_type = tk_number;
229 return true;
230 }
231
232 if (is_symbol_char (*m_ptr, false))
233 {
234 m_token_type = tk_symbol;
235
236 do
237 {
238 if (!is_symbol_char (*m_ptr, true))
239 break;
240
241 m_token_text += *m_ptr++;
242 } while (*m_ptr != '\0');
243
244 return true;
245 }
246
247 error ("unknown character \"%1\"", *m_ptr);
248 return false;
249 }
250
251 // =============================================================================
252 //
253 void lexer_scanner::skip()
254 {
255 if (*m_ptr == '\n')
256 {
257 m_line++;
258 m_line_break_pos = m_ptr;
259 }
260
261 m_ptr++;
262 }
263
264 // =============================================================================
265 //
266 void lexer_scanner::skip (int chars)
267 {
268 for (int i = 0; i < chars; ++i)
269 skip();
270 }
271
272 // =============================================================================
273 //
274 string lexer_scanner::get_token_string (e_token a)
275 {
276 assert ((int) a <= tk_last_named_token);
277 return g_token_strings[a];
278 }
279
280 // =============================================================================
281 //
282 string lexer_scanner::read_line()
283 {
284 string line;
285
286 while (*m_ptr != '\n')
287 line += *(m_ptr++);
288
289 return line;
290 }

mercurial