src/lexer_scanner.cc

changeset 73
1ee9b312dc18
child 75
bf8c57437231
equal deleted inserted replaced
72:03e4d9db3fd9 73:1ee9b312dc18
1 /*
2 Copyright (c) 2013-2014, Santeri Piippo
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 * Neither the name of the <organization> nor the
16 names of its contributors may be used to endorse or promote products
17 derived from this software without specific prior written permission.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
23 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include <cstdio>
32 #include <cstdlib>
33 #include <cassert>
34 #include <cstring>
35 #include <string>
36 #include "lexer_scanner.h"
37
38 static const string g_token_strings[] =
39 {
40 "==",
41 "[]",
42 "+=",
43 "-=",
44 "*=",
45 "/=",
46 "%=",
47 "'",
48 "$",
49 "(",
50 ")",
51 "[",
52 "]",
53 "{",
54 "}",
55 "=",
56 "+",
57 "-",
58 "*",
59 "/",
60 "%",
61 ",",
62 "<",
63 ">",
64 ".",
65 ":",
66 ";",
67 "#",
68 "!",
69 "->",
70 "bool",
71 "break",
72 "case",
73 "continue",
74 "const",
75 "default",
76 "do",
77 "else",
78 "event",
79 "for",
80 "goto",
81 "if",
82 "int",
83 "mainloop",
84 "onenter",
85 "onexit",
86 "state",
87 "switch",
88 "str"
89 "void",
90 "while",
91 "enum",
92 "func",
93 "return",
94 };
95
96 static_assert (countof (g_token_strings) == (int) last_named_token + 1,
97 "Count of g_token_strings is not the same as the amount of named token identifiers.");
98
99 // =============================================================================
100 //
101 lexer_scanner::lexer_scanner (FILE* fp) :
102 m_line (1)
103 {
104 long fsize, bytes;
105
106 fseek (fp, 0l, SEEK_END);
107 fsize = ftell (fp);
108 rewind (fp);
109 m_data = new char[fsize];
110 m_ptr = m_line_break_pos = &m_data[0];
111 bytes = fread (m_data, 1, fsize, fp);
112 assert (bytes >= fsize);
113 }
114
115 // =============================================================================
116 //
117 lexer_scanner::~lexer_scanner()
118 {
119 delete m_data;
120 }
121
122 // =============================================================================
123 //
124 bool lexer_scanner::check_string (const char* c, int flags)
125 {
126 bool r = strncmp (m_ptr, c, strlen (c)) == 0;
127
128 // There is to be whitespace after words
129 if (r && (flags & f_check_word) && !isspace (m_ptr[strlen (c)]))
130 r = false;
131
132 // Advance the cursor unless we want to just peek
133 if (r && ! (flags & f_check_peek))
134 m_ptr += strlen (c);
135
136 return r;
137 }
138
139 // =============================================================================
140 //
141 bool lexer_scanner::get_next_token()
142 {
143 m_token_text = "";
144
145 while (isspace (*m_ptr) == true)
146 {
147 if (*m_ptr == '\n')
148 {
149 m_line++;
150 m_line_break_pos = m_ptr;
151 }
152
153 m_ptr++;
154 }
155
156 if (*m_ptr == '\0')
157 return false;
158
159 // Check tokens
160 for (int i = 0; i < (int) (sizeof g_token_strings / sizeof * g_token_strings); ++i)
161 {
162 if (check_string (g_token_strings[i], f_check_word))
163 {
164 m_token_text = g_token_strings[i];
165 m_e_token = (e_token) i;
166 return true;
167 }
168 }
169
170 // Check and parse string
171 if (*m_ptr == '\"')
172 {
173 m_ptr++;
174
175 while (*m_ptr != '\"')
176 {
177 if (!*m_ptr)
178 return false;
179
180 if (check_string ("\\n"))
181 {
182 m_token_text += '\n';
183 continue;
184 }
185 elif (check_string ("\\t"))
186 {
187 m_token_text += '\t';
188 continue;
189 }
190 elif (check_string ("\\\""))
191 {
192 m_token_text += '"';
193 continue;
194 }
195
196 m_token_text += *m_ptr++;
197 }
198
199 m_e_token = tk_string;
200 m_ptr++; // skip the final quote
201 return true;
202 }
203
204 m_e_token = tk_symbol;
205
206 if (isdigit (*m_ptr))
207 {
208 while (isdigit (*m_ptr))
209 m_token_text += *m_ptr++;
210
211 m_e_token = tk_number;
212 return true;
213 }
214
215 if (is_symbol_char (*m_ptr))
216 {
217 while (m_ptr != '\0')
218 {
219 if (!is_symbol_char (*m_ptr))
220 break;
221
222 bool stop_here = false;
223
224 for (string i : g_token_strings)
225 {
226 if (check_string (i, f_check_peek | f_check_word))
227 {
228 stop_here = true;
229 break;
230 }
231 }
232
233 if (stop_here)
234 break;
235
236 m_token_text += *m_ptr++;
237 }
238
239 return true;
240 }
241
242 return false;
243 }
244
245 string lexer_scanner::get_token_string (e_token a)
246 {
247 assert ((int) a <= (int) last_named_token);
248 return g_token_strings[a];
249 }

mercurial