src/lexer.cc

changeset 73
1ee9b312dc18
child 74
007fbadfa7f9
equal deleted inserted replaced
72:03e4d9db3fd9 73:1ee9b312dc18
1 /*
2 Copyright (c) 2013-2014, Santeri Piippo
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 * Neither the name of the <organization> nor the
16 names of its contributors may be used to endorse or promote products
17 derived from this software without specific prior written permission.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
23 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include <cstring>
32 #include "lexer.h"
33
34 static string_list g_file_name_stack;
35 static lexer* g_main_lexer = null;
36
37 lexer::lexer()
38 {
39 assert (g_main_lexer == null);
40 g_main_lexer = this;
41 devf ("Lexer initialized\n");
42 }
43
44 lexer::~lexer()
45 {
46 g_main_lexer = null;
47 devf ("Lexer de-initialized\n");
48 }
49
50 void lexer::process_file (string file_name)
51 {
52 devf ("Lexer: processing %1\n", file_name);
53 FILE* fp = fopen (file_name, "r");
54
55 if (fp == null)
56 error ("couldn't open %1 for reading: %2", file_name, strerror (errno));
57
58 lexer_scanner sc (fp);
59
60 while (sc.get_next_token())
61 {
62 // Preprocessor commands:
63 if (sc.get_e_token() == tk_hash)
64 {
65 must_get_next_from_scanner (sc, tk_symbol);
66
67 if (sc.get_token_text() == "include")
68 {
69 devf ("Lexer: encountered #include\n");
70
71 must_get_next_from_scanner (sc, tk_string);
72 string file_name = sc.get_token_text();
73
74 if (g_file_name_stack.contains (file_name))
75 error ("attempted to #include %1 recursively", sc.get_token_text());
76
77 process_file (file_name);
78 }
79 else
80 error ("unknown preprocessor directive \"#%1\"", sc.get_token_text());
81 }
82 else
83 {
84 token tok;
85 tok.file = file_name;
86 tok.line = sc.get_line();
87 tok.column = sc.get_column();
88 tok.type = sc.get_e_token();
89 tok.text = sc.get_token_text();
90 m_tokens << tok;
91 devf ("Lexer: added %1 (%2)\n", describe_e_token (tok.type),
92 describe_token (&tok));
93 }
94 }
95
96 devf ("Lexer: File %1 processed.\n", file_name);
97 m_token_position = m_tokens.begin() - 1;
98 }
99
100 // =============================================================================
101 //
102 bool lexer::get_next (e_token req)
103 {
104 iterator pos = m_token_position;
105 devf ("Lexer: Requested next token, requirement: %1\n", describe_e_token (req));
106
107 if (is_at_end())
108 {
109 devf ("Lexer: at end of tokens. Failed.\n");
110 return false;
111 }
112
113 m_token_position++;
114
115 if (req != tk_any && get_token() != req)
116 {
117 devf ("Lexer: Token %1 does not meet the requirement\n", describe_token (get_token()));
118 m_token_position = pos;
119 return false;
120 }
121
122 devf ("Lexer: Get successful: %1\n", describe_token (get_token()));
123 return true;
124 }
125
126 // =============================================================================
127 //
128 void lexer::must_get_next (e_token tok)
129 {
130 if (!get_next())
131 error ("unexpected EOF");
132
133 must_be (tok);
134 }
135
136 // =============================================================================
137 // eugh..
138 //
139 void lexer::must_get_next_from_scanner (lexer_scanner& sc, e_token tok)
140 {
141 if (!sc.get_next_token())
142 error ("unexpected EOF");
143
144 if (tok != tk_any && sc.get_e_token() != tok)
145 error ("expected %1, got %2", describe_e_token (tok),
146 describe_token (get_token()));
147 }
148
149 // =============================================================================
150 //
151 void lexer::must_get_any_of (const list<e_token>& toks)
152 {
153 devf ("Lexer: need to get a token that is any of: %1\n", toks);
154
155 if (!get_next())
156 error ("unexpected EOF");
157
158 for (e_token tok : toks)
159 if (get_token() == tok)
160 return;
161
162 string toknames;
163
164 for (const e_token& tok_type : toks)
165 {
166 if (&tok_type == &toks.last())
167 toknames += " or ";
168 elif (toknames.is_empty() == false)
169 toknames += ", ";
170
171 toknames += describe_e_token (tok_type);
172 }
173
174 error ("expected %1, got %2", toknames, describe_token (get_token()));
175 }
176
177 // =============================================================================
178 //
179 int lexer::get_one_symbol (const string_list& syms)
180 {
181 if (!get_next())
182 error ("unexpected EOF");
183
184 if (get_token() == tk_symbol)
185 {
186 for (int i = 0; i < syms.size(); ++i)
187 {
188 if (syms[i] == get_token()->text)
189 return i;
190 }
191 }
192
193 error ("expected one of %1, got %2", syms, describe_token (get_token()));
194 return -1;
195 }
196
197 // =============================================================================
198 //
199 void lexer::must_be (e_token tok)
200 {
201 if (get_token() != tok)
202 error ("expected %1, got %2", describe_e_token (tok),
203 describe_token (get_token()));
204 }
205
206 // =============================================================================
207 //
208 string lexer::describe_token_private (e_token tok_type, lexer::token* tok)
209 {
210 if ( (int) tok_type < (int) last_named_token)
211 return "\"" + lexer_scanner::get_token_string (tok_type) + "\"";
212
213 switch (tok_type)
214 {
215 case tk_symbol:
216 return tok ? tok->text : "a symbol";
217
218 case tk_number:
219 return tok ? tok->text : "a number";
220
221 case tk_string:
222 return tok ? ("\"" + tok->text + "\"") : "a string";
223
224 case tk_any:
225 return tok ? tok->text : "any token";
226
227 default:
228 break;
229 }
230
231 return "";
232 }
233
234 // =============================================================================
235 //
236 bool lexer::peek_next (lexer::token* tk)
237 {
238 iterator pos = m_token_position;
239 bool r = get_next();
240
241 if (r && tk != null)
242 *tk = *m_token_position;
243
244 m_token_position = pos;
245 return r;
246 }
247
248 // =============================================================================
249 //
250 lexer* lexer::get_main_lexer()
251 {
252 return g_main_lexer;
253 }

mercurial