|
1 /* |
|
2 Copyright (c) 2013-2014, Santeri Piippo |
|
3 All rights reserved. |
|
4 |
|
5 Redistribution and use in source and binary forms, with or without |
|
6 modification, are permitted provided that the following conditions are met: |
|
7 |
|
8 * Redistributions of source code must retain the above copyright |
|
9 notice, this list of conditions and the following disclaimer. |
|
10 |
|
11 * Redistributions in binary form must reproduce the above copyright |
|
12 notice, this list of conditions and the following disclaimer in the |
|
13 documentation and/or other materials provided with the distribution. |
|
14 |
|
15 * Neither the name of the <organization> nor the |
|
16 names of its contributors may be used to endorse or promote products |
|
17 derived from this software without specific prior written permission. |
|
18 |
|
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
|
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
|
22 DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY |
|
23 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
|
24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
|
25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
|
26 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
|
28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
29 */ |
|
30 |
|
31 #include <cstring> |
|
32 #include "lexer.h" |
|
33 |
|
34 static string_list g_file_name_stack; |
|
35 static lexer* g_main_lexer = null; |
|
36 |
|
37 lexer::lexer() |
|
38 { |
|
39 assert (g_main_lexer == null); |
|
40 g_main_lexer = this; |
|
41 devf ("Lexer initialized\n"); |
|
42 } |
|
43 |
|
44 lexer::~lexer() |
|
45 { |
|
46 g_main_lexer = null; |
|
47 devf ("Lexer de-initialized\n"); |
|
48 } |
|
49 |
|
50 void lexer::process_file (string file_name) |
|
51 { |
|
52 devf ("Lexer: processing %1\n", file_name); |
|
53 FILE* fp = fopen (file_name, "r"); |
|
54 |
|
55 if (fp == null) |
|
56 error ("couldn't open %1 for reading: %2", file_name, strerror (errno)); |
|
57 |
|
58 lexer_scanner sc (fp); |
|
59 |
|
60 while (sc.get_next_token()) |
|
61 { |
|
62 // Preprocessor commands: |
|
63 if (sc.get_e_token() == tk_hash) |
|
64 { |
|
65 must_get_next_from_scanner (sc, tk_symbol); |
|
66 |
|
67 if (sc.get_token_text() == "include") |
|
68 { |
|
69 devf ("Lexer: encountered #include\n"); |
|
70 |
|
71 must_get_next_from_scanner (sc, tk_string); |
|
72 string file_name = sc.get_token_text(); |
|
73 |
|
74 if (g_file_name_stack.contains (file_name)) |
|
75 error ("attempted to #include %1 recursively", sc.get_token_text()); |
|
76 |
|
77 process_file (file_name); |
|
78 } |
|
79 else |
|
80 error ("unknown preprocessor directive \"#%1\"", sc.get_token_text()); |
|
81 } |
|
82 else |
|
83 { |
|
84 token tok; |
|
85 tok.file = file_name; |
|
86 tok.line = sc.get_line(); |
|
87 tok.column = sc.get_column(); |
|
88 tok.type = sc.get_e_token(); |
|
89 tok.text = sc.get_token_text(); |
|
90 m_tokens << tok; |
|
91 devf ("Lexer: added %1 (%2)\n", describe_e_token (tok.type), |
|
92 describe_token (&tok)); |
|
93 } |
|
94 } |
|
95 |
|
96 devf ("Lexer: File %1 processed.\n", file_name); |
|
97 m_token_position = m_tokens.begin() - 1; |
|
98 } |
|
99 |
|
100 // ============================================================================= |
|
101 // |
|
102 bool lexer::get_next (e_token req) |
|
103 { |
|
104 iterator pos = m_token_position; |
|
105 devf ("Lexer: Requested next token, requirement: %1\n", describe_e_token (req)); |
|
106 |
|
107 if (is_at_end()) |
|
108 { |
|
109 devf ("Lexer: at end of tokens. Failed.\n"); |
|
110 return false; |
|
111 } |
|
112 |
|
113 m_token_position++; |
|
114 |
|
115 if (req != tk_any && get_token() != req) |
|
116 { |
|
117 devf ("Lexer: Token %1 does not meet the requirement\n", describe_token (get_token())); |
|
118 m_token_position = pos; |
|
119 return false; |
|
120 } |
|
121 |
|
122 devf ("Lexer: Get successful: %1\n", describe_token (get_token())); |
|
123 return true; |
|
124 } |
|
125 |
|
126 // ============================================================================= |
|
127 // |
|
128 void lexer::must_get_next (e_token tok) |
|
129 { |
|
130 if (!get_next()) |
|
131 error ("unexpected EOF"); |
|
132 |
|
133 must_be (tok); |
|
134 } |
|
135 |
|
136 // ============================================================================= |
|
137 // eugh.. |
|
138 // |
|
139 void lexer::must_get_next_from_scanner (lexer_scanner& sc, e_token tok) |
|
140 { |
|
141 if (!sc.get_next_token()) |
|
142 error ("unexpected EOF"); |
|
143 |
|
144 if (tok != tk_any && sc.get_e_token() != tok) |
|
145 error ("expected %1, got %2", describe_e_token (tok), |
|
146 describe_token (get_token())); |
|
147 } |
|
148 |
|
149 // ============================================================================= |
|
150 // |
|
151 void lexer::must_get_any_of (const list<e_token>& toks) |
|
152 { |
|
153 devf ("Lexer: need to get a token that is any of: %1\n", toks); |
|
154 |
|
155 if (!get_next()) |
|
156 error ("unexpected EOF"); |
|
157 |
|
158 for (e_token tok : toks) |
|
159 if (get_token() == tok) |
|
160 return; |
|
161 |
|
162 string toknames; |
|
163 |
|
164 for (const e_token& tok_type : toks) |
|
165 { |
|
166 if (&tok_type == &toks.last()) |
|
167 toknames += " or "; |
|
168 elif (toknames.is_empty() == false) |
|
169 toknames += ", "; |
|
170 |
|
171 toknames += describe_e_token (tok_type); |
|
172 } |
|
173 |
|
174 error ("expected %1, got %2", toknames, describe_token (get_token())); |
|
175 } |
|
176 |
|
177 // ============================================================================= |
|
178 // |
|
179 int lexer::get_one_symbol (const string_list& syms) |
|
180 { |
|
181 if (!get_next()) |
|
182 error ("unexpected EOF"); |
|
183 |
|
184 if (get_token() == tk_symbol) |
|
185 { |
|
186 for (int i = 0; i < syms.size(); ++i) |
|
187 { |
|
188 if (syms[i] == get_token()->text) |
|
189 return i; |
|
190 } |
|
191 } |
|
192 |
|
193 error ("expected one of %1, got %2", syms, describe_token (get_token())); |
|
194 return -1; |
|
195 } |
|
196 |
|
197 // ============================================================================= |
|
198 // |
|
199 void lexer::must_be (e_token tok) |
|
200 { |
|
201 if (get_token() != tok) |
|
202 error ("expected %1, got %2", describe_e_token (tok), |
|
203 describe_token (get_token())); |
|
204 } |
|
205 |
|
206 // ============================================================================= |
|
207 // |
|
208 string lexer::describe_token_private (e_token tok_type, lexer::token* tok) |
|
209 { |
|
210 if ( (int) tok_type < (int) last_named_token) |
|
211 return "\"" + lexer_scanner::get_token_string (tok_type) + "\""; |
|
212 |
|
213 switch (tok_type) |
|
214 { |
|
215 case tk_symbol: |
|
216 return tok ? tok->text : "a symbol"; |
|
217 |
|
218 case tk_number: |
|
219 return tok ? tok->text : "a number"; |
|
220 |
|
221 case tk_string: |
|
222 return tok ? ("\"" + tok->text + "\"") : "a string"; |
|
223 |
|
224 case tk_any: |
|
225 return tok ? tok->text : "any token"; |
|
226 |
|
227 default: |
|
228 break; |
|
229 } |
|
230 |
|
231 return ""; |
|
232 } |
|
233 |
|
234 // ============================================================================= |
|
235 // |
|
236 bool lexer::peek_next (lexer::token* tk) |
|
237 { |
|
238 iterator pos = m_token_position; |
|
239 bool r = get_next(); |
|
240 |
|
241 if (r && tk != null) |
|
242 *tk = *m_token_position; |
|
243 |
|
244 m_token_position = pos; |
|
245 return r; |
|
246 } |
|
247 |
|
248 // ============================================================================= |
|
249 // |
|
250 lexer* lexer::get_main_lexer() |
|
251 { |
|
252 return g_main_lexer; |
|
253 } |