src/parser.cc

changeset 75
bf8c57437231
child 77
ad17801b1a36
equal deleted inserted replaced
74:007fbadfa7f9 75:bf8c57437231
1 /*
2 Copyright (c) 2012-2014, Santeri Piippo
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 * Neither the name of the <organization> nor the
16 names of its contributors may be used to endorse or promote products
17 derived from this software without specific prior written permission.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
23 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include "object_writer.h"
32 #include "parser.h"
33 #include "events.h"
34 #include "commands.h"
35 #include "stringtable.h"
36 #include "variables.h"
37 #include "containers.h"
38 #include "lexer.h"
39
40 #define SCOPE(n) scopestack[g_ScopeCursor - n]
41
42 // TODO: make these static
43 int g_NumStates = 0;
44 int g_NumEvents = 0;
45 parsermode_e g_CurMode = MODE_TOPLEVEL;
46 string g_CurState = "";
47 bool g_stateSpawnDefined = false;
48 bool g_GotMainLoop = false;
49 int g_ScopeCursor = 0;
50 data_buffer* g_IfExpression = null;
51 bool g_CanElse = false;
52 static string* g_undefined_labels[MAX_MARKS]; // TODO: make a list
53 list<constant_info> g_ConstInfo;
54
55 static botscript_parser* g_current_parser = null;
56
57 // ============================================================================
58 //
59 botscript_parser::botscript_parser() :
60 m_lx (new lexer) {}
61
62 // ============================================================================
63 //
64 botscript_parser::~botscript_parser()
65 {
66 delete m_lx;
67 }
68
69 // ============================================================================
70 //
71 void botscript_parser::check_toplevel()
72 {
73 if (g_CurMode != MODE_TOPLEVEL)
74 error ("%1-statements may only be defined at top level!", token_string().chars());
75 }
76
77 // ============================================================================
78 //
79 void botscript_parser::check_not_toplevel()
80 {
81 if (g_CurMode == MODE_TOPLEVEL)
82 error ("%1-statements must not be defined at top level!", token_string().chars());
83 }
84
85 // ============================================================================
86 // Main parser code. Begins read of the script file, checks the syntax of it
87 // and writes the data to the object file via Objwriter - which also takes care
88 // of necessary buffering so stuff is written in the correct order.
89 void botscript_parser::parse_botscript (string file_name, object_writer* w)
90 {
91 m_writer = w;
92
93 // Lex and preprocess the file
94 m_lx->process_file (file_name);
95
96 // Zero the entire block stack first
97 // TODO: this shouldn't be necessary
98 for (int i = 0; i < MAX_SCOPE; i++)
99 ZERO (scopestack[i]);
100
101 for (int i = 0; i < MAX_MARKS; i++)
102 g_undefined_labels[i] = null;
103
104 while (m_lx->get_next())
105 {
106 // Check if else is potentically valid
107 if (token_is (tk_else) && !g_CanElse)
108 error ("else without preceding if");
109
110 if (!token_is (tk_else))
111 g_CanElse = false;
112
113 switch (m_lx->get_token()->type)
114 {
115 case tk_state:
116 parse_state_block();
117 break;
118
119 case tk_event:
120 parse_event_block();
121 break;
122
123 case tk_mainloop:
124 parse_mainloop();
125 break;
126
127 case tk_onenter:
128 case tk_onexit:
129 parse_on_enter_exit();
130 break;
131
132 case tk_int:
133 case tk_str:
134 case tk_void:
135 parse_variable_declaration();
136 break;
137
138 case tk_goto:
139 parse_goto();
140 break;
141
142 case tk_if:
143 parse_if();
144 break;
145
146 case tk_else:
147 parse_else();
148 break;
149
150 case tk_while:
151 parse_while_block();
152 break;
153
154 case tk_for:
155 parse_for_block();
156 break;
157
158 case tk_do:
159 parse_do_block();
160 break;
161
162 case tk_switch:
163 parse_switch_block();
164 break;
165
166 case tk_case:
167 parse_switch_case();
168 break;
169
170 case tk_default:
171 parse_switch_default();
172 break;
173
174 case tk_break:
175 parse_break();
176 break;
177
178 case tk_continue:
179 parse_continue();
180 break;
181
182 case tk_brace_end:
183 parse_block_end();
184 break;
185
186 case tk_const:
187 parse_const();
188 break;
189
190 default:
191 {
192 // Check for labels
193 lexer::token next;
194
195 if (token_is (tk_symbol) &&
196 m_lx->peek_next (&next) &&
197 next.type == tk_colon)
198 {
199 parse_label();
200 break;
201 }
202
203 // Check if it's a command
204 command_info* comm = find_command_by_name (token_string());
205
206 if (comm)
207 {
208 m_writer->get_current_buffer()->merge (ParseCommand (comm));
209 m_lx->must_get_next (tk_semicolon);
210 continue;
211 }
212
213 // If nothing else, parse it as a statement
214 data_buffer* b = parse_statement (w);
215
216 if (!b)
217 error ("unknown token `%1`", token_string());
218
219 m_writer->write_buffer (b);
220 m_lx->must_get_next (tk_semicolon);
221 }
222 break;
223 }
224 }
225
226 // ===============================================================================
227 // Script file ended. Do some last checks and write the last things to main buffer
228 if (g_CurMode != MODE_TOPLEVEL)
229 error ("script did not end at top level; a `}` is missing somewhere");
230
231 // stateSpawn must be defined!
232 if (!g_stateSpawnDefined)
233 error ("script must have a state named `stateSpawn`!");
234
235 for (int i = 0; i < MAX_MARKS; i++)
236 if (g_undefined_labels[i])
237 error ("label `%s` is referenced via `goto` but isn't defined\n", g_undefined_labels[i]->chars());
238
239 // Dump the last state's onenter and mainloop
240 m_writer->write_member_buffers();
241
242 // String table
243 m_writer->write_string_table();
244 }
245
246 // ============================================================================
247 //
248 void botscript_parser::parse_state_block()
249 {
250 check_toplevel();
251 m_lx->must_get_next (tk_string);
252 string statename = token_string();
253
254 // State name must be a word.
255 if (statename.first (" ") != -1)
256 error ("state name must be a single word, got `%1`", statename);
257
258 // stateSpawn is special - it *must* be defined. If we
259 // encountered it, then mark down that we have it.
260 if (-statename == "statespawn")
261 g_stateSpawnDefined = true;
262
263 // Must end in a colon
264 m_lx->must_get_next (tk_colon);
265
266 // write the previous state's onenter and
267 // mainloop buffers to file now
268 if (g_CurState.is_empty() == false)
269 m_writer->write_member_buffers();
270
271 m_writer->write (dh_state_name);
272 m_writer->write_string (statename);
273 m_writer->write (dh_state_index);
274 m_writer->write (g_NumStates);
275
276 g_NumStates++;
277 g_CurState = statename;
278 g_GotMainLoop = false;
279 }
280
281 // ============================================================================
282 //
283 void botscript_parser::parse_event_block()
284 {
285 check_toplevel();
286 m_lx->must_get_next (tk_string);
287
288 event_info* e = find_event_by_name (token_string());
289
290 if (!e)
291 error ("bad event, got `%1`\n", token_string());
292
293 m_lx->must_get_next (tk_brace_start);
294 g_CurMode = MODE_EVENT;
295 m_writer->write (dh_event);
296 m_writer->write (e->number);
297 g_NumEvents++;
298 }
299
300 // ============================================================================
301 //
302 void botscript_parser::parse_mainloop()
303 {
304 check_toplevel();
305 m_lx->must_get_next (tk_brace_start);
306
307 // Mode must be set before dataheader is written here!
308 g_CurMode = MODE_MAINLOOP;
309 m_writer->write (dh_main_loop);
310 }
311
312 // ============================================================================
313 //
314 void botscript_parser::parse_on_enter_exit()
315 {
316 check_toplevel();
317 bool onenter = (token_is (tk_onenter));
318 m_lx->must_get_next (tk_brace_start);
319
320 // Mode must be set before dataheader is written here,
321 // because onenter goes to a separate buffer.
322 g_CurMode = onenter ? MODE_ONENTER : MODE_ONEXIT;
323 m_writer->write (onenter ? dh_on_enter : dh_on_exit);
324 }
325
326 // ============================================================================
327 //
328 void botscript_parser::parse_variable_declaration()
329 {
330 // For now, only globals are supported
331 if (g_CurMode != MODE_TOPLEVEL || g_CurState.is_empty() == false)
332 error ("variables must only be global for now");
333
334 type_e type = (token_is (tk_int)) ? TYPE_INT :
335 (token_is (tk_str)) ? TYPE_STRING :
336 TYPE_BOOL;
337
338 m_lx->must_get_next();
339 string varname = token_string();
340
341 // Var name must not be a number
342 if (varname.is_numeric())
343 error ("variable name must not be a number");
344
345 script_variable* var = declare_global_variable (type, varname);
346 (void) var;
347 m_lx->must_get_next (tk_semicolon);
348 }
349
350 // ============================================================================
351 //
352 void botscript_parser::parse_goto()
353 {
354 check_not_toplevel();
355
356 // Get the name of the label
357 m_lx->must_get_next();
358
359 // Find the mark this goto statement points to
360 string target = token_string();
361 int m = m_writer->find_byte_mark (target);
362
363 // If not set, define it
364 if (m == MAX_MARKS)
365 {
366 m = m_writer->add_mark (target);
367 g_undefined_labels[m] = new string (target);
368 }
369
370 // Add a reference to the mark.
371 m_writer->write (dh_goto);
372 m_writer->add_reference (m);
373 m_lx->must_get_next (tk_semicolon);
374 continue;
375 }
376
377 // ============================================================================
378 //
379 void botscript_parser::parse_if()
380 {
381 check_not_toplevel();
382 push_scope();
383
384 // Condition
385 m_lx->must_get_next (tk_paren_start);
386
387 // Read the expression and write it.
388 m_lx->must_get_next();
389 data_buffer* c = parse_expression (TYPE_INT);
390 m_writer->write_buffer (c);
391
392 m_lx->must_get_next (tk_paren_end);
393 m_lx->must_get_next (tk_brace_start);
394
395 // Add a mark - to here temporarily - and add a reference to it.
396 // Upon a closing brace, the mark will be adjusted.
397 int marknum = m_writer->add_mark ("");
398
399 // Use dh_if_not_goto - if the expression is not true, we goto the mark
400 // we just defined - and this mark will be at the end of the scope block.
401 m_writer->write (dh_if_not_goto);
402 m_writer->add_reference (marknum);
403
404 // Store it
405 SCOPE (0).mark1 = marknum;
406 SCOPE (0).type = e_if_scope;
407 }
408
409 // ============================================================================
410 //
411 void botscript_parser::parse_else()
412 {
413 check_not_toplevel();
414 m_lx->must_get_next (tk_brace_start);
415
416 // Don't use PushScope as it resets the scope
417 g_ScopeCursor++;
418
419 if (g_ScopeCursor >= MAX_SCOPE)
420 error ("too deep scope");
421
422 if (SCOPE (0).type != e_if_scope)
423 error ("else without preceding if");
424
425 // write down to jump to the end of the else statement
426 // Otherwise we have fall-throughs
427 SCOPE (0).mark2 = m_writer->add_mark ("");
428
429 // Instruction to jump to the end after if block is complete
430 m_writer->write (dh_goto);
431 m_writer->add_reference (SCOPE (0).mark2);
432
433 // Move the ifnot mark here and set type to else
434 m_writer->move_mark (SCOPE (0).mark1);
435 SCOPE (0).type = e_else_scope;
436 }
437
438 // ============================================================================
439 //
440 void botscript_parser::parse_while_block()
441 {
442 check_not_toplevel();
443 push_scope();
444
445 // While loops need two marks - one at the start of the loop and one at the
446 // end. The condition is checked at the very start of the loop, if it fails,
447 // we use goto to skip to the end of the loop. At the end, we loop back to
448 // the beginning with a go-to statement.
449 int mark1 = m_writer->add_mark (""); // start
450 int mark2 = m_writer->add_mark (""); // end
451
452 // Condition
453 m_lx->must_get_next (tk_paren_start);
454 m_lx->must_get_next();
455 data_buffer* expr = parse_expression (TYPE_INT);
456 m_lx->must_get_next (tk_paren_end);
457 m_lx->must_get_next (tk_brace_start);
458
459 // write condition
460 m_writer->write_buffer (expr);
461
462 // Instruction to go to the end if it fails
463 m_writer->write (dh_if_not_goto);
464 m_writer->add_reference (mark2);
465
466 // Store the needed stuff
467 SCOPE (0).mark1 = mark1;
468 SCOPE (0).mark2 = mark2;
469 SCOPE (0).type = e_while_scope;
470 }
471
472 // ============================================================================
473 //
474 void botscript_parser::parse_for_block()
475 {
476 check_not_toplevel();
477 push_scope();
478
479 // Initializer
480 m_lx->must_get_next (tk_paren_start);
481 m_lx->must_get_next();
482 data_buffer* init = parse_statement (w);
483
484 if (!init)
485 error ("bad statement for initializer of for");
486
487 m_lx->must_get_next (tk_semicolon);
488
489 // Condition
490 m_lx->must_get_next();
491 data_buffer* cond = parse_expression (TYPE_INT);
492
493 if (!cond)
494 error ("bad statement for condition of for");
495
496 m_lx->must_get_next (tk_semicolon);
497
498 // Incrementor
499 m_lx->must_get_next();
500 data_buffer* incr = parse_statement (w);
501
502 if (!incr)
503 error ("bad statement for incrementor of for");
504
505 m_lx->must_get_next (tk_paren_end);
506 m_lx->must_get_next (tk_brace_start);
507
508 // First, write out the initializer
509 m_writer->write_buffer (init);
510
511 // Init two marks
512 int mark1 = m_writer->add_mark ("");
513 int mark2 = m_writer->add_mark ("");
514
515 // Add the condition
516 m_writer->write_buffer (cond);
517 m_writer->write (dh_if_not_goto);
518 m_writer->add_reference (mark2);
519
520 // Store the marks and incrementor
521 SCOPE (0).mark1 = mark1;
522 SCOPE (0).mark2 = mark2;
523 SCOPE (0).buffer1 = incr;
524 SCOPE (0).type = e_for_scope;
525 }
526
527 // ============================================================================
528 //
529 void botscript_parser::parse_do_block()
530 {
531 check_not_toplevel();
532 push_scope();
533 m_lx->must_get_next (tk_brace_start);
534 SCOPE (0).mark1 = m_writer->add_mark ("");
535 SCOPE (0).type = e_do_scope;
536 }
537
538 // ============================================================================
539 //
540 void botscript_parser::parse_switch_block()
541 {
542 // This gets a bit tricky. switch is structured in the
543 // bytecode followingly:
544 //
545 // (expression)
546 // case a: goto casemark1
547 // case b: goto casemark2
548 // case c: goto casemark3
549 // goto mark1 // jump to end if no matches
550 // casemark1: ...
551 // casemark2: ...
552 // casemark3: ...
553 // mark1: // end mark
554
555 check_not_toplevel();
556 push_scope();
557 m_lx->must_get_next (tk_paren_start);
558 m_lx->must_get_next();
559 m_writer->write_buffer (parse_expression (TYPE_INT));
560 m_lx->must_get_next (tk_paren_end);
561 m_lx->must_get_next (tk_brace_start);
562 SCOPE (0).type = e_switch_scope;
563 SCOPE (0).mark1 = m_writer->add_mark (""); // end mark
564 SCOPE (0).buffer1 = null; // default header
565 }
566
567 // ============================================================================
568 //
569 void botscript_parser::parse_switch_case()
570 {
571 // case is only allowed inside switch
572 if (SCOPE (0).type != e_switch_scope)
573 error ("case label outside switch");
574
575 // Get the literal (Zandronum does not support expressions here)
576 m_lx->must_get_next (tk_number);
577 int num = m_lx->get_token()->text.to_long();
578 m_lx->must_get_next (tk_colon);
579
580 for (int i = 0; i < MAX_CASE; i++)
581 if (SCOPE (0).casenumbers[i] == num)
582 error ("multiple case %d labels in one switch", num);
583
584 // write down the expression and case-go-to. This builds
585 // the case tree. The closing event will write the actual
586 // blocks and move the marks appropriately.
587 // AddSwitchCase will add the reference to the mark
588 // for the case block that this heralds, and takes care
589 // of buffering setup and stuff like that.
590 // null the switch buffer for the case-go-to statement,
591 // we want it all under the switch, not into the case-buffers.
592 m_writer->SwitchBuffer = null;
593 m_writer->write (dh_case_goto);
594 m_writer->write (num);
595 add_switch_case (m_writer, null);
596 SCOPE (0).casenumbers[SCOPE (0).casecursor] = num;
597 }
598
599 // ============================================================================
600 //
601 void botscript_parser::parse_switch_default()
602 {
603 if (SCOPE (0).type != e_switch_scope)
604 error ("default label outside switch");
605
606 if (SCOPE (0).buffer1)
607 error ("multiple default labels in one switch");
608
609 m_lx->must_get_next (tk_colon);
610
611 // The default header is buffered into buffer1, since
612 // it has to be the last of the case headers
613 //
614 // Since the expression is pushed into the switch
615 // and is only popped when case succeeds, we have
616 // to pop it with dh_drop manually if we end up in
617 // a default.
618 data_buffer* b = new data_buffer;
619 SCOPE (0).buffer1 = b;
620 b->write (dh_drop);
621 b->write (dh_goto);
622 add_switch_case (m_writer, b);
623 }
624
625 // ============================================================================
626 //
627 void botscript_parser::parse_break()
628 {
629 if (!g_ScopeCursor)
630 error ("unexpected `break`");
631
632 m_writer->write (dh_goto);
633
634 // switch and if use mark1 for the closing point,
635 // for and while use mark2.
636 switch (SCOPE (0).type)
637 {
638 case e_if_scope:
639 case e_switch_scope:
640 {
641 m_writer->add_reference (SCOPE (0).mark1);
642 } break;
643
644 case e_for_scope:
645 case e_while_scope:
646 {
647 m_writer->add_reference (SCOPE (0).mark2);
648 } break;
649
650 default:
651 {
652 error ("unexpected `break`");
653 } break;
654 }
655
656 m_lx->must_get_next (tk_semicolon);
657 }
658
659 // ============================================================================
660 //
661 void botscript_parser::parse_continue()
662 {
663 m_lx->must_get_next (tk_semicolon);
664
665 int curs;
666 bool found = false;
667
668 // Fall through the scope until we find a loop block
669 for (curs = g_ScopeCursor; curs > 0 && !found; curs--)
670 {
671 switch (scopestack[curs].type)
672 {
673 case e_for_scope:
674 case e_while_scope:
675 case e_do_scope:
676 {
677 m_writer->write (dh_goto);
678 m_writer->add_reference (scopestack[curs].mark1);
679 found = true;
680 } break;
681
682 default:
683 break;
684 }
685 }
686
687 // No loop blocks
688 if (!found)
689 error ("`continue`-statement not inside a loop");
690 }
691
692 // ============================================================================
693 //
694 void botscript_parser::parse_block_end()
695 {
696 // Closing brace
697 // If we're in the block stack, we're descending down from it now
698 if (g_ScopeCursor > 0)
699 {
700 switch (SCOPE (0).type)
701 {
702 case e_if_scope:
703 // Adjust the closing mark.
704 m_writer->move_mark (SCOPE (0).mark1);
705
706 // We're returning from if, thus else can be next
707 g_CanElse = true;
708 break;
709
710 case e_else_scope:
711 // else instead uses mark1 for itself (so if expression
712 // fails, jump to else), mark2 means end of else
713 m_writer->move_mark (SCOPE (0).mark2);
714 break;
715
716 case e_for_scope:
717 // write the incrementor at the end of the loop block
718 m_writer->write_buffer (SCOPE (0).buffer1);
719
720 // fall-thru
721 case e_while_scope:
722 // write down the instruction to go back to the start of the loop
723 m_writer->write (dh_goto);
724 m_writer->add_reference (SCOPE (0).mark1);
725
726 // Move the closing mark here since we're at the end of the while loop
727 m_writer->move_mark (SCOPE (0).mark2);
728 break;
729
730 case e_do_scope:
731 {
732 m_lx->must_get_next (tk_while);
733 m_lx->must_get_next (tk_paren_start);
734 m_lx->must_get_next();
735 data_buffer* expr = parse_expression (TYPE_INT);
736 m_lx->must_get_next (tk_paren_end);
737 m_lx->must_get_next (tk_semicolon);
738
739 // If the condition runs true, go back to the start.
740 m_writer->write_buffer (expr);
741 m_writer->write (dh_if_goto);
742 m_writer->add_reference (SCOPE (0).mark1);
743 break;
744 }
745
746 case e_switch_scope:
747 {
748 // Switch closes. Move down to the record buffer of
749 // the lower block.
750 if (SCOPE (1).casecursor != -1)
751 m_writer->SwitchBuffer = SCOPE (1).casebuffers[SCOPE (1).casecursor];
752 else
753 m_writer->SwitchBuffer = null;
754
755 // If there was a default in the switch, write its header down now.
756 // If not, write instruction to jump to the end of switch after
757 // the headers (thus won't fall-through if no case matched)
758 if (SCOPE (0).buffer1)
759 m_writer->write_buffer (SCOPE (0).buffer1);
760 else
761 {
762 m_writer->write (dh_drop);
763 m_writer->write (dh_goto);
764 m_writer->add_reference (SCOPE (0).mark1);
765 }
766
767 // Go through all of the buffers we
768 // recorded down and write them.
769 for (int u = 0; u < MAX_CASE; u++)
770 {
771 if (!SCOPE (0).casebuffers[u])
772 continue;
773
774 m_writer->move_mark (SCOPE (0).casemarks[u]);
775 m_writer->write_buffer (SCOPE (0).casebuffers[u]);
776 }
777
778 // Move the closing mark here
779 m_writer->move_mark (SCOPE (0).mark1);
780 break;
781 }
782
783 case e_unknown_scope:
784 break;
785 }
786
787 // Descend down the stack
788 g_ScopeCursor--;
789 continue;
790 }
791
792 int dataheader = (g_CurMode == MODE_EVENT) ? dh_end_event :
793 (g_CurMode == MODE_MAINLOOP) ? dh_end_main_loop :
794 (g_CurMode == MODE_ONENTER) ? dh_end_on_enter :
795 (g_CurMode == MODE_ONEXIT) ? dh_end_on_exit : -1;
796
797 if (dataheader == -1)
798 error ("unexpected `}`");
799
800 // Data header must be written before mode is changed because
801 // onenter and mainloop go into special buffers, and we want
802 // the closing data headers into said buffers too.
803 m_writer->write (dataheader);
804 g_CurMode = MODE_TOPLEVEL;
805 m_lx->get_next (tk_semicolon);
806 }
807
808 // ============================================================================
809 //
810 void botscript_parser::parse_const()
811 {
812 constant_info info;
813
814 // Get the type
815 m_lx->must_get_next();
816 string typestring = token_string();
817 info.type = GetTypeByName (typestring);
818
819 if (info.type == TYPE_UNKNOWN || info.type == TYPE_VOID)
820 error ("unknown type `%1` for constant", typestring);
821
822 m_lx->must_get_next();
823 info.name = token_string();
824
825 m_lx->must_get_next (tk_assign);
826
827 switch (info.type)
828 {
829 case TYPE_BOOL:
830 case TYPE_INT:
831 {
832 m_lx->must_get_next (tk_number);
833 } break;
834
835 case TYPE_STRING:
836 {
837 m_lx->must_get_next (tk_string);
838 } break;
839
840 case TYPE_UNKNOWN:
841 case TYPE_VOID:
842 break;
843 }
844
845 info.val = m_lx->get_token()->text;
846 g_ConstInfo << info;
847
848 m_lx->must_get_next (tk_semicolon);
849 }
850
851 // ============================================================================
852 //
853 void botscript_parser::parse_label()
854 {
855 check_not_toplevel();
856 string label_name = token_string();
857
858 // want no conflicts..
859 if (find_command_by_name (label_name))
860 error ("label name `%s` conflicts with command name\n", label_name);
861
862 if (find_global_variable (label_name))
863 error ("label name `%s` conflicts with variable\n", label_name);
864
865 // See if a mark already exists for this label
866 int mark = -1;
867
868 for (int i = 0; i < MAX_MARKS; i++)
869 {
870 if (g_undefined_labels[i] && *g_undefined_labels[i] == label_name)
871 {
872 mark = i;
873 m_writer->move_mark (i);
874
875 // No longer undefinde
876 delete g_undefined_labels[i];
877 g_undefined_labels[i] = null;
878 }
879 }
880
881 // Not found in unmarked lists, define it now
882 if (mark == -1)
883 m_writer->add_mark (label_name);
884
885 m_lx->must_get_next (tk_colon);
886 }
887
888 // ============================================================================
889 // Parses a command call
890 data_buffer* botscript_parser::ParseCommand (command_info* comm)
891 {
892 data_buffer* r = new data_buffer (64);
893
894 if (g_CurMode == MODE_TOPLEVEL)
895 error ("command call at top level");
896
897 m_lx->must_get_next (tk_paren_start);
898 m_lx->must_get_next();
899
900 int curarg = 0;
901
902 while (1)
903 {
904 if (token_is (tk_paren_end))
905 {
906 if (curarg < comm->numargs)
907 error ("too few arguments passed to %s\n\tprototype: %s",
908 comm->name.chars(), get_command_signature (comm).chars());
909
910 break;
911 curarg++;
912 }
913
914 if (curarg >= comm->maxargs)
915 error ("too many arguments passed to %s\n\tprototype: %s",
916 comm->name.chars(), get_command_signature (comm).chars());
917
918 r->merge (parse_expression (comm->args[curarg].type));
919 m_lx->must_get_next();
920
921 if (curarg < comm->numargs - 1)
922 {
923 m_lx->must_be (tk_comma);
924 m_lx->must_get_next();
925 }
926 else if (curarg < comm->maxargs - 1)
927 {
928 // Can continue, but can terminate as well.
929 if (token_is (tk_paren_end))
930 {
931 curarg++;
932 break;
933 }
934 else
935 {
936 m_lx->must_be (tk_comma);
937 m_lx->must_get_next();
938 }
939 }
940
941 curarg++;
942 }
943
944 // If the script skipped any optional arguments, fill in defaults.
945 while (curarg < comm->maxargs)
946 {
947 r->write (dh_push_number);
948 r->write (comm->args[curarg].defvalue);
949 curarg++;
950 }
951
952 r->write (dh_command);
953 r->write (comm->number);
954 r->write (comm->maxargs);
955
956 return r;
957 }
958
959 // ============================================================================
960 // Is the given operator an assignment operator?
961 static bool is_assignment_operator (int oper)
962 {
963 switch (oper)
964 {
965 case OPER_ASSIGNADD:
966 case OPER_ASSIGNSUB:
967 case OPER_ASSIGNMUL:
968 case OPER_ASSIGNDIV:
969 case OPER_ASSIGNMOD:
970 case OPER_ASSIGNLEFTSHIFT:
971 case OPER_ASSIGNRIGHTSHIFT:
972 case OPER_ASSIGN:
973 return true;
974 }
975
976 return false;
977 }
978
979 // ============================================================================
980 // Finds an operator's corresponding dataheader
981 static word get_data_header_by_operator (script_variable* var, int oper)
982 {
983 if (is_assignment_operator (oper))
984 {
985 if (!var)
986 error ("operator %d requires left operand to be a variable\n", oper);
987
988 // TODO: At the moment, vars only are global
989 // OPER_ASSIGNLEFTSHIFT and OPER_ASSIGNRIGHTSHIFT do not
990 // have data headers, instead they are expanded out in
991 // the operator parser
992 switch (oper)
993 {
994 case OPER_ASSIGNADD: return dh_add_global_var;
995 case OPER_ASSIGNSUB: return dh_subtract_global_var;
996 case OPER_ASSIGNMUL: return dh_multiply_global_var;
997 case OPER_ASSIGNDIV: return dh_divide_global_var;
998 case OPER_ASSIGNMOD: return dh_mod_global_var;
999 case OPER_ASSIGN: return dh_assign_global_var;
1000
1001 default: error ("bad assignment operator!!\n");
1002 }
1003 }
1004
1005 switch (oper)
1006 {
1007 case OPER_ADD: return dh_add;
1008 case OPER_SUBTRACT: return dh_subtract;
1009 case OPER_MULTIPLY: return dh_multiply;
1010 case OPER_DIVIDE: return dh_divide;
1011 case OPER_MODULUS: return dh_modulus;
1012 case OPER_EQUALS: return dh_equals;
1013 case OPER_NOTEQUALS: return dh_not_equals;
1014 case OPER_LESSTHAN: return dh_less_than;
1015 case OPER_GREATERTHAN: return dh_greater_than;
1016 case OPER_LESSTHANEQUALS: return dh_at_most;
1017 case OPER_GREATERTHANEQUALS: return dh_at_least;
1018 case OPER_LEFTSHIFT: return dh_left_shift;
1019 case OPER_RIGHTSHIFT: return dh_right_shift;
1020 case OPER_OR: return dh_or_logical;
1021 case OPER_AND: return dh_and_logical;
1022 case OPER_BITWISEOR: return dh_or_bitwise;
1023 case OPER_BITWISEEOR: return dh_eor_bitwise;
1024 case OPER_BITWISEAND: return dh_and_bitwise;
1025 }
1026
1027 error ("DataHeaderByOperator: couldn't find dataheader for operator %d!\n", oper);
1028 return 0;
1029 }
1030
1031 // ============================================================================
1032 // Parses an expression, potentially recursively
1033 data_buffer* botscript_parser::parse_expression (type_e reqtype)
1034 {
1035 data_buffer* retbuf = new data_buffer (64);
1036
1037 // Parse first operand
1038 retbuf->merge (parse_expr_value (reqtype));
1039
1040 // Parse any and all operators we get
1041 int oper;
1042
1043 while ( (oper = parse_operator (true)) != -1)
1044 {
1045 // We peeked the operator, move forward now
1046 m_lx->skip();
1047
1048 // Can't be an assignement operator, those belong in assignments.
1049 if (is_assignment_operator (oper))
1050 error ("assignment operator inside expression");
1051
1052 // Parse the right operand.
1053 m_lx->must_get_next();
1054 data_buffer* rb = parse_expr_value (reqtype);
1055
1056 if (oper == OPER_TERNARY)
1057 {
1058 // Ternary operator requires - naturally - a third operand.
1059 m_lx->must_get_next (tk_colon);
1060 m_lx->must_get_next();
1061 data_buffer* tb = parse_expr_value (reqtype);
1062
1063 // It also is handled differently: there isn't a dataheader for ternary
1064 // operator. Instead, we abuse PUSHNUMBER and IFNOTGOTO for this.
1065 // Behold, big block of writing madness! :P
1066 int mark1 = retbuf->add_mark (""); // start of "else" case
1067 int mark2 = retbuf->add_mark (""); // end of expression
1068 retbuf->write (dh_if_not_goto); // if the first operand (condition)
1069 retbuf->add_reference (mark1); // didn't eval true, jump into mark1
1070 retbuf->merge (rb); // otherwise, perform second operand (true case)
1071 retbuf->write (dh_goto); // afterwards, jump to the end, which is
1072 retbuf->add_reference (mark2); // marked by mark2.
1073 retbuf->move_mark (mark1); // move mark1 at the end of the true case
1074 retbuf->merge (tb); // perform third operand (false case)
1075 retbuf->move_mark (mark2); // move the ending mark2 here
1076 }
1077 else
1078 {
1079 // write to buffer
1080 retbuf->merge (rb);
1081 retbuf->write (get_data_header_by_operator (null, oper));
1082 }
1083 }
1084
1085 return retbuf;
1086 }
1087
1088 // ============================================================================
1089 // Parses an operator string. Returns the operator number code.
1090 #define ISNEXT(C) (m_lx->peek_next_string (peek ? 1 : 0) == C)
1091 int botscript_parser::parse_operator (bool peek)
1092 {
1093 string oper;
1094
1095 if (peek)
1096 oper += m_lx->peek_next_string();
1097 else
1098 oper += token_string();
1099
1100 if (-oper == "strlen")
1101 return OPER_STRLEN;
1102
1103 // Check one-char operators
1104 bool equalsnext = ISNEXT ("=");
1105
1106 int o = (oper == "=" && !equalsnext) ? OPER_ASSIGN :
1107 (oper == ">" && !equalsnext && !ISNEXT (">")) ? OPER_GREATERTHAN :
1108 (oper == "<" && !equalsnext && !ISNEXT ("<")) ? OPER_LESSTHAN :
1109 (oper == "&" && !ISNEXT ("&")) ? OPER_BITWISEAND :
1110 (oper == "|" && !ISNEXT ("|")) ? OPER_BITWISEOR :
1111 (oper == "+" && !equalsnext) ? OPER_ADD :
1112 (oper == "-" && !equalsnext) ? OPER_SUBTRACT :
1113 (oper == "*" && !equalsnext) ? OPER_MULTIPLY :
1114 (oper == "/" && !equalsnext) ? OPER_DIVIDE :
1115 (oper == "%" && !equalsnext) ? OPER_MODULUS :
1116 (oper == "^") ? OPER_BITWISEEOR :
1117 (oper == "?") ? OPER_TERNARY :
1118 -1;
1119
1120 if (o != -1)
1121 {
1122 return o;
1123 }
1124
1125 // Two-char operators
1126 oper += m_lx->peek_next_string (peek ? 1 : 0);
1127 equalsnext = m_lx->peek_next_string (peek ? 2 : 1) == ("=");
1128
1129 o = (oper == "+=") ? OPER_ASSIGNADD :
1130 (oper == "-=") ? OPER_ASSIGNSUB :
1131 (oper == "*=") ? OPER_ASSIGNMUL :
1132 (oper == "/=") ? OPER_ASSIGNDIV :
1133 (oper == "%=") ? OPER_ASSIGNMOD :
1134 (oper == "==") ? OPER_EQUALS :
1135 (oper == "!=") ? OPER_NOTEQUALS :
1136 (oper == ">=") ? OPER_GREATERTHANEQUALS :
1137 (oper == "<=") ? OPER_LESSTHANEQUALS :
1138 (oper == "&&") ? OPER_AND :
1139 (oper == "||") ? OPER_OR :
1140 (oper == "<<" && !equalsnext) ? OPER_LEFTSHIFT :
1141 (oper == ">>" && !equalsnext) ? OPER_RIGHTSHIFT :
1142 -1;
1143
1144 if (o != -1)
1145 {
1146 m_lx->must_get_next();
1147 return o;
1148 }
1149
1150 // Three-char opers
1151 oper += m_lx->peek_next_string (peek ? 2 : 1);
1152 o = oper == "<<=" ? OPER_ASSIGNLEFTSHIFT :
1153 oper == ">>=" ? OPER_ASSIGNRIGHTSHIFT :
1154 -1;
1155
1156 if (o != -1)
1157 {
1158 m_lx->must_get_next();
1159 m_lx->must_get_next();
1160 }
1161
1162 return o;
1163 }
1164
1165 // ============================================================================
1166 string botscript_parser::parse_float()
1167 {
1168 m_lx->must_be (tk_number);
1169 string floatstring = token_string();
1170 lexer::token tok;
1171
1172 // Go after the decimal point
1173 if (m_lx->peek_next (&tok) && tok.type == tk_dot)
1174 {
1175 m_lx->skip();
1176 m_lx->must_get_next (tk_number);
1177 floatstring += ".";
1178 floatstring += token_string();
1179 }
1180
1181 return floatstring;
1182 }
1183
1184 // ============================================================================
1185 // Parses a value in the expression and returns the data needed to push
1186 // it, contained in a data buffer. A value can be either a variable, a command,
1187 // a literal or an expression.
1188 data_buffer* botscript_parser::parse_expr_value (type_e reqtype)
1189 {
1190 data_buffer* b = new data_buffer (16);
1191 script_variable* g;
1192
1193 // Prefixing "!" means negation.
1194 bool negate = token_is (tk_exclamation_mark);
1195
1196 if (negate) // Jump past the "!"
1197 m_lx->skip();
1198
1199 // Handle strlen
1200 /* if (token_string() == "strlen")
1201 {
1202 m_lx->must_get_next (tk_paren_start);
1203 m_lx->must_get_next();
1204
1205 // By this token we should get a string constant.
1206 constant_info* constant = find_constant (token_string());
1207
1208 if (!constant || constant->type != TYPE_STRING)
1209 error ("strlen only works with const str");
1210
1211 if (reqtype != TYPE_INT)
1212 error ("strlen returns int but %s is expected\n", GetTypeName (reqtype).c_str());
1213
1214 b->write (dh_push_number);
1215 b->write (constant->val.len());
1216
1217 m_lx->must_get_next (tk_paren_end);
1218 }
1219 else */
1220 if (token_is (tk_paren_start))
1221 {
1222 // Expression
1223 m_lx->must_get_next();
1224 data_buffer* c = parse_expression (reqtype);
1225 b->merge (c);
1226 m_lx->must_get_next (tk_paren_end);
1227 }
1228 else if (command_info* comm = find_command_by_name (token_string()))
1229 {
1230 delete b;
1231
1232 // Command
1233 if (reqtype && comm->returnvalue != reqtype)
1234 error ("%s returns an incompatible data type", comm->name.chars());
1235
1236 b = ParseCommand (comm);
1237 }
1238 else if (constant_info* constant = find_constant (token_string()))
1239 {
1240 // Type check
1241 if (reqtype != constant->type)
1242 error ("constant `%s` is %s, expression requires %s\n",
1243 constant->name.c_str(), GetTypeName (constant->type).c_str(),
1244 GetTypeName (reqtype).c_str());
1245
1246 switch (constant->type)
1247 {
1248 case TYPE_BOOL:
1249 case TYPE_INT:
1250 b->write (dh_push_number);
1251 b->write (atoi (constant->val));
1252 break;
1253
1254 case TYPE_STRING:
1255 b->write_string (constant->val);
1256 break;
1257
1258 case TYPE_VOID:
1259 case TYPE_UNKNOWN:
1260 break;
1261 }
1262 }
1263 else if ((g = find_global_variable (token_string())))
1264 {
1265 // Global variable
1266 b->write (dh_push_global_var);
1267 b->write (g->index);
1268 }
1269 else
1270 {
1271 // If nothing else, check for literal
1272 switch (reqtype)
1273 {
1274 case TYPE_VOID:
1275 case TYPE_UNKNOWN:
1276 error ("unknown identifier `%1` (expected keyword, function or variable)", token_string());
1277 break;
1278
1279 case TYPE_BOOL:
1280 case TYPE_INT:
1281 {
1282 m_lx->must_be (tk_number);
1283
1284 // All values are written unsigned - thus we need to write the value's
1285 // absolute value, followed by an unary minus for negatives.
1286 b->write (dh_push_number);
1287
1288 long v = token_string().to_long();
1289 b->write (static_cast<word> (abs (v)));
1290
1291 if (v < 0)
1292 b->write (dh_unary_minus);
1293
1294 break;
1295 }
1296
1297 case TYPE_STRING:
1298 // PushToStringTable either returns the string index of the
1299 // string if it finds it in the table, or writes it to the
1300 // table and returns it index if it doesn't find it there.
1301 m_lx->must_be (tk_string);
1302 b->write_string (token_string());
1303 break;
1304 }
1305 }
1306
1307 // Negate it now if desired
1308 if (negate)
1309 b->write (dh_negate_logical);
1310
1311 return b;
1312 }
1313
1314 // ============================================================================
1315 // Parses an assignment. An assignment starts with a variable name, followed
1316 // by an assignment operator, followed by an expression value. Expects current
1317 // token to be the name of the variable, and expects the variable to be given.
1318 data_buffer* botscript_parser::ParseAssignment (script_variable* var)
1319 {
1320 bool global = !var->statename.len();
1321
1322 // Get an operator
1323 m_lx->must_get_next();
1324 int oper = parse_operator();
1325
1326 if (!is_assignment_operator (oper))
1327 error ("expected assignment operator");
1328
1329 if (g_CurMode == MODE_TOPLEVEL)
1330 error ("can't alter variables at top level");
1331
1332 // Parse the right operand
1333 m_lx->must_get_next();
1334 data_buffer* retbuf = new data_buffer;
1335 data_buffer* expr = parse_expression (var->type);
1336
1337 // <<= and >>= do not have data headers. Solution: expand them.
1338 // a <<= b -> a = a << b
1339 // a >>= b -> a = a >> b
1340 if (oper == OPER_ASSIGNLEFTSHIFT || oper == OPER_ASSIGNRIGHTSHIFT)
1341 {
1342 retbuf->write (global ? dh_push_global_var : dh_push_local_var);
1343 retbuf->write (var->index);
1344 retbuf->merge (expr);
1345 retbuf->write ((oper == OPER_ASSIGNLEFTSHIFT) ? dh_left_shift : dh_right_shift);
1346 retbuf->write (global ? dh_assign_global_var : dh_assign_local_var);
1347 retbuf->write (var->index);
1348 }
1349 else
1350 {
1351 retbuf->merge (expr);
1352 long dh = get_data_header_by_operator (var, oper);
1353 retbuf->write (dh);
1354 retbuf->write (var->index);
1355 }
1356
1357 return retbuf;
1358 }
1359
1360 void botscript_parser::push_scope()
1361 {
1362 g_ScopeCursor++;
1363
1364 if (g_ScopeCursor >= MAX_SCOPE)
1365 error ("too deep scope");
1366
1367 ScopeInfo* info = &SCOPE (0);
1368 info->type = e_unknown_scope;
1369 info->mark1 = 0;
1370 info->mark2 = 0;
1371 info->buffer1 = null;
1372 info->casecursor = -1;
1373
1374 for (int i = 0; i < MAX_CASE; i++)
1375 {
1376 info->casemarks[i] = MAX_MARKS;
1377 info->casebuffers[i] = null;
1378 info->casenumbers[i] = -1;
1379 }
1380 }
1381
1382 data_buffer* botscript_parser::parse_statement (object_writer* w)
1383 {
1384 if (find_constant (token_string())) // There should not be constants here.
1385 error ("invalid use for constant\n");
1386
1387 // If it's a variable, expect assignment.
1388 if (script_variable* var = find_global_variable (token_string()))
1389 return ParseAssignment (var);
1390
1391 return null;
1392 }
1393
1394 void botscript_parser::add_switch_case (object_writer* w, data_buffer* b)
1395 {
1396 ScopeInfo* info = &SCOPE (0);
1397
1398 info->casecursor++;
1399
1400 if (info->casecursor >= MAX_CASE)
1401 error ("too many cases in one switch");
1402
1403 // Init a mark for the case buffer
1404 int m = m_writer->add_mark ("");
1405 info->casemarks[info->casecursor] = m;
1406
1407 // Add a reference to the mark. "case" and "default" both
1408 // add the necessary bytecode before the reference.
1409 if (b)
1410 b->add_reference (m);
1411 else
1412 m_writer->add_reference (m);
1413
1414 // Init a buffer for the case block and tell the object
1415 // writer to record all written data to it.
1416 info->casebuffers[info->casecursor] = m_writer->SwitchBuffer = new data_buffer;
1417 }
1418
1419 // ============================================================================
1420 //
1421 constant_info* find_constant (const string& tok)
1422 {
1423 for (int i = 0; i < g_ConstInfo.size(); i++)
1424 if (g_ConstInfo[i].name == tok)
1425 return &g_ConstInfo[i];
1426
1427 return null;
1428 }
1429
1430 // ============================================================================
1431 //
1432 bool botscript_parser::token_is (e_token a)
1433 {
1434 return (m_lx->get_token_type() == a);
1435 }
1436
1437 // ============================================================================
1438 //
1439 string botscript_parser::token_string()
1440 {
1441 return m_lx->get_token()->text;
1442 }
1443
1444 // ============================================================================
1445 //
1446 string botscript_parser::describe_position() const
1447 {
1448 lexer::token* tok = m_lx->get_token();
1449 return tok->file + ":" + string (tok->line) + ":" + string (tok->column);
1450 }

mercurial