Mon, 16 Feb 2004 18:50:28 +0100
trunk: changeset 1318
Switched to using spaces only for indentation. Simple automatic "\t"
-> " " conversion; may need tuning later.
0 | 1 | /* |
2 | * libtu/tokenizer.c | |
3 | * | |
36 | 4 | * Copyright (c) Tuomo Valkonen 1999-2002. |
53 | 5 | * |
6 | * You may distribute and modify this library under the terms of either | |
7 | * the Clarified Artistic License or the GNU LGPL, version 2.1 or later. | |
0 | 8 | */ |
9 | ||
10 | #include <errno.h> | |
11 | #include <stdio.h> | |
12 | #include <ctype.h> | |
13 | #include <limits.h> | |
14 | #include <assert.h> | |
15 | #include <math.h> | |
16 | #include <string.h> | |
17 | ||
5 | 18 | #include <libtu/tokenizer.h> |
19 | #include <libtu/misc.h> | |
20 | #include <libtu/output.h> | |
0 | 21 | |
22 | ||
23 | static const char *errors[]={ | |
62 | 24 | DUMMY_TR("(no error)"), |
25 | DUMMY_TR("Unexpected end of file"), /* E_TOKZ_UNEXPECTED_EOF */ | |
26 | DUMMY_TR("Unexpected end of line"), /* E_TOKZ_UNEXPECTED_EOL */ | |
27 | DUMMY_TR("End of line expected"), /* E_TOKZ_EOL_EXPECTED */ | |
28 | DUMMY_TR("Invalid character"), /* E_TOKZ_INVALID_CHAR*/ | |
29 | DUMMY_TR("Numeric constant too big"), /* E_TOKZ_TOOBIG */ | |
30 | DUMMY_TR("Invalid numberic format"), /* E_TOKZ_NUMFMT */ | |
31 | DUMMY_TR("Junk after numeric constant"), /* E_TOKZ_NUM_JUNK */ | |
32 | DUMMY_TR("Not an integer"), /* E_TOKZ_NOTINT */ | |
33 | DUMMY_TR("Numeric constant out of range"), /* E_TOKZ_RANGE */ | |
34 | DUMMY_TR("Multi-character character constant"), /* E_TOKZ_MULTICHAR */ | |
35 | DUMMY_TR("Token/statement limit reached"), /* E_TOKZ_TOKEN_LIMIT */ | |
36 | DUMMY_TR("Unknown option"), /* E_TOKZ_UNKONWN_OPTION */ | |
37 | DUMMY_TR("Syntax error"), /* E_TOKZ_SYNTAX */ | |
38 | DUMMY_TR("Invalid argument"), /* E_TOKZ_INVALID_ARGUMENT */ | |
39 | DUMMY_TR("End of statement expected"), /* E_TOKZ_EOS_EXPECTED */ | |
40 | DUMMY_TR("Too few arguments"), /* E_TOKZ_TOO_FEW_ARGS */ | |
41 | DUMMY_TR("Too many arguments"), /* E_TOKZ_TOO_MANY_ARGS */ | |
42 | DUMMY_TR("Maximum section nestin level exceeded"), /* E_TOK_Z_MAX_NEST */ | |
43 | DUMMY_TR("Identifier expected"), /* E_TOKZ_IDENTIFIER_EXPECTED */ | |
44 | DUMMY_TR("Starting brace ('{') expected"), /* E_TOKZ_LBRACE_EXPECTED */ | |
0 | 45 | }; |
46 | ||
47 | ||
48 | /* */ | |
49 | ||
50 | #define STRBLEN 32 | |
51 | ||
2 | 52 | #define STRING_DECL(X) int err=0; char* X=NULL; char X##_tmp[STRBLEN]; int X##_tmpl=0 |
53 | #define STRING_DECL_P(X, P) int err=0; char* X=NULL; char X##_tmp[STRBLEN]=P; int X##_tmpl=sizeof(P)-1 | |
54 | #define STRING_APPEND(X, C) {if(!_string_append(&X, X##_tmp, &X##_tmpl, c)) err=-ENOMEM;} | |
0 | 55 | #define STRING_FREE(X) if(X!=NULL) free(X) |
2 | 56 | #define STRING_FINISH(X) {if(err!=0) return err; if(!_string_finish(&X, X##_tmp, X##_tmpl)) err=-ENOMEM;} |
0 | 57 | |
58 | ||
59 | static bool _string_append(char **p, char *tmp, int *tmplen, char c) | |
60 | { | |
62 | 61 | char *tmp2; |
62 | ||
63 | if(*tmplen==STRBLEN-1){ | |
64 | tmp[STRBLEN-1]='\0'; | |
65 | if(*p!=NULL){ | |
66 | tmp2=scat(*p, tmp); | |
67 | free(*p); | |
68 | *p=tmp2; | |
69 | }else{ | |
70 | *p=scopy(tmp); | |
71 | } | |
72 | *tmplen=1; | |
73 | tmp[0]=c; | |
74 | return *p!=NULL; | |
75 | }else{ | |
76 | tmp[(*tmplen)++]=c; | |
77 | return TRUE; | |
78 | } | |
0 | 79 | } |
80 | ||
81 | ||
82 | static bool _string_finish(char **p, char *tmp, int tmplen) | |
83 | { | |
62 | 84 | char *tmp2; |
85 | ||
86 | if(tmplen==0){ | |
87 | if(*p==NULL) | |
88 | *p=scopy(""); | |
89 | }else{ | |
90 | tmp[tmplen]='\0'; | |
91 | if(*p!=NULL){ | |
92 | tmp2=scat(*p, tmp); | |
93 | free(*p); | |
94 | *p=tmp2; | |
95 | }else{ | |
96 | *p=scopy(tmp); | |
97 | } | |
98 | } | |
99 | return *p!=NULL; | |
0 | 100 | } |
101 | ||
102 | ||
103 | /* */ | |
104 | ||
105 | ||
106 | #define INC_LINE() tokz->line++ | |
107 | #define GETCH() _getch(tokz) | |
108 | #define UNGETCH(C) _ungetch(tokz, C) | |
109 | ||
110 | static int _getch(Tokenizer *tokz) | |
111 | { | |
62 | 112 | int c; |
113 | ||
114 | if(tokz->ungetc!=-1){ | |
115 | c=tokz->ungetc; | |
116 | tokz->ungetc=-1; | |
117 | }else if (tokz->flags&TOKZ_READ_FROM_BUFFER) { | |
118 | assert(tokz->buffer.data!=NULL); | |
119 | if (tokz->buffer.pos==tokz->buffer.len) | |
120 | c=EOF; | |
121 | else | |
122 | c=tokz->buffer.data[tokz->buffer.pos++]; | |
123 | }else{ | |
124 | c=getc(tokz->file); | |
125 | } | |
20 | 126 | |
62 | 127 | return c; |
0 | 128 | } |
129 | ||
130 | ||
131 | static void _ungetch(Tokenizer *tokz, int c) | |
132 | { | |
62 | 133 | tokz->ungetc=c; |
0 | 134 | } |
135 | ||
136 | ||
137 | /* */ | |
138 | ||
139 | ||
140 | static int scan_line_comment(Token *tok, Tokenizer *tokz) | |
141 | { | |
62 | 142 | STRING_DECL_P(s, "#"); |
143 | int c; | |
0 | 144 | |
62 | 145 | c=GETCH(); |
146 | ||
147 | while(c!='\n' && c!=EOF){ | |
148 | STRING_APPEND(s, c); | |
149 | c=GETCH(); | |
150 | } | |
0 | 151 | |
62 | 152 | UNGETCH(c); |
0 | 153 | |
62 | 154 | STRING_FINISH(s); |
155 | ||
156 | TOK_SET_COMMENT(tok, s); | |
157 | ||
158 | return 0; | |
0 | 159 | } |
160 | ||
161 | ||
162 | static int skip_line_comment(Tokenizer *tokz) | |
163 | { | |
62 | 164 | int c; |
165 | ||
166 | do{ | |
167 | c=GETCH(); | |
168 | }while(c!='\n' && c!=EOF); | |
0 | 169 | |
62 | 170 | UNGETCH(c); |
171 | ||
172 | return 0; | |
0 | 173 | } |
174 | ||
175 | ||
176 | /* */ | |
177 | ||
178 | ||
179 | static int scan_c_comment(Token *tok, Tokenizer *tokz) | |
180 | { | |
62 | 181 | STRING_DECL_P(s, "/*"); |
182 | int c; | |
183 | int st=0; | |
184 | ||
185 | while(1){ | |
186 | c=GETCH(); | |
187 | ||
188 | if(c==EOF){ | |
189 | STRING_FREE(s); | |
190 | return E_TOKZ_UNEXPECTED_EOF; | |
191 | } | |
192 | ||
193 | STRING_APPEND(s, c); | |
194 | ||
195 | if(c=='\n'){ | |
196 | INC_LINE(); | |
197 | }else if(st==0 && c=='*'){ | |
198 | st=1; | |
199 | }else if(st==1){ | |
200 | if(c=='/') | |
201 | break; | |
202 | st=0; | |
203 | } | |
204 | } | |
0 | 205 | |
62 | 206 | STRING_FINISH(s); |
0 | 207 | |
62 | 208 | TOK_SET_COMMENT(tok, s); |
0 | 209 | |
62 | 210 | return 0; |
0 | 211 | } |
212 | ||
213 | ||
214 | static int skip_c_comment(Tokenizer *tokz) | |
215 | { | |
62 | 216 | int c; |
217 | int st=0; | |
218 | ||
219 | while(1){ | |
220 | c=GETCH(); | |
221 | ||
222 | if(c==EOF) | |
223 | return E_TOKZ_UNEXPECTED_EOF; | |
224 | ||
225 | if(c=='\n') | |
226 | INC_LINE(); | |
227 | else if(st==0 && c=='*') | |
228 | st=1; | |
229 | else if(st==1){ | |
230 | if(c=='/') | |
231 | break; | |
232 | st=0; | |
233 | } | |
234 | } | |
235 | ||
236 | return 0; | |
0 | 237 | } |
238 | ||
239 | ||
240 | /* */ | |
241 | ||
242 | ||
243 | static int scan_char_escape(Tokenizer *tokz) | |
244 | { | |
62 | 245 | static char* special_chars="nrtbae"; |
246 | static char* specials="\n\r\t\b\a\033"; | |
247 | int base, max; | |
248 | int i ,c; | |
249 | ||
250 | c=GETCH(); | |
251 | ||
252 | for(i=0;special_chars[i];i++){ | |
253 | if(special_chars[i]==c) | |
254 | return specials[c]; | |
255 | } | |
256 | ||
257 | if(c=='x' || c=='X'){ | |
258 | base=16;max=2;i=0; | |
259 | }else if(c=='d' || c=='D'){ | |
260 | base=10;max=3;i=0; | |
261 | }else if(c=='8' || c=='9'){ | |
262 | base=10;max=2;i=c-'0'; | |
263 | }else if('0'<=c && c<='7'){ | |
264 | base=8;max=2;i=c-'0'; | |
265 | }else if(c=='\n'){ | |
266 | UNGETCH(c); | |
267 | return -2; | |
268 | }else{ | |
269 | return c; | |
270 | } | |
271 | ||
272 | ||
273 | while(--max>=0){ | |
274 | c=GETCH(); | |
275 | ||
276 | if(c==EOF) | |
277 | return EOF; | |
278 | ||
279 | if(c=='\n'){ | |
280 | UNGETCH(c); | |
281 | return -2; | |
282 | } | |
283 | ||
284 | if(base==16){ | |
285 | if(!isxdigit(c)) | |
286 | break; | |
287 | ||
288 | i<<=4; | |
289 | ||
290 | if(isdigit(c)) | |
291 | i+=c-'0'; | |
292 | else if(i>='a') | |
293 | i+=0xa+c-'a'; | |
294 | else | |
295 | i+=0xa+c-'a'; | |
296 | ||
297 | }else if(base==10){ | |
298 | if(!isdigit(c)) | |
299 | break; | |
300 | i*=10; | |
301 | i+=c-'0'; | |
302 | }else{ | |
303 | if(c<'0' || c>'7') | |
304 | break; | |
305 | i<<=3; | |
306 | i+=c-'0'; | |
307 | } | |
308 | } | |
309 | ||
310 | if(max>=0) | |
311 | UNGETCH(c); | |
0 | 312 | |
62 | 313 | return i; |
0 | 314 | } |
315 | ||
316 | ||
317 | /* */ | |
318 | ||
319 | ||
320 | static int scan_string(Token *tok, Tokenizer *tokz, bool escapes) | |
321 | { | |
62 | 322 | STRING_DECL(s); |
323 | int c; | |
0 | 324 | |
62 | 325 | while(1){ |
326 | c=GETCH(); | |
327 | ||
328 | if(c=='"') | |
329 | break; | |
330 | ||
331 | if(c=='\n'){ | |
332 | UNGETCH(c); | |
333 | STRING_FREE(s); | |
334 | return E_TOKZ_UNEXPECTED_EOL; | |
335 | } | |
336 | ||
337 | if(c=='\\' && escapes){ | |
338 | c=scan_char_escape(tokz); | |
339 | if(c==-2){ | |
340 | STRING_FREE(s); | |
341 | return E_TOKZ_UNEXPECTED_EOL; | |
342 | } | |
343 | } | |
344 | ||
345 | if(c==EOF){ | |
346 | STRING_FREE(s); | |
347 | return E_TOKZ_UNEXPECTED_EOF; | |
348 | } | |
349 | ||
350 | STRING_APPEND(s, c); | |
351 | } | |
352 | ||
353 | STRING_FINISH(s); | |
354 | ||
355 | TOK_SET_STRING(tok, s); | |
0 | 356 | |
62 | 357 | return 0; |
0 | 358 | } |
359 | ||
360 | ||
361 | /* */ | |
362 | ||
363 | ||
364 | static int scan_char(Token *tok, Tokenizer *tokz) | |
365 | { | |
62 | 366 | int c, c2; |
367 | ||
368 | c=GETCH(); | |
369 | ||
370 | if(c==EOF) | |
371 | return E_TOKZ_UNEXPECTED_EOF; | |
372 | ||
373 | if(c=='\n') | |
374 | return E_TOKZ_UNEXPECTED_EOL; | |
0 | 375 | |
62 | 376 | if(c=='\\'){ |
377 | c=scan_char_escape(tokz); | |
378 | ||
379 | if(c==EOF) | |
380 | return E_TOKZ_UNEXPECTED_EOF; | |
381 | ||
382 | if(c==-2) | |
383 | return E_TOKZ_UNEXPECTED_EOL; | |
384 | } | |
385 | ||
386 | c2=GETCH(); | |
387 | ||
388 | if(c2!='\'') | |
389 | return E_TOKZ_MULTICHAR; | |
390 | ||
391 | TOK_SET_CHAR(tok, c); | |
392 | ||
393 | return 0; | |
0 | 394 | } |
395 | ||
396 | ||
397 | /* */ | |
398 | ||
399 | ||
400 | #define START_IDENT(X) (isalpha(X) || X=='_' || X=='$') | |
401 | ||
402 | ||
403 | static int scan_identifier(Token *tok, Tokenizer *tokz, int c) | |
404 | { | |
62 | 405 | STRING_DECL(s); |
406 | ||
407 | do{ | |
408 | STRING_APPEND(s, c); | |
409 | c=GETCH(); | |
410 | }while(isalnum(c) || c=='_' || c=='$'); | |
411 | ||
412 | UNGETCH(c); | |
413 | ||
414 | STRING_FINISH(s); | |
415 | ||
416 | TOK_SET_IDENT(tok, s); | |
0 | 417 | |
62 | 418 | return 0; |
0 | 419 | } |
420 | ||
3 | 421 | #define NP_SIMPLE_IMPL |
60 | 422 | #include "np/numparser2.h" |
423 | #include "np/np-conv.h" | |
0 | 424 | |
425 | ||
426 | static int scan_number(Token *tok, Tokenizer *tokz, int c) | |
427 | { | |
62 | 428 | NPNum num=NUM_INIT; |
429 | int e; | |
430 | ||
431 | if((e=parse_number(&num, tokz, c))) | |
432 | return e; | |
433 | ||
434 | if(num.type==NPNUM_INT){ | |
435 | long l; | |
436 | if((e=num_to_long(&l, &num, TRUE))) | |
437 | return e; | |
438 | ||
439 | TOK_SET_LONG(tok, l); | |
440 | }else if(num.type==NPNUM_FLOAT){ | |
441 | double d; | |
442 | if((e=num_to_double(&d, &num))) | |
443 | return e; | |
444 | ||
445 | TOK_SET_DOUBLE(tok, d); | |
446 | }else{ | |
447 | return E_TOKZ_NUMFMT; | |
448 | } | |
0 | 449 | |
62 | 450 | return 0; |
0 | 451 | } |
452 | ||
453 | ||
454 | /* */ | |
455 | ||
456 | ||
457 | static uchar op_map[]={ | |
62 | 458 | 0x00, /* ________ 0-7 */ |
459 | 0x00, /* ________ 8-15 */ | |
460 | 0x00, /* ________ 16-23 */ | |
461 | 0x00, /* ________ 24-31 */ | |
462 | 0x62, /* _!___%&_ 32-39 */ | |
463 | 0xff, /* ()*+,-./ 40-47 */ | |
464 | 0x00, /* ________ 48-55 */ | |
465 | 0xfc, /* __:;<=>? 56-63 */ | |
466 | 0x01, /* @_______ 64-71 */ | |
467 | 0x00, /* ________ 72-79 */ | |
468 | 0x00, /* ________ 80-87 */ | |
469 | 0x78, /* ___[_]^_ 88-95 */ | |
470 | 0x00, /* ________ 96-103 */ | |
471 | 0x00, /* ________ 104-111 */ | |
472 | 0x00, /* ________ 112-119 */ | |
473 | 0x38 /* ___{|}__ 120-127 */ | |
0 | 474 | }; |
475 | ||
476 | ||
477 | static bool map_isset(uchar *map, uint ch) | |
478 | { | |
62 | 479 | if(ch>127) |
480 | return FALSE; | |
0 | 481 | |
62 | 482 | return map[ch>>3]&(1<<(ch&7)); |
0 | 483 | } |
484 | ||
485 | ||
486 | static bool is_opch(uint ch) | |
487 | { | |
62 | 488 | return map_isset(op_map, ch); |
0 | 489 | } |
490 | ||
491 | ||
492 | static int scan_op(Token *tok, Tokenizer *tokz, int c) | |
493 | { | |
62 | 494 | int c2; |
495 | int op=-1; | |
496 | ||
497 | /* Quickly check it is an operator character */ | |
498 | if(!is_opch(c)) | |
499 | return E_TOKZ_INVALID_CHAR; | |
0 | 500 | |
62 | 501 | switch(c){ |
502 | case '+': | |
503 | case '-': | |
504 | case '*': | |
505 | /* case '/': Checked elsewhere */ | |
506 | case '%': | |
507 | case '^': | |
508 | case '!': | |
509 | case '=': | |
510 | case '<': | |
511 | case '>': | |
512 | c2=GETCH(); | |
513 | if(c2=='='){ | |
514 | op=c|(c2<<8); | |
515 | }else if(c2==c && (c2!='%' && c2!='!' && c2!='*')){ | |
516 | if(c=='<' || c=='>'){ | |
517 | int c3=GETCH(); | |
518 | if(c3=='='){ | |
519 | op=c|(c2<<8)|(c3<<16); | |
520 | }else{ | |
521 | UNGETCH(c3); | |
522 | op=c|(c2<<8); | |
523 | } | |
524 | }else{ | |
525 | op=c|(c2<<8); | |
526 | } | |
527 | }else{ | |
528 | UNGETCH(c2); | |
529 | op=c; | |
530 | } | |
531 | break; | |
532 | ||
533 | /* It is already known that it is a operator so these are not needed | |
534 | case ':': | |
535 | case '~': | |
536 | case '?': | |
537 | case '.': | |
538 | case ';'; | |
539 | case '{': | |
540 | case '}': | |
541 | case '@': | |
542 | case '|': | |
543 | case '&': | |
544 | */ | |
545 | default: | |
546 | op=c; | |
547 | } | |
548 | ||
549 | TOK_SET_OP(tok, op); | |
0 | 550 | |
62 | 551 | return 0; |
0 | 552 | } |
553 | ||
554 | ||
555 | /* */ | |
556 | ||
557 | ||
21 | 558 | void tokz_warn(const Tokenizer *tokz, int line, const char *fmt, ...) |
559 | { | |
62 | 560 | va_list args; |
561 | ||
562 | va_start(args, fmt); | |
563 | ||
564 | if(tokz!=NULL) | |
565 | warn_obj_line_v(tokz->name, line, fmt, args); | |
566 | else | |
567 | warn(fmt, args); | |
568 | ||
569 | va_end(args); | |
21 | 570 | } |
571 | ||
572 | ||
0 | 573 | void tokz_warn_error(const Tokenizer *tokz, int line, int e) |
574 | { | |
62 | 575 | if(e==E_TOKZ_UNEXPECTED_EOF) |
576 | line=0; | |
577 | ||
578 | if(e<0) | |
579 | tokz_warn(tokz, line, "%s", strerror(-e)); | |
580 | else | |
581 | tokz_warn(tokz, line, "%s", TR(errors[e])); | |
0 | 582 | } |
583 | ||
584 | ||
585 | bool tokz_get_token(Tokenizer *tokz, Token *tok) | |
586 | { | |
62 | 587 | int c, c2, e; |
588 | ||
589 | if (!(tokz->flags&TOKZ_READ_FROM_BUFFER)) | |
590 | assert(tokz->file!=NULL); | |
591 | ||
592 | tok_free(tok); | |
593 | ||
594 | if(!TOK_IS_INVALID(&(tokz->ungettok))){ | |
595 | *tok=tokz->ungettok; | |
596 | tokz->ungettok.type=TOK_INVALID; | |
597 | return TRUE; | |
598 | } | |
2 | 599 | |
62 | 600 | while(1){ |
601 | ||
602 | e=0; | |
603 | ||
604 | do{ | |
605 | c=GETCH(); | |
606 | }while(c!='\n' && c!=EOF && isspace(c)); | |
607 | ||
608 | tok->line=tokz->line; | |
609 | ||
610 | switch(c){ | |
611 | case EOF: | |
612 | TOK_SET_OP(tok, OP_EOF); | |
613 | return TRUE; | |
614 | ||
615 | case '\n': | |
616 | INC_LINE(); | |
617 | ||
618 | if(tokz->flags&TOKZ_IGNORE_NEXTLINE) | |
619 | continue; | |
620 | ||
621 | TOK_SET_OP(tok, OP_NEXTLINE); | |
622 | ||
623 | return TRUE; | |
624 | ||
625 | case '\\': | |
626 | do{ | |
627 | c=GETCH(); | |
628 | if(c==EOF){ | |
629 | TOK_SET_OP(tok, OP_EOF); | |
630 | return FALSE; | |
631 | } | |
632 | if(!isspace(c) && e==0){ | |
633 | e=E_TOKZ_EOL_EXPECTED; | |
634 | tokz_warn_error(tokz, tokz->line, e); | |
635 | if(!(tokz->flags&TOKZ_ERROR_TOLERANT)) | |
636 | return FALSE; | |
637 | } | |
638 | }while(c!='\n'); | |
639 | ||
640 | INC_LINE(); | |
641 | continue; | |
0 | 642 | |
62 | 643 | case '#': |
644 | if(tokz->flags&TOKZ_READ_COMMENTS){ | |
645 | e=scan_line_comment(tok, tokz); | |
646 | break; | |
647 | }else if((e=skip_line_comment(tokz))){ | |
648 | break; | |
649 | } | |
650 | ||
651 | continue; | |
652 | ||
653 | case '/': | |
654 | c2=GETCH(); | |
655 | ||
656 | if(c2=='='){ | |
657 | TOK_SET_OP(tok, OP_AS_DIV); | |
658 | return TRUE; | |
659 | } | |
660 | ||
661 | if(c2!='*'){ | |
662 | UNGETCH(c2); | |
663 | TOK_SET_OP(tok, OP_DIV); | |
664 | return TRUE; | |
665 | } | |
666 | ||
667 | if(tokz->flags&TOKZ_READ_COMMENTS){ | |
668 | e=scan_c_comment(tok, tokz); | |
669 | break; | |
670 | }else if((e=skip_c_comment(tokz))){ | |
671 | break; | |
672 | } | |
673 | ||
674 | continue; | |
675 | ||
676 | case '\"': | |
677 | e=scan_string(tok, tokz, TRUE); | |
678 | break; | |
0 | 679 | |
62 | 680 | case '\'': |
681 | e=scan_char(tok, tokz); | |
682 | break; | |
0 | 683 | |
62 | 684 | default: |
685 | if(('0'<=c && c<='9') || c=='-' || c=='+'){ | |
686 | e=scan_number(tok, tokz, c); | |
687 | break; | |
688 | } | |
0 | 689 | |
62 | 690 | if(START_IDENT(c)) |
691 | e=scan_identifier(tok, tokz, c); | |
692 | else | |
693 | e=scan_op(tok, tokz, c); | |
694 | } | |
695 | ||
696 | if(!e) | |
697 | return TRUE; | |
698 | ||
699 | tokz_warn_error(tokz, tokz->line, e); | |
700 | return FALSE; | |
701 | } | |
0 | 702 | } |
703 | ||
704 | ||
2 | 705 | void tokz_unget_token(Tokenizer *tokz, Token *tok) |
706 | { | |
62 | 707 | tok_free(&(tokz->ungettok)); |
708 | tokz->ungettok=*tok; | |
709 | tok->type=TOK_INVALID; | |
2 | 710 | } |
711 | ||
712 | ||
1 | 713 | /* |
714 | * File open | |
715 | */ | |
716 | ||
717 | static bool do_tokz_pushf(Tokenizer *tokz) | |
718 | { | |
62 | 719 | Tokenizer_FInfo *finfo; |
720 | ||
721 | finfo=REALLOC_N(tokz->filestack, Tokenizer_FInfo, | |
722 | tokz->filestack_n, tokz->filestack_n+1); | |
723 | ||
724 | if(finfo==NULL) | |
725 | return FALSE; | |
1 | 726 | |
62 | 727 | tokz->filestack=finfo; |
728 | finfo=&(finfo[tokz->filestack_n++]); | |
729 | ||
730 | finfo->file=tokz->file; | |
731 | finfo->name=tokz->name; | |
732 | finfo->line=tokz->line; | |
733 | finfo->ungetc=tokz->ungetc; | |
734 | finfo->ungettok=tokz->ungettok; | |
735 | ||
736 | return TRUE; | |
1 | 737 | } |
738 | ||
739 | ||
14 | 740 | bool tokz_pushf_file(Tokenizer *tokz, FILE *file, const char *fname) |
1 | 741 | { |
62 | 742 | char *fname_copy=NULL; |
743 | ||
744 | if(file==NULL) | |
745 | return FALSE; | |
14 | 746 | |
62 | 747 | if(fname!=NULL){ |
748 | fname_copy=scopy(fname); | |
749 | if(fname_copy==NULL){ | |
750 | warn_err(); | |
751 | return FALSE; | |
752 | } | |
753 | } | |
754 | ||
755 | if(tokz->file!=NULL){ | |
756 | if(!do_tokz_pushf(tokz)){ | |
757 | warn_err(); | |
758 | if(fname_copy!=NULL) | |
759 | free(fname_copy); | |
760 | return FALSE; | |
761 | } | |
762 | } | |
763 | ||
764 | tokz->file=file; | |
765 | tokz->name=fname_copy; | |
766 | tokz->line=1; | |
767 | tokz->ungetc=-1; | |
768 | tokz->ungettok.type=TOK_INVALID; | |
769 | ||
770 | return TRUE; | |
1 | 771 | } |
772 | ||
773 | ||
774 | bool tokz_pushf(Tokenizer *tokz, const char *fname) | |
775 | { | |
62 | 776 | FILE *file; |
777 | ||
778 | file=fopen(fname, "r"); | |
779 | ||
780 | if(file==NULL){ | |
781 | warn_err_obj(fname); | |
782 | return FALSE; | |
783 | } | |
784 | ||
785 | if(!tokz_pushf_file(tokz, file, fname)){ | |
786 | fclose(file); | |
787 | return FALSE; | |
788 | } | |
1 | 789 | |
62 | 790 | return TRUE; |
0 | 791 | } |
792 | ||
793 | ||
62 | 794 | |
1 | 795 | static Tokenizer *tokz_create() |
0 | 796 | { |
62 | 797 | Tokenizer*tokz; |
798 | ||
799 | tokz=ALLOC(Tokenizer); | |
800 | ||
801 | if(tokz==NULL){ | |
802 | warn_err(); | |
803 | return NULL; | |
804 | } | |
805 | ||
806 | tokz->file=NULL; | |
807 | tokz->name=NULL; | |
808 | tokz->line=1; | |
809 | tokz->ungetc=-1; | |
810 | tokz->ungettok.type=TOK_INVALID; | |
811 | tokz->flags=0; | |
812 | tokz->optstack=NULL; | |
813 | tokz->nest_lvl=0; | |
814 | tokz->filestack_n=0; | |
815 | tokz->filestack=NULL; | |
816 | tokz->buffer.data=0; | |
817 | tokz->buffer.len=0; | |
818 | tokz->buffer.pos=0; | |
819 | ||
820 | return tokz; | |
1 | 821 | } |
822 | ||
62 | 823 | |
1 | 824 | Tokenizer *tokz_open(const char *fname) |
825 | { | |
62 | 826 | Tokenizer *tokz; |
827 | ||
828 | tokz=tokz_create(); | |
829 | ||
830 | if(!tokz_pushf(tokz, fname)){ | |
831 | free(tokz); | |
832 | return NULL; | |
833 | } | |
834 | ||
835 | return tokz; | |
1 | 836 | } |
837 | ||
838 | ||
14 | 839 | Tokenizer *tokz_open_file(FILE *file, const char *fname) |
1 | 840 | { |
62 | 841 | Tokenizer *tokz; |
842 | ||
843 | tokz=tokz_create(); | |
844 | ||
845 | if(!tokz_pushf_file(tokz, file, fname)){ | |
846 | free(tokz); | |
847 | return NULL; | |
848 | } | |
849 | ||
850 | return tokz; | |
0 | 851 | } |
852 | ||
35 | 853 | Tokenizer *tokz_prepare_buffer(char *buffer, int len) |
854 | { | |
62 | 855 | Tokenizer *tokz; |
856 | char old=0; | |
35 | 857 | |
62 | 858 | tokz=tokz_create(); |
859 | if(len>0){ | |
860 | old=buffer[len-1]; | |
861 | buffer[len-1]='\0'; | |
862 | } | |
35 | 863 | |
62 | 864 | tokz->flags|=TOKZ_READ_FROM_BUFFER; |
865 | tokz->buffer.data=scopy(buffer); | |
866 | tokz->buffer.len=(len>0 ? (uint)len : strlen(tokz->buffer.data)); | |
867 | tokz->buffer.pos=0; | |
35 | 868 | |
62 | 869 | if(old>0) |
870 | buffer[len-1]=old; | |
35 | 871 | |
62 | 872 | return tokz; |
35 | 873 | } |
0 | 874 | |
1 | 875 | /* |
876 | * File close | |
877 | */ | |
878 | ||
879 | static bool do_tokz_popf(Tokenizer *tokz, bool shrink) | |
880 | { | |
62 | 881 | Tokenizer_FInfo *finfo; |
882 | ||
883 | if(tokz->filestack_n<=0) | |
884 | return FALSE; | |
1 | 885 | |
62 | 886 | if(tokz->file!=NULL) |
887 | fclose(tokz->file); | |
888 | if(tokz->name!=NULL) | |
889 | free(tokz->name); | |
890 | ||
891 | finfo=&(tokz->filestack[--tokz->filestack_n]); | |
892 | ||
893 | tokz->file=finfo->file; | |
894 | tokz->name=finfo->name; | |
895 | tokz->line=finfo->line; | |
896 | tokz->ungetc=finfo->ungetc; | |
897 | tokz->ungettok=finfo->ungettok; | |
898 | ||
899 | if(tokz->filestack_n==0){ | |
900 | free(tokz->filestack); | |
901 | tokz->filestack=NULL; | |
902 | }else if(shrink){ | |
903 | finfo=REALLOC_N(tokz->filestack, Tokenizer_FInfo, | |
904 | tokz->filestack_n+1, tokz->filestack_n); | |
905 | if(finfo==NULL) | |
906 | warn_err(); | |
907 | else | |
908 | tokz->filestack=finfo; | |
909 | } | |
910 | ||
911 | return TRUE; | |
1 | 912 | } |
913 | ||
914 | ||
915 | bool tokz_popf(Tokenizer *tokz) | |
916 | { | |
62 | 917 | return do_tokz_popf(tokz, TRUE); |
1 | 918 | } |
62 | 919 | |
1 | 920 | |
0 | 921 | void tokz_close(Tokenizer *tokz) |
922 | { | |
62 | 923 | while(tokz->filestack_n>0) |
924 | do_tokz_popf(tokz, FALSE); | |
925 | ||
926 | if(tokz->file!=NULL) | |
927 | fclose(tokz->file); | |
928 | if(tokz->name!=NULL) | |
929 | free(tokz->name); | |
930 | tok_free(&(tokz->ungettok)); | |
931 | ||
932 | free(tokz); | |
0 | 933 | } |
934 | ||
935 | ||
1 | 936 | |
0 | 937 | /* */ |
938 | ||
939 | ||
940 | void tok_free(Token *tok) | |
941 | { | |
62 | 942 | if(TOK_IS_STRING(tok) || TOK_IS_IDENT(tok) || TOK_IS_COMMENT(tok)){ |
943 | if(TOK_STRING_VAL(tok)!=NULL) | |
944 | free(TOK_STRING_VAL(tok)); | |
945 | } | |
946 | ||
947 | tok->type=TOK_INVALID; | |
0 | 948 | } |
949 | ||
950 | ||
951 | void tok_init(Token *tok) | |
952 | { | |
62 | 953 | static Token dummy=TOK_INIT; |
954 | ||
955 | memcpy(tok, &dummy, sizeof(*tok)); | |
0 | 956 | } |
957 |