Sat, 19 Jan 2002 19:31:04 +0100
trunk: changeset 39
- The tokenizer also supports reading from a buffer now.
- 2002 update
0 | 1 | /* |
2 | * libtu/tokenizer.c | |
3 | * | |
36 | 4 | * Copyright (c) Tuomo Valkonen 1999-2002. |
0 | 5 | * See the included file LICENSE for details. |
6 | */ | |
7 | ||
8 | #include <errno.h> | |
9 | #include <stdio.h> | |
10 | #include <ctype.h> | |
11 | #include <limits.h> | |
12 | #include <assert.h> | |
13 | #include <math.h> | |
14 | #include <string.h> | |
15 | ||
5 | 16 | #include <libtu/tokenizer.h> |
17 | #include <libtu/misc.h> | |
18 | #include <libtu/output.h> | |
0 | 19 | |
20 | ||
21 | static const char *errors[]={ | |
22 | DUMMY_TR("(no error)"), | |
23 | DUMMY_TR("Unexpected end of file"), /* E_TOKZ_UNEXPECTED_EOF */ | |
24 | DUMMY_TR("Unexpected end of line"), /* E_TOKZ_UNEXPECTED_EOL */ | |
25 | DUMMY_TR("End of line expected"), /* E_TOKZ_EOL_EXPECTED */ | |
26 | DUMMY_TR("Invalid character"), /* E_TOKZ_INVALID_CHAR*/ | |
27 | DUMMY_TR("Numeric constant too big"), /* E_TOKZ_TOOBIG */ | |
28 | DUMMY_TR("Invalid numberic format"), /* E_TOKZ_NUMFMT */ | |
29 | DUMMY_TR("Junk after numeric constant"), /* E_TOKZ_NUM_JUNK */ | |
30 | DUMMY_TR("Not an integer"), /* E_TOKZ_NOTINT */ | |
31 | DUMMY_TR("Numeric constant out of range"), /* E_TOKZ_RANGE */ | |
32 | DUMMY_TR("Multi-character character constant"), /* E_TOKZ_MULTICHAR */ | |
33 | DUMMY_TR("Token/statement limit reached"), /* E_TOKZ_TOKEN_LIMIT */ | |
34 | DUMMY_TR("Unknown option"), /* E_TOKZ_UNKONWN_OPTION */ | |
35 | DUMMY_TR("Syntax error"), /* E_TOKZ_SYNTAX */ | |
36 | DUMMY_TR("Invalid argument"), /* E_TOKZ_INVALID_ARGUMENT */ | |
37 | DUMMY_TR("End of statement expected"), /* E_TOKZ_EOS_EXPECTED */ | |
38 | DUMMY_TR("Too few arguments"), /* E_TOKZ_TOO_FEW_ARGS */ | |
39 | DUMMY_TR("Too many arguments"), /* E_TOKZ_TOO_MANY_ARGS */ | |
40 | DUMMY_TR("Maximum section nestin level exceeded"), /* E_TOK_Z_MAX_NEST */ | |
41 | DUMMY_TR("Identifier expected"), /* E_TOKZ_IDENTIFIER_EXPECTED */ | |
2 | 42 | DUMMY_TR("Starting brace ('{') expected"), /* E_TOKZ_LBRACE_EXPECTED */ |
0 | 43 | }; |
44 | ||
45 | ||
46 | /* */ | |
47 | ||
48 | #define STRBLEN 32 | |
49 | ||
2 | 50 | #define STRING_DECL(X) int err=0; char* X=NULL; char X##_tmp[STRBLEN]; int X##_tmpl=0 |
51 | #define STRING_DECL_P(X, P) int err=0; char* X=NULL; char X##_tmp[STRBLEN]=P; int X##_tmpl=sizeof(P)-1 | |
52 | #define STRING_APPEND(X, C) {if(!_string_append(&X, X##_tmp, &X##_tmpl, c)) err=-ENOMEM;} | |
0 | 53 | #define STRING_FREE(X) if(X!=NULL) free(X) |
2 | 54 | #define STRING_FINISH(X) {if(err!=0) return err; if(!_string_finish(&X, X##_tmp, X##_tmpl)) err=-ENOMEM;} |
0 | 55 | |
56 | ||
57 | static bool _string_append(char **p, char *tmp, int *tmplen, char c) | |
58 | { | |
59 | char *tmp2; | |
60 | ||
61 | if(*tmplen==STRBLEN-1){ | |
62 | tmp[STRBLEN-1]='\0'; | |
63 | if(*p!=NULL){ | |
64 | tmp2=scat(*p, tmp); | |
65 | free(*p); | |
66 | *p=tmp2; | |
67 | }else{ | |
68 | *p=scopy(tmp); | |
69 | } | |
70 | *tmplen=1; | |
71 | tmp[0]=c; | |
72 | return *p!=NULL; | |
73 | }else{ | |
74 | tmp[(*tmplen)++]=c; | |
75 | return TRUE; | |
76 | } | |
77 | } | |
78 | ||
79 | ||
80 | static bool _string_finish(char **p, char *tmp, int tmplen) | |
81 | { | |
82 | char *tmp2; | |
83 | ||
84 | if(tmplen==0){ | |
85 | if(*p==NULL) | |
86 | *p=scopy(""); | |
87 | }else{ | |
88 | tmp[tmplen]='\0'; | |
89 | if(*p!=NULL){ | |
90 | tmp2=scat(*p, tmp); | |
91 | free(*p); | |
92 | *p=tmp2; | |
93 | }else{ | |
94 | *p=scopy(tmp); | |
95 | } | |
96 | } | |
97 | return *p!=NULL; | |
98 | } | |
99 | ||
100 | ||
101 | /* */ | |
102 | ||
103 | ||
104 | #define INC_LINE() tokz->line++ | |
105 | #define GETCH() _getch(tokz) | |
106 | #define UNGETCH(C) _ungetch(tokz, C) | |
107 | ||
108 | static int _getch(Tokenizer *tokz) | |
109 | { | |
110 | int c; | |
111 | ||
112 | if(tokz->ungetc!=-1){ | |
113 | c=tokz->ungetc; | |
114 | tokz->ungetc=-1; | |
35 | 115 | }else if (tokz->flags&TOKZ_READ_FROM_BUFFER) { |
116 | assert(tokz->buffer.data!=NULL); | |
117 | if (tokz->buffer.pos==tokz->buffer.len) | |
118 | c=EOF; | |
119 | else | |
120 | c=tokz->buffer.data[tokz->buffer.pos++]; | |
0 | 121 | }else{ |
122 | c=getc(tokz->file); | |
123 | } | |
20 | 124 | |
0 | 125 | return c; |
126 | } | |
127 | ||
128 | ||
129 | static void _ungetch(Tokenizer *tokz, int c) | |
130 | { | |
131 | tokz->ungetc=c; | |
132 | } | |
133 | ||
134 | ||
135 | /* */ | |
136 | ||
137 | ||
138 | static int scan_line_comment(Token *tok, Tokenizer *tokz) | |
139 | { | |
140 | STRING_DECL_P(s, "#"); | |
141 | int c; | |
142 | ||
143 | c=GETCH(); | |
144 | ||
145 | while(c!='\n' && c!=EOF){ | |
146 | STRING_APPEND(s, c); | |
147 | c=GETCH(); | |
148 | } | |
149 | ||
150 | UNGETCH(c); | |
151 | ||
152 | STRING_FINISH(s); | |
153 | ||
154 | TOK_SET_COMMENT(tok, s); | |
155 | ||
156 | return 0; | |
157 | } | |
158 | ||
159 | ||
160 | static int skip_line_comment(Tokenizer *tokz) | |
161 | { | |
162 | int c; | |
163 | ||
164 | do{ | |
165 | c=GETCH(); | |
166 | }while(c!='\n' && c!=EOF); | |
167 | ||
168 | UNGETCH(c); | |
169 | ||
170 | return 0; | |
171 | } | |
172 | ||
173 | ||
174 | /* */ | |
175 | ||
176 | ||
177 | static int scan_c_comment(Token *tok, Tokenizer *tokz) | |
178 | { | |
179 | STRING_DECL_P(s, "/*"); | |
180 | int c; | |
181 | int st=0; | |
182 | ||
183 | while(1){ | |
184 | c=GETCH(); | |
185 | ||
186 | if(c==EOF){ | |
187 | STRING_FREE(s); | |
188 | return E_TOKZ_UNEXPECTED_EOF; | |
189 | } | |
190 | ||
191 | STRING_APPEND(s, c); | |
192 | ||
193 | if(c=='\n'){ | |
194 | INC_LINE(); | |
195 | }else if(st==0 && c=='*'){ | |
196 | st=1; | |
197 | }else if(st==1){ | |
198 | if(c=='/') | |
199 | break; | |
200 | st=0; | |
201 | } | |
202 | } | |
203 | ||
204 | STRING_FINISH(s); | |
205 | ||
206 | TOK_SET_COMMENT(tok, s); | |
207 | ||
208 | return 0; | |
209 | } | |
210 | ||
211 | ||
212 | static int skip_c_comment(Tokenizer *tokz) | |
213 | { | |
214 | int c; | |
215 | int st=0; | |
216 | ||
217 | while(1){ | |
218 | c=GETCH(); | |
219 | ||
220 | if(c==EOF) | |
221 | return E_TOKZ_UNEXPECTED_EOF; | |
222 | ||
223 | if(c=='\n') | |
224 | INC_LINE(); | |
225 | else if(st==0 && c=='*') | |
226 | st=1; | |
227 | else if(st==1){ | |
228 | if(c=='/') | |
229 | break; | |
230 | st=0; | |
231 | } | |
232 | } | |
233 | ||
234 | return 0; | |
235 | } | |
236 | ||
237 | ||
238 | /* */ | |
239 | ||
240 | ||
241 | static int scan_char_escape(Tokenizer *tokz) | |
242 | { | |
243 | static char* special_chars="nrtbae"; | |
244 | static char* specials="\n\r\t\b\a\033"; | |
245 | int base, max; | |
246 | int i ,c; | |
247 | ||
248 | c=GETCH(); | |
249 | ||
250 | for(i=0;special_chars[i];i++){ | |
251 | if(special_chars[i]==c) | |
252 | return specials[c]; | |
253 | } | |
254 | ||
255 | if(c=='x' || c=='X'){ | |
256 | base=16;max=2;i=0; | |
257 | }else if(c=='d' || c=='D'){ | |
258 | base=10;max=3;i=0; | |
259 | }else if(c=='8' || c=='9'){ | |
260 | base=10;max=2;i=c-'0'; | |
261 | }else if('0'<=c && c<='7'){ | |
262 | base=8;max=2;i=c-'0'; | |
263 | }else if(c=='\n'){ | |
264 | UNGETCH(c); | |
265 | return -2; | |
266 | }else{ | |
267 | return c; | |
268 | } | |
269 | ||
270 | ||
271 | while(--max>=0){ | |
272 | c=GETCH(); | |
273 | ||
274 | if(c==EOF) | |
275 | return EOF; | |
276 | ||
277 | if(c=='\n'){ | |
278 | UNGETCH(c); | |
279 | return -2; | |
280 | } | |
281 | ||
282 | if(base==16){ | |
283 | if(!isxdigit(c)) | |
284 | break; | |
285 | ||
286 | i<<=4; | |
287 | ||
288 | if(isdigit(c)) | |
289 | i+=c-'0'; | |
290 | else if(i>='a') | |
291 | i+=0xa+c-'a'; | |
292 | else | |
293 | i+=0xa+c-'a'; | |
294 | ||
295 | }else if(base==10){ | |
296 | if(!isdigit(c)) | |
297 | break; | |
298 | i*=10; | |
299 | i+=c-'0'; | |
300 | }else{ | |
301 | if(c<'0' || c>'7') | |
302 | break; | |
303 | i<<=3; | |
304 | i+=c-'0'; | |
305 | } | |
306 | } | |
307 | ||
308 | if(max>=0) | |
309 | UNGETCH(c); | |
310 | ||
311 | return i; | |
312 | } | |
313 | ||
314 | ||
315 | /* */ | |
316 | ||
317 | ||
318 | static int scan_string(Token *tok, Tokenizer *tokz, bool escapes) | |
319 | { | |
320 | STRING_DECL(s); | |
321 | int c; | |
322 | ||
323 | while(1){ | |
324 | c=GETCH(); | |
325 | ||
326 | if(c=='"') | |
327 | break; | |
328 | ||
329 | if(c=='\n'){ | |
330 | UNGETCH(c); | |
331 | STRING_FREE(s); | |
332 | return E_TOKZ_UNEXPECTED_EOL; | |
333 | } | |
334 | ||
335 | if(c=='\\' && escapes){ | |
336 | c=scan_char_escape(tokz); | |
337 | if(c==-2){ | |
338 | STRING_FREE(s); | |
339 | return E_TOKZ_UNEXPECTED_EOL; | |
340 | } | |
341 | } | |
342 | ||
343 | if(c==EOF){ | |
344 | STRING_FREE(s); | |
345 | return E_TOKZ_UNEXPECTED_EOF; | |
346 | } | |
347 | ||
348 | STRING_APPEND(s, c); | |
349 | } | |
350 | ||
351 | STRING_FINISH(s); | |
352 | ||
353 | TOK_SET_STRING(tok, s); | |
354 | ||
355 | return 0; | |
356 | } | |
357 | ||
358 | ||
359 | /* */ | |
360 | ||
361 | ||
362 | static int scan_char(Token *tok, Tokenizer *tokz) | |
363 | { | |
364 | int c, c2; | |
365 | ||
366 | c=GETCH(); | |
367 | ||
368 | if(c==EOF) | |
369 | return E_TOKZ_UNEXPECTED_EOF; | |
370 | ||
371 | if(c=='\n') | |
372 | return E_TOKZ_UNEXPECTED_EOL; | |
373 | ||
374 | if(c=='\\'){ | |
375 | c=scan_char_escape(tokz); | |
376 | ||
377 | if(c==EOF) | |
378 | return E_TOKZ_UNEXPECTED_EOF; | |
379 | ||
380 | if(c==-2) | |
381 | return E_TOKZ_UNEXPECTED_EOL; | |
382 | } | |
383 | ||
384 | c2=GETCH(); | |
385 | ||
386 | if(c2!='\'') | |
387 | return E_TOKZ_MULTICHAR; | |
388 | ||
389 | TOK_SET_CHAR(tok, c); | |
390 | ||
391 | return 0; | |
392 | } | |
393 | ||
394 | ||
395 | /* */ | |
396 | ||
397 | ||
398 | #define START_IDENT(X) (isalpha(X) || X=='_' || X=='$') | |
399 | ||
400 | ||
401 | static int scan_identifier(Token *tok, Tokenizer *tokz, int c) | |
402 | { | |
403 | STRING_DECL(s); | |
404 | ||
405 | do{ | |
406 | STRING_APPEND(s, c); | |
407 | c=GETCH(); | |
408 | }while(isalnum(c) || c=='_' || c=='$'); | |
409 | ||
410 | UNGETCH(c); | |
411 | ||
412 | STRING_FINISH(s); | |
413 | ||
414 | TOK_SET_IDENT(tok, s); | |
415 | ||
416 | return 0; | |
417 | } | |
418 | ||
3 | 419 | #define NP_SIMPLE_IMPL |
0 | 420 | #include "numparser2.h" |
421 | #include "np-conv.h" | |
422 | ||
423 | ||
424 | static int scan_number(Token *tok, Tokenizer *tokz, int c) | |
425 | { | |
426 | NPNum num=NUM_INIT; | |
427 | int e; | |
428 | ||
429 | if((e=parse_number(&num, tokz, c))) | |
430 | return e; | |
431 | ||
432 | if(num.type==NPNUM_INT){ | |
433 | long l; | |
434 | if((e=num_to_long(&l, &num, TRUE))) | |
435 | return e; | |
436 | ||
437 | TOK_SET_LONG(tok, l); | |
438 | }else if(num.type==NPNUM_FLOAT){ | |
439 | double d; | |
440 | if((e=num_to_double(&d, &num))) | |
441 | return e; | |
442 | ||
443 | TOK_SET_DOUBLE(tok, d); | |
444 | }else{ | |
445 | return E_TOKZ_NUMFMT; | |
446 | } | |
447 | ||
448 | return 0; | |
449 | } | |
450 | ||
451 | ||
452 | /* */ | |
453 | ||
454 | ||
455 | static uchar op_map[]={ | |
456 | 0x00, /* ________ 0-7 */ | |
457 | 0x00, /* ________ 8-15 */ | |
458 | 0x00, /* ________ 16-23 */ | |
459 | 0x00, /* ________ 24-31 */ | |
460 | 0x62, /* _!___%&_ 32-39 */ | |
461 | 0xff, /* ()*+,-./ 40-47 */ | |
462 | 0x00, /* ________ 48-55 */ | |
463 | 0xfc, /* __:;<=>? 56-63 */ | |
464 | 0x01, /* @_______ 64-71 */ | |
465 | 0x00, /* ________ 72-79 */ | |
466 | 0x00, /* ________ 80-87 */ | |
467 | 0x78, /* ___[_]^_ 88-95 */ | |
468 | 0x00, /* ________ 96-103 */ | |
469 | 0x00, /* ________ 104-111 */ | |
470 | 0x00, /* ________ 112-119 */ | |
471 | 0x38 /* ___{|}__ 120-127 */ | |
472 | }; | |
473 | ||
474 | ||
475 | static bool map_isset(uchar *map, uint ch) | |
476 | { | |
477 | if(ch>127) | |
478 | return FALSE; | |
479 | ||
480 | return map[ch>>3]&(1<<(ch&7)); | |
481 | } | |
482 | ||
483 | ||
484 | static bool is_opch(uint ch) | |
485 | { | |
486 | return map_isset(op_map, ch); | |
487 | } | |
488 | ||
489 | ||
490 | static int scan_op(Token *tok, Tokenizer *tokz, int c) | |
491 | { | |
492 | int c2; | |
493 | int op=-1; | |
494 | ||
495 | /* Quickly check it is an operator character */ | |
496 | if(!is_opch(c)) | |
497 | return E_TOKZ_INVALID_CHAR; | |
498 | ||
499 | switch(c){ | |
500 | case '+': | |
501 | case '-': | |
502 | case '*': | |
503 | /* case '/': Checked elsewhere */ | |
504 | case '%': | |
505 | case '^': | |
506 | case '!': | |
507 | case '=': | |
508 | case '<': | |
509 | case '>': | |
510 | c2=GETCH(); | |
511 | if(c2=='='){ | |
512 | op=c|(c2<<8); | |
513 | }else if(c2==c && (c2!='%' && c2!='!' && c2!='*')){ | |
514 | if(c=='<' || c=='>'){ | |
515 | int c3=GETCH(); | |
516 | if(c3=='='){ | |
517 | op=c|(c2<<8)|(c3<<16); | |
518 | }else{ | |
519 | UNGETCH(c3); | |
520 | op=c|(c2<<8); | |
521 | } | |
522 | }else{ | |
523 | op=c|(c2<<8); | |
524 | } | |
525 | }else{ | |
526 | UNGETCH(c2); | |
527 | op=c; | |
528 | } | |
529 | break; | |
530 | ||
531 | /* It is already known that it is a operator so these are not needed | |
532 | case ':': | |
533 | case '~': | |
534 | case '?': | |
535 | case '.': | |
536 | case ';'; | |
537 | case '{': | |
538 | case '}': | |
539 | case '@': | |
540 | case '|': | |
541 | case '&': | |
542 | */ | |
543 | default: | |
544 | op=c; | |
545 | } | |
546 | ||
547 | TOK_SET_OP(tok, op); | |
548 | ||
549 | return 0; | |
550 | } | |
551 | ||
552 | ||
553 | /* */ | |
554 | ||
555 | ||
21 | 556 | void tokz_warn(const Tokenizer *tokz, int line, const char *fmt, ...) |
557 | { | |
558 | va_list args; | |
559 | ||
560 | va_start(args, fmt); | |
561 | ||
562 | if(tokz!=NULL) | |
563 | warn_obj_line_v(tokz->name, line, fmt, args); | |
564 | else | |
565 | warn(fmt, args); | |
566 | ||
567 | va_end(args); | |
568 | } | |
569 | ||
570 | ||
0 | 571 | void tokz_warn_error(const Tokenizer *tokz, int line, int e) |
572 | { | |
573 | if(e==E_TOKZ_UNEXPECTED_EOF) | |
574 | line=0; | |
575 | ||
21 | 576 | if(e<0) |
577 | tokz_warn(tokz, line, "%s", strerror(-e)); | |
578 | else | |
579 | tokz_warn(tokz, line, "%s", TR(errors[e])); | |
0 | 580 | } |
581 | ||
582 | ||
583 | bool tokz_get_token(Tokenizer *tokz, Token *tok) | |
584 | { | |
585 | int c, c2, e; | |
586 | ||
35 | 587 | if (!(tokz->flags&TOKZ_READ_FROM_BUFFER)) |
1 | 588 | assert(tokz->file!=NULL); |
0 | 589 | |
590 | tok_free(tok); | |
591 | ||
2 | 592 | if(!TOK_IS_INVALID(&(tokz->ungettok))){ |
593 | *tok=tokz->ungettok; | |
594 | tokz->ungettok.type=TOK_INVALID; | |
595 | return TRUE; | |
596 | } | |
597 | ||
0 | 598 | while(1){ |
599 | ||
600 | e=0; | |
601 | ||
602 | do{ | |
603 | c=GETCH(); | |
604 | }while(c!='\n' && c!=EOF && isspace(c)); | |
605 | ||
606 | tok->line=tokz->line; | |
607 | ||
608 | switch(c){ | |
609 | case EOF: | |
610 | TOK_SET_OP(tok, OP_EOF); | |
611 | return TRUE; | |
612 | ||
613 | case '\n': | |
614 | INC_LINE(); | |
615 | ||
616 | if(tokz->flags&TOKZ_IGNORE_NEXTLINE) | |
617 | continue; | |
618 | ||
619 | TOK_SET_OP(tok, OP_NEXTLINE); | |
620 | ||
621 | return TRUE; | |
622 | ||
623 | case '\\': | |
624 | do{ | |
625 | c=GETCH(); | |
626 | if(c==EOF){ | |
627 | TOK_SET_OP(tok, OP_EOF); | |
628 | return FALSE; | |
629 | } | |
2 | 630 | if(!isspace(c) && e==0){ |
631 | e=E_TOKZ_EOL_EXPECTED; | |
632 | tokz_warn_error(tokz, tokz->line, e); | |
633 | if(!(tokz->flags&TOKZ_ERROR_TOLERANT)) | |
634 | return FALSE; | |
0 | 635 | } |
636 | }while(c!='\n'); | |
637 | ||
638 | INC_LINE(); | |
639 | continue; | |
640 | ||
641 | case '#': | |
642 | if(tokz->flags&TOKZ_READ_COMMENTS){ | |
643 | e=scan_line_comment(tok, tokz); | |
644 | break; | |
645 | }else if((e=skip_line_comment(tokz))){ | |
646 | break; | |
647 | } | |
648 | ||
649 | continue; | |
650 | ||
651 | case '/': | |
2 | 652 | c2=GETCH(); |
653 | ||
654 | if(c2=='='){ | |
655 | TOK_SET_OP(tok, OP_AS_DIV); | |
656 | return TRUE; | |
0 | 657 | } |
658 | ||
2 | 659 | if(c2!='*'){ |
660 | UNGETCH(c2); | |
661 | TOK_SET_OP(tok, OP_DIV); | |
662 | return TRUE; | |
663 | } | |
664 | ||
665 | if(tokz->flags&TOKZ_READ_COMMENTS){ | |
666 | e=scan_c_comment(tok, tokz); | |
667 | break; | |
668 | }else if((e=skip_c_comment(tokz))){ | |
669 | break; | |
670 | } | |
671 | ||
672 | continue; | |
673 | ||
0 | 674 | case '\"': |
675 | e=scan_string(tok, tokz, TRUE); | |
676 | break; | |
677 | ||
678 | case '\'': | |
679 | e=scan_char(tok, tokz); | |
680 | break; | |
681 | ||
682 | default: | |
683 | if(('0'<=c && c<='9') || c=='-' || c=='+'){ | |
684 | e=scan_number(tok, tokz, c); | |
685 | break; | |
686 | } | |
687 | ||
688 | if(START_IDENT(c)) | |
689 | e=scan_identifier(tok, tokz, c); | |
690 | else | |
691 | e=scan_op(tok, tokz, c); | |
692 | } | |
693 | ||
694 | if(!e) | |
695 | return TRUE; | |
696 | ||
697 | tokz_warn_error(tokz, tokz->line, e); | |
698 | return FALSE; | |
699 | } | |
700 | } | |
701 | ||
702 | ||
2 | 703 | void tokz_unget_token(Tokenizer *tokz, Token *tok) |
704 | { | |
705 | tok_free(&(tokz->ungettok)); | |
706 | tokz->ungettok=*tok; | |
707 | tok->type=TOK_INVALID; | |
708 | } | |
709 | ||
710 | ||
1 | 711 | /* |
712 | * File open | |
713 | */ | |
714 | ||
715 | static bool do_tokz_pushf(Tokenizer *tokz) | |
716 | { | |
717 | Tokenizer_FInfo *finfo; | |
718 | ||
719 | finfo=REALLOC_N(tokz->filestack, Tokenizer_FInfo, | |
720 | tokz->filestack_n, tokz->filestack_n+1); | |
721 | ||
722 | if(finfo==NULL) | |
723 | return FALSE; | |
724 | ||
725 | tokz->filestack=finfo; | |
726 | finfo=&(finfo[tokz->filestack_n++]); | |
727 | ||
728 | finfo->file=tokz->file; | |
729 | finfo->name=tokz->name; | |
730 | finfo->line=tokz->line; | |
731 | finfo->ungetc=tokz->ungetc; | |
2 | 732 | finfo->ungettok=tokz->ungettok; |
733 | ||
1 | 734 | return TRUE; |
735 | } | |
736 | ||
737 | ||
14 | 738 | bool tokz_pushf_file(Tokenizer *tokz, FILE *file, const char *fname) |
1 | 739 | { |
14 | 740 | char *fname_copy=NULL; |
741 | ||
1 | 742 | if(file==NULL) |
743 | return FALSE; | |
14 | 744 | |
745 | if(fname!=NULL){ | |
746 | fname_copy=scopy(fname); | |
747 | if(fname_copy==NULL){ | |
1 | 748 | warn_err(); |
749 | return FALSE; | |
750 | } | |
751 | } | |
752 | ||
14 | 753 | if(tokz->file!=NULL){ |
754 | if(!do_tokz_pushf(tokz)){ | |
755 | warn_err(); | |
756 | if(fname_copy!=NULL) | |
757 | free(fname_copy); | |
758 | return FALSE; | |
759 | } | |
760 | } | |
761 | ||
1 | 762 | tokz->file=file; |
14 | 763 | tokz->name=fname_copy; |
1 | 764 | tokz->line=1; |
2 | 765 | tokz->ungetc=-1; |
766 | tokz->ungettok.type=TOK_INVALID; | |
1 | 767 | |
768 | return TRUE; | |
769 | } | |
770 | ||
771 | ||
772 | bool tokz_pushf(Tokenizer *tokz, const char *fname) | |
773 | { | |
774 | FILE *file; | |
0 | 775 | |
776 | file=fopen(fname, "r"); | |
777 | ||
778 | if(file==NULL){ | |
779 | warn_err_obj(fname); | |
1 | 780 | return FALSE; |
0 | 781 | } |
782 | ||
14 | 783 | if(!tokz_pushf_file(tokz, file, fname)){ |
784 | fclose(file); | |
785 | return FALSE; | |
1 | 786 | } |
787 | ||
788 | return TRUE; | |
0 | 789 | } |
790 | ||
791 | ||
1 | 792 | |
793 | static Tokenizer *tokz_create() | |
0 | 794 | { |
795 | Tokenizer*tokz; | |
796 | ||
797 | tokz=ALLOC(Tokenizer); | |
798 | ||
799 | if(tokz==NULL){ | |
800 | warn_err(); | |
801 | return NULL; | |
802 | } | |
803 | ||
1 | 804 | tokz->file=NULL; |
0 | 805 | tokz->name=NULL; |
806 | tokz->line=1; | |
2 | 807 | tokz->ungetc=-1; |
808 | tokz->ungettok.type=TOK_INVALID; | |
0 | 809 | tokz->flags=0; |
810 | tokz->optstack=NULL; | |
811 | tokz->nest_lvl=0; | |
1 | 812 | tokz->filestack_n=0; |
813 | tokz->filestack=NULL; | |
35 | 814 | tokz->buffer.data=0; |
815 | tokz->buffer.len=0; | |
816 | tokz->buffer.pos=0; | |
1 | 817 | |
818 | return tokz; | |
819 | } | |
820 | ||
821 | ||
822 | Tokenizer *tokz_open(const char *fname) | |
823 | { | |
824 | Tokenizer *tokz; | |
825 | ||
826 | tokz=tokz_create(); | |
827 | ||
828 | if(!tokz_pushf(tokz, fname)){ | |
829 | free(tokz); | |
830 | return NULL; | |
831 | } | |
832 | ||
833 | return tokz; | |
834 | } | |
835 | ||
836 | ||
14 | 837 | Tokenizer *tokz_open_file(FILE *file, const char *fname) |
1 | 838 | { |
839 | Tokenizer *tokz; | |
840 | ||
841 | tokz=tokz_create(); | |
842 | ||
14 | 843 | if(!tokz_pushf_file(tokz, file, fname)){ |
1 | 844 | free(tokz); |
845 | return NULL; | |
846 | } | |
0 | 847 | |
848 | return tokz; | |
849 | } | |
850 | ||
35 | 851 | Tokenizer *tokz_prepare_buffer(char *buffer, int len) |
852 | { | |
853 | Tokenizer *tokz; | |
854 | char old=0; | |
855 | ||
856 | tokz=tokz_create(); | |
857 | if (len>0) { | |
858 | old=buffer[len-1]; | |
859 | buffer[len-1]='\0'; | |
860 | } | |
861 | ||
862 | tokz->flags|=TOKZ_READ_FROM_BUFFER; | |
863 | tokz->buffer.data=scopy(buffer); | |
864 | tokz->buffer.len=len>0 ? len : strlen(tokz->buffer.data); | |
865 | tokz->buffer.pos=0; | |
866 | ||
867 | if (old>0) | |
868 | buffer[len-1]=old; | |
869 | ||
870 | return tokz; | |
871 | } | |
0 | 872 | |
1 | 873 | /* |
874 | * File close | |
875 | */ | |
876 | ||
877 | static bool do_tokz_popf(Tokenizer *tokz, bool shrink) | |
878 | { | |
879 | Tokenizer_FInfo *finfo; | |
880 | ||
881 | if(tokz->filestack_n<=0) | |
882 | return FALSE; | |
883 | ||
884 | if(tokz->file!=NULL) | |
885 | fclose(tokz->file); | |
886 | if(tokz->name!=NULL) | |
887 | free(tokz->name); | |
888 | ||
889 | finfo=&(tokz->filestack[--tokz->filestack_n]); | |
890 | ||
891 | tokz->file=finfo->file; | |
892 | tokz->name=finfo->name; | |
893 | tokz->line=finfo->line; | |
894 | tokz->ungetc=finfo->ungetc; | |
2 | 895 | tokz->ungettok=finfo->ungettok; |
896 | ||
1 | 897 | if(tokz->filestack_n==0){ |
898 | free(tokz->filestack); | |
899 | tokz->filestack=NULL; | |
900 | }else if(shrink){ | |
901 | finfo=REALLOC_N(tokz->filestack, Tokenizer_FInfo, | |
902 | tokz->filestack_n+1, tokz->filestack_n); | |
903 | if(finfo==NULL) | |
904 | warn_err(); | |
905 | else | |
906 | tokz->filestack=finfo; | |
907 | } | |
908 | ||
909 | return TRUE; | |
910 | } | |
911 | ||
912 | ||
913 | bool tokz_popf(Tokenizer *tokz) | |
914 | { | |
915 | return do_tokz_popf(tokz, TRUE); | |
916 | } | |
917 | ||
918 | ||
0 | 919 | void tokz_close(Tokenizer *tokz) |
920 | { | |
1 | 921 | while(tokz->filestack_n>0) |
922 | do_tokz_popf(tokz, FALSE); | |
923 | ||
0 | 924 | if(tokz->file!=NULL) |
925 | fclose(tokz->file); | |
1 | 926 | if(tokz->name!=NULL) |
927 | free(tokz->name); | |
2 | 928 | tok_free(&(tokz->ungettok)); |
929 | ||
0 | 930 | free(tokz); |
931 | } | |
932 | ||
933 | ||
1 | 934 | |
0 | 935 | /* */ |
936 | ||
937 | ||
938 | void tok_free(Token *tok) | |
939 | { | |
2 | 940 | if(TOK_IS_STRING(tok) || TOK_IS_IDENT(tok) || TOK_IS_COMMENT(tok)){ |
941 | if(TOK_STRING_VAL(tok)!=NULL) | |
942 | free(TOK_STRING_VAL(tok)); | |
943 | } | |
0 | 944 | |
945 | tok->type=TOK_INVALID; | |
946 | } | |
947 | ||
948 | ||
949 | void tok_init(Token *tok) | |
950 | { | |
951 | static Token dummy=TOK_INIT; | |
952 | ||
953 | memcpy(tok, &dummy, sizeof(*tok)); | |
954 | } | |
955 |