Mon, 16 Feb 2004 00:44:56 +0100
trunk: changeset 1303
Moved header files to top level directory.
0 | 1 | /* |
2 | * libtu/tokenizer.c | |
3 | * | |
36 | 4 | * Copyright (c) Tuomo Valkonen 1999-2002. |
53 | 5 | * |
6 | * You may distribute and modify this library under the terms of either | |
7 | * the Clarified Artistic License or the GNU LGPL, version 2.1 or later. | |
0 | 8 | */ |
9 | ||
10 | #include <errno.h> | |
11 | #include <stdio.h> | |
12 | #include <ctype.h> | |
13 | #include <limits.h> | |
14 | #include <assert.h> | |
15 | #include <math.h> | |
16 | #include <string.h> | |
17 | ||
5 | 18 | #include <libtu/tokenizer.h> |
19 | #include <libtu/misc.h> | |
20 | #include <libtu/output.h> | |
0 | 21 | |
22 | ||
23 | static const char *errors[]={ | |
24 | DUMMY_TR("(no error)"), | |
25 | DUMMY_TR("Unexpected end of file"), /* E_TOKZ_UNEXPECTED_EOF */ | |
26 | DUMMY_TR("Unexpected end of line"), /* E_TOKZ_UNEXPECTED_EOL */ | |
27 | DUMMY_TR("End of line expected"), /* E_TOKZ_EOL_EXPECTED */ | |
28 | DUMMY_TR("Invalid character"), /* E_TOKZ_INVALID_CHAR*/ | |
29 | DUMMY_TR("Numeric constant too big"), /* E_TOKZ_TOOBIG */ | |
30 | DUMMY_TR("Invalid numberic format"), /* E_TOKZ_NUMFMT */ | |
31 | DUMMY_TR("Junk after numeric constant"), /* E_TOKZ_NUM_JUNK */ | |
32 | DUMMY_TR("Not an integer"), /* E_TOKZ_NOTINT */ | |
33 | DUMMY_TR("Numeric constant out of range"), /* E_TOKZ_RANGE */ | |
34 | DUMMY_TR("Multi-character character constant"), /* E_TOKZ_MULTICHAR */ | |
35 | DUMMY_TR("Token/statement limit reached"), /* E_TOKZ_TOKEN_LIMIT */ | |
36 | DUMMY_TR("Unknown option"), /* E_TOKZ_UNKONWN_OPTION */ | |
37 | DUMMY_TR("Syntax error"), /* E_TOKZ_SYNTAX */ | |
38 | DUMMY_TR("Invalid argument"), /* E_TOKZ_INVALID_ARGUMENT */ | |
39 | DUMMY_TR("End of statement expected"), /* E_TOKZ_EOS_EXPECTED */ | |
40 | DUMMY_TR("Too few arguments"), /* E_TOKZ_TOO_FEW_ARGS */ | |
41 | DUMMY_TR("Too many arguments"), /* E_TOKZ_TOO_MANY_ARGS */ | |
42 | DUMMY_TR("Maximum section nestin level exceeded"), /* E_TOK_Z_MAX_NEST */ | |
43 | DUMMY_TR("Identifier expected"), /* E_TOKZ_IDENTIFIER_EXPECTED */ | |
2 | 44 | DUMMY_TR("Starting brace ('{') expected"), /* E_TOKZ_LBRACE_EXPECTED */ |
0 | 45 | }; |
46 | ||
47 | ||
48 | /* */ | |
49 | ||
50 | #define STRBLEN 32 | |
51 | ||
2 | 52 | #define STRING_DECL(X) int err=0; char* X=NULL; char X##_tmp[STRBLEN]; int X##_tmpl=0 |
53 | #define STRING_DECL_P(X, P) int err=0; char* X=NULL; char X##_tmp[STRBLEN]=P; int X##_tmpl=sizeof(P)-1 | |
54 | #define STRING_APPEND(X, C) {if(!_string_append(&X, X##_tmp, &X##_tmpl, c)) err=-ENOMEM;} | |
0 | 55 | #define STRING_FREE(X) if(X!=NULL) free(X) |
2 | 56 | #define STRING_FINISH(X) {if(err!=0) return err; if(!_string_finish(&X, X##_tmp, X##_tmpl)) err=-ENOMEM;} |
0 | 57 | |
58 | ||
59 | static bool _string_append(char **p, char *tmp, int *tmplen, char c) | |
60 | { | |
61 | char *tmp2; | |
62 | ||
63 | if(*tmplen==STRBLEN-1){ | |
64 | tmp[STRBLEN-1]='\0'; | |
65 | if(*p!=NULL){ | |
66 | tmp2=scat(*p, tmp); | |
67 | free(*p); | |
68 | *p=tmp2; | |
69 | }else{ | |
70 | *p=scopy(tmp); | |
71 | } | |
72 | *tmplen=1; | |
73 | tmp[0]=c; | |
74 | return *p!=NULL; | |
75 | }else{ | |
76 | tmp[(*tmplen)++]=c; | |
77 | return TRUE; | |
78 | } | |
79 | } | |
80 | ||
81 | ||
82 | static bool _string_finish(char **p, char *tmp, int tmplen) | |
83 | { | |
84 | char *tmp2; | |
85 | ||
86 | if(tmplen==0){ | |
87 | if(*p==NULL) | |
88 | *p=scopy(""); | |
89 | }else{ | |
90 | tmp[tmplen]='\0'; | |
91 | if(*p!=NULL){ | |
92 | tmp2=scat(*p, tmp); | |
93 | free(*p); | |
94 | *p=tmp2; | |
95 | }else{ | |
96 | *p=scopy(tmp); | |
97 | } | |
98 | } | |
99 | return *p!=NULL; | |
100 | } | |
101 | ||
102 | ||
103 | /* */ | |
104 | ||
105 | ||
106 | #define INC_LINE() tokz->line++ | |
107 | #define GETCH() _getch(tokz) | |
108 | #define UNGETCH(C) _ungetch(tokz, C) | |
109 | ||
110 | static int _getch(Tokenizer *tokz) | |
111 | { | |
112 | int c; | |
113 | ||
114 | if(tokz->ungetc!=-1){ | |
115 | c=tokz->ungetc; | |
116 | tokz->ungetc=-1; | |
35 | 117 | }else if (tokz->flags&TOKZ_READ_FROM_BUFFER) { |
118 | assert(tokz->buffer.data!=NULL); | |
119 | if (tokz->buffer.pos==tokz->buffer.len) | |
120 | c=EOF; | |
121 | else | |
122 | c=tokz->buffer.data[tokz->buffer.pos++]; | |
0 | 123 | }else{ |
124 | c=getc(tokz->file); | |
125 | } | |
20 | 126 | |
0 | 127 | return c; |
128 | } | |
129 | ||
130 | ||
131 | static void _ungetch(Tokenizer *tokz, int c) | |
132 | { | |
133 | tokz->ungetc=c; | |
134 | } | |
135 | ||
136 | ||
137 | /* */ | |
138 | ||
139 | ||
140 | static int scan_line_comment(Token *tok, Tokenizer *tokz) | |
141 | { | |
142 | STRING_DECL_P(s, "#"); | |
143 | int c; | |
144 | ||
145 | c=GETCH(); | |
146 | ||
147 | while(c!='\n' && c!=EOF){ | |
148 | STRING_APPEND(s, c); | |
149 | c=GETCH(); | |
150 | } | |
151 | ||
152 | UNGETCH(c); | |
153 | ||
154 | STRING_FINISH(s); | |
155 | ||
156 | TOK_SET_COMMENT(tok, s); | |
157 | ||
158 | return 0; | |
159 | } | |
160 | ||
161 | ||
162 | static int skip_line_comment(Tokenizer *tokz) | |
163 | { | |
164 | int c; | |
165 | ||
166 | do{ | |
167 | c=GETCH(); | |
168 | }while(c!='\n' && c!=EOF); | |
169 | ||
170 | UNGETCH(c); | |
171 | ||
172 | return 0; | |
173 | } | |
174 | ||
175 | ||
176 | /* */ | |
177 | ||
178 | ||
179 | static int scan_c_comment(Token *tok, Tokenizer *tokz) | |
180 | { | |
181 | STRING_DECL_P(s, "/*"); | |
182 | int c; | |
183 | int st=0; | |
184 | ||
185 | while(1){ | |
186 | c=GETCH(); | |
187 | ||
188 | if(c==EOF){ | |
189 | STRING_FREE(s); | |
190 | return E_TOKZ_UNEXPECTED_EOF; | |
191 | } | |
192 | ||
193 | STRING_APPEND(s, c); | |
194 | ||
195 | if(c=='\n'){ | |
196 | INC_LINE(); | |
197 | }else if(st==0 && c=='*'){ | |
198 | st=1; | |
199 | }else if(st==1){ | |
200 | if(c=='/') | |
201 | break; | |
202 | st=0; | |
203 | } | |
204 | } | |
205 | ||
206 | STRING_FINISH(s); | |
207 | ||
208 | TOK_SET_COMMENT(tok, s); | |
209 | ||
210 | return 0; | |
211 | } | |
212 | ||
213 | ||
214 | static int skip_c_comment(Tokenizer *tokz) | |
215 | { | |
216 | int c; | |
217 | int st=0; | |
218 | ||
219 | while(1){ | |
220 | c=GETCH(); | |
221 | ||
222 | if(c==EOF) | |
223 | return E_TOKZ_UNEXPECTED_EOF; | |
224 | ||
225 | if(c=='\n') | |
226 | INC_LINE(); | |
227 | else if(st==0 && c=='*') | |
228 | st=1; | |
229 | else if(st==1){ | |
230 | if(c=='/') | |
231 | break; | |
232 | st=0; | |
233 | } | |
234 | } | |
235 | ||
236 | return 0; | |
237 | } | |
238 | ||
239 | ||
240 | /* */ | |
241 | ||
242 | ||
243 | static int scan_char_escape(Tokenizer *tokz) | |
244 | { | |
245 | static char* special_chars="nrtbae"; | |
246 | static char* specials="\n\r\t\b\a\033"; | |
247 | int base, max; | |
248 | int i ,c; | |
249 | ||
250 | c=GETCH(); | |
251 | ||
252 | for(i=0;special_chars[i];i++){ | |
253 | if(special_chars[i]==c) | |
254 | return specials[c]; | |
255 | } | |
256 | ||
257 | if(c=='x' || c=='X'){ | |
258 | base=16;max=2;i=0; | |
259 | }else if(c=='d' || c=='D'){ | |
260 | base=10;max=3;i=0; | |
261 | }else if(c=='8' || c=='9'){ | |
262 | base=10;max=2;i=c-'0'; | |
263 | }else if('0'<=c && c<='7'){ | |
264 | base=8;max=2;i=c-'0'; | |
265 | }else if(c=='\n'){ | |
266 | UNGETCH(c); | |
267 | return -2; | |
268 | }else{ | |
269 | return c; | |
270 | } | |
271 | ||
272 | ||
273 | while(--max>=0){ | |
274 | c=GETCH(); | |
275 | ||
276 | if(c==EOF) | |
277 | return EOF; | |
278 | ||
279 | if(c=='\n'){ | |
280 | UNGETCH(c); | |
281 | return -2; | |
282 | } | |
283 | ||
284 | if(base==16){ | |
285 | if(!isxdigit(c)) | |
286 | break; | |
287 | ||
288 | i<<=4; | |
289 | ||
290 | if(isdigit(c)) | |
291 | i+=c-'0'; | |
292 | else if(i>='a') | |
293 | i+=0xa+c-'a'; | |
294 | else | |
295 | i+=0xa+c-'a'; | |
296 | ||
297 | }else if(base==10){ | |
298 | if(!isdigit(c)) | |
299 | break; | |
300 | i*=10; | |
301 | i+=c-'0'; | |
302 | }else{ | |
303 | if(c<'0' || c>'7') | |
304 | break; | |
305 | i<<=3; | |
306 | i+=c-'0'; | |
307 | } | |
308 | } | |
309 | ||
310 | if(max>=0) | |
311 | UNGETCH(c); | |
312 | ||
313 | return i; | |
314 | } | |
315 | ||
316 | ||
317 | /* */ | |
318 | ||
319 | ||
320 | static int scan_string(Token *tok, Tokenizer *tokz, bool escapes) | |
321 | { | |
322 | STRING_DECL(s); | |
323 | int c; | |
324 | ||
325 | while(1){ | |
326 | c=GETCH(); | |
327 | ||
328 | if(c=='"') | |
329 | break; | |
330 | ||
331 | if(c=='\n'){ | |
332 | UNGETCH(c); | |
333 | STRING_FREE(s); | |
334 | return E_TOKZ_UNEXPECTED_EOL; | |
335 | } | |
336 | ||
337 | if(c=='\\' && escapes){ | |
338 | c=scan_char_escape(tokz); | |
339 | if(c==-2){ | |
340 | STRING_FREE(s); | |
341 | return E_TOKZ_UNEXPECTED_EOL; | |
342 | } | |
343 | } | |
344 | ||
345 | if(c==EOF){ | |
346 | STRING_FREE(s); | |
347 | return E_TOKZ_UNEXPECTED_EOF; | |
348 | } | |
349 | ||
350 | STRING_APPEND(s, c); | |
351 | } | |
352 | ||
353 | STRING_FINISH(s); | |
354 | ||
355 | TOK_SET_STRING(tok, s); | |
356 | ||
357 | return 0; | |
358 | } | |
359 | ||
360 | ||
361 | /* */ | |
362 | ||
363 | ||
364 | static int scan_char(Token *tok, Tokenizer *tokz) | |
365 | { | |
366 | int c, c2; | |
367 | ||
368 | c=GETCH(); | |
369 | ||
370 | if(c==EOF) | |
371 | return E_TOKZ_UNEXPECTED_EOF; | |
372 | ||
373 | if(c=='\n') | |
374 | return E_TOKZ_UNEXPECTED_EOL; | |
375 | ||
376 | if(c=='\\'){ | |
377 | c=scan_char_escape(tokz); | |
378 | ||
379 | if(c==EOF) | |
380 | return E_TOKZ_UNEXPECTED_EOF; | |
381 | ||
382 | if(c==-2) | |
383 | return E_TOKZ_UNEXPECTED_EOL; | |
384 | } | |
385 | ||
386 | c2=GETCH(); | |
387 | ||
388 | if(c2!='\'') | |
389 | return E_TOKZ_MULTICHAR; | |
390 | ||
391 | TOK_SET_CHAR(tok, c); | |
392 | ||
393 | return 0; | |
394 | } | |
395 | ||
396 | ||
397 | /* */ | |
398 | ||
399 | ||
400 | #define START_IDENT(X) (isalpha(X) || X=='_' || X=='$') | |
401 | ||
402 | ||
403 | static int scan_identifier(Token *tok, Tokenizer *tokz, int c) | |
404 | { | |
405 | STRING_DECL(s); | |
406 | ||
407 | do{ | |
408 | STRING_APPEND(s, c); | |
409 | c=GETCH(); | |
410 | }while(isalnum(c) || c=='_' || c=='$'); | |
411 | ||
412 | UNGETCH(c); | |
413 | ||
414 | STRING_FINISH(s); | |
415 | ||
416 | TOK_SET_IDENT(tok, s); | |
417 | ||
418 | return 0; | |
419 | } | |
420 | ||
3 | 421 | #define NP_SIMPLE_IMPL |
0 | 422 | #include "numparser2.h" |
423 | #include "np-conv.h" | |
424 | ||
425 | ||
426 | static int scan_number(Token *tok, Tokenizer *tokz, int c) | |
427 | { | |
428 | NPNum num=NUM_INIT; | |
429 | int e; | |
430 | ||
431 | if((e=parse_number(&num, tokz, c))) | |
432 | return e; | |
433 | ||
434 | if(num.type==NPNUM_INT){ | |
435 | long l; | |
436 | if((e=num_to_long(&l, &num, TRUE))) | |
437 | return e; | |
438 | ||
439 | TOK_SET_LONG(tok, l); | |
440 | }else if(num.type==NPNUM_FLOAT){ | |
441 | double d; | |
442 | if((e=num_to_double(&d, &num))) | |
443 | return e; | |
444 | ||
445 | TOK_SET_DOUBLE(tok, d); | |
446 | }else{ | |
447 | return E_TOKZ_NUMFMT; | |
448 | } | |
449 | ||
450 | return 0; | |
451 | } | |
452 | ||
453 | ||
454 | /* */ | |
455 | ||
456 | ||
457 | static uchar op_map[]={ | |
458 | 0x00, /* ________ 0-7 */ | |
459 | 0x00, /* ________ 8-15 */ | |
460 | 0x00, /* ________ 16-23 */ | |
461 | 0x00, /* ________ 24-31 */ | |
462 | 0x62, /* _!___%&_ 32-39 */ | |
463 | 0xff, /* ()*+,-./ 40-47 */ | |
464 | 0x00, /* ________ 48-55 */ | |
465 | 0xfc, /* __:;<=>? 56-63 */ | |
466 | 0x01, /* @_______ 64-71 */ | |
467 | 0x00, /* ________ 72-79 */ | |
468 | 0x00, /* ________ 80-87 */ | |
469 | 0x78, /* ___[_]^_ 88-95 */ | |
470 | 0x00, /* ________ 96-103 */ | |
471 | 0x00, /* ________ 104-111 */ | |
472 | 0x00, /* ________ 112-119 */ | |
473 | 0x38 /* ___{|}__ 120-127 */ | |
474 | }; | |
475 | ||
476 | ||
477 | static bool map_isset(uchar *map, uint ch) | |
478 | { | |
479 | if(ch>127) | |
480 | return FALSE; | |
481 | ||
482 | return map[ch>>3]&(1<<(ch&7)); | |
483 | } | |
484 | ||
485 | ||
486 | static bool is_opch(uint ch) | |
487 | { | |
488 | return map_isset(op_map, ch); | |
489 | } | |
490 | ||
491 | ||
492 | static int scan_op(Token *tok, Tokenizer *tokz, int c) | |
493 | { | |
494 | int c2; | |
495 | int op=-1; | |
496 | ||
497 | /* Quickly check it is an operator character */ | |
498 | if(!is_opch(c)) | |
499 | return E_TOKZ_INVALID_CHAR; | |
500 | ||
501 | switch(c){ | |
502 | case '+': | |
503 | case '-': | |
504 | case '*': | |
505 | /* case '/': Checked elsewhere */ | |
506 | case '%': | |
507 | case '^': | |
508 | case '!': | |
509 | case '=': | |
510 | case '<': | |
511 | case '>': | |
512 | c2=GETCH(); | |
513 | if(c2=='='){ | |
514 | op=c|(c2<<8); | |
515 | }else if(c2==c && (c2!='%' && c2!='!' && c2!='*')){ | |
516 | if(c=='<' || c=='>'){ | |
517 | int c3=GETCH(); | |
518 | if(c3=='='){ | |
519 | op=c|(c2<<8)|(c3<<16); | |
520 | }else{ | |
521 | UNGETCH(c3); | |
522 | op=c|(c2<<8); | |
523 | } | |
524 | }else{ | |
525 | op=c|(c2<<8); | |
526 | } | |
527 | }else{ | |
528 | UNGETCH(c2); | |
529 | op=c; | |
530 | } | |
531 | break; | |
532 | ||
533 | /* It is already known that it is a operator so these are not needed | |
534 | case ':': | |
535 | case '~': | |
536 | case '?': | |
537 | case '.': | |
538 | case ';'; | |
539 | case '{': | |
540 | case '}': | |
541 | case '@': | |
542 | case '|': | |
543 | case '&': | |
544 | */ | |
545 | default: | |
546 | op=c; | |
547 | } | |
548 | ||
549 | TOK_SET_OP(tok, op); | |
550 | ||
551 | return 0; | |
552 | } | |
553 | ||
554 | ||
555 | /* */ | |
556 | ||
557 | ||
21 | 558 | void tokz_warn(const Tokenizer *tokz, int line, const char *fmt, ...) |
559 | { | |
560 | va_list args; | |
561 | ||
562 | va_start(args, fmt); | |
563 | ||
564 | if(tokz!=NULL) | |
565 | warn_obj_line_v(tokz->name, line, fmt, args); | |
566 | else | |
567 | warn(fmt, args); | |
568 | ||
569 | va_end(args); | |
570 | } | |
571 | ||
572 | ||
0 | 573 | void tokz_warn_error(const Tokenizer *tokz, int line, int e) |
574 | { | |
575 | if(e==E_TOKZ_UNEXPECTED_EOF) | |
576 | line=0; | |
577 | ||
21 | 578 | if(e<0) |
579 | tokz_warn(tokz, line, "%s", strerror(-e)); | |
580 | else | |
581 | tokz_warn(tokz, line, "%s", TR(errors[e])); | |
0 | 582 | } |
583 | ||
584 | ||
585 | bool tokz_get_token(Tokenizer *tokz, Token *tok) | |
586 | { | |
587 | int c, c2, e; | |
588 | ||
35 | 589 | if (!(tokz->flags&TOKZ_READ_FROM_BUFFER)) |
1 | 590 | assert(tokz->file!=NULL); |
0 | 591 | |
592 | tok_free(tok); | |
593 | ||
2 | 594 | if(!TOK_IS_INVALID(&(tokz->ungettok))){ |
595 | *tok=tokz->ungettok; | |
596 | tokz->ungettok.type=TOK_INVALID; | |
597 | return TRUE; | |
598 | } | |
599 | ||
0 | 600 | while(1){ |
601 | ||
602 | e=0; | |
603 | ||
604 | do{ | |
605 | c=GETCH(); | |
606 | }while(c!='\n' && c!=EOF && isspace(c)); | |
607 | ||
608 | tok->line=tokz->line; | |
609 | ||
610 | switch(c){ | |
611 | case EOF: | |
612 | TOK_SET_OP(tok, OP_EOF); | |
613 | return TRUE; | |
614 | ||
615 | case '\n': | |
616 | INC_LINE(); | |
617 | ||
618 | if(tokz->flags&TOKZ_IGNORE_NEXTLINE) | |
619 | continue; | |
620 | ||
621 | TOK_SET_OP(tok, OP_NEXTLINE); | |
622 | ||
623 | return TRUE; | |
624 | ||
625 | case '\\': | |
626 | do{ | |
627 | c=GETCH(); | |
628 | if(c==EOF){ | |
629 | TOK_SET_OP(tok, OP_EOF); | |
630 | return FALSE; | |
631 | } | |
2 | 632 | if(!isspace(c) && e==0){ |
633 | e=E_TOKZ_EOL_EXPECTED; | |
634 | tokz_warn_error(tokz, tokz->line, e); | |
635 | if(!(tokz->flags&TOKZ_ERROR_TOLERANT)) | |
636 | return FALSE; | |
0 | 637 | } |
638 | }while(c!='\n'); | |
639 | ||
640 | INC_LINE(); | |
641 | continue; | |
642 | ||
643 | case '#': | |
644 | if(tokz->flags&TOKZ_READ_COMMENTS){ | |
645 | e=scan_line_comment(tok, tokz); | |
646 | break; | |
647 | }else if((e=skip_line_comment(tokz))){ | |
648 | break; | |
649 | } | |
650 | ||
651 | continue; | |
652 | ||
653 | case '/': | |
2 | 654 | c2=GETCH(); |
655 | ||
656 | if(c2=='='){ | |
657 | TOK_SET_OP(tok, OP_AS_DIV); | |
658 | return TRUE; | |
0 | 659 | } |
660 | ||
2 | 661 | if(c2!='*'){ |
662 | UNGETCH(c2); | |
663 | TOK_SET_OP(tok, OP_DIV); | |
664 | return TRUE; | |
665 | } | |
666 | ||
667 | if(tokz->flags&TOKZ_READ_COMMENTS){ | |
668 | e=scan_c_comment(tok, tokz); | |
669 | break; | |
670 | }else if((e=skip_c_comment(tokz))){ | |
671 | break; | |
672 | } | |
673 | ||
674 | continue; | |
675 | ||
0 | 676 | case '\"': |
677 | e=scan_string(tok, tokz, TRUE); | |
678 | break; | |
679 | ||
680 | case '\'': | |
681 | e=scan_char(tok, tokz); | |
682 | break; | |
683 | ||
684 | default: | |
685 | if(('0'<=c && c<='9') || c=='-' || c=='+'){ | |
686 | e=scan_number(tok, tokz, c); | |
687 | break; | |
688 | } | |
689 | ||
690 | if(START_IDENT(c)) | |
691 | e=scan_identifier(tok, tokz, c); | |
692 | else | |
693 | e=scan_op(tok, tokz, c); | |
694 | } | |
695 | ||
696 | if(!e) | |
697 | return TRUE; | |
698 | ||
699 | tokz_warn_error(tokz, tokz->line, e); | |
700 | return FALSE; | |
701 | } | |
702 | } | |
703 | ||
704 | ||
2 | 705 | void tokz_unget_token(Tokenizer *tokz, Token *tok) |
706 | { | |
707 | tok_free(&(tokz->ungettok)); | |
708 | tokz->ungettok=*tok; | |
709 | tok->type=TOK_INVALID; | |
710 | } | |
711 | ||
712 | ||
1 | 713 | /* |
714 | * File open | |
715 | */ | |
716 | ||
717 | static bool do_tokz_pushf(Tokenizer *tokz) | |
718 | { | |
719 | Tokenizer_FInfo *finfo; | |
720 | ||
721 | finfo=REALLOC_N(tokz->filestack, Tokenizer_FInfo, | |
722 | tokz->filestack_n, tokz->filestack_n+1); | |
723 | ||
724 | if(finfo==NULL) | |
725 | return FALSE; | |
726 | ||
727 | tokz->filestack=finfo; | |
728 | finfo=&(finfo[tokz->filestack_n++]); | |
729 | ||
730 | finfo->file=tokz->file; | |
731 | finfo->name=tokz->name; | |
732 | finfo->line=tokz->line; | |
733 | finfo->ungetc=tokz->ungetc; | |
2 | 734 | finfo->ungettok=tokz->ungettok; |
735 | ||
1 | 736 | return TRUE; |
737 | } | |
738 | ||
739 | ||
14 | 740 | bool tokz_pushf_file(Tokenizer *tokz, FILE *file, const char *fname) |
1 | 741 | { |
14 | 742 | char *fname_copy=NULL; |
743 | ||
1 | 744 | if(file==NULL) |
745 | return FALSE; | |
14 | 746 | |
747 | if(fname!=NULL){ | |
748 | fname_copy=scopy(fname); | |
749 | if(fname_copy==NULL){ | |
1 | 750 | warn_err(); |
751 | return FALSE; | |
752 | } | |
753 | } | |
754 | ||
14 | 755 | if(tokz->file!=NULL){ |
756 | if(!do_tokz_pushf(tokz)){ | |
757 | warn_err(); | |
758 | if(fname_copy!=NULL) | |
759 | free(fname_copy); | |
760 | return FALSE; | |
761 | } | |
762 | } | |
763 | ||
1 | 764 | tokz->file=file; |
14 | 765 | tokz->name=fname_copy; |
1 | 766 | tokz->line=1; |
2 | 767 | tokz->ungetc=-1; |
768 | tokz->ungettok.type=TOK_INVALID; | |
1 | 769 | |
770 | return TRUE; | |
771 | } | |
772 | ||
773 | ||
774 | bool tokz_pushf(Tokenizer *tokz, const char *fname) | |
775 | { | |
776 | FILE *file; | |
0 | 777 | |
778 | file=fopen(fname, "r"); | |
779 | ||
780 | if(file==NULL){ | |
781 | warn_err_obj(fname); | |
1 | 782 | return FALSE; |
0 | 783 | } |
784 | ||
14 | 785 | if(!tokz_pushf_file(tokz, file, fname)){ |
786 | fclose(file); | |
787 | return FALSE; | |
1 | 788 | } |
789 | ||
790 | return TRUE; | |
0 | 791 | } |
792 | ||
793 | ||
1 | 794 | |
795 | static Tokenizer *tokz_create() | |
0 | 796 | { |
797 | Tokenizer*tokz; | |
798 | ||
799 | tokz=ALLOC(Tokenizer); | |
800 | ||
801 | if(tokz==NULL){ | |
802 | warn_err(); | |
803 | return NULL; | |
804 | } | |
805 | ||
1 | 806 | tokz->file=NULL; |
0 | 807 | tokz->name=NULL; |
808 | tokz->line=1; | |
2 | 809 | tokz->ungetc=-1; |
810 | tokz->ungettok.type=TOK_INVALID; | |
0 | 811 | tokz->flags=0; |
812 | tokz->optstack=NULL; | |
813 | tokz->nest_lvl=0; | |
1 | 814 | tokz->filestack_n=0; |
815 | tokz->filestack=NULL; | |
35 | 816 | tokz->buffer.data=0; |
817 | tokz->buffer.len=0; | |
818 | tokz->buffer.pos=0; | |
1 | 819 | |
820 | return tokz; | |
821 | } | |
822 | ||
823 | ||
824 | Tokenizer *tokz_open(const char *fname) | |
825 | { | |
826 | Tokenizer *tokz; | |
827 | ||
828 | tokz=tokz_create(); | |
829 | ||
830 | if(!tokz_pushf(tokz, fname)){ | |
831 | free(tokz); | |
832 | return NULL; | |
833 | } | |
834 | ||
835 | return tokz; | |
836 | } | |
837 | ||
838 | ||
14 | 839 | Tokenizer *tokz_open_file(FILE *file, const char *fname) |
1 | 840 | { |
841 | Tokenizer *tokz; | |
842 | ||
843 | tokz=tokz_create(); | |
844 | ||
14 | 845 | if(!tokz_pushf_file(tokz, file, fname)){ |
1 | 846 | free(tokz); |
847 | return NULL; | |
848 | } | |
0 | 849 | |
850 | return tokz; | |
851 | } | |
852 | ||
35 | 853 | Tokenizer *tokz_prepare_buffer(char *buffer, int len) |
854 | { | |
855 | Tokenizer *tokz; | |
856 | char old=0; | |
857 | ||
858 | tokz=tokz_create(); | |
47 | 859 | if(len>0){ |
35 | 860 | old=buffer[len-1]; |
861 | buffer[len-1]='\0'; | |
862 | } | |
863 | ||
864 | tokz->flags|=TOKZ_READ_FROM_BUFFER; | |
865 | tokz->buffer.data=scopy(buffer); | |
47 | 866 | tokz->buffer.len=(len>0 ? (uint)len : strlen(tokz->buffer.data)); |
35 | 867 | tokz->buffer.pos=0; |
868 | ||
47 | 869 | if(old>0) |
35 | 870 | buffer[len-1]=old; |
871 | ||
872 | return tokz; | |
873 | } | |
0 | 874 | |
1 | 875 | /* |
876 | * File close | |
877 | */ | |
878 | ||
879 | static bool do_tokz_popf(Tokenizer *tokz, bool shrink) | |
880 | { | |
881 | Tokenizer_FInfo *finfo; | |
882 | ||
883 | if(tokz->filestack_n<=0) | |
884 | return FALSE; | |
885 | ||
886 | if(tokz->file!=NULL) | |
887 | fclose(tokz->file); | |
888 | if(tokz->name!=NULL) | |
889 | free(tokz->name); | |
890 | ||
891 | finfo=&(tokz->filestack[--tokz->filestack_n]); | |
892 | ||
893 | tokz->file=finfo->file; | |
894 | tokz->name=finfo->name; | |
895 | tokz->line=finfo->line; | |
896 | tokz->ungetc=finfo->ungetc; | |
2 | 897 | tokz->ungettok=finfo->ungettok; |
898 | ||
1 | 899 | if(tokz->filestack_n==0){ |
900 | free(tokz->filestack); | |
901 | tokz->filestack=NULL; | |
902 | }else if(shrink){ | |
903 | finfo=REALLOC_N(tokz->filestack, Tokenizer_FInfo, | |
904 | tokz->filestack_n+1, tokz->filestack_n); | |
905 | if(finfo==NULL) | |
906 | warn_err(); | |
907 | else | |
908 | tokz->filestack=finfo; | |
909 | } | |
910 | ||
911 | return TRUE; | |
912 | } | |
913 | ||
914 | ||
915 | bool tokz_popf(Tokenizer *tokz) | |
916 | { | |
917 | return do_tokz_popf(tokz, TRUE); | |
918 | } | |
919 | ||
920 | ||
0 | 921 | void tokz_close(Tokenizer *tokz) |
922 | { | |
1 | 923 | while(tokz->filestack_n>0) |
924 | do_tokz_popf(tokz, FALSE); | |
925 | ||
0 | 926 | if(tokz->file!=NULL) |
927 | fclose(tokz->file); | |
1 | 928 | if(tokz->name!=NULL) |
929 | free(tokz->name); | |
2 | 930 | tok_free(&(tokz->ungettok)); |
931 | ||
0 | 932 | free(tokz); |
933 | } | |
934 | ||
935 | ||
1 | 936 | |
0 | 937 | /* */ |
938 | ||
939 | ||
940 | void tok_free(Token *tok) | |
941 | { | |
2 | 942 | if(TOK_IS_STRING(tok) || TOK_IS_IDENT(tok) || TOK_IS_COMMENT(tok)){ |
943 | if(TOK_STRING_VAL(tok)!=NULL) | |
944 | free(TOK_STRING_VAL(tok)); | |
945 | } | |
0 | 946 | |
947 | tok->type=TOK_INVALID; | |
948 | } | |
949 | ||
950 | ||
951 | void tok_init(Token *tok) | |
952 | { | |
953 | static Token dummy=TOK_INIT; | |
954 | ||
955 | memcpy(tok, &dummy, sizeof(*tok)); | |
956 | } | |
957 |