Mon, 20 Apr 2020 10:14:32 -0500
Convert README to markdown
0 | 1 | /* |
2 | * libtu/tokenizer.c | |
3 | * | |
36 | 4 | * Copyright (c) Tuomo Valkonen 1999-2002. |
53 | 5 | * |
6 | * You may distribute and modify this library under the terms of either | |
7 | * the Clarified Artistic License or the GNU LGPL, version 2.1 or later. | |
0 | 8 | */ |
9 | ||
10 | #include <errno.h> | |
11 | #include <stdio.h> | |
12 | #include <ctype.h> | |
13 | #include <limits.h> | |
14 | #include <assert.h> | |
15 | #include <math.h> | |
16 | #include <string.h> | |
17 | ||
70 | 18 | #include "tokenizer.h" |
19 | #include "misc.h" | |
20 | #include "output.h" | |
79 | 21 | #include "private.h" |
0 | 22 | |
23 | ||
24 | static const char *errors[]={ | |
62 | 25 | DUMMY_TR("(no error)"), |
26 | DUMMY_TR("Unexpected end of file"), /* E_TOKZ_UNEXPECTED_EOF */ | |
27 | DUMMY_TR("Unexpected end of line"), /* E_TOKZ_UNEXPECTED_EOL */ | |
28 | DUMMY_TR("End of line expected"), /* E_TOKZ_EOL_EXPECTED */ | |
29 | DUMMY_TR("Invalid character"), /* E_TOKZ_INVALID_CHAR*/ | |
30 | DUMMY_TR("Numeric constant too big"), /* E_TOKZ_TOOBIG */ | |
31 | DUMMY_TR("Invalid numberic format"), /* E_TOKZ_NUMFMT */ | |
32 | DUMMY_TR("Junk after numeric constant"), /* E_TOKZ_NUM_JUNK */ | |
33 | DUMMY_TR("Not an integer"), /* E_TOKZ_NOTINT */ | |
34 | DUMMY_TR("Numeric constant out of range"), /* E_TOKZ_RANGE */ | |
35 | DUMMY_TR("Multi-character character constant"), /* E_TOKZ_MULTICHAR */ | |
36 | DUMMY_TR("Token/statement limit reached"), /* E_TOKZ_TOKEN_LIMIT */ | |
37 | DUMMY_TR("Unknown option"), /* E_TOKZ_UNKONWN_OPTION */ | |
38 | DUMMY_TR("Syntax error"), /* E_TOKZ_SYNTAX */ | |
39 | DUMMY_TR("Invalid argument"), /* E_TOKZ_INVALID_ARGUMENT */ | |
40 | DUMMY_TR("End of statement expected"), /* E_TOKZ_EOS_EXPECTED */ | |
41 | DUMMY_TR("Too few arguments"), /* E_TOKZ_TOO_FEW_ARGS */ | |
42 | DUMMY_TR("Too many arguments"), /* E_TOKZ_TOO_MANY_ARGS */ | |
43 | DUMMY_TR("Maximum section nestin level exceeded"), /* E_TOK_Z_MAX_NEST */ | |
44 | DUMMY_TR("Identifier expected"), /* E_TOKZ_IDENTIFIER_EXPECTED */ | |
45 | DUMMY_TR("Starting brace ('{') expected"), /* E_TOKZ_LBRACE_EXPECTED */ | |
0 | 46 | }; |
47 | ||
48 | ||
49 | /* */ | |
50 | ||
51 | #define STRBLEN 32 | |
52 | ||
2 | 53 | #define STRING_DECL(X) int err=0; char* X=NULL; char X##_tmp[STRBLEN]; int X##_tmpl=0 |
54 | #define STRING_DECL_P(X, P) int err=0; char* X=NULL; char X##_tmp[STRBLEN]=P; int X##_tmpl=sizeof(P)-1 | |
55 | #define STRING_APPEND(X, C) {if(!_string_append(&X, X##_tmp, &X##_tmpl, c)) err=-ENOMEM;} | |
0 | 56 | #define STRING_FREE(X) if(X!=NULL) free(X) |
2 | 57 | #define STRING_FINISH(X) {if(err!=0) return err; if(!_string_finish(&X, X##_tmp, X##_tmpl)) err=-ENOMEM;} |
0 | 58 | |
59 | ||
60 | static bool _string_append(char **p, char *tmp, int *tmplen, char c) | |
61 | { | |
62 | 62 | char *tmp2; |
63 | ||
64 | if(*tmplen==STRBLEN-1){ | |
65 | tmp[STRBLEN-1]='\0'; | |
66 | if(*p!=NULL){ | |
67 | tmp2=scat(*p, tmp); | |
68 | free(*p); | |
69 | *p=tmp2; | |
70 | }else{ | |
71 | *p=scopy(tmp); | |
72 | } | |
73 | *tmplen=1; | |
74 | tmp[0]=c; | |
75 | return *p!=NULL; | |
76 | }else{ | |
77 | tmp[(*tmplen)++]=c; | |
78 | return TRUE; | |
79 | } | |
0 | 80 | } |
81 | ||
82 | ||
83 | static bool _string_finish(char **p, char *tmp, int tmplen) | |
84 | { | |
62 | 85 | char *tmp2; |
86 | ||
87 | if(tmplen==0){ | |
88 | if(*p==NULL) | |
89 | *p=scopy(""); | |
90 | }else{ | |
91 | tmp[tmplen]='\0'; | |
92 | if(*p!=NULL){ | |
93 | tmp2=scat(*p, tmp); | |
94 | free(*p); | |
95 | *p=tmp2; | |
96 | }else{ | |
97 | *p=scopy(tmp); | |
98 | } | |
99 | } | |
100 | return *p!=NULL; | |
0 | 101 | } |
102 | ||
103 | ||
104 | /* */ | |
105 | ||
106 | ||
107 | #define INC_LINE() tokz->line++ | |
108 | #define GETCH() _getch(tokz) | |
109 | #define UNGETCH(C) _ungetch(tokz, C) | |
110 | ||
111 | static int _getch(Tokenizer *tokz) | |
112 | { | |
62 | 113 | int c; |
114 | ||
115 | if(tokz->ungetc!=-1){ | |
116 | c=tokz->ungetc; | |
117 | tokz->ungetc=-1; | |
118 | }else if (tokz->flags&TOKZ_READ_FROM_BUFFER) { | |
119 | assert(tokz->buffer.data!=NULL); | |
120 | if (tokz->buffer.pos==tokz->buffer.len) | |
121 | c=EOF; | |
122 | else | |
123 | c=tokz->buffer.data[tokz->buffer.pos++]; | |
124 | }else{ | |
125 | c=getc(tokz->file); | |
126 | } | |
20 | 127 | |
62 | 128 | return c; |
0 | 129 | } |
130 | ||
131 | ||
132 | static void _ungetch(Tokenizer *tokz, int c) | |
133 | { | |
62 | 134 | tokz->ungetc=c; |
0 | 135 | } |
136 | ||
137 | ||
138 | /* */ | |
139 | ||
140 | ||
141 | static int scan_line_comment(Token *tok, Tokenizer *tokz) | |
142 | { | |
62 | 143 | STRING_DECL_P(s, "#"); |
144 | int c; | |
0 | 145 | |
62 | 146 | c=GETCH(); |
147 | ||
148 | while(c!='\n' && c!=EOF){ | |
149 | STRING_APPEND(s, c); | |
150 | c=GETCH(); | |
151 | } | |
0 | 152 | |
62 | 153 | UNGETCH(c); |
0 | 154 | |
62 | 155 | STRING_FINISH(s); |
156 | ||
157 | TOK_SET_COMMENT(tok, s); | |
158 | ||
159 | return 0; | |
0 | 160 | } |
161 | ||
162 | ||
163 | static int skip_line_comment(Tokenizer *tokz) | |
164 | { | |
62 | 165 | int c; |
166 | ||
167 | do{ | |
168 | c=GETCH(); | |
169 | }while(c!='\n' && c!=EOF); | |
0 | 170 | |
62 | 171 | UNGETCH(c); |
172 | ||
173 | return 0; | |
0 | 174 | } |
175 | ||
176 | ||
177 | /* */ | |
178 | ||
179 | ||
180 | static int scan_c_comment(Token *tok, Tokenizer *tokz) | |
181 | { | |
62 | 182 | STRING_DECL_P(s, "/*"); |
183 | int c; | |
184 | int st=0; | |
185 | ||
186 | while(1){ | |
187 | c=GETCH(); | |
188 | ||
189 | if(c==EOF){ | |
190 | STRING_FREE(s); | |
191 | return E_TOKZ_UNEXPECTED_EOF; | |
192 | } | |
193 | ||
194 | STRING_APPEND(s, c); | |
195 | ||
196 | if(c=='\n'){ | |
197 | INC_LINE(); | |
198 | }else if(st==0 && c=='*'){ | |
199 | st=1; | |
200 | }else if(st==1){ | |
201 | if(c=='/') | |
202 | break; | |
203 | st=0; | |
204 | } | |
205 | } | |
0 | 206 | |
62 | 207 | STRING_FINISH(s); |
0 | 208 | |
62 | 209 | TOK_SET_COMMENT(tok, s); |
0 | 210 | |
62 | 211 | return 0; |
0 | 212 | } |
213 | ||
214 | ||
215 | static int skip_c_comment(Tokenizer *tokz) | |
216 | { | |
62 | 217 | int c; |
218 | int st=0; | |
219 | ||
220 | while(1){ | |
221 | c=GETCH(); | |
222 | ||
223 | if(c==EOF) | |
224 | return E_TOKZ_UNEXPECTED_EOF; | |
225 | ||
226 | if(c=='\n') | |
227 | INC_LINE(); | |
228 | else if(st==0 && c=='*') | |
229 | st=1; | |
230 | else if(st==1){ | |
231 | if(c=='/') | |
232 | break; | |
233 | st=0; | |
234 | } | |
235 | } | |
236 | ||
237 | return 0; | |
0 | 238 | } |
239 | ||
240 | ||
241 | /* */ | |
242 | ||
243 | ||
244 | static int scan_char_escape(Tokenizer *tokz) | |
245 | { | |
62 | 246 | static char* special_chars="nrtbae"; |
247 | static char* specials="\n\r\t\b\a\033"; | |
248 | int base, max; | |
249 | int i ,c; | |
250 | ||
251 | c=GETCH(); | |
252 | ||
253 | for(i=0;special_chars[i];i++){ | |
254 | if(special_chars[i]==c) | |
255 | return specials[c]; | |
256 | } | |
257 | ||
258 | if(c=='x' || c=='X'){ | |
259 | base=16;max=2;i=0; | |
260 | }else if(c=='d' || c=='D'){ | |
261 | base=10;max=3;i=0; | |
262 | }else if(c=='8' || c=='9'){ | |
263 | base=10;max=2;i=c-'0'; | |
264 | }else if('0'<=c && c<='7'){ | |
265 | base=8;max=2;i=c-'0'; | |
266 | }else if(c=='\n'){ | |
267 | UNGETCH(c); | |
268 | return -2; | |
269 | }else{ | |
270 | return c; | |
271 | } | |
272 | ||
273 | ||
274 | while(--max>=0){ | |
275 | c=GETCH(); | |
276 | ||
277 | if(c==EOF) | |
278 | return EOF; | |
279 | ||
280 | if(c=='\n'){ | |
281 | UNGETCH(c); | |
282 | return -2; | |
283 | } | |
284 | ||
285 | if(base==16){ | |
286 | if(!isxdigit(c)) | |
287 | break; | |
288 | ||
289 | i<<=4; | |
290 | ||
291 | if(isdigit(c)) | |
292 | i+=c-'0'; | |
293 | else if(i>='a') | |
294 | i+=0xa+c-'a'; | |
295 | else | |
296 | i+=0xa+c-'a'; | |
297 | ||
298 | }else if(base==10){ | |
299 | if(!isdigit(c)) | |
300 | break; | |
301 | i*=10; | |
302 | i+=c-'0'; | |
303 | }else{ | |
304 | if(c<'0' || c>'7') | |
305 | break; | |
306 | i<<=3; | |
307 | i+=c-'0'; | |
308 | } | |
309 | } | |
310 | ||
311 | if(max>=0) | |
312 | UNGETCH(c); | |
0 | 313 | |
62 | 314 | return i; |
0 | 315 | } |
316 | ||
317 | ||
318 | /* */ | |
319 | ||
320 | ||
321 | static int scan_string(Token *tok, Tokenizer *tokz, bool escapes) | |
322 | { | |
62 | 323 | STRING_DECL(s); |
324 | int c; | |
0 | 325 | |
62 | 326 | while(1){ |
327 | c=GETCH(); | |
328 | ||
329 | if(c=='"') | |
330 | break; | |
331 | ||
332 | if(c=='\n'){ | |
333 | UNGETCH(c); | |
334 | STRING_FREE(s); | |
335 | return E_TOKZ_UNEXPECTED_EOL; | |
336 | } | |
337 | ||
338 | if(c=='\\' && escapes){ | |
339 | c=scan_char_escape(tokz); | |
340 | if(c==-2){ | |
341 | STRING_FREE(s); | |
342 | return E_TOKZ_UNEXPECTED_EOL; | |
343 | } | |
344 | } | |
345 | ||
346 | if(c==EOF){ | |
347 | STRING_FREE(s); | |
348 | return E_TOKZ_UNEXPECTED_EOF; | |
349 | } | |
350 | ||
351 | STRING_APPEND(s, c); | |
352 | } | |
353 | ||
354 | STRING_FINISH(s); | |
355 | ||
356 | TOK_SET_STRING(tok, s); | |
0 | 357 | |
62 | 358 | return 0; |
0 | 359 | } |
360 | ||
361 | ||
362 | /* */ | |
363 | ||
364 | ||
365 | static int scan_char(Token *tok, Tokenizer *tokz) | |
366 | { | |
62 | 367 | int c, c2; |
368 | ||
369 | c=GETCH(); | |
370 | ||
371 | if(c==EOF) | |
372 | return E_TOKZ_UNEXPECTED_EOF; | |
373 | ||
374 | if(c=='\n') | |
375 | return E_TOKZ_UNEXPECTED_EOL; | |
0 | 376 | |
62 | 377 | if(c=='\\'){ |
378 | c=scan_char_escape(tokz); | |
379 | ||
380 | if(c==EOF) | |
381 | return E_TOKZ_UNEXPECTED_EOF; | |
382 | ||
383 | if(c==-2) | |
384 | return E_TOKZ_UNEXPECTED_EOL; | |
385 | } | |
386 | ||
387 | c2=GETCH(); | |
388 | ||
389 | if(c2!='\'') | |
390 | return E_TOKZ_MULTICHAR; | |
391 | ||
392 | TOK_SET_CHAR(tok, c); | |
393 | ||
394 | return 0; | |
0 | 395 | } |
396 | ||
397 | ||
398 | /* */ | |
399 | ||
400 | ||
401 | #define START_IDENT(X) (isalpha(X) || X=='_' || X=='$') | |
402 | ||
403 | ||
404 | static int scan_identifier(Token *tok, Tokenizer *tokz, int c) | |
405 | { | |
62 | 406 | STRING_DECL(s); |
407 | ||
408 | do{ | |
409 | STRING_APPEND(s, c); | |
410 | c=GETCH(); | |
411 | }while(isalnum(c) || c=='_' || c=='$'); | |
412 | ||
413 | UNGETCH(c); | |
414 | ||
415 | STRING_FINISH(s); | |
416 | ||
417 | TOK_SET_IDENT(tok, s); | |
0 | 418 | |
62 | 419 | return 0; |
0 | 420 | } |
421 | ||
3 | 422 | #define NP_SIMPLE_IMPL |
60 | 423 | #include "np/numparser2.h" |
424 | #include "np/np-conv.h" | |
0 | 425 | |
426 | ||
427 | static int scan_number(Token *tok, Tokenizer *tokz, int c) | |
428 | { | |
62 | 429 | NPNum num=NUM_INIT; |
430 | int e; | |
431 | ||
432 | if((e=parse_number(&num, tokz, c))) | |
433 | return e; | |
434 | ||
435 | if(num.type==NPNUM_INT){ | |
436 | long l; | |
437 | if((e=num_to_long(&l, &num, TRUE))) | |
438 | return e; | |
439 | ||
440 | TOK_SET_LONG(tok, l); | |
441 | }else if(num.type==NPNUM_FLOAT){ | |
442 | double d; | |
443 | if((e=num_to_double(&d, &num))) | |
444 | return e; | |
445 | ||
446 | TOK_SET_DOUBLE(tok, d); | |
447 | }else{ | |
448 | return E_TOKZ_NUMFMT; | |
449 | } | |
0 | 450 | |
62 | 451 | return 0; |
0 | 452 | } |
453 | ||
454 | ||
455 | /* */ | |
456 | ||
457 | ||
458 | static uchar op_map[]={ | |
62 | 459 | 0x00, /* ________ 0-7 */ |
460 | 0x00, /* ________ 8-15 */ | |
461 | 0x00, /* ________ 16-23 */ | |
462 | 0x00, /* ________ 24-31 */ | |
463 | 0x62, /* _!___%&_ 32-39 */ | |
464 | 0xff, /* ()*+,-./ 40-47 */ | |
465 | 0x00, /* ________ 48-55 */ | |
466 | 0xfc, /* __:;<=>? 56-63 */ | |
467 | 0x01, /* @_______ 64-71 */ | |
468 | 0x00, /* ________ 72-79 */ | |
469 | 0x00, /* ________ 80-87 */ | |
470 | 0x78, /* ___[_]^_ 88-95 */ | |
471 | 0x00, /* ________ 96-103 */ | |
472 | 0x00, /* ________ 104-111 */ | |
473 | 0x00, /* ________ 112-119 */ | |
474 | 0x38 /* ___{|}__ 120-127 */ | |
0 | 475 | }; |
476 | ||
477 | ||
478 | static bool map_isset(uchar *map, uint ch) | |
479 | { | |
62 | 480 | if(ch>127) |
481 | return FALSE; | |
0 | 482 | |
62 | 483 | return map[ch>>3]&(1<<(ch&7)); |
0 | 484 | } |
485 | ||
486 | ||
487 | static bool is_opch(uint ch) | |
488 | { | |
62 | 489 | return map_isset(op_map, ch); |
0 | 490 | } |
491 | ||
492 | ||
493 | static int scan_op(Token *tok, Tokenizer *tokz, int c) | |
494 | { | |
62 | 495 | int c2; |
496 | int op=-1; | |
497 | ||
498 | /* Quickly check it is an operator character */ | |
499 | if(!is_opch(c)) | |
500 | return E_TOKZ_INVALID_CHAR; | |
0 | 501 | |
62 | 502 | switch(c){ |
503 | case '+': | |
504 | case '-': | |
505 | case '*': | |
506 | /* case '/': Checked elsewhere */ | |
507 | case '%': | |
508 | case '^': | |
509 | case '!': | |
510 | case '=': | |
511 | case '<': | |
512 | case '>': | |
513 | c2=GETCH(); | |
514 | if(c2=='='){ | |
515 | op=c|(c2<<8); | |
516 | }else if(c2==c && (c2!='%' && c2!='!' && c2!='*')){ | |
517 | if(c=='<' || c=='>'){ | |
518 | int c3=GETCH(); | |
519 | if(c3=='='){ | |
520 | op=c|(c2<<8)|(c3<<16); | |
521 | }else{ | |
522 | UNGETCH(c3); | |
523 | op=c|(c2<<8); | |
524 | } | |
525 | }else{ | |
526 | op=c|(c2<<8); | |
527 | } | |
528 | }else{ | |
529 | UNGETCH(c2); | |
530 | op=c; | |
531 | } | |
532 | break; | |
533 | ||
534 | /* It is already known that it is a operator so these are not needed | |
535 | case ':': | |
536 | case '~': | |
537 | case '?': | |
538 | case '.': | |
539 | case ';'; | |
540 | case '{': | |
541 | case '}': | |
542 | case '@': | |
543 | case '|': | |
544 | case '&': | |
545 | */ | |
546 | default: | |
547 | op=c; | |
548 | } | |
549 | ||
550 | TOK_SET_OP(tok, op); | |
0 | 551 | |
62 | 552 | return 0; |
0 | 553 | } |
554 | ||
555 | ||
556 | /* */ | |
557 | ||
558 | ||
21 | 559 | void tokz_warn(const Tokenizer *tokz, int line, const char *fmt, ...) |
560 | { | |
62 | 561 | va_list args; |
562 | ||
563 | va_start(args, fmt); | |
564 | ||
565 | if(tokz!=NULL) | |
566 | warn_obj_line_v(tokz->name, line, fmt, args); | |
567 | else | |
568 | warn(fmt, args); | |
569 | ||
570 | va_end(args); | |
21 | 571 | } |
572 | ||
573 | ||
0 | 574 | void tokz_warn_error(const Tokenizer *tokz, int line, int e) |
575 | { | |
62 | 576 | if(e==E_TOKZ_UNEXPECTED_EOF) |
577 | line=0; | |
578 | ||
579 | if(e<0) | |
580 | tokz_warn(tokz, line, "%s", strerror(-e)); | |
581 | else | |
582 | tokz_warn(tokz, line, "%s", TR(errors[e])); | |
0 | 583 | } |
584 | ||
585 | ||
586 | bool tokz_get_token(Tokenizer *tokz, Token *tok) | |
587 | { | |
62 | 588 | int c, c2, e; |
589 | ||
590 | if (!(tokz->flags&TOKZ_READ_FROM_BUFFER)) | |
591 | assert(tokz->file!=NULL); | |
592 | ||
593 | tok_free(tok); | |
594 | ||
595 | if(!TOK_IS_INVALID(&(tokz->ungettok))){ | |
596 | *tok=tokz->ungettok; | |
597 | tokz->ungettok.type=TOK_INVALID; | |
598 | return TRUE; | |
599 | } | |
2 | 600 | |
62 | 601 | while(1){ |
602 | ||
603 | e=0; | |
604 | ||
605 | do{ | |
606 | c=GETCH(); | |
607 | }while(c!='\n' && c!=EOF && isspace(c)); | |
608 | ||
609 | tok->line=tokz->line; | |
610 | ||
611 | switch(c){ | |
612 | case EOF: | |
613 | TOK_SET_OP(tok, OP_EOF); | |
614 | return TRUE; | |
615 | ||
616 | case '\n': | |
617 | INC_LINE(); | |
618 | ||
619 | if(tokz->flags&TOKZ_IGNORE_NEXTLINE) | |
620 | continue; | |
621 | ||
622 | TOK_SET_OP(tok, OP_NEXTLINE); | |
623 | ||
624 | return TRUE; | |
625 | ||
626 | case '\\': | |
627 | do{ | |
628 | c=GETCH(); | |
629 | if(c==EOF){ | |
630 | TOK_SET_OP(tok, OP_EOF); | |
631 | return FALSE; | |
632 | } | |
633 | if(!isspace(c) && e==0){ | |
634 | e=E_TOKZ_EOL_EXPECTED; | |
635 | tokz_warn_error(tokz, tokz->line, e); | |
636 | if(!(tokz->flags&TOKZ_ERROR_TOLERANT)) | |
637 | return FALSE; | |
638 | } | |
639 | }while(c!='\n'); | |
640 | ||
641 | INC_LINE(); | |
642 | continue; | |
0 | 643 | |
62 | 644 | case '#': |
645 | if(tokz->flags&TOKZ_READ_COMMENTS){ | |
646 | e=scan_line_comment(tok, tokz); | |
647 | break; | |
648 | }else if((e=skip_line_comment(tokz))){ | |
649 | break; | |
650 | } | |
651 | ||
652 | continue; | |
653 | ||
654 | case '/': | |
655 | c2=GETCH(); | |
656 | ||
657 | if(c2=='='){ | |
658 | TOK_SET_OP(tok, OP_AS_DIV); | |
659 | return TRUE; | |
660 | } | |
661 | ||
662 | if(c2!='*'){ | |
663 | UNGETCH(c2); | |
664 | TOK_SET_OP(tok, OP_DIV); | |
665 | return TRUE; | |
666 | } | |
667 | ||
668 | if(tokz->flags&TOKZ_READ_COMMENTS){ | |
669 | e=scan_c_comment(tok, tokz); | |
670 | break; | |
671 | }else if((e=skip_c_comment(tokz))){ | |
672 | break; | |
673 | } | |
674 | ||
675 | continue; | |
676 | ||
677 | case '\"': | |
678 | e=scan_string(tok, tokz, TRUE); | |
679 | break; | |
0 | 680 | |
62 | 681 | case '\'': |
682 | e=scan_char(tok, tokz); | |
683 | break; | |
0 | 684 | |
62 | 685 | default: |
686 | if(('0'<=c && c<='9') || c=='-' || c=='+'){ | |
687 | e=scan_number(tok, tokz, c); | |
688 | break; | |
689 | } | |
0 | 690 | |
62 | 691 | if(START_IDENT(c)) |
692 | e=scan_identifier(tok, tokz, c); | |
693 | else | |
694 | e=scan_op(tok, tokz, c); | |
695 | } | |
696 | ||
697 | if(!e) | |
698 | return TRUE; | |
699 | ||
700 | tokz_warn_error(tokz, tokz->line, e); | |
701 | return FALSE; | |
702 | } | |
0 | 703 | } |
704 | ||
705 | ||
2 | 706 | void tokz_unget_token(Tokenizer *tokz, Token *tok) |
707 | { | |
62 | 708 | tok_free(&(tokz->ungettok)); |
709 | tokz->ungettok=*tok; | |
710 | tok->type=TOK_INVALID; | |
2 | 711 | } |
712 | ||
713 | ||
1 | 714 | /* |
715 | * File open | |
716 | */ | |
717 | ||
718 | static bool do_tokz_pushf(Tokenizer *tokz) | |
719 | { | |
62 | 720 | Tokenizer_FInfo *finfo; |
721 | ||
722 | finfo=REALLOC_N(tokz->filestack, Tokenizer_FInfo, | |
723 | tokz->filestack_n, tokz->filestack_n+1); | |
724 | ||
725 | if(finfo==NULL) | |
726 | return FALSE; | |
1 | 727 | |
62 | 728 | tokz->filestack=finfo; |
729 | finfo=&(finfo[tokz->filestack_n++]); | |
730 | ||
731 | finfo->file=tokz->file; | |
732 | finfo->name=tokz->name; | |
733 | finfo->line=tokz->line; | |
734 | finfo->ungetc=tokz->ungetc; | |
735 | finfo->ungettok=tokz->ungettok; | |
736 | ||
737 | return TRUE; | |
1 | 738 | } |
739 | ||
740 | ||
14 | 741 | bool tokz_pushf_file(Tokenizer *tokz, FILE *file, const char *fname) |
1 | 742 | { |
62 | 743 | char *fname_copy=NULL; |
744 | ||
745 | if(file==NULL) | |
746 | return FALSE; | |
14 | 747 | |
62 | 748 | if(fname!=NULL){ |
749 | fname_copy=scopy(fname); | |
750 | if(fname_copy==NULL){ | |
751 | warn_err(); | |
752 | return FALSE; | |
753 | } | |
754 | } | |
755 | ||
756 | if(tokz->file!=NULL){ | |
757 | if(!do_tokz_pushf(tokz)){ | |
758 | warn_err(); | |
759 | if(fname_copy!=NULL) | |
760 | free(fname_copy); | |
761 | return FALSE; | |
762 | } | |
763 | } | |
764 | ||
765 | tokz->file=file; | |
766 | tokz->name=fname_copy; | |
767 | tokz->line=1; | |
768 | tokz->ungetc=-1; | |
769 | tokz->ungettok.type=TOK_INVALID; | |
770 | ||
771 | return TRUE; | |
1 | 772 | } |
773 | ||
774 | ||
775 | bool tokz_pushf(Tokenizer *tokz, const char *fname) | |
776 | { | |
62 | 777 | FILE *file; |
778 | ||
779 | file=fopen(fname, "r"); | |
780 | ||
781 | if(file==NULL){ | |
782 | warn_err_obj(fname); | |
783 | return FALSE; | |
784 | } | |
785 | ||
786 | if(!tokz_pushf_file(tokz, file, fname)){ | |
787 | fclose(file); | |
788 | return FALSE; | |
789 | } | |
1 | 790 | |
62 | 791 | return TRUE; |
0 | 792 | } |
793 | ||
794 | ||
62 | 795 | |
1 | 796 | static Tokenizer *tokz_create() |
0 | 797 | { |
62 | 798 | Tokenizer*tokz; |
799 | ||
800 | tokz=ALLOC(Tokenizer); | |
801 | ||
802 | if(tokz==NULL){ | |
803 | warn_err(); | |
804 | return NULL; | |
805 | } | |
806 | ||
807 | tokz->file=NULL; | |
808 | tokz->name=NULL; | |
809 | tokz->line=1; | |
810 | tokz->ungetc=-1; | |
811 | tokz->ungettok.type=TOK_INVALID; | |
812 | tokz->flags=0; | |
813 | tokz->optstack=NULL; | |
814 | tokz->nest_lvl=0; | |
815 | tokz->filestack_n=0; | |
816 | tokz->filestack=NULL; | |
817 | tokz->buffer.data=0; | |
818 | tokz->buffer.len=0; | |
819 | tokz->buffer.pos=0; | |
820 | ||
821 | return tokz; | |
1 | 822 | } |
823 | ||
62 | 824 | |
1 | 825 | Tokenizer *tokz_open(const char *fname) |
826 | { | |
62 | 827 | Tokenizer *tokz; |
828 | ||
829 | tokz=tokz_create(); | |
830 | ||
831 | if(!tokz_pushf(tokz, fname)){ | |
832 | free(tokz); | |
833 | return NULL; | |
834 | } | |
835 | ||
836 | return tokz; | |
1 | 837 | } |
838 | ||
839 | ||
14 | 840 | Tokenizer *tokz_open_file(FILE *file, const char *fname) |
1 | 841 | { |
62 | 842 | Tokenizer *tokz; |
843 | ||
844 | tokz=tokz_create(); | |
845 | ||
846 | if(!tokz_pushf_file(tokz, file, fname)){ | |
847 | free(tokz); | |
848 | return NULL; | |
849 | } | |
850 | ||
851 | return tokz; | |
0 | 852 | } |
853 | ||
35 | 854 | Tokenizer *tokz_prepare_buffer(char *buffer, int len) |
855 | { | |
62 | 856 | Tokenizer *tokz; |
857 | char old=0; | |
35 | 858 | |
62 | 859 | tokz=tokz_create(); |
860 | if(len>0){ | |
861 | old=buffer[len-1]; | |
862 | buffer[len-1]='\0'; | |
863 | } | |
35 | 864 | |
62 | 865 | tokz->flags|=TOKZ_READ_FROM_BUFFER; |
866 | tokz->buffer.data=scopy(buffer); | |
867 | tokz->buffer.len=(len>0 ? (uint)len : strlen(tokz->buffer.data)); | |
868 | tokz->buffer.pos=0; | |
35 | 869 | |
62 | 870 | if(old>0) |
871 | buffer[len-1]=old; | |
35 | 872 | |
62 | 873 | return tokz; |
35 | 874 | } |
0 | 875 | |
1 | 876 | /* |
877 | * File close | |
878 | */ | |
879 | ||
880 | static bool do_tokz_popf(Tokenizer *tokz, bool shrink) | |
881 | { | |
62 | 882 | Tokenizer_FInfo *finfo; |
883 | ||
884 | if(tokz->filestack_n<=0) | |
885 | return FALSE; | |
1 | 886 | |
62 | 887 | if(tokz->file!=NULL) |
888 | fclose(tokz->file); | |
889 | if(tokz->name!=NULL) | |
890 | free(tokz->name); | |
891 | ||
892 | finfo=&(tokz->filestack[--tokz->filestack_n]); | |
893 | ||
894 | tokz->file=finfo->file; | |
895 | tokz->name=finfo->name; | |
896 | tokz->line=finfo->line; | |
897 | tokz->ungetc=finfo->ungetc; | |
898 | tokz->ungettok=finfo->ungettok; | |
899 | ||
900 | if(tokz->filestack_n==0){ | |
901 | free(tokz->filestack); | |
902 | tokz->filestack=NULL; | |
903 | }else if(shrink){ | |
904 | finfo=REALLOC_N(tokz->filestack, Tokenizer_FInfo, | |
905 | tokz->filestack_n+1, tokz->filestack_n); | |
906 | if(finfo==NULL) | |
907 | warn_err(); | |
908 | else | |
909 | tokz->filestack=finfo; | |
910 | } | |
911 | ||
912 | return TRUE; | |
1 | 913 | } |
914 | ||
915 | ||
916 | bool tokz_popf(Tokenizer *tokz) | |
917 | { | |
62 | 918 | return do_tokz_popf(tokz, TRUE); |
1 | 919 | } |
62 | 920 | |
1 | 921 | |
0 | 922 | void tokz_close(Tokenizer *tokz) |
923 | { | |
62 | 924 | while(tokz->filestack_n>0) |
925 | do_tokz_popf(tokz, FALSE); | |
926 | ||
927 | if(tokz->file!=NULL) | |
928 | fclose(tokz->file); | |
929 | if(tokz->name!=NULL) | |
930 | free(tokz->name); | |
931 | tok_free(&(tokz->ungettok)); | |
932 | ||
933 | free(tokz); | |
0 | 934 | } |
935 | ||
936 | ||
1 | 937 | |
0 | 938 | /* */ |
939 | ||
940 | ||
941 | void tok_free(Token *tok) | |
942 | { | |
62 | 943 | if(TOK_IS_STRING(tok) || TOK_IS_IDENT(tok) || TOK_IS_COMMENT(tok)){ |
944 | if(TOK_STRING_VAL(tok)!=NULL) | |
945 | free(TOK_STRING_VAL(tok)); | |
946 | } | |
947 | ||
948 | tok->type=TOK_INVALID; | |
0 | 949 | } |
950 | ||
951 | ||
952 | void tok_init(Token *tok) | |
953 | { | |
62 | 954 | static Token dummy=TOK_INIT; |
955 | ||
956 | memcpy(tok, &dummy, sizeof(*tok)); | |
0 | 957 | } |
958 |