Mon, 28 Aug 2000 12:24:32 +0200
trunk: changeset 24
Added tokz_warn
0 | 1 | /* |
2 | * libtu/parser.c | |
3 | * | |
9 | 4 | * Copyright (c) Tuomo Valkonen 1999-2000. |
0 | 5 | * See the included file LICENSE for details. |
6 | */ | |
7 | ||
8 | #include <string.h> | |
14 | 9 | #include <errno.h> |
0 | 10 | |
5 | 11 | #include <libtu/parser.h> |
12 | #include <libtu/misc.h> | |
13 | #include <libtu/output.h> | |
0 | 14 | |
13 | 15 | #define MAX_TOKENS 256 |
16 | #define MAX_NEST 256 | |
0 | 17 | |
18 | ||
19 | enum{ | |
2 | 20 | P_NONE=1, |
0 | 21 | P_EOF, |
2 | 22 | P_STMT, |
23 | P_STMT_NS, | |
24 | P_STMT_SECT, | |
0 | 25 | P_BEG_SECT, |
26 | P_END_SECT | |
27 | }; | |
28 | ||
29 | ||
20 | 30 | /* */ |
0 | 31 | |
32 | ||
1 | 33 | static bool opt_include(Tokenizer *tokz, int n, Token *toks); |
34 | ||
35 | ||
36 | static ConfOpt common_opts[]={ | |
37 | {"include", "s", opt_include, NULL}, | |
38 | {NULL, NULL, NULL, NULL} | |
39 | }; | |
40 | ||
41 | ||
42 | /* */ | |
43 | ||
0 | 44 | |
2 | 45 | static int read_statement(Tokenizer *tokz, Token *tokens, int *ntok_ret) |
0 | 46 | { |
47 | int ntokens=0; | |
48 | Token *tok=NULL; | |
2 | 49 | int had_comma=0; /* 0 - no, 1 - yes, 2 - not had, not expected */ |
50 | int retval=0; | |
0 | 51 | int e=0; |
52 | ||
53 | while(1){ | |
2 | 54 | tok=&tokens[ntokens]; |
55 | ||
56 | if(!tokz_get_token(tokz, tok)){ | |
57 | e=1; | |
58 | continue; | |
59 | } | |
60 | ||
0 | 61 | if(ntokens==MAX_TOKENS-1){ |
62 | e=E_TOKZ_TOKEN_LIMIT; | |
2 | 63 | tokz_warn_error(tokz, tok->line, e); |
64 | if(!(tokz->flags&TOKZ_ERROR_TOLERANT)) | |
65 | break; | |
66 | }else{ | |
67 | ntokens++; | |
0 | 68 | } |
69 | ||
70 | if(!TOK_IS_OP(tok)){ | |
71 | if(ntokens==1 && !had_comma){ | |
2 | 72 | /*if(!TOK_IS_IDENT(tok)){ |
0 | 73 | e=E_TOKZ_IDENTIFIER_EXPECTED; |
2 | 74 | goto handle_error; |
75 | }*/ | |
0 | 76 | |
77 | had_comma=2; | |
78 | }else{ | |
2 | 79 | if(had_comma==0) |
80 | goto syntax; | |
0 | 81 | |
82 | had_comma=0; | |
83 | } | |
84 | continue; | |
85 | } | |
86 | ||
87 | /* It is an operator */ | |
2 | 88 | ntokens--; |
0 | 89 | |
90 | switch(TOK_OP_VAL(tok)){ | |
91 | case OP_SCOLON: | |
2 | 92 | retval=(ntokens==0 ? P_NONE : P_STMT_NS); |
0 | 93 | break; |
94 | ||
95 | case OP_NEXTLINE: | |
2 | 96 | retval=(ntokens==0 ? P_NONE : P_STMT); |
97 | break; | |
0 | 98 | |
2 | 99 | case OP_L_BRC: |
100 | retval=(ntokens==0 ? P_BEG_SECT : P_STMT_SECT); | |
0 | 101 | break; |
102 | ||
2 | 103 | case OP_R_BRC: |
104 | if(ntokens==0){ | |
105 | retval=P_END_SECT; | |
106 | }else{ | |
107 | tokz_unget_token(tokz, tok); | |
108 | retval=P_STMT_NS; | |
109 | } | |
110 | break; | |
111 | ||
0 | 112 | case OP_EOF: |
2 | 113 | retval=(ntokens==0 ? P_EOF : P_STMT_NS); |
114 | ||
0 | 115 | if(had_comma==1){ |
116 | e=E_TOKZ_UNEXPECTED_EOF; | |
2 | 117 | goto handle_error; |
0 | 118 | } |
119 | ||
2 | 120 | goto end; |
0 | 121 | |
122 | case OP_COMMA: | |
2 | 123 | if(had_comma!=0) |
124 | goto syntax; | |
125 | ||
0 | 126 | had_comma=1; |
2 | 127 | continue; |
0 | 128 | |
129 | default: | |
2 | 130 | goto syntax; |
0 | 131 | } |
132 | ||
2 | 133 | if(had_comma!=1) |
134 | break; | |
135 | ||
136 | syntax: | |
137 | e=E_TOKZ_SYNTAX; | |
138 | handle_error: | |
139 | tokz_warn_error(tokz, tok->line, e); | |
140 | ||
141 | if(!(tokz->flags&TOKZ_ERROR_TOLERANT) || retval!=0) | |
142 | break; | |
0 | 143 | } |
144 | ||
2 | 145 | end: |
146 | if(e!=0) | |
147 | retval=-retval; | |
148 | ||
0 | 149 | *ntok_ret=ntokens; |
150 | ||
151 | return retval; | |
152 | } | |
153 | ||
154 | ||
2 | 155 | static bool find_beg_sect(Tokenizer *tokz) |
156 | { | |
157 | Token tok; | |
158 | ||
159 | while(tokz_get_token(tokz, &tok)){ | |
160 | if(TOK_IS_OP(&tok)){ | |
161 | if(TOK_OP_VAL(&tok)==OP_NEXTLINE) | |
162 | continue; | |
163 | ||
164 | if(TOK_OP_VAL(&tok)==OP_SCOLON) | |
165 | return FALSE; | |
166 | ||
167 | if(TOK_OP_VAL(&tok)==OP_L_BRC) | |
168 | return TRUE; | |
169 | } | |
170 | ||
171 | tokz_unget_token(tokz, &tok); | |
172 | break; | |
173 | } | |
174 | return FALSE; | |
175 | } | |
176 | ||
177 | ||
0 | 178 | /* */ |
179 | ||
180 | ||
2 | 181 | static const ConfOpt* lookup_option(const ConfOpt *opts, const char *name) |
182 | { | |
183 | while(opts->optname!=NULL){ | |
184 | if(strcmp(opts->optname, name)==0) | |
185 | return opts; | |
186 | opts++; | |
187 | } | |
188 | return NULL; | |
189 | } | |
190 | ||
0 | 191 | |
2 | 192 | static bool call_end_sect(Tokenizer *tokz, const ConfOpt *opts) |
193 | { | |
194 | opts=lookup_option(opts, "#end"); | |
195 | if(opts!=NULL) | |
196 | return opts->fn(tokz, 0, NULL); | |
0 | 197 | |
2 | 198 | return TRUE; |
0 | 199 | } |
200 | ||
201 | ||
2 | 202 | static bool call_cancel_sect(Tokenizer *tokz, const ConfOpt *opts) |
203 | { | |
204 | opts=lookup_option(opts, "#cancel"); | |
205 | if(opts!=NULL) | |
206 | return opts->fn(tokz, 0, NULL); | |
207 | ||
208 | return TRUE; | |
0 | 209 | } |
210 | ||
211 | ||
212 | /* */ | |
213 | ||
214 | ||
215 | bool parse_config_tokz(Tokenizer *tokz, const ConfOpt *options) | |
216 | { | |
217 | Token tokens[MAX_TOKENS]; | |
218 | bool alloced_optstack=FALSE; | |
2 | 219 | int i, t, ntokens=0; |
0 | 220 | int init_nest_lvl; |
2 | 221 | bool had_error; |
222 | int errornest=0; | |
0 | 223 | |
224 | /* Allocate tokz->optstack if it does not yet exist (if it does, | |
225 | * we have been called from an option handler) | |
226 | */ | |
227 | if(!tokz->optstack){ | |
228 | tokz->optstack=ALLOC_N(const ConfOpt*, MAX_NEST); | |
229 | if(!tokz->optstack){ | |
230 | warn_err(); | |
231 | return FALSE; | |
232 | } | |
233 | ||
234 | memset(tokz->optstack, 0, sizeof(ConfOpt*)*MAX_NEST); | |
235 | init_nest_lvl=tokz->nest_lvl=0; | |
236 | alloced_optstack=TRUE; | |
237 | }else{ | |
238 | init_nest_lvl=tokz->nest_lvl; | |
239 | } | |
240 | ||
241 | tokz->optstack[init_nest_lvl]=options; | |
242 | ||
2 | 243 | for(i=0; i<MAX_TOKENS; i++) |
0 | 244 | tok_init(&tokens[i]); |
245 | ||
246 | ||
247 | /* The loop | |
248 | */ | |
249 | while(1){ | |
2 | 250 | had_error=FALSE; |
0 | 251 | |
252 | /* free the tokens */ | |
253 | while(ntokens--) | |
254 | tok_free(&tokens[ntokens]); | |
255 | ||
2 | 256 | /* read the tokens */ |
257 | t=read_statement(tokz, tokens, &ntokens); | |
258 | ||
259 | if((had_error=t<0)) | |
260 | t=-t; | |
261 | ||
262 | switch(t){ | |
263 | case P_STMT: | |
264 | case P_STMT_NS: | |
265 | case P_STMT_SECT: | |
266 | ||
267 | if(errornest) | |
268 | had_error=TRUE; | |
269 | else if(tokz->flags&TOKZ_PARSER_INDENT_MODE) | |
270 | verbose_indent(tokz->nest_lvl); | |
271 | ||
272 | if(!TOK_IS_IDENT(tokens+0)){ | |
273 | had_error=TRUE; | |
274 | tokz_warn_error(tokz, tokens->line, | |
275 | E_TOKZ_IDENTIFIER_EXPECTED); | |
276 | } | |
277 | ||
278 | if(had_error) | |
279 | break; | |
280 | ||
281 | if(t==P_STMT){ | |
282 | if(find_beg_sect(tokz)) | |
283 | t=P_STMT_SECT; | |
284 | } | |
285 | ||
286 | options=lookup_option(tokz->optstack[tokz->nest_lvl], | |
287 | TOK_IDENT_VAL(tokens+0)); | |
288 | if(options==NULL) | |
289 | options=lookup_option(common_opts, TOK_IDENT_VAL(tokens+0)); | |
290 | ||
291 | if(options==NULL){ | |
292 | had_error=TRUE; | |
293 | tokz_warn_error(tokz, tokens->line, E_TOKZ_UNKNOWN_OPTION); | |
294 | }else{ | |
295 | had_error=!check_args(tokz, tokens, ntokens, options->argfmt); | |
296 | } | |
297 | ||
298 | if(had_error) | |
299 | break; | |
300 | ||
301 | if(options->opts!=NULL){ | |
302 | if(t!=P_STMT_SECT){ | |
303 | had_error=TRUE; | |
304 | tokz_warn_error(tokz, tokz->line, E_TOKZ_LBRACE_EXPECTED); | |
305 | }else if(tokz->nest_lvl==MAX_NEST-1){ | |
306 | tokz_warn_error(tokz, tokz->line, E_TOKZ_MAX_NEST); | |
307 | had_error=TRUE; | |
308 | }else{ | |
309 | tokz->optstack[++tokz->nest_lvl]=options->opts; | |
310 | } | |
311 | }else if(t==P_STMT_SECT){ | |
312 | had_error=TRUE; | |
313 | tokz_warn_error(tokz, tokz->line, E_TOKZ_SYNTAX); | |
314 | } | |
315 | ||
316 | if(!had_error && options->fn!=NULL){ | |
317 | had_error=!options->fn(tokz, ntokens, tokens); | |
318 | if(t==P_STMT_SECT && had_error) | |
319 | tokz->nest_lvl--; | |
320 | } | |
0 | 321 | break; |
2 | 322 | |
0 | 323 | case P_EOF: |
1 | 324 | if(tokz_popf(tokz)){ |
2 | 325 | break; |
326 | }else if(tokz->nest_lvl>0 || errornest>0){ | |
0 | 327 | tokz_warn_error(tokz, 0, E_TOKZ_UNEXPECTED_EOF); |
328 | had_error=TRUE; | |
329 | } | |
2 | 330 | goto eof; |
331 | ||
332 | case P_BEG_SECT: | |
333 | had_error=TRUE; | |
334 | errornest++; | |
335 | tokz_warn_error(tokz, tokz->line, E_TOKZ_SYNTAX); | |
0 | 336 | break; |
2 | 337 | |
0 | 338 | case P_END_SECT: |
339 | if(tokz->nest_lvl==0){ | |
340 | tokz_warn_error(tokz, tokz->line, E_TOKZ_SYNTAX); | |
341 | had_error=TRUE; | |
342 | break; | |
343 | } | |
344 | ||
20 | 345 | if(errornest!=0){ |
346 | if(errornest==1){ | |
347 | call_cancel_sect(tokz, tokz->optstack[tokz->nest_lvl]); | |
348 | tokz->nest_lvl--; | |
349 | } | |
350 | errornest--; | |
351 | }else{ | |
352 | if(!had_error) | |
353 | had_error=!call_end_sect(tokz, | |
354 | tokz->optstack[tokz->nest_lvl]); | |
0 | 355 | |
20 | 356 | tokz->nest_lvl--; |
357 | } | |
0 | 358 | |
359 | if(tokz->nest_lvl<init_nest_lvl) | |
2 | 360 | goto eof; |
361 | } | |
362 | ||
363 | if(!had_error) | |
364 | continue; | |
365 | ||
20 | 366 | if(t==P_STMT_SECT || t==P_END_SECT){ |
2 | 367 | errornest++; |
20 | 368 | /*if(t==P_END_SECT) |
369 | tokz->nest_lvl--;*/ | |
370 | } | |
2 | 371 | |
372 | if(!(tokz->flags&TOKZ_ERROR_TOLERANT)) | |
373 | break; | |
374 | } | |
0 | 375 | |
2 | 376 | eof: |
377 | /* free the tokens */ | |
378 | while(ntokens--) | |
379 | tok_free(&tokens[ntokens]); | |
380 | ||
381 | while(tokz->nest_lvl>=init_nest_lvl){ | |
382 | if(tokz->flags&TOKZ_ERROR_TOLERANT || !had_error) | |
383 | call_end_sect(tokz, tokz->optstack[tokz->nest_lvl]); | |
384 | else | |
385 | call_cancel_sect(tokz, tokz->optstack[tokz->nest_lvl]); | |
0 | 386 | tokz->nest_lvl--; |
387 | } | |
388 | ||
389 | /* Free optstack if it was alloced by this call */ | |
390 | if(alloced_optstack){ | |
391 | free(tokz->optstack); | |
392 | tokz->optstack=NULL; | |
393 | tokz->nest_lvl=0; | |
394 | } | |
395 | ||
396 | if(tokz->flags&TOKZ_PARSER_INDENT_MODE) | |
397 | verbose_indent(init_nest_lvl); | |
398 | ||
399 | return !had_error; | |
400 | } | |
401 | ||
402 | ||
403 | /* */ | |
404 | ||
405 | ||
2 | 406 | bool parse_config(const char *fname, const ConfOpt *options, int flags) |
0 | 407 | { |
408 | Tokenizer *tokz; | |
409 | bool ret; | |
410 | ||
411 | tokz=tokz_open(fname); | |
412 | ||
413 | if(tokz==NULL) | |
414 | return FALSE; | |
2 | 415 | |
416 | tokz->flags|=flags&~TOKZ_READ_COMMENTS; | |
0 | 417 | |
418 | ret=parse_config_tokz(tokz, options); | |
419 | ||
420 | tokz_close(tokz); | |
421 | ||
422 | return ret; | |
423 | } | |
424 | ||
425 | ||
2 | 426 | bool parse_config_file(FILE *file, const ConfOpt *options, int flags) |
0 | 427 | { |
428 | Tokenizer *tokz; | |
429 | bool ret; | |
430 | ||
14 | 431 | tokz=tokz_open_file(file, NULL); |
0 | 432 | |
433 | if(tokz==NULL) | |
434 | return FALSE; | |
435 | ||
2 | 436 | tokz->flags|=flags&~TOKZ_READ_COMMENTS; |
437 | ||
0 | 438 | ret=parse_config_tokz(tokz, options); |
439 | ||
440 | tokz_close(tokz); | |
441 | ||
442 | return ret; | |
443 | } | |
444 | ||
445 | ||
446 | /* | |
447 | * Argument validity checking stuff | |
448 | */ | |
449 | ||
450 | ||
2 | 451 | static int arg_match(Token *tok, char c) |
0 | 452 | { |
20 | 453 | char c2=tok->type; |
0 | 454 | |
455 | if(c=='.' || c=='*') | |
2 | 456 | return 0; |
0 | 457 | |
458 | if(c2==c) | |
2 | 459 | return 0; |
0 | 460 | |
461 | if(c2=='c' && c=='l'){ | |
462 | TOK_SET_LONG(tok, TOK_CHAR_VAL(tok)); | |
2 | 463 | return 0; |
0 | 464 | } |
465 | ||
466 | if(c2=='l' && c=='c'){ | |
467 | TOK_SET_CHAR(tok, TOK_LONG_VAL(tok)); | |
2 | 468 | return 0; |
0 | 469 | } |
470 | ||
471 | if(c2=='l' && c=='d'){ | |
472 | TOK_SET_DOUBLE(tok, TOK_LONG_VAL(tok)); | |
2 | 473 | return 0; |
0 | 474 | } |
475 | ||
17 | 476 | if(c=='b'){ |
477 | if(c2=='l'){ | |
478 | TOK_SET_BOOL(tok, TOK_LONG_VAL(tok)); | |
479 | return 0; | |
480 | }else if(c2=='i'){ | |
481 | if(strcmp(TOK_IDENT_VAL(tok), "TRUE")==0){ | |
482 | tok_free(tok); | |
483 | TOK_SET_BOOL(tok, TRUE); | |
484 | return 0; | |
485 | }else if(strcmp(TOK_IDENT_VAL(tok), "FALSE")==0){ | |
486 | tok_free(tok); | |
487 | TOK_SET_BOOL(tok, FALSE); | |
488 | return 0; | |
489 | } | |
490 | } | |
491 | } | |
492 | ||
2 | 493 | return E_TOKZ_INVALID_ARGUMENT; |
0 | 494 | } |
495 | ||
496 | ||
2 | 497 | static int check_argument(const char **pret, Token *tok, const char *p) |
0 | 498 | { |
2 | 499 | int mode; |
500 | int e=E_TOKZ_TOO_MANY_ARGS; | |
0 | 501 | |
2 | 502 | again: |
503 | mode=0; | |
504 | ||
0 | 505 | if(*p=='*'){ |
506 | *pret=p; | |
2 | 507 | return 0; |
0 | 508 | }else if(*p=='?'){ |
509 | mode=1; | |
510 | p++; | |
511 | }else if(*p==':'){ | |
512 | mode=2; | |
513 | p++; | |
514 | }else if(*p=='+'){ | |
515 | *pret=p; | |
516 | return arg_match(tok, *(p-1)); | |
517 | } | |
518 | ||
519 | while(*p!='\0'){ | |
2 | 520 | e=arg_match(tok, *p); |
521 | if(e==0){ | |
0 | 522 | p++; |
523 | while(mode==2 && *p==':'){ | |
524 | if(*++p=='\0') | |
2 | 525 | break; /* Invalid argument format string, though... */ |
0 | 526 | p++; |
527 | } | |
528 | *pret=p; | |
2 | 529 | return 0; |
0 | 530 | } |
531 | ||
532 | if(mode==0) | |
533 | break; | |
534 | ||
535 | p++; | |
536 | ||
2 | 537 | if(mode==1) |
538 | goto again; | |
539 | ||
540 | /* mode==2 */ | |
0 | 541 | |
542 | if(*p!=':') | |
543 | break; | |
544 | p++; | |
2 | 545 | e=E_TOKZ_TOO_MANY_ARGS; |
0 | 546 | } |
547 | ||
548 | *pret=p; | |
2 | 549 | return e; |
0 | 550 | } |
551 | ||
552 | ||
553 | static bool args_at_end(const char *p) | |
554 | { | |
555 | if(p==NULL) | |
556 | return TRUE; | |
557 | ||
558 | while(*p!='\0'){ | |
559 | if(*p=='*' || *p=='+') | |
560 | p++; | |
561 | else if(*p=='?') | |
562 | p+=2; | |
563 | else | |
564 | return FALSE; | |
565 | } | |
566 | ||
567 | return TRUE; | |
568 | } | |
569 | ||
570 | ||
20 | 571 | bool check_args(const Tokenizer *tokz, Token *tokens, int ntokens, |
572 | const char *fmt) | |
0 | 573 | { |
574 | int i; | |
2 | 575 | int e; |
576 | ||
577 | if(fmt==NULL){ | |
578 | if(ntokens!=1) | |
579 | tokz_warn_error(tokz, tokens[0].line, E_TOKZ_TOO_MANY_ARGS); | |
580 | return ntokens==1; | |
581 | } | |
0 | 582 | |
2 | 583 | for(i=1; i<ntokens; i++){ |
584 | e=check_argument(&fmt, &tokens[i], fmt); | |
585 | if(e!=0){ | |
586 | tokz_warn_error(tokz, tokens[i].line, e); | |
0 | 587 | return FALSE; |
588 | } | |
589 | } | |
590 | ||
591 | if(!args_at_end(fmt)){ | |
592 | tokz_warn_error(tokz, tokens[i].line, E_TOKZ_TOO_FEW_ARGS); | |
593 | return FALSE; | |
594 | } | |
595 | ||
596 | return TRUE; | |
597 | } | |
598 | ||
1 | 599 | |
600 | /* */ | |
601 | ||
602 | ||
14 | 603 | static bool try_include(Tokenizer *tokz, const char *fname) |
1 | 604 | { |
14 | 605 | FILE *f; |
606 | ||
607 | f=fopen(fname, "r"); | |
608 | ||
609 | if(f==NULL) | |
610 | return FALSE; | |
611 | ||
612 | if(!tokz_pushf_file(tokz, f, fname)){ | |
613 | fclose(f); | |
614 | return FALSE; | |
615 | } | |
616 | ||
617 | return TRUE; | |
618 | } | |
619 | ||
620 | ||
621 | static bool try_include_dir(Tokenizer *tokz, const char *dir, int dlen, | |
622 | const char *file) | |
623 | { | |
1 | 624 | char *tmpname; |
625 | bool retval; | |
626 | ||
14 | 627 | tmpname=scatn(dir, dlen, file, -1); |
1 | 628 | |
629 | if(tmpname==NULL){ | |
630 | warn_err(); | |
631 | return FALSE; | |
632 | } | |
633 | ||
14 | 634 | retval=try_include(tokz, tmpname); |
635 | ||
1 | 636 | free(tmpname); |
14 | 637 | |
1 | 638 | return retval; |
639 | } | |
640 | ||
14 | 641 | |
642 | static bool opt_include(Tokenizer *tokz, int n, Token *toks) | |
643 | { | |
644 | const char *fname=TOK_STRING_VAL(toks+1); | |
645 | const char *lastndx=NULL; | |
646 | bool retval, e; | |
647 | int i=0; | |
648 | ||
649 | if(fname[0]!='/' && tokz->name!=NULL) | |
650 | lastndx=strrchr(tokz->name, '/'); | |
651 | ||
652 | if(lastndx==NULL) | |
653 | retval=try_include(tokz, fname); | |
654 | else | |
655 | retval=try_include_dir(tokz, tokz->name, lastndx-tokz->name+1, fname); | |
656 | ||
657 | if(retval==TRUE) | |
658 | return TRUE; | |
659 | ||
660 | e=errno; | |
661 | ||
662 | if(tokz->includepaths!=NULL){ | |
663 | while(tokz->includepaths[i]!=NULL){ | |
664 | if(try_include_dir(tokz, tokz->includepaths[i], -1, fname)) | |
665 | return TRUE; | |
666 | i++; | |
667 | } | |
668 | } | |
669 | ||
670 | warn_obj(fname, "%s", strerror(e)); | |
671 | ||
672 | return FALSE; | |
673 | } | |
674 | ||
675 | ||
676 | extern void tokz_set_includepaths(Tokenizer *tokz, char **paths) | |
677 | { | |
678 | tokz->includepaths=paths; | |
679 | } | |
680 |