Tue, 02 Jan 2001 01:45:46 +0100
trunk: changeset 34
Added libtu_(v)asprintf functions.
0 | 1 | /* |
2 | * libtu/parser.c | |
3 | * | |
9 | 4 | * Copyright (c) Tuomo Valkonen 1999-2000. |
0 | 5 | * See the included file LICENSE for details. |
6 | */ | |
7 | ||
8 | #include <string.h> | |
14 | 9 | #include <errno.h> |
0 | 10 | |
5 | 11 | #include <libtu/parser.h> |
12 | #include <libtu/misc.h> | |
13 | #include <libtu/output.h> | |
0 | 14 | |
13 | 15 | #define MAX_TOKENS 256 |
16 | #define MAX_NEST 256 | |
0 | 17 | |
18 | ||
19 | enum{ | |
2 | 20 | P_NONE=1, |
0 | 21 | P_EOF, |
2 | 22 | P_STMT, |
23 | P_STMT_NS, | |
24 | P_STMT_SECT, | |
0 | 25 | P_BEG_SECT, |
26 | P_END_SECT | |
27 | }; | |
28 | ||
29 | ||
20 | 30 | /* */ |
0 | 31 | |
32 | ||
1 | 33 | static bool opt_include(Tokenizer *tokz, int n, Token *toks); |
34 | ||
35 | ||
36 | static ConfOpt common_opts[]={ | |
37 | {"include", "s", opt_include, NULL}, | |
38 | {NULL, NULL, NULL, NULL} | |
39 | }; | |
40 | ||
41 | ||
42 | /* */ | |
43 | ||
0 | 44 | |
2 | 45 | static int read_statement(Tokenizer *tokz, Token *tokens, int *ntok_ret) |
0 | 46 | { |
47 | int ntokens=0; | |
48 | Token *tok=NULL; | |
2 | 49 | int had_comma=0; /* 0 - no, 1 - yes, 2 - not had, not expected */ |
50 | int retval=0; | |
0 | 51 | int e=0; |
52 | ||
53 | while(1){ | |
2 | 54 | tok=&tokens[ntokens]; |
55 | ||
56 | if(!tokz_get_token(tokz, tok)){ | |
57 | e=1; | |
58 | continue; | |
59 | } | |
60 | ||
0 | 61 | if(ntokens==MAX_TOKENS-1){ |
62 | e=E_TOKZ_TOKEN_LIMIT; | |
2 | 63 | tokz_warn_error(tokz, tok->line, e); |
64 | if(!(tokz->flags&TOKZ_ERROR_TOLERANT)) | |
65 | break; | |
66 | }else{ | |
67 | ntokens++; | |
0 | 68 | } |
69 | ||
70 | if(!TOK_IS_OP(tok)){ | |
71 | if(ntokens==1 && !had_comma){ | |
22 | 72 | /* first token */ |
0 | 73 | had_comma=2; |
74 | }else{ | |
2 | 75 | if(had_comma==0) |
76 | goto syntax; | |
0 | 77 | |
78 | had_comma=0; | |
79 | } | |
80 | continue; | |
81 | } | |
82 | ||
83 | /* It is an operator */ | |
2 | 84 | ntokens--; |
0 | 85 | |
86 | switch(TOK_OP_VAL(tok)){ | |
87 | case OP_SCOLON: | |
2 | 88 | retval=(ntokens==0 ? P_NONE : P_STMT_NS); |
0 | 89 | break; |
90 | ||
91 | case OP_NEXTLINE: | |
2 | 92 | retval=(ntokens==0 ? P_NONE : P_STMT); |
93 | break; | |
0 | 94 | |
2 | 95 | case OP_L_BRC: |
96 | retval=(ntokens==0 ? P_BEG_SECT : P_STMT_SECT); | |
0 | 97 | break; |
98 | ||
2 | 99 | case OP_R_BRC: |
100 | if(ntokens==0){ | |
101 | retval=P_END_SECT; | |
102 | }else{ | |
103 | tokz_unget_token(tokz, tok); | |
104 | retval=P_STMT_NS; | |
105 | } | |
106 | break; | |
107 | ||
0 | 108 | case OP_EOF: |
2 | 109 | retval=(ntokens==0 ? P_EOF : P_STMT_NS); |
110 | ||
0 | 111 | if(had_comma==1){ |
112 | e=E_TOKZ_UNEXPECTED_EOF; | |
2 | 113 | goto handle_error; |
0 | 114 | } |
115 | ||
2 | 116 | goto end; |
0 | 117 | |
118 | case OP_COMMA: | |
2 | 119 | if(had_comma!=0) |
120 | goto syntax; | |
121 | ||
0 | 122 | had_comma=1; |
2 | 123 | continue; |
0 | 124 | |
125 | default: | |
2 | 126 | goto syntax; |
0 | 127 | } |
128 | ||
2 | 129 | if(had_comma!=1) |
130 | break; | |
131 | ||
132 | syntax: | |
133 | e=E_TOKZ_SYNTAX; | |
134 | handle_error: | |
135 | tokz_warn_error(tokz, tok->line, e); | |
136 | ||
137 | if(!(tokz->flags&TOKZ_ERROR_TOLERANT) || retval!=0) | |
138 | break; | |
0 | 139 | } |
140 | ||
2 | 141 | end: |
142 | if(e!=0) | |
143 | retval=-retval; | |
144 | ||
0 | 145 | *ntok_ret=ntokens; |
146 | ||
147 | return retval; | |
148 | } | |
149 | ||
150 | ||
2 | 151 | static bool find_beg_sect(Tokenizer *tokz) |
152 | { | |
24 | 153 | Token tok=TOK_INIT; |
2 | 154 | |
155 | while(tokz_get_token(tokz, &tok)){ | |
156 | if(TOK_IS_OP(&tok)){ | |
157 | if(TOK_OP_VAL(&tok)==OP_NEXTLINE) | |
158 | continue; | |
159 | ||
160 | if(TOK_OP_VAL(&tok)==OP_SCOLON) | |
161 | return FALSE; | |
162 | ||
163 | if(TOK_OP_VAL(&tok)==OP_L_BRC) | |
164 | return TRUE; | |
165 | } | |
166 | ||
167 | tokz_unget_token(tokz, &tok); | |
168 | break; | |
169 | } | |
170 | return FALSE; | |
171 | } | |
172 | ||
173 | ||
0 | 174 | /* */ |
175 | ||
176 | ||
2 | 177 | static const ConfOpt* lookup_option(const ConfOpt *opts, const char *name) |
178 | { | |
179 | while(opts->optname!=NULL){ | |
180 | if(strcmp(opts->optname, name)==0) | |
181 | return opts; | |
182 | opts++; | |
183 | } | |
184 | return NULL; | |
185 | } | |
186 | ||
0 | 187 | |
2 | 188 | static bool call_end_sect(Tokenizer *tokz, const ConfOpt *opts) |
189 | { | |
190 | opts=lookup_option(opts, "#end"); | |
191 | if(opts!=NULL) | |
192 | return opts->fn(tokz, 0, NULL); | |
0 | 193 | |
2 | 194 | return TRUE; |
0 | 195 | } |
196 | ||
197 | ||
2 | 198 | static bool call_cancel_sect(Tokenizer *tokz, const ConfOpt *opts) |
199 | { | |
200 | opts=lookup_option(opts, "#cancel"); | |
201 | if(opts!=NULL) | |
202 | return opts->fn(tokz, 0, NULL); | |
203 | ||
204 | return TRUE; | |
0 | 205 | } |
206 | ||
207 | ||
208 | /* */ | |
209 | ||
210 | ||
211 | bool parse_config_tokz(Tokenizer *tokz, const ConfOpt *options) | |
212 | { | |
213 | Token tokens[MAX_TOKENS]; | |
214 | bool alloced_optstack=FALSE; | |
2 | 215 | int i, t, ntokens=0; |
0 | 216 | int init_nest_lvl; |
2 | 217 | bool had_error; |
218 | int errornest=0; | |
0 | 219 | |
220 | /* Allocate tokz->optstack if it does not yet exist (if it does, | |
221 | * we have been called from an option handler) | |
222 | */ | |
223 | if(!tokz->optstack){ | |
224 | tokz->optstack=ALLOC_N(const ConfOpt*, MAX_NEST); | |
225 | if(!tokz->optstack){ | |
226 | warn_err(); | |
227 | return FALSE; | |
228 | } | |
229 | ||
230 | memset(tokz->optstack, 0, sizeof(ConfOpt*)*MAX_NEST); | |
231 | init_nest_lvl=tokz->nest_lvl=0; | |
232 | alloced_optstack=TRUE; | |
233 | }else{ | |
234 | init_nest_lvl=tokz->nest_lvl; | |
235 | } | |
236 | ||
237 | tokz->optstack[init_nest_lvl]=options; | |
238 | ||
2 | 239 | for(i=0; i<MAX_TOKENS; i++) |
0 | 240 | tok_init(&tokens[i]); |
241 | ||
242 | ||
243 | while(1){ | |
2 | 244 | had_error=FALSE; |
0 | 245 | |
246 | /* free the tokens */ | |
247 | while(ntokens--) | |
248 | tok_free(&tokens[ntokens]); | |
249 | ||
2 | 250 | /* read the tokens */ |
251 | t=read_statement(tokz, tokens, &ntokens); | |
252 | ||
253 | if((had_error=t<0)) | |
254 | t=-t; | |
255 | ||
256 | switch(t){ | |
257 | case P_STMT: | |
258 | case P_STMT_NS: | |
259 | case P_STMT_SECT: | |
260 | ||
261 | if(errornest) | |
262 | had_error=TRUE; | |
263 | else if(tokz->flags&TOKZ_PARSER_INDENT_MODE) | |
264 | verbose_indent(tokz->nest_lvl); | |
265 | ||
266 | if(!TOK_IS_IDENT(tokens+0)){ | |
267 | had_error=TRUE; | |
268 | tokz_warn_error(tokz, tokens->line, | |
269 | E_TOKZ_IDENTIFIER_EXPECTED); | |
270 | } | |
271 | ||
272 | if(t==P_STMT){ | |
273 | if(find_beg_sect(tokz)) | |
274 | t=P_STMT_SECT; | |
275 | } | |
276 | ||
22 | 277 | if(had_error) |
278 | break; | |
279 | ||
280 | /* Got the statement and its type */ | |
281 | ||
2 | 282 | options=lookup_option(tokz->optstack[tokz->nest_lvl], |
283 | TOK_IDENT_VAL(tokens+0)); | |
284 | if(options==NULL) | |
285 | options=lookup_option(common_opts, TOK_IDENT_VAL(tokens+0)); | |
286 | ||
287 | if(options==NULL){ | |
288 | had_error=TRUE; | |
289 | tokz_warn_error(tokz, tokens->line, E_TOKZ_UNKNOWN_OPTION); | |
290 | }else{ | |
291 | had_error=!check_args(tokz, tokens, ntokens, options->argfmt); | |
292 | } | |
293 | ||
294 | if(had_error) | |
295 | break; | |
296 | ||
22 | 297 | /* Found the option and arguments are ok */ |
298 | ||
2 | 299 | if(options->opts!=NULL){ |
300 | if(t!=P_STMT_SECT){ | |
301 | had_error=TRUE; | |
302 | tokz_warn_error(tokz, tokz->line, E_TOKZ_LBRACE_EXPECTED); | |
303 | }else if(tokz->nest_lvl==MAX_NEST-1){ | |
304 | tokz_warn_error(tokz, tokz->line, E_TOKZ_MAX_NEST); | |
305 | had_error=TRUE; | |
306 | }else{ | |
22 | 307 | tokz->nest_lvl++; |
308 | tokz->optstack[tokz->nest_lvl]=options->opts; | |
2 | 309 | } |
310 | }else if(t==P_STMT_SECT){ | |
311 | had_error=TRUE; | |
312 | tokz_warn_error(tokz, tokz->line, E_TOKZ_SYNTAX); | |
313 | } | |
314 | ||
315 | if(!had_error && options->fn!=NULL){ | |
316 | had_error=!options->fn(tokz, ntokens, tokens); | |
317 | if(t==P_STMT_SECT && had_error) | |
318 | tokz->nest_lvl--; | |
319 | } | |
0 | 320 | break; |
2 | 321 | |
0 | 322 | case P_EOF: |
1 | 323 | if(tokz_popf(tokz)){ |
2 | 324 | break; |
325 | }else if(tokz->nest_lvl>0 || errornest>0){ | |
0 | 326 | tokz_warn_error(tokz, 0, E_TOKZ_UNEXPECTED_EOF); |
327 | had_error=TRUE; | |
328 | } | |
2 | 329 | goto eof; |
330 | ||
331 | case P_BEG_SECT: | |
332 | had_error=TRUE; | |
333 | errornest++; | |
334 | tokz_warn_error(tokz, tokz->line, E_TOKZ_SYNTAX); | |
0 | 335 | break; |
2 | 336 | |
0 | 337 | case P_END_SECT: |
22 | 338 | if(tokz->nest_lvl+errornest==0){ |
0 | 339 | tokz_warn_error(tokz, tokz->line, E_TOKZ_SYNTAX); |
340 | had_error=TRUE; | |
22 | 341 | } |
342 | ||
343 | if(had_error) | |
0 | 344 | break; |
345 | ||
20 | 346 | if(errornest!=0){ |
347 | errornest--; | |
348 | }else{ | |
22 | 349 | had_error=!call_end_sect(tokz, tokz->optstack[tokz->nest_lvl]); |
20 | 350 | tokz->nest_lvl--; |
351 | } | |
0 | 352 | |
353 | if(tokz->nest_lvl<init_nest_lvl) | |
2 | 354 | goto eof; |
355 | } | |
356 | ||
357 | if(!had_error) | |
358 | continue; | |
359 | ||
22 | 360 | if(t==P_STMT_SECT) |
2 | 361 | errornest++; |
362 | ||
363 | if(!(tokz->flags&TOKZ_ERROR_TOLERANT)) | |
364 | break; | |
365 | } | |
0 | 366 | |
2 | 367 | eof: |
368 | /* free the tokens */ | |
369 | while(ntokens--) | |
370 | tok_free(&tokens[ntokens]); | |
371 | ||
372 | while(tokz->nest_lvl>=init_nest_lvl){ | |
373 | if(tokz->flags&TOKZ_ERROR_TOLERANT || !had_error) | |
374 | call_end_sect(tokz, tokz->optstack[tokz->nest_lvl]); | |
375 | else | |
376 | call_cancel_sect(tokz, tokz->optstack[tokz->nest_lvl]); | |
0 | 377 | tokz->nest_lvl--; |
378 | } | |
379 | ||
380 | /* Free optstack if it was alloced by this call */ | |
381 | if(alloced_optstack){ | |
382 | free(tokz->optstack); | |
383 | tokz->optstack=NULL; | |
384 | tokz->nest_lvl=0; | |
385 | } | |
386 | ||
387 | if(tokz->flags&TOKZ_PARSER_INDENT_MODE) | |
388 | verbose_indent(init_nest_lvl); | |
389 | ||
390 | return !had_error; | |
391 | } | |
392 | ||
393 | ||
394 | /* */ | |
395 | ||
396 | ||
2 | 397 | bool parse_config(const char *fname, const ConfOpt *options, int flags) |
0 | 398 | { |
399 | Tokenizer *tokz; | |
400 | bool ret; | |
401 | ||
402 | tokz=tokz_open(fname); | |
403 | ||
404 | if(tokz==NULL) | |
405 | return FALSE; | |
2 | 406 | |
407 | tokz->flags|=flags&~TOKZ_READ_COMMENTS; | |
0 | 408 | |
409 | ret=parse_config_tokz(tokz, options); | |
410 | ||
411 | tokz_close(tokz); | |
412 | ||
413 | return ret; | |
414 | } | |
415 | ||
416 | ||
2 | 417 | bool parse_config_file(FILE *file, const ConfOpt *options, int flags) |
0 | 418 | { |
419 | Tokenizer *tokz; | |
420 | bool ret; | |
421 | ||
14 | 422 | tokz=tokz_open_file(file, NULL); |
0 | 423 | |
424 | if(tokz==NULL) | |
425 | return FALSE; | |
426 | ||
2 | 427 | tokz->flags|=flags&~TOKZ_READ_COMMENTS; |
428 | ||
0 | 429 | ret=parse_config_tokz(tokz, options); |
430 | ||
431 | tokz_close(tokz); | |
432 | ||
433 | return ret; | |
434 | } | |
435 | ||
436 | ||
437 | /* | |
438 | * Argument validity checking stuff | |
439 | */ | |
440 | ||
441 | ||
2 | 442 | static int arg_match(Token *tok, char c) |
0 | 443 | { |
20 | 444 | char c2=tok->type; |
0 | 445 | |
446 | if(c=='.' || c=='*') | |
2 | 447 | return 0; |
0 | 448 | |
449 | if(c2==c) | |
2 | 450 | return 0; |
0 | 451 | |
452 | if(c2=='c' && c=='l'){ | |
453 | TOK_SET_LONG(tok, TOK_CHAR_VAL(tok)); | |
2 | 454 | return 0; |
0 | 455 | } |
456 | ||
457 | if(c2=='l' && c=='c'){ | |
458 | TOK_SET_CHAR(tok, TOK_LONG_VAL(tok)); | |
2 | 459 | return 0; |
0 | 460 | } |
461 | ||
462 | if(c2=='l' && c=='d'){ | |
463 | TOK_SET_DOUBLE(tok, TOK_LONG_VAL(tok)); | |
2 | 464 | return 0; |
0 | 465 | } |
466 | ||
17 | 467 | if(c=='b'){ |
468 | if(c2=='l'){ | |
469 | TOK_SET_BOOL(tok, TOK_LONG_VAL(tok)); | |
470 | return 0; | |
471 | }else if(c2=='i'){ | |
472 | if(strcmp(TOK_IDENT_VAL(tok), "TRUE")==0){ | |
473 | tok_free(tok); | |
474 | TOK_SET_BOOL(tok, TRUE); | |
475 | return 0; | |
476 | }else if(strcmp(TOK_IDENT_VAL(tok), "FALSE")==0){ | |
477 | tok_free(tok); | |
478 | TOK_SET_BOOL(tok, FALSE); | |
479 | return 0; | |
480 | } | |
481 | } | |
482 | } | |
483 | ||
2 | 484 | return E_TOKZ_INVALID_ARGUMENT; |
0 | 485 | } |
486 | ||
487 | ||
2 | 488 | static int check_argument(const char **pret, Token *tok, const char *p) |
0 | 489 | { |
2 | 490 | int mode; |
491 | int e=E_TOKZ_TOO_MANY_ARGS; | |
0 | 492 | |
2 | 493 | again: |
494 | mode=0; | |
495 | ||
0 | 496 | if(*p=='*'){ |
497 | *pret=p; | |
2 | 498 | return 0; |
0 | 499 | }else if(*p=='?'){ |
500 | mode=1; | |
501 | p++; | |
502 | }else if(*p==':'){ | |
503 | mode=2; | |
504 | p++; | |
505 | }else if(*p=='+'){ | |
506 | *pret=p; | |
507 | return arg_match(tok, *(p-1)); | |
508 | } | |
509 | ||
510 | while(*p!='\0'){ | |
2 | 511 | e=arg_match(tok, *p); |
512 | if(e==0){ | |
0 | 513 | p++; |
514 | while(mode==2 && *p==':'){ | |
515 | if(*++p=='\0') | |
2 | 516 | break; /* Invalid argument format string, though... */ |
0 | 517 | p++; |
518 | } | |
519 | *pret=p; | |
2 | 520 | return 0; |
0 | 521 | } |
522 | ||
523 | if(mode==0) | |
524 | break; | |
525 | ||
526 | p++; | |
527 | ||
2 | 528 | if(mode==1) |
529 | goto again; | |
530 | ||
531 | /* mode==2 */ | |
0 | 532 | |
533 | if(*p!=':') | |
534 | break; | |
535 | p++; | |
2 | 536 | e=E_TOKZ_TOO_MANY_ARGS; |
0 | 537 | } |
538 | ||
539 | *pret=p; | |
2 | 540 | return e; |
0 | 541 | } |
542 | ||
543 | ||
544 | static bool args_at_end(const char *p) | |
545 | { | |
546 | if(p==NULL) | |
547 | return TRUE; | |
548 | ||
549 | while(*p!='\0'){ | |
550 | if(*p=='*' || *p=='+') | |
551 | p++; | |
552 | else if(*p=='?') | |
553 | p+=2; | |
554 | else | |
555 | return FALSE; | |
556 | } | |
557 | ||
558 | return TRUE; | |
559 | } | |
560 | ||
561 | ||
20 | 562 | bool check_args(const Tokenizer *tokz, Token *tokens, int ntokens, |
563 | const char *fmt) | |
0 | 564 | { |
565 | int i; | |
2 | 566 | int e; |
567 | ||
568 | if(fmt==NULL){ | |
569 | if(ntokens!=1) | |
570 | tokz_warn_error(tokz, tokens[0].line, E_TOKZ_TOO_MANY_ARGS); | |
571 | return ntokens==1; | |
572 | } | |
0 | 573 | |
2 | 574 | for(i=1; i<ntokens; i++){ |
575 | e=check_argument(&fmt, &tokens[i], fmt); | |
576 | if(e!=0){ | |
577 | tokz_warn_error(tokz, tokens[i].line, e); | |
0 | 578 | return FALSE; |
579 | } | |
580 | } | |
581 | ||
582 | if(!args_at_end(fmt)){ | |
583 | tokz_warn_error(tokz, tokens[i].line, E_TOKZ_TOO_FEW_ARGS); | |
584 | return FALSE; | |
585 | } | |
586 | ||
587 | return TRUE; | |
588 | } | |
589 | ||
1 | 590 | |
591 | /* */ | |
592 | ||
593 | ||
14 | 594 | static bool try_include(Tokenizer *tokz, const char *fname) |
1 | 595 | { |
14 | 596 | FILE *f; |
597 | ||
598 | f=fopen(fname, "r"); | |
599 | ||
600 | if(f==NULL) | |
601 | return FALSE; | |
602 | ||
603 | if(!tokz_pushf_file(tokz, f, fname)){ | |
604 | fclose(f); | |
605 | return FALSE; | |
606 | } | |
607 | ||
608 | return TRUE; | |
609 | } | |
610 | ||
611 | ||
612 | static bool try_include_dir(Tokenizer *tokz, const char *dir, int dlen, | |
613 | const char *file) | |
614 | { | |
1 | 615 | char *tmpname; |
616 | bool retval; | |
617 | ||
14 | 618 | tmpname=scatn(dir, dlen, file, -1); |
1 | 619 | |
620 | if(tmpname==NULL){ | |
621 | warn_err(); | |
622 | return FALSE; | |
623 | } | |
624 | ||
14 | 625 | retval=try_include(tokz, tmpname); |
626 | ||
1 | 627 | free(tmpname); |
14 | 628 | |
1 | 629 | return retval; |
630 | } | |
631 | ||
14 | 632 | |
633 | static bool opt_include(Tokenizer *tokz, int n, Token *toks) | |
634 | { | |
635 | const char *fname=TOK_STRING_VAL(toks+1); | |
636 | const char *lastndx=NULL; | |
637 | bool retval, e; | |
638 | int i=0; | |
639 | ||
640 | if(fname[0]!='/' && tokz->name!=NULL) | |
641 | lastndx=strrchr(tokz->name, '/'); | |
642 | ||
643 | if(lastndx==NULL) | |
644 | retval=try_include(tokz, fname); | |
645 | else | |
646 | retval=try_include_dir(tokz, tokz->name, lastndx-tokz->name+1, fname); | |
647 | ||
648 | if(retval==TRUE) | |
649 | return TRUE; | |
650 | ||
651 | e=errno; | |
652 | ||
653 | if(tokz->includepaths!=NULL){ | |
654 | while(tokz->includepaths[i]!=NULL){ | |
655 | if(try_include_dir(tokz, tokz->includepaths[i], -1, fname)) | |
656 | return TRUE; | |
657 | i++; | |
658 | } | |
659 | } | |
660 | ||
661 | warn_obj(fname, "%s", strerror(e)); | |
662 | ||
663 | return FALSE; | |
664 | } | |
665 | ||
666 | ||
667 | extern void tokz_set_includepaths(Tokenizer *tokz, char **paths) | |
668 | { | |
669 | tokz->includepaths=paths; | |
670 | } | |
671 |