Wed, 19 Apr 2000 22:03:38 +0200
trunk: changeset 6
- Simpler optparser
- New Makefile system
0 | 1 | /* |
2 | * libtu/parser.c | |
3 | * | |
4 | * Copyright (c) Tuomo Valkonen 1999-2000. | |
5 | * | |
6 | * This file is distributed under the terms of the "Artistic License". | |
7 | * See the included file LICENSE for details. | |
8 | */ | |
9 | ||
10 | #include <string.h> | |
11 | ||
12 | #include "include/parser.h" | |
13 | #include "include/misc.h" | |
14 | #include "include/output.h" | |
15 | ||
16 | #define MAX_TOKENS 32 | |
17 | #define MAX_NEST 16 | |
18 | ||
19 | ||
20 | enum{ | |
2 | 21 | P_NONE=1, |
0 | 22 | P_EOF, |
2 | 23 | P_STMT, |
24 | P_STMT_NS, | |
25 | P_STMT_SECT, | |
0 | 26 | P_BEG_SECT, |
27 | P_END_SECT | |
28 | }; | |
29 | ||
30 | ||
31 | static bool check_args(const Tokenizer *tokz, Token *tokens, int ntokens, | |
32 | const char *fmt); | |
33 | ||
34 | ||
35 | /* */ | |
36 | ||
1 | 37 | static bool opt_include(Tokenizer *tokz, int n, Token *toks); |
38 | ||
39 | ||
40 | static ConfOpt common_opts[]={ | |
41 | {"include", "s", opt_include, NULL}, | |
42 | {NULL, NULL, NULL, NULL} | |
43 | }; | |
44 | ||
45 | ||
46 | /* */ | |
47 | ||
0 | 48 | |
2 | 49 | static int read_statement(Tokenizer *tokz, Token *tokens, int *ntok_ret) |
0 | 50 | { |
51 | int ntokens=0; | |
52 | Token *tok=NULL; | |
2 | 53 | int had_comma=0; /* 0 - no, 1 - yes, 2 - not had, not expected */ |
54 | int retval=0; | |
0 | 55 | int e=0; |
56 | ||
57 | while(1){ | |
2 | 58 | tok=&tokens[ntokens]; |
59 | ||
60 | if(!tokz_get_token(tokz, tok)){ | |
61 | e=1; | |
62 | continue; | |
63 | } | |
64 | ||
0 | 65 | if(ntokens==MAX_TOKENS-1){ |
66 | e=E_TOKZ_TOKEN_LIMIT; | |
2 | 67 | tokz_warn_error(tokz, tok->line, e); |
68 | if(!(tokz->flags&TOKZ_ERROR_TOLERANT)) | |
69 | break; | |
70 | }else{ | |
71 | ntokens++; | |
0 | 72 | } |
73 | ||
74 | if(!TOK_IS_OP(tok)){ | |
75 | if(ntokens==1 && !had_comma){ | |
2 | 76 | /*if(!TOK_IS_IDENT(tok)){ |
0 | 77 | e=E_TOKZ_IDENTIFIER_EXPECTED; |
2 | 78 | goto handle_error; |
79 | }*/ | |
0 | 80 | |
81 | had_comma=2; | |
82 | }else{ | |
2 | 83 | if(had_comma==0) |
84 | goto syntax; | |
0 | 85 | |
86 | had_comma=0; | |
87 | } | |
88 | continue; | |
89 | } | |
90 | ||
91 | /* It is an operator */ | |
2 | 92 | ntokens--; |
0 | 93 | |
94 | switch(TOK_OP_VAL(tok)){ | |
95 | case OP_SCOLON: | |
2 | 96 | retval=(ntokens==0 ? P_NONE : P_STMT_NS); |
0 | 97 | break; |
98 | ||
99 | case OP_NEXTLINE: | |
2 | 100 | retval=(ntokens==0 ? P_NONE : P_STMT); |
101 | break; | |
0 | 102 | |
2 | 103 | case OP_L_BRC: |
104 | retval=(ntokens==0 ? P_BEG_SECT : P_STMT_SECT); | |
0 | 105 | break; |
106 | ||
2 | 107 | case OP_R_BRC: |
108 | if(ntokens==0){ | |
109 | retval=P_END_SECT; | |
110 | }else{ | |
111 | tokz_unget_token(tokz, tok); | |
112 | retval=P_STMT_NS; | |
113 | } | |
114 | break; | |
115 | ||
0 | 116 | case OP_EOF: |
2 | 117 | retval=(ntokens==0 ? P_EOF : P_STMT_NS); |
118 | ||
0 | 119 | if(had_comma==1){ |
120 | e=E_TOKZ_UNEXPECTED_EOF; | |
2 | 121 | goto handle_error; |
0 | 122 | } |
123 | ||
2 | 124 | goto end; |
0 | 125 | |
126 | case OP_COMMA: | |
2 | 127 | if(had_comma!=0) |
128 | goto syntax; | |
129 | ||
0 | 130 | had_comma=1; |
2 | 131 | continue; |
0 | 132 | |
133 | default: | |
2 | 134 | goto syntax; |
0 | 135 | } |
136 | ||
2 | 137 | if(had_comma!=1) |
138 | break; | |
139 | ||
140 | syntax: | |
141 | e=E_TOKZ_SYNTAX; | |
142 | handle_error: | |
143 | tokz_warn_error(tokz, tok->line, e); | |
144 | ||
145 | if(!(tokz->flags&TOKZ_ERROR_TOLERANT) || retval!=0) | |
146 | break; | |
0 | 147 | } |
148 | ||
2 | 149 | end: |
150 | if(e!=0) | |
151 | retval=-retval; | |
152 | ||
0 | 153 | *ntok_ret=ntokens; |
154 | ||
155 | return retval; | |
156 | } | |
157 | ||
158 | ||
2 | 159 | static bool find_beg_sect(Tokenizer *tokz) |
160 | { | |
161 | Token tok; | |
162 | ||
163 | while(tokz_get_token(tokz, &tok)){ | |
164 | if(TOK_IS_OP(&tok)){ | |
165 | if(TOK_OP_VAL(&tok)==OP_NEXTLINE) | |
166 | continue; | |
167 | ||
168 | if(TOK_OP_VAL(&tok)==OP_SCOLON) | |
169 | return FALSE; | |
170 | ||
171 | if(TOK_OP_VAL(&tok)==OP_L_BRC) | |
172 | return TRUE; | |
173 | } | |
174 | ||
175 | tokz_unget_token(tokz, &tok); | |
176 | break; | |
177 | } | |
178 | return FALSE; | |
179 | } | |
180 | ||
181 | ||
0 | 182 | /* */ |
183 | ||
184 | ||
2 | 185 | static const ConfOpt* lookup_option(const ConfOpt *opts, const char *name) |
186 | { | |
187 | while(opts->optname!=NULL){ | |
188 | if(strcmp(opts->optname, name)==0) | |
189 | return opts; | |
190 | opts++; | |
191 | } | |
192 | return NULL; | |
193 | } | |
194 | ||
0 | 195 | |
2 | 196 | static bool call_end_sect(Tokenizer *tokz, const ConfOpt *opts) |
197 | { | |
198 | opts=lookup_option(opts, "#end"); | |
199 | if(opts!=NULL) | |
200 | return opts->fn(tokz, 0, NULL); | |
0 | 201 | |
2 | 202 | return TRUE; |
0 | 203 | } |
204 | ||
205 | ||
2 | 206 | static bool call_cancel_sect(Tokenizer *tokz, const ConfOpt *opts) |
207 | { | |
208 | opts=lookup_option(opts, "#cancel"); | |
209 | if(opts!=NULL) | |
210 | return opts->fn(tokz, 0, NULL); | |
211 | ||
212 | return TRUE; | |
0 | 213 | } |
214 | ||
215 | ||
216 | /* */ | |
217 | ||
218 | ||
219 | bool parse_config_tokz(Tokenizer *tokz, const ConfOpt *options) | |
220 | { | |
221 | Token tokens[MAX_TOKENS]; | |
222 | bool alloced_optstack=FALSE; | |
2 | 223 | int i, t, ntokens=0; |
0 | 224 | int init_nest_lvl; |
2 | 225 | bool had_error; |
226 | int errornest=0; | |
0 | 227 | |
228 | /* Allocate tokz->optstack if it does not yet exist (if it does, | |
229 | * we have been called from an option handler) | |
230 | */ | |
231 | if(!tokz->optstack){ | |
232 | tokz->optstack=ALLOC_N(const ConfOpt*, MAX_NEST); | |
233 | if(!tokz->optstack){ | |
234 | warn_err(); | |
235 | return FALSE; | |
236 | } | |
237 | ||
238 | memset(tokz->optstack, 0, sizeof(ConfOpt*)*MAX_NEST); | |
239 | init_nest_lvl=tokz->nest_lvl=0; | |
240 | alloced_optstack=TRUE; | |
241 | }else{ | |
242 | init_nest_lvl=tokz->nest_lvl; | |
243 | } | |
244 | ||
245 | tokz->optstack[init_nest_lvl]=options; | |
246 | ||
2 | 247 | for(i=0; i<MAX_TOKENS; i++) |
0 | 248 | tok_init(&tokens[i]); |
249 | ||
250 | ||
251 | /* The loop | |
252 | */ | |
253 | while(1){ | |
2 | 254 | had_error=FALSE; |
0 | 255 | |
256 | /* free the tokens */ | |
257 | while(ntokens--) | |
258 | tok_free(&tokens[ntokens]); | |
259 | ||
2 | 260 | /* read the tokens */ |
261 | t=read_statement(tokz, tokens, &ntokens); | |
262 | ||
263 | if((had_error=t<0)) | |
264 | t=-t; | |
265 | ||
266 | switch(t){ | |
267 | case P_STMT: | |
268 | case P_STMT_NS: | |
269 | case P_STMT_SECT: | |
270 | ||
271 | if(errornest) | |
272 | had_error=TRUE; | |
273 | else if(tokz->flags&TOKZ_PARSER_INDENT_MODE) | |
274 | verbose_indent(tokz->nest_lvl); | |
275 | ||
276 | if(!TOK_IS_IDENT(tokens+0)){ | |
277 | had_error=TRUE; | |
278 | tokz_warn_error(tokz, tokens->line, | |
279 | E_TOKZ_IDENTIFIER_EXPECTED); | |
280 | } | |
281 | ||
282 | if(had_error) | |
283 | break; | |
284 | ||
285 | if(t==P_STMT){ | |
286 | if(find_beg_sect(tokz)) | |
287 | t=P_STMT_SECT; | |
288 | } | |
289 | ||
290 | options=lookup_option(tokz->optstack[tokz->nest_lvl], | |
291 | TOK_IDENT_VAL(tokens+0)); | |
292 | if(options==NULL) | |
293 | options=lookup_option(common_opts, TOK_IDENT_VAL(tokens+0)); | |
294 | ||
295 | if(options==NULL){ | |
296 | had_error=TRUE; | |
297 | tokz_warn_error(tokz, tokens->line, E_TOKZ_UNKNOWN_OPTION); | |
298 | }else{ | |
299 | had_error=!check_args(tokz, tokens, ntokens, options->argfmt); | |
300 | } | |
301 | ||
302 | if(had_error) | |
303 | break; | |
304 | ||
305 | if(options->opts!=NULL){ | |
306 | if(t!=P_STMT_SECT){ | |
307 | had_error=TRUE; | |
308 | tokz_warn_error(tokz, tokz->line, E_TOKZ_LBRACE_EXPECTED); | |
309 | }else if(tokz->nest_lvl==MAX_NEST-1){ | |
310 | tokz_warn_error(tokz, tokz->line, E_TOKZ_MAX_NEST); | |
311 | had_error=TRUE; | |
312 | }else{ | |
313 | tokz->optstack[++tokz->nest_lvl]=options->opts; | |
314 | } | |
315 | }else if(t==P_STMT_SECT){ | |
316 | had_error=TRUE; | |
317 | tokz_warn_error(tokz, tokz->line, E_TOKZ_SYNTAX); | |
318 | } | |
319 | ||
320 | if(!had_error && options->fn!=NULL){ | |
321 | had_error=!options->fn(tokz, ntokens, tokens); | |
322 | if(t==P_STMT_SECT && had_error) | |
323 | tokz->nest_lvl--; | |
324 | } | |
0 | 325 | break; |
2 | 326 | |
0 | 327 | case P_EOF: |
1 | 328 | if(tokz_popf(tokz)){ |
2 | 329 | break; |
330 | }else if(tokz->nest_lvl>0 || errornest>0){ | |
0 | 331 | tokz_warn_error(tokz, 0, E_TOKZ_UNEXPECTED_EOF); |
332 | had_error=TRUE; | |
333 | } | |
2 | 334 | goto eof; |
335 | ||
336 | case P_BEG_SECT: | |
337 | had_error=TRUE; | |
338 | errornest++; | |
339 | tokz_warn_error(tokz, tokz->line, E_TOKZ_SYNTAX); | |
0 | 340 | break; |
2 | 341 | |
0 | 342 | case P_END_SECT: |
2 | 343 | if(errornest!=0){ |
344 | errornest--; | |
345 | break; | |
346 | } | |
347 | ||
0 | 348 | if(tokz->nest_lvl==0){ |
349 | tokz_warn_error(tokz, tokz->line, E_TOKZ_SYNTAX); | |
350 | had_error=TRUE; | |
351 | break; | |
352 | } | |
353 | ||
354 | if(!had_error) | |
355 | had_error=!call_end_sect(tokz, tokz->optstack[tokz->nest_lvl]); | |
356 | ||
357 | tokz->nest_lvl--; | |
358 | ||
359 | if(tokz->nest_lvl<init_nest_lvl) | |
2 | 360 | goto eof; |
361 | } | |
362 | ||
363 | if(!had_error) | |
364 | continue; | |
365 | ||
366 | if(t==P_STMT_SECT) | |
367 | errornest++; | |
368 | ||
369 | if(!(tokz->flags&TOKZ_ERROR_TOLERANT)) | |
370 | break; | |
371 | } | |
0 | 372 | |
2 | 373 | eof: |
374 | /* free the tokens */ | |
375 | while(ntokens--) | |
376 | tok_free(&tokens[ntokens]); | |
377 | ||
378 | while(tokz->nest_lvl>=init_nest_lvl){ | |
379 | if(tokz->flags&TOKZ_ERROR_TOLERANT || !had_error) | |
380 | call_end_sect(tokz, tokz->optstack[tokz->nest_lvl]); | |
381 | else | |
382 | call_cancel_sect(tokz, tokz->optstack[tokz->nest_lvl]); | |
0 | 383 | tokz->nest_lvl--; |
384 | } | |
385 | ||
386 | /* Free optstack if it was alloced by this call */ | |
387 | if(alloced_optstack){ | |
388 | free(tokz->optstack); | |
389 | tokz->optstack=NULL; | |
390 | tokz->nest_lvl=0; | |
391 | } | |
392 | ||
393 | if(tokz->flags&TOKZ_PARSER_INDENT_MODE) | |
394 | verbose_indent(init_nest_lvl); | |
395 | ||
396 | return !had_error; | |
397 | } | |
398 | ||
399 | ||
400 | /* */ | |
401 | ||
402 | ||
2 | 403 | bool parse_config(const char *fname, const ConfOpt *options, int flags) |
0 | 404 | { |
405 | Tokenizer *tokz; | |
406 | bool ret; | |
407 | ||
408 | tokz=tokz_open(fname); | |
409 | ||
410 | if(tokz==NULL) | |
411 | return FALSE; | |
2 | 412 | |
413 | tokz->flags|=flags&~TOKZ_READ_COMMENTS; | |
0 | 414 | |
415 | ret=parse_config_tokz(tokz, options); | |
416 | ||
417 | tokz_close(tokz); | |
418 | ||
419 | return ret; | |
420 | } | |
421 | ||
422 | ||
2 | 423 | bool parse_config_file(FILE *file, const ConfOpt *options, int flags) |
0 | 424 | { |
425 | Tokenizer *tokz; | |
426 | bool ret; | |
427 | ||
428 | tokz=tokz_open_file(file); | |
429 | ||
430 | if(tokz==NULL) | |
431 | return FALSE; | |
432 | ||
2 | 433 | tokz->flags|=flags&~TOKZ_READ_COMMENTS; |
434 | ||
0 | 435 | ret=parse_config_tokz(tokz, options); |
436 | ||
437 | tokz_close(tokz); | |
438 | ||
439 | return ret; | |
440 | } | |
441 | ||
442 | ||
443 | /* | |
444 | * Argument validity checking stuff | |
445 | */ | |
446 | ||
447 | ||
2 | 448 | static int arg_match(Token *tok, char c) |
0 | 449 | { |
450 | static const char chs[]={0, 'l', 'd', 'c', 's', 'i', 0, 0}; | |
451 | char c2; | |
452 | ||
453 | if(c=='.' || c=='*') | |
2 | 454 | return 0; |
0 | 455 | |
456 | c2=chs[tok->type]; | |
457 | ||
458 | if(c2==c) | |
2 | 459 | return 0; |
0 | 460 | |
461 | if(c2=='c' && c=='l'){ | |
462 | TOK_SET_LONG(tok, TOK_CHAR_VAL(tok)); | |
2 | 463 | return 0; |
0 | 464 | } |
465 | ||
466 | if(c2=='l' && c=='c'){ | |
467 | TOK_SET_CHAR(tok, TOK_LONG_VAL(tok)); | |
2 | 468 | return 0; |
0 | 469 | } |
470 | ||
471 | if(c2=='l' && c=='d'){ | |
472 | TOK_SET_DOUBLE(tok, TOK_LONG_VAL(tok)); | |
2 | 473 | return 0; |
0 | 474 | } |
475 | ||
2 | 476 | return E_TOKZ_INVALID_ARGUMENT; |
0 | 477 | } |
478 | ||
479 | ||
2 | 480 | static int check_argument(const char **pret, Token *tok, const char *p) |
0 | 481 | { |
2 | 482 | int mode; |
483 | int e=E_TOKZ_TOO_MANY_ARGS; | |
0 | 484 | |
2 | 485 | again: |
486 | mode=0; | |
487 | ||
0 | 488 | if(*p=='*'){ |
489 | *pret=p; | |
2 | 490 | return 0; |
0 | 491 | }else if(*p=='?'){ |
492 | mode=1; | |
493 | p++; | |
494 | }else if(*p==':'){ | |
495 | mode=2; | |
496 | p++; | |
497 | }else if(*p=='+'){ | |
498 | *pret=p; | |
499 | return arg_match(tok, *(p-1)); | |
500 | } | |
501 | ||
502 | while(*p!='\0'){ | |
2 | 503 | e=arg_match(tok, *p); |
504 | if(e==0){ | |
0 | 505 | p++; |
506 | while(mode==2 && *p==':'){ | |
507 | if(*++p=='\0') | |
2 | 508 | break; /* Invalid argument format string, though... */ |
0 | 509 | p++; |
510 | } | |
511 | *pret=p; | |
2 | 512 | return 0; |
0 | 513 | } |
514 | ||
515 | if(mode==0) | |
516 | break; | |
517 | ||
518 | p++; | |
519 | ||
2 | 520 | if(mode==1) |
521 | goto again; | |
522 | ||
523 | /* mode==2 */ | |
0 | 524 | |
525 | if(*p!=':') | |
526 | break; | |
527 | p++; | |
2 | 528 | e=E_TOKZ_TOO_MANY_ARGS; |
0 | 529 | } |
530 | ||
531 | *pret=p; | |
2 | 532 | return e; |
0 | 533 | } |
534 | ||
535 | ||
536 | static bool args_at_end(const char *p) | |
537 | { | |
538 | if(p==NULL) | |
539 | return TRUE; | |
540 | ||
541 | while(*p!='\0'){ | |
542 | if(*p=='*' || *p=='+') | |
543 | p++; | |
544 | else if(*p=='?') | |
545 | p+=2; | |
546 | else | |
547 | return FALSE; | |
548 | } | |
549 | ||
550 | return TRUE; | |
551 | } | |
552 | ||
553 | ||
554 | static bool check_args(const Tokenizer *tokz, Token *tokens, int ntokens, | |
555 | const char *fmt) | |
556 | { | |
557 | int i; | |
2 | 558 | int e; |
559 | ||
560 | if(fmt==NULL){ | |
561 | if(ntokens!=1) | |
562 | tokz_warn_error(tokz, tokens[0].line, E_TOKZ_TOO_MANY_ARGS); | |
563 | return ntokens==1; | |
564 | } | |
0 | 565 | |
2 | 566 | for(i=1; i<ntokens; i++){ |
567 | e=check_argument(&fmt, &tokens[i], fmt); | |
568 | if(e!=0){ | |
569 | tokz_warn_error(tokz, tokens[i].line, e); | |
0 | 570 | return FALSE; |
571 | } | |
572 | } | |
573 | ||
574 | if(!args_at_end(fmt)){ | |
575 | tokz_warn_error(tokz, tokens[i].line, E_TOKZ_TOO_FEW_ARGS); | |
576 | return FALSE; | |
577 | } | |
578 | ||
579 | return TRUE; | |
580 | } | |
581 | ||
1 | 582 | |
583 | /* */ | |
584 | ||
585 | ||
586 | static bool opt_include(Tokenizer *tokz, int n, Token *toks) | |
587 | { | |
588 | const char *fname=TOK_STRING_VAL(toks+1); | |
589 | const char *lastndx; | |
590 | char *tmpname; | |
591 | bool retval; | |
592 | ||
593 | if(fname[0]=='/' || tokz->name==NULL) | |
594 | goto thisdir; | |
595 | ||
596 | lastndx=strrchr(tokz->name, '/'); | |
597 | ||
598 | if(lastndx==NULL) | |
599 | goto thisdir; | |
600 | ||
601 | tmpname=scatn(tokz->name, lastndx-tokz->name+1, fname, -1); | |
602 | ||
603 | if(tmpname==NULL){ | |
604 | warn_err(); | |
605 | return FALSE; | |
606 | } | |
607 | ||
608 | retval=tokz_pushf(tokz, tmpname); | |
609 | ||
610 | free(tmpname); | |
611 | ||
612 | return retval; | |
613 | ||
614 | thisdir: | |
615 | return tokz_pushf(tokz, fname); | |
616 | } | |
617 |