Sat, 29 Apr 2000 15:35:45 +0200
trunk: changeset 12
Changed the copyright notice in headers
0 | 1 | /* |
2 | * libtu/parser.c | |
3 | * | |
9 | 4 | * Copyright (c) Tuomo Valkonen 1999-2000. |
0 | 5 | * See the included file LICENSE for details. |
6 | */ | |
7 | ||
8 | #include <string.h> | |
9 | ||
5 | 10 | #include <libtu/parser.h> |
11 | #include <libtu/misc.h> | |
12 | #include <libtu/output.h> | |
0 | 13 | |
14 | #define MAX_TOKENS 32 | |
15 | #define MAX_NEST 16 | |
16 | ||
17 | ||
18 | enum{ | |
2 | 19 | P_NONE=1, |
0 | 20 | P_EOF, |
2 | 21 | P_STMT, |
22 | P_STMT_NS, | |
23 | P_STMT_SECT, | |
0 | 24 | P_BEG_SECT, |
25 | P_END_SECT | |
26 | }; | |
27 | ||
28 | ||
29 | static bool check_args(const Tokenizer *tokz, Token *tokens, int ntokens, | |
30 | const char *fmt); | |
31 | ||
32 | ||
33 | /* */ | |
34 | ||
1 | 35 | static bool opt_include(Tokenizer *tokz, int n, Token *toks); |
36 | ||
37 | ||
38 | static ConfOpt common_opts[]={ | |
39 | {"include", "s", opt_include, NULL}, | |
40 | {NULL, NULL, NULL, NULL} | |
41 | }; | |
42 | ||
43 | ||
44 | /* */ | |
45 | ||
0 | 46 | |
2 | 47 | static int read_statement(Tokenizer *tokz, Token *tokens, int *ntok_ret) |
0 | 48 | { |
49 | int ntokens=0; | |
50 | Token *tok=NULL; | |
2 | 51 | int had_comma=0; /* 0 - no, 1 - yes, 2 - not had, not expected */ |
52 | int retval=0; | |
0 | 53 | int e=0; |
54 | ||
55 | while(1){ | |
2 | 56 | tok=&tokens[ntokens]; |
57 | ||
58 | if(!tokz_get_token(tokz, tok)){ | |
59 | e=1; | |
60 | continue; | |
61 | } | |
62 | ||
0 | 63 | if(ntokens==MAX_TOKENS-1){ |
64 | e=E_TOKZ_TOKEN_LIMIT; | |
2 | 65 | tokz_warn_error(tokz, tok->line, e); |
66 | if(!(tokz->flags&TOKZ_ERROR_TOLERANT)) | |
67 | break; | |
68 | }else{ | |
69 | ntokens++; | |
0 | 70 | } |
71 | ||
72 | if(!TOK_IS_OP(tok)){ | |
73 | if(ntokens==1 && !had_comma){ | |
2 | 74 | /*if(!TOK_IS_IDENT(tok)){ |
0 | 75 | e=E_TOKZ_IDENTIFIER_EXPECTED; |
2 | 76 | goto handle_error; |
77 | }*/ | |
0 | 78 | |
79 | had_comma=2; | |
80 | }else{ | |
2 | 81 | if(had_comma==0) |
82 | goto syntax; | |
0 | 83 | |
84 | had_comma=0; | |
85 | } | |
86 | continue; | |
87 | } | |
88 | ||
89 | /* It is an operator */ | |
2 | 90 | ntokens--; |
0 | 91 | |
92 | switch(TOK_OP_VAL(tok)){ | |
93 | case OP_SCOLON: | |
2 | 94 | retval=(ntokens==0 ? P_NONE : P_STMT_NS); |
0 | 95 | break; |
96 | ||
97 | case OP_NEXTLINE: | |
2 | 98 | retval=(ntokens==0 ? P_NONE : P_STMT); |
99 | break; | |
0 | 100 | |
2 | 101 | case OP_L_BRC: |
102 | retval=(ntokens==0 ? P_BEG_SECT : P_STMT_SECT); | |
0 | 103 | break; |
104 | ||
2 | 105 | case OP_R_BRC: |
106 | if(ntokens==0){ | |
107 | retval=P_END_SECT; | |
108 | }else{ | |
109 | tokz_unget_token(tokz, tok); | |
110 | retval=P_STMT_NS; | |
111 | } | |
112 | break; | |
113 | ||
0 | 114 | case OP_EOF: |
2 | 115 | retval=(ntokens==0 ? P_EOF : P_STMT_NS); |
116 | ||
0 | 117 | if(had_comma==1){ |
118 | e=E_TOKZ_UNEXPECTED_EOF; | |
2 | 119 | goto handle_error; |
0 | 120 | } |
121 | ||
2 | 122 | goto end; |
0 | 123 | |
124 | case OP_COMMA: | |
2 | 125 | if(had_comma!=0) |
126 | goto syntax; | |
127 | ||
0 | 128 | had_comma=1; |
2 | 129 | continue; |
0 | 130 | |
131 | default: | |
2 | 132 | goto syntax; |
0 | 133 | } |
134 | ||
2 | 135 | if(had_comma!=1) |
136 | break; | |
137 | ||
138 | syntax: | |
139 | e=E_TOKZ_SYNTAX; | |
140 | handle_error: | |
141 | tokz_warn_error(tokz, tok->line, e); | |
142 | ||
143 | if(!(tokz->flags&TOKZ_ERROR_TOLERANT) || retval!=0) | |
144 | break; | |
0 | 145 | } |
146 | ||
2 | 147 | end: |
148 | if(e!=0) | |
149 | retval=-retval; | |
150 | ||
0 | 151 | *ntok_ret=ntokens; |
152 | ||
153 | return retval; | |
154 | } | |
155 | ||
156 | ||
2 | 157 | static bool find_beg_sect(Tokenizer *tokz) |
158 | { | |
159 | Token tok; | |
160 | ||
161 | while(tokz_get_token(tokz, &tok)){ | |
162 | if(TOK_IS_OP(&tok)){ | |
163 | if(TOK_OP_VAL(&tok)==OP_NEXTLINE) | |
164 | continue; | |
165 | ||
166 | if(TOK_OP_VAL(&tok)==OP_SCOLON) | |
167 | return FALSE; | |
168 | ||
169 | if(TOK_OP_VAL(&tok)==OP_L_BRC) | |
170 | return TRUE; | |
171 | } | |
172 | ||
173 | tokz_unget_token(tokz, &tok); | |
174 | break; | |
175 | } | |
176 | return FALSE; | |
177 | } | |
178 | ||
179 | ||
0 | 180 | /* */ |
181 | ||
182 | ||
2 | 183 | static const ConfOpt* lookup_option(const ConfOpt *opts, const char *name) |
184 | { | |
185 | while(opts->optname!=NULL){ | |
186 | if(strcmp(opts->optname, name)==0) | |
187 | return opts; | |
188 | opts++; | |
189 | } | |
190 | return NULL; | |
191 | } | |
192 | ||
0 | 193 | |
2 | 194 | static bool call_end_sect(Tokenizer *tokz, const ConfOpt *opts) |
195 | { | |
196 | opts=lookup_option(opts, "#end"); | |
197 | if(opts!=NULL) | |
198 | return opts->fn(tokz, 0, NULL); | |
0 | 199 | |
2 | 200 | return TRUE; |
0 | 201 | } |
202 | ||
203 | ||
2 | 204 | static bool call_cancel_sect(Tokenizer *tokz, const ConfOpt *opts) |
205 | { | |
206 | opts=lookup_option(opts, "#cancel"); | |
207 | if(opts!=NULL) | |
208 | return opts->fn(tokz, 0, NULL); | |
209 | ||
210 | return TRUE; | |
0 | 211 | } |
212 | ||
213 | ||
214 | /* */ | |
215 | ||
216 | ||
217 | bool parse_config_tokz(Tokenizer *tokz, const ConfOpt *options) | |
218 | { | |
219 | Token tokens[MAX_TOKENS]; | |
220 | bool alloced_optstack=FALSE; | |
2 | 221 | int i, t, ntokens=0; |
0 | 222 | int init_nest_lvl; |
2 | 223 | bool had_error; |
224 | int errornest=0; | |
0 | 225 | |
226 | /* Allocate tokz->optstack if it does not yet exist (if it does, | |
227 | * we have been called from an option handler) | |
228 | */ | |
229 | if(!tokz->optstack){ | |
230 | tokz->optstack=ALLOC_N(const ConfOpt*, MAX_NEST); | |
231 | if(!tokz->optstack){ | |
232 | warn_err(); | |
233 | return FALSE; | |
234 | } | |
235 | ||
236 | memset(tokz->optstack, 0, sizeof(ConfOpt*)*MAX_NEST); | |
237 | init_nest_lvl=tokz->nest_lvl=0; | |
238 | alloced_optstack=TRUE; | |
239 | }else{ | |
240 | init_nest_lvl=tokz->nest_lvl; | |
241 | } | |
242 | ||
243 | tokz->optstack[init_nest_lvl]=options; | |
244 | ||
2 | 245 | for(i=0; i<MAX_TOKENS; i++) |
0 | 246 | tok_init(&tokens[i]); |
247 | ||
248 | ||
249 | /* The loop | |
250 | */ | |
251 | while(1){ | |
2 | 252 | had_error=FALSE; |
0 | 253 | |
254 | /* free the tokens */ | |
255 | while(ntokens--) | |
256 | tok_free(&tokens[ntokens]); | |
257 | ||
2 | 258 | /* read the tokens */ |
259 | t=read_statement(tokz, tokens, &ntokens); | |
260 | ||
261 | if((had_error=t<0)) | |
262 | t=-t; | |
263 | ||
264 | switch(t){ | |
265 | case P_STMT: | |
266 | case P_STMT_NS: | |
267 | case P_STMT_SECT: | |
268 | ||
269 | if(errornest) | |
270 | had_error=TRUE; | |
271 | else if(tokz->flags&TOKZ_PARSER_INDENT_MODE) | |
272 | verbose_indent(tokz->nest_lvl); | |
273 | ||
274 | if(!TOK_IS_IDENT(tokens+0)){ | |
275 | had_error=TRUE; | |
276 | tokz_warn_error(tokz, tokens->line, | |
277 | E_TOKZ_IDENTIFIER_EXPECTED); | |
278 | } | |
279 | ||
280 | if(had_error) | |
281 | break; | |
282 | ||
283 | if(t==P_STMT){ | |
284 | if(find_beg_sect(tokz)) | |
285 | t=P_STMT_SECT; | |
286 | } | |
287 | ||
288 | options=lookup_option(tokz->optstack[tokz->nest_lvl], | |
289 | TOK_IDENT_VAL(tokens+0)); | |
290 | if(options==NULL) | |
291 | options=lookup_option(common_opts, TOK_IDENT_VAL(tokens+0)); | |
292 | ||
293 | if(options==NULL){ | |
294 | had_error=TRUE; | |
295 | tokz_warn_error(tokz, tokens->line, E_TOKZ_UNKNOWN_OPTION); | |
296 | }else{ | |
297 | had_error=!check_args(tokz, tokens, ntokens, options->argfmt); | |
298 | } | |
299 | ||
300 | if(had_error) | |
301 | break; | |
302 | ||
303 | if(options->opts!=NULL){ | |
304 | if(t!=P_STMT_SECT){ | |
305 | had_error=TRUE; | |
306 | tokz_warn_error(tokz, tokz->line, E_TOKZ_LBRACE_EXPECTED); | |
307 | }else if(tokz->nest_lvl==MAX_NEST-1){ | |
308 | tokz_warn_error(tokz, tokz->line, E_TOKZ_MAX_NEST); | |
309 | had_error=TRUE; | |
310 | }else{ | |
311 | tokz->optstack[++tokz->nest_lvl]=options->opts; | |
312 | } | |
313 | }else if(t==P_STMT_SECT){ | |
314 | had_error=TRUE; | |
315 | tokz_warn_error(tokz, tokz->line, E_TOKZ_SYNTAX); | |
316 | } | |
317 | ||
318 | if(!had_error && options->fn!=NULL){ | |
319 | had_error=!options->fn(tokz, ntokens, tokens); | |
320 | if(t==P_STMT_SECT && had_error) | |
321 | tokz->nest_lvl--; | |
322 | } | |
0 | 323 | break; |
2 | 324 | |
0 | 325 | case P_EOF: |
1 | 326 | if(tokz_popf(tokz)){ |
2 | 327 | break; |
328 | }else if(tokz->nest_lvl>0 || errornest>0){ | |
0 | 329 | tokz_warn_error(tokz, 0, E_TOKZ_UNEXPECTED_EOF); |
330 | had_error=TRUE; | |
331 | } | |
2 | 332 | goto eof; |
333 | ||
334 | case P_BEG_SECT: | |
335 | had_error=TRUE; | |
336 | errornest++; | |
337 | tokz_warn_error(tokz, tokz->line, E_TOKZ_SYNTAX); | |
0 | 338 | break; |
2 | 339 | |
0 | 340 | case P_END_SECT: |
2 | 341 | if(errornest!=0){ |
342 | errornest--; | |
343 | break; | |
344 | } | |
345 | ||
0 | 346 | if(tokz->nest_lvl==0){ |
347 | tokz_warn_error(tokz, tokz->line, E_TOKZ_SYNTAX); | |
348 | had_error=TRUE; | |
349 | break; | |
350 | } | |
351 | ||
352 | if(!had_error) | |
353 | had_error=!call_end_sect(tokz, tokz->optstack[tokz->nest_lvl]); | |
354 | ||
355 | tokz->nest_lvl--; | |
356 | ||
357 | if(tokz->nest_lvl<init_nest_lvl) | |
2 | 358 | goto eof; |
359 | } | |
360 | ||
361 | if(!had_error) | |
362 | continue; | |
363 | ||
364 | if(t==P_STMT_SECT) | |
365 | errornest++; | |
366 | ||
367 | if(!(tokz->flags&TOKZ_ERROR_TOLERANT)) | |
368 | break; | |
369 | } | |
0 | 370 | |
2 | 371 | eof: |
372 | /* free the tokens */ | |
373 | while(ntokens--) | |
374 | tok_free(&tokens[ntokens]); | |
375 | ||
376 | while(tokz->nest_lvl>=init_nest_lvl){ | |
377 | if(tokz->flags&TOKZ_ERROR_TOLERANT || !had_error) | |
378 | call_end_sect(tokz, tokz->optstack[tokz->nest_lvl]); | |
379 | else | |
380 | call_cancel_sect(tokz, tokz->optstack[tokz->nest_lvl]); | |
0 | 381 | tokz->nest_lvl--; |
382 | } | |
383 | ||
384 | /* Free optstack if it was alloced by this call */ | |
385 | if(alloced_optstack){ | |
386 | free(tokz->optstack); | |
387 | tokz->optstack=NULL; | |
388 | tokz->nest_lvl=0; | |
389 | } | |
390 | ||
391 | if(tokz->flags&TOKZ_PARSER_INDENT_MODE) | |
392 | verbose_indent(init_nest_lvl); | |
393 | ||
394 | return !had_error; | |
395 | } | |
396 | ||
397 | ||
398 | /* */ | |
399 | ||
400 | ||
2 | 401 | bool parse_config(const char *fname, const ConfOpt *options, int flags) |
0 | 402 | { |
403 | Tokenizer *tokz; | |
404 | bool ret; | |
405 | ||
406 | tokz=tokz_open(fname); | |
407 | ||
408 | if(tokz==NULL) | |
409 | return FALSE; | |
2 | 410 | |
411 | tokz->flags|=flags&~TOKZ_READ_COMMENTS; | |
0 | 412 | |
413 | ret=parse_config_tokz(tokz, options); | |
414 | ||
415 | tokz_close(tokz); | |
416 | ||
417 | return ret; | |
418 | } | |
419 | ||
420 | ||
2 | 421 | bool parse_config_file(FILE *file, const ConfOpt *options, int flags) |
0 | 422 | { |
423 | Tokenizer *tokz; | |
424 | bool ret; | |
425 | ||
426 | tokz=tokz_open_file(file); | |
427 | ||
428 | if(tokz==NULL) | |
429 | return FALSE; | |
430 | ||
2 | 431 | tokz->flags|=flags&~TOKZ_READ_COMMENTS; |
432 | ||
0 | 433 | ret=parse_config_tokz(tokz, options); |
434 | ||
435 | tokz_close(tokz); | |
436 | ||
437 | return ret; | |
438 | } | |
439 | ||
440 | ||
441 | /* | |
442 | * Argument validity checking stuff | |
443 | */ | |
444 | ||
445 | ||
2 | 446 | static int arg_match(Token *tok, char c) |
0 | 447 | { |
448 | static const char chs[]={0, 'l', 'd', 'c', 's', 'i', 0, 0}; | |
449 | char c2; | |
450 | ||
451 | if(c=='.' || c=='*') | |
2 | 452 | return 0; |
0 | 453 | |
454 | c2=chs[tok->type]; | |
455 | ||
456 | if(c2==c) | |
2 | 457 | return 0; |
0 | 458 | |
459 | if(c2=='c' && c=='l'){ | |
460 | TOK_SET_LONG(tok, TOK_CHAR_VAL(tok)); | |
2 | 461 | return 0; |
0 | 462 | } |
463 | ||
464 | if(c2=='l' && c=='c'){ | |
465 | TOK_SET_CHAR(tok, TOK_LONG_VAL(tok)); | |
2 | 466 | return 0; |
0 | 467 | } |
468 | ||
469 | if(c2=='l' && c=='d'){ | |
470 | TOK_SET_DOUBLE(tok, TOK_LONG_VAL(tok)); | |
2 | 471 | return 0; |
0 | 472 | } |
473 | ||
2 | 474 | return E_TOKZ_INVALID_ARGUMENT; |
0 | 475 | } |
476 | ||
477 | ||
2 | 478 | static int check_argument(const char **pret, Token *tok, const char *p) |
0 | 479 | { |
2 | 480 | int mode; |
481 | int e=E_TOKZ_TOO_MANY_ARGS; | |
0 | 482 | |
2 | 483 | again: |
484 | mode=0; | |
485 | ||
0 | 486 | if(*p=='*'){ |
487 | *pret=p; | |
2 | 488 | return 0; |
0 | 489 | }else if(*p=='?'){ |
490 | mode=1; | |
491 | p++; | |
492 | }else if(*p==':'){ | |
493 | mode=2; | |
494 | p++; | |
495 | }else if(*p=='+'){ | |
496 | *pret=p; | |
497 | return arg_match(tok, *(p-1)); | |
498 | } | |
499 | ||
500 | while(*p!='\0'){ | |
2 | 501 | e=arg_match(tok, *p); |
502 | if(e==0){ | |
0 | 503 | p++; |
504 | while(mode==2 && *p==':'){ | |
505 | if(*++p=='\0') | |
2 | 506 | break; /* Invalid argument format string, though... */ |
0 | 507 | p++; |
508 | } | |
509 | *pret=p; | |
2 | 510 | return 0; |
0 | 511 | } |
512 | ||
513 | if(mode==0) | |
514 | break; | |
515 | ||
516 | p++; | |
517 | ||
2 | 518 | if(mode==1) |
519 | goto again; | |
520 | ||
521 | /* mode==2 */ | |
0 | 522 | |
523 | if(*p!=':') | |
524 | break; | |
525 | p++; | |
2 | 526 | e=E_TOKZ_TOO_MANY_ARGS; |
0 | 527 | } |
528 | ||
529 | *pret=p; | |
2 | 530 | return e; |
0 | 531 | } |
532 | ||
533 | ||
534 | static bool args_at_end(const char *p) | |
535 | { | |
536 | if(p==NULL) | |
537 | return TRUE; | |
538 | ||
539 | while(*p!='\0'){ | |
540 | if(*p=='*' || *p=='+') | |
541 | p++; | |
542 | else if(*p=='?') | |
543 | p+=2; | |
544 | else | |
545 | return FALSE; | |
546 | } | |
547 | ||
548 | return TRUE; | |
549 | } | |
550 | ||
551 | ||
552 | static bool check_args(const Tokenizer *tokz, Token *tokens, int ntokens, | |
553 | const char *fmt) | |
554 | { | |
555 | int i; | |
2 | 556 | int e; |
557 | ||
558 | if(fmt==NULL){ | |
559 | if(ntokens!=1) | |
560 | tokz_warn_error(tokz, tokens[0].line, E_TOKZ_TOO_MANY_ARGS); | |
561 | return ntokens==1; | |
562 | } | |
0 | 563 | |
2 | 564 | for(i=1; i<ntokens; i++){ |
565 | e=check_argument(&fmt, &tokens[i], fmt); | |
566 | if(e!=0){ | |
567 | tokz_warn_error(tokz, tokens[i].line, e); | |
0 | 568 | return FALSE; |
569 | } | |
570 | } | |
571 | ||
572 | if(!args_at_end(fmt)){ | |
573 | tokz_warn_error(tokz, tokens[i].line, E_TOKZ_TOO_FEW_ARGS); | |
574 | return FALSE; | |
575 | } | |
576 | ||
577 | return TRUE; | |
578 | } | |
579 | ||
1 | 580 | |
581 | /* */ | |
582 | ||
583 | ||
584 | static bool opt_include(Tokenizer *tokz, int n, Token *toks) | |
585 | { | |
586 | const char *fname=TOK_STRING_VAL(toks+1); | |
587 | const char *lastndx; | |
588 | char *tmpname; | |
589 | bool retval; | |
590 | ||
591 | if(fname[0]=='/' || tokz->name==NULL) | |
592 | goto thisdir; | |
593 | ||
594 | lastndx=strrchr(tokz->name, '/'); | |
595 | ||
596 | if(lastndx==NULL) | |
597 | goto thisdir; | |
598 | ||
599 | tmpname=scatn(tokz->name, lastndx-tokz->name+1, fname, -1); | |
600 | ||
601 | if(tmpname==NULL){ | |
602 | warn_err(); | |
603 | return FALSE; | |
604 | } | |
605 | ||
606 | retval=tokz_pushf(tokz, tmpname); | |
607 | ||
608 | free(tmpname); | |
609 | ||
610 | return retval; | |
611 | ||
612 | thisdir: | |
613 | return tokz_pushf(tokz, fname); | |
614 | } | |
615 |