# HG changeset patch # User tuomov # Date 951292079 -3600 # Node ID e14a1aba4c56e4348cb1b6dba207835e0dad9c67 # Parent 6e704fc09528b70de1fa899fab685947a0ce53b5 trunk: changeset 5 Error tolerant config file parsing mode diff -r 6e704fc09528 -r e14a1aba4c56 include/parser.h --- a/include/parser.h Sat Feb 19 23:23:29 2000 +0100 +++ b/include/parser.h Wed Feb 23 08:47:59 2000 +0100 @@ -39,7 +39,7 @@ extern bool parse_config_tokz(Tokenizer *tokz, const ConfOpt *options); -extern bool parse_config(const char *fname, const ConfOpt *options); -extern bool parse_config_file(FILE *file, const ConfOpt *options); +extern bool parse_config(const char *fname, const ConfOpt *options, int flags); +extern bool parse_config_file(FILE *file, const ConfOpt *options, int flags); #endif /* __LIBTU_PARSER_H */ diff -r 6e704fc09528 -r e14a1aba4c56 include/tokenizer.h --- a/include/tokenizer.h Sat Feb 19 23:23:29 2000 +0100 +++ b/include/tokenizer.h Wed Feb 23 08:47:59 2000 +0100 @@ -31,6 +31,7 @@ #define TOK_COMMENT_VAL(TOK) ((TOK)->u.sval) #define TOK_OP_VAL(TOK) ((TOK)->u.opval) +#define TOK_IS_INVALID(TOK) ((TOK)->type==TOK_INVALID) #define TOK_IS_LONG(TOK) ((TOK)->type==TOK_LONG) #define TOK_IS_DOUBLE(TOK) ((TOK)->type==TOK_DOUBLE) #define TOK_IS_CHAR(TOK) ((TOK)->type==TOK_CHAR) @@ -118,7 +119,8 @@ enum{ TOKZ_IGNORE_NEXTLINE=0x1, TOKZ_READ_COMMENTS=0x2, - TOKZ_PARSER_INDENT_MODE=0x4 + TOKZ_PARSER_INDENT_MODE=0x04, + TOKZ_ERROR_TOLERANT=0x8 }; @@ -142,8 +144,9 @@ E_TOKZ_TOO_FEW_ARGS, E_TOKZ_TOO_MANY_ARGS, E_TOKZ_MAX_NEST, - E_TOKZ_UNEXPECTED_EOS, - E_TOKZ_IDENTIFIER_EXPECTED + E_TOKZ_IDENTIFIER_EXPECTED, + + E_TOKZ_LBRACE_EXPECTED }; @@ -154,6 +157,7 @@ char *name; int line; int ungetc; + Token ungettok; } Tokenizer_FInfo; typedef struct _Tokenizer{ @@ -161,6 +165,7 @@ char *name; int line; int ungetc; + Token ungettok; int flags; const struct _ConfOpt **optstack; @@ -176,6 +181,7 @@ extern Tokenizer *tokz_open_file(FILE *file); extern void tokz_close(Tokenizer *tokz); extern bool tokz_get_token(Tokenizer *tokz, Token *tok); +extern void tokz_unget_token(Tokenizer *tokz, Token *tok); extern void tokz_warn_error(const Tokenizer *tokz, int line, int e); extern bool tokz_pushf(Tokenizer *tokz, const char *fname); diff -r 6e704fc09528 -r e14a1aba4c56 numparser2.h --- a/numparser2.h Sat Feb 19 23:23:29 2000 +0100 +++ b/numparser2.h Wed Feb 23 08:47:59 2000 +0100 @@ -189,7 +189,9 @@ if(c>=base) err=E_TOKZ_NUMFMT; - npnum_mulbase_add(num, base, c); + tmp=npnum_mulbase_add(num, base, c); + if(err==0) + err=tmp; if(dm==1) dm=2; diff -r 6e704fc09528 -r e14a1aba4c56 parser.c --- a/parser.c Sat Feb 19 23:23:29 2000 +0100 +++ b/parser.c Wed Feb 23 08:47:59 2000 +0100 @@ -18,9 +18,11 @@ enum{ + P_NONE=1, P_EOF, - P_OK, - P_ERROR, + P_STMT, + P_STMT_NS, + P_STMT_SECT, P_BEG_SECT, P_END_SECT }; @@ -44,63 +46,42 @@ /* */ -static int read_statement(const ConfOpt **opt_ret, Token *tokens, - int *ntok_ret, Tokenizer *tokz, - const ConfOpt *options) +static int read_statement(Tokenizer *tokz, Token *tokens, int *ntok_ret) { int ntokens=0; Token *tok=NULL; - const ConfOpt *opt=NULL; - int had_comma=0; - int retval=P_OK; + int had_comma=0; /* 0 - no, 1 - yes, 2 - not had, not expected */ + int retval=0; int e=0; - int e_line=0; while(1){ + tok=&tokens[ntokens]; + + if(!tokz_get_token(tokz, tok)){ + e=1; + continue; + } + if(ntokens==MAX_TOKENS-1){ e=E_TOKZ_TOKEN_LIMIT; - goto ret_err; + tokz_warn_error(tokz, tok->line, e); + if(!(tokz->flags&TOKZ_ERROR_TOLERANT)) + break; + }else{ + ntokens++; } - tok=&tokens[ntokens]; - - if(!tokz_get_token(tokz, tok)) - goto ret_err; - - ntokens++; - if(!TOK_IS_OP(tok)){ if(ntokens==1 && !had_comma){ - if(!TOK_IS_IDENT(tok)){ + /*if(!TOK_IS_IDENT(tok)){ e=E_TOKZ_IDENTIFIER_EXPECTED; - goto ret_err; - } + goto handle_error; + }*/ - /* find the option */ - for(opt=options; opt->optname; opt++){ - if(strcmp(opt->optname, TOK_IDENT_VAL(tok))==0) - break; - } - - if(opt->optname==NULL){ - /* common opt? include, etc. */ - for(opt=common_opts; opt->optname; opt++){ - if(strcmp(opt->optname, TOK_IDENT_VAL(tok))==0) - break; - } - if(opt->optname==NULL){ - e=E_TOKZ_UNKNOWN_OPTION; - e_line=tok->line; - retval=P_ERROR; - } - } - had_comma=2; }else{ - if(!had_comma){ - e=E_TOKZ_SYNTAX; - goto ret_err; - } + if(had_comma==0) + goto syntax; had_comma=0; } @@ -108,141 +89,141 @@ } /* It is an operator */ + ntokens--; switch(TOK_OP_VAL(tok)){ case OP_SCOLON: - if(opt){ - if(had_comma || opt->opts){ - e=E_TOKZ_SYNTAX; - goto ret_err; - } - goto ret_success; - } + retval=(ntokens==0 ? P_NONE : P_STMT_NS); break; case OP_NEXTLINE: - if(had_comma==1){ - e=E_TOKZ_SYNTAX; - e_line=tok->line-1; - goto ret_err2; - } + retval=(ntokens==0 ? P_NONE : P_STMT); + break; - if(opt && !opt->opts) - goto ret_success; + case OP_L_BRC: + retval=(ntokens==0 ? P_BEG_SECT : P_STMT_SECT); break; + case OP_R_BRC: + if(ntokens==0){ + retval=P_END_SECT; + }else{ + tokz_unget_token(tokz, tok); + retval=P_STMT_NS; + } + break; + case OP_EOF: + retval=(ntokens==0 ? P_EOF : P_STMT_NS); + if(had_comma==1){ e=E_TOKZ_UNEXPECTED_EOF; - goto ret_err; - } - - if(opt && opt->opts){ - e=E_TOKZ_UNEXPECTED_EOF; - goto ret_err; - } - - retval=P_EOF; - goto ret_success; - - case OP_R_BRC: - if(had_comma==1){ - e=E_TOKZ_SYNTAX; - goto ret_err; - } - - if(opt && opt->opts){ - e=E_TOKZ_SYNTAX; - goto ret_err; + goto handle_error; } - retval=P_END_SECT; - goto ret_success; - - case OP_L_BRC: - if(had_comma==1 || !opt || !opt->opts){ - e=E_TOKZ_SYNTAX; - goto ret_err; - } - - retval=P_BEG_SECT; - goto ret_success; + goto end; case OP_COMMA: - if(had_comma){ - e=E_TOKZ_SYNTAX; - goto ret_err; - } + if(had_comma!=0) + goto syntax; + had_comma=1; - break; + continue; default: - e=E_TOKZ_SYNTAX; - goto ret_err; + goto syntax; } - ntokens--; + if(had_comma!=1) + break; + + syntax: + e=E_TOKZ_SYNTAX; + handle_error: + tokz_warn_error(tokz, tok->line, e); + + if(!(tokz->flags&TOKZ_ERROR_TOLERANT) || retval!=0) + break; } - -ret_err: - e_line=tok->line; -ret_err2: - retval=P_ERROR; - -ret_success: - if(retval==P_ERROR && e!=0) - tokz_warn_error(tokz, e_line, e); - *opt_ret=opt; +end: + if(e!=0) + retval=-retval; + *ntok_ret=ntokens; return retval; } +static bool find_beg_sect(Tokenizer *tokz) +{ + Token tok; + + while(tokz_get_token(tokz, &tok)){ + if(TOK_IS_OP(&tok)){ + if(TOK_OP_VAL(&tok)==OP_NEXTLINE) + continue; + + if(TOK_OP_VAL(&tok)==OP_SCOLON) + return FALSE; + + if(TOK_OP_VAL(&tok)==OP_L_BRC) + return TRUE; + } + + tokz_unget_token(tokz, &tok); + break; + } + return FALSE; +} + + /* */ -static bool call_end_sect(Tokenizer *tokz, const ConfOpt *options) -{ - bool retval=TRUE; +static const ConfOpt* lookup_option(const ConfOpt *opts, const char *name) +{ + while(opts->optname!=NULL){ + if(strcmp(opts->optname, name)==0) + return opts; + opts++; + } + return NULL; +} + - while(options->optname){ - if(strcmp(options->optname, "#end")==0){ - retval=options->fn(tokz, 0, NULL); - break; - } - options++; - } +static bool call_end_sect(Tokenizer *tokz, const ConfOpt *opts) +{ + opts=lookup_option(opts, "#end"); + if(opts!=NULL) + return opts->fn(tokz, 0, NULL); - return retval; + return TRUE; } -static void call_cancel_sect(Tokenizer *tokz, const ConfOpt *options) -{ - while(options->optname){ - if(strcmp(options->optname, "#cancel")==0){ - options->fn(tokz, 0, NULL); - break; - } - options++; - } +static bool call_cancel_sect(Tokenizer *tokz, const ConfOpt *opts) +{ + opts=lookup_option(opts, "#cancel"); + if(opts!=NULL) + return opts->fn(tokz, 0, NULL); + + return TRUE; } /* */ -/* Does the parsing work - */ bool parse_config_tokz(Tokenizer *tokz, const ConfOpt *options) { Token tokens[MAX_TOKENS]; bool alloced_optstack=FALSE; - int i, t, ntokens; + int i, t, ntokens=0; int init_nest_lvl; - bool had_error=FALSE; + bool had_error; + int errornest=0; /* Allocate tokz->optstack if it does not yet exist (if it does, * we have been called from an option handler) @@ -263,68 +244,107 @@ tokz->optstack[init_nest_lvl]=options; - for(i=0;ioptstack[tokz->nest_lvl]); - - had_error=(t==P_ERROR); - - /* Check that arguments are ok */ - if(!had_error && options) - had_error=!check_args(tokz, tokens, ntokens, options->argfmt); + had_error=FALSE; - if(tokz->flags&TOKZ_PARSER_INDENT_MODE) - verbose_indent(tokz->nest_lvl); - - /* New section? */ - if(t==P_BEG_SECT){ - if(tokz->nest_lvl==MAX_NEST-1){ - tokz_warn_error(tokz, tokz->line, E_TOKZ_MAX_NEST); - had_error=TRUE; - while(ntokens--) - tok_free(&tokens[ntokens]); - break; - }else{ - tokz->optstack[++tokz->nest_lvl]=options->opts; - } - } - - /* call the handler */ - if(!had_error && options && options->fn) - had_error=!options->fn(tokz, ntokens-1, tokens); - /* free the tokens */ while(ntokens--) tok_free(&tokens[ntokens]); - switch(t){ - case P_BEG_SECT: - if(!had_error) - continue; - /* #cancel handler should not be called when - * error occured in section begin handler */ - tokz->nest_lvl--; + /* read the tokens */ + t=read_statement(tokz, tokens, &ntokens); + + if((had_error=t<0)) + t=-t; + + switch(t){ + case P_STMT: + case P_STMT_NS: + case P_STMT_SECT: + + if(errornest) + had_error=TRUE; + else if(tokz->flags&TOKZ_PARSER_INDENT_MODE) + verbose_indent(tokz->nest_lvl); + + if(!TOK_IS_IDENT(tokens+0)){ + had_error=TRUE; + tokz_warn_error(tokz, tokens->line, + E_TOKZ_IDENTIFIER_EXPECTED); + } + + if(had_error) + break; + + if(t==P_STMT){ + if(find_beg_sect(tokz)) + t=P_STMT_SECT; + } + + options=lookup_option(tokz->optstack[tokz->nest_lvl], + TOK_IDENT_VAL(tokens+0)); + if(options==NULL) + options=lookup_option(common_opts, TOK_IDENT_VAL(tokens+0)); + + if(options==NULL){ + had_error=TRUE; + tokz_warn_error(tokz, tokens->line, E_TOKZ_UNKNOWN_OPTION); + }else{ + had_error=!check_args(tokz, tokens, ntokens, options->argfmt); + } + + if(had_error) + break; + + if(options->opts!=NULL){ + if(t!=P_STMT_SECT){ + had_error=TRUE; + tokz_warn_error(tokz, tokz->line, E_TOKZ_LBRACE_EXPECTED); + }else if(tokz->nest_lvl==MAX_NEST-1){ + tokz_warn_error(tokz, tokz->line, E_TOKZ_MAX_NEST); + had_error=TRUE; + }else{ + tokz->optstack[++tokz->nest_lvl]=options->opts; + } + }else if(t==P_STMT_SECT){ + had_error=TRUE; + tokz_warn_error(tokz, tokz->line, E_TOKZ_SYNTAX); + } + + if(!had_error && options->fn!=NULL){ + had_error=!options->fn(tokz, ntokens, tokens); + if(t==P_STMT_SECT && had_error) + tokz->nest_lvl--; + } break; - + case P_EOF: if(tokz_popf(tokz)){ - if(!had_error) - continue; - }else if(tokz->nest_lvl>0){ + break; + }else if(tokz->nest_lvl>0 || errornest>0){ tokz_warn_error(tokz, 0, E_TOKZ_UNEXPECTED_EOF); had_error=TRUE; - }else if(!had_error){ - had_error=!call_end_sect(tokz, tokz->optstack[0]); } + goto eof; + + case P_BEG_SECT: + had_error=TRUE; + errornest++; + tokz_warn_error(tokz, tokz->line, E_TOKZ_SYNTAX); break; - + case P_END_SECT: + if(errornest!=0){ + errornest--; + break; + } + if(tokz->nest_lvl==0){ tokz_warn_error(tokz, tokz->line, E_TOKZ_SYNTAX); had_error=TRUE; @@ -337,20 +357,29 @@ tokz->nest_lvl--; if(tokz->nest_lvlflags&TOKZ_ERROR_TOLERANT)) + break; + } - default: - if(!had_error) - continue; - } - break; - } - - /* On error, call all the #cancel-handlers */ - while(had_error && tokz->nest_lvl>=init_nest_lvl){ - call_cancel_sect(tokz, tokz->optstack[tokz->nest_lvl]); +eof: + /* free the tokens */ + while(ntokens--) + tok_free(&tokens[ntokens]); + + while(tokz->nest_lvl>=init_nest_lvl){ + if(tokz->flags&TOKZ_ERROR_TOLERANT || !had_error) + call_end_sect(tokz, tokz->optstack[tokz->nest_lvl]); + else + call_cancel_sect(tokz, tokz->optstack[tokz->nest_lvl]); tokz->nest_lvl--; } @@ -371,7 +400,7 @@ /* */ -bool parse_config(const char *fname, const ConfOpt *options) +bool parse_config(const char *fname, const ConfOpt *options, int flags) { Tokenizer *tokz; bool ret; @@ -380,6 +409,8 @@ if(tokz==NULL) return FALSE; + + tokz->flags|=flags&~TOKZ_READ_COMMENTS; ret=parse_config_tokz(tokz, options); @@ -389,7 +420,7 @@ } -bool parse_config_file(FILE *file, const ConfOpt *options) +bool parse_config_file(FILE *file, const ConfOpt *options, int flags) { Tokenizer *tokz; bool ret; @@ -399,6 +430,8 @@ if(tokz==NULL) return FALSE; + tokz->flags|=flags&~TOKZ_READ_COMMENTS; + ret=parse_config_tokz(tokz, options); tokz_close(tokz); @@ -412,45 +445,49 @@ */ -static bool arg_match(Token *tok, char c) +static int arg_match(Token *tok, char c) { static const char chs[]={0, 'l', 'd', 'c', 's', 'i', 0, 0}; char c2; if(c=='.' || c=='*') - return TRUE; + return 0; c2=chs[tok->type]; if(c2==c) - return TRUE; + return 0; if(c2=='c' && c=='l'){ TOK_SET_LONG(tok, TOK_CHAR_VAL(tok)); - return TRUE; + return 0; } if(c2=='l' && c=='c'){ TOK_SET_CHAR(tok, TOK_LONG_VAL(tok)); - return TRUE; + return 0; } if(c2=='l' && c=='d'){ TOK_SET_DOUBLE(tok, TOK_LONG_VAL(tok)); - return TRUE; + return 0; } - return FALSE; + return E_TOKZ_INVALID_ARGUMENT; } -static bool check_argument(const char **pret, Token *tok, const char *p) +static int check_argument(const char **pret, Token *tok, const char *p) { - int mode=0; + int mode; + int e=E_TOKZ_TOO_MANY_ARGS; +again: + mode=0; + if(*p=='*'){ *pret=p; - return TRUE; + return 0; }else if(*p=='?'){ mode=1; p++; @@ -463,15 +500,16 @@ } while(*p!='\0'){ - if(arg_match(tok, *p)){ + e=arg_match(tok, *p); + if(e==0){ p++; while(mode==2 && *p==':'){ if(*++p=='\0') - break; /* invalid argument format string it is... */ + break; /* Invalid argument format string, though... */ p++; } *pret=p; - return TRUE; + return 0; } if(mode==0) @@ -479,18 +517,19 @@ p++; - if(mode==1){ - *pret=p; - return TRUE; - } + if(mode==1) + goto again; + + /* mode==2 */ if(*p!=':') break; p++; + e=E_TOKZ_TOO_MANY_ARGS; } *pret=p; - return FALSE; + return e; } @@ -516,15 +555,18 @@ const char *fmt) { int i; - - if(fmt==NULL) - return ntokens==2; + int e; + + if(fmt==NULL){ + if(ntokens!=1) + tokz_warn_error(tokz, tokens[0].line, E_TOKZ_TOO_MANY_ARGS); + return ntokens==1; + } - for(i=1; iungettok))){ + *tok=tokz->ungettok; + tokz->ungettok.type=TOK_INVALID; + return TRUE; + } + while(1){ e=0; @@ -607,9 +613,11 @@ TOK_SET_OP(tok, OP_EOF); return FALSE; } - if(!isspace(c)){ - tokz_warn_error(tokz, tokz->line, E_TOKZ_EOL_EXPECTED); - return FALSE; + if(!isspace(c) && e==0){ + e=E_TOKZ_EOL_EXPECTED; + tokz_warn_error(tokz, tokz->line, e); + if(!(tokz->flags&TOKZ_ERROR_TOLERANT)) + return FALSE; } }while(c!='\n'); @@ -627,30 +635,28 @@ continue; case '/': - { - c2=GETCH(); - - if(c2=='='){ - TOK_SET_OP(tok, OP_AS_DIV); - return TRUE; - } - - if(c2!='*'){ - UNGETCH(c2); - TOK_SET_OP(tok, OP_DIV); - return TRUE; - } - - if(tokz->flags&TOKZ_READ_COMMENTS){ - e=scan_c_comment(tok, tokz); - break; - }else if((e=skip_c_comment(tokz))){ - break; - } - - continue; + c2=GETCH(); + + if(c2=='='){ + TOK_SET_OP(tok, OP_AS_DIV); + return TRUE; } + if(c2!='*'){ + UNGETCH(c2); + TOK_SET_OP(tok, OP_DIV); + return TRUE; + } + + if(tokz->flags&TOKZ_READ_COMMENTS){ + e=scan_c_comment(tok, tokz); + break; + }else if((e=skip_c_comment(tokz))){ + break; + } + + continue; + case '\"': e=scan_string(tok, tokz, TRUE); break; @@ -680,6 +686,14 @@ } +void tokz_unget_token(Tokenizer *tokz, Token *tok) +{ + tok_free(&(tokz->ungettok)); + tokz->ungettok=*tok; + tok->type=TOK_INVALID; +} + + /* * File open */ @@ -701,7 +715,8 @@ finfo->name=tokz->name; finfo->line=tokz->line; finfo->ungetc=tokz->ungetc; - + finfo->ungettok=tokz->ungettok; + return TRUE; } @@ -721,7 +736,8 @@ tokz->file=file; tokz->name=NULL; tokz->line=1; - tokz->ungetc=-1; + tokz->ungetc=-1; + tokz->ungettok.type=TOK_INVALID; return TRUE; } @@ -776,7 +792,8 @@ tokz->file=NULL; tokz->name=NULL; tokz->line=1; - tokz->ungetc=-1; + tokz->ungetc=-1; + tokz->ungettok.type=TOK_INVALID; tokz->flags=0; tokz->optstack=NULL; tokz->nest_lvl=0; @@ -839,7 +856,8 @@ tokz->name=finfo->name; tokz->line=finfo->line; tokz->ungetc=finfo->ungetc; - + tokz->ungettok=finfo->ungettok; + if(tokz->filestack_n==0){ free(tokz->filestack); tokz->filestack=NULL; @@ -871,7 +889,8 @@ fclose(tokz->file); if(tokz->name!=NULL) free(tokz->name); - + tok_free(&(tokz->ungettok)); + free(tokz); } @@ -882,8 +901,10 @@ void tok_free(Token *tok) { - if(TOK_IS_STRING(tok)) - free(TOK_STRING_VAL(tok)); + if(TOK_IS_STRING(tok) || TOK_IS_IDENT(tok) || TOK_IS_COMMENT(tok)){ + if(TOK_STRING_VAL(tok)!=NULL) + free(TOK_STRING_VAL(tok)); + } tok->type=TOK_INVALID; }