parser.c

changeset 0
86b7f6f9c5c0
child 1
6e704fc09528
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/parser.c	Tue Feb 15 18:57:52 2005 +0100
@@ -0,0 +1,518 @@
+/*
+ * libtu/parser.c
+ *
+ * Copyright (c) Tuomo Valkonen 1999-2000.
+ * 
+ * This file is distributed under the terms of the "Artistic License".
+ * See the included file LICENSE for details.
+ */
+
+#include <string.h>
+
+#include "include/parser.h"
+#include "include/misc.h"
+#include "include/output.h"
+
+#define MAX_TOKENS 	32
+#define MAX_NEST	16
+
+
+enum{
+	P_EOF,
+	P_OK,
+	P_ERROR,
+	P_BEG_SECT,
+	P_END_SECT
+};
+
+
+static bool check_args(const Tokenizer *tokz, Token *tokens, int ntokens,
+					   const char *fmt);
+
+
+/* */
+
+
+static int read_statement(const ConfOpt **opt_ret, Token *tokens,
+						  int *ntok_ret, Tokenizer *tokz,
+						  const ConfOpt *options)
+{
+	int ntokens=0;
+	Token *tok=NULL;
+	const ConfOpt *opt=NULL;
+	int had_comma=0;
+	int retval=P_OK;
+	int e=0;
+	int e_line=0;
+
+	while(1){
+		if(ntokens==MAX_TOKENS-1){
+			e=E_TOKZ_TOKEN_LIMIT;
+			goto ret_err;
+		}
+		
+		tok=&tokens[ntokens];
+		
+		if(!tokz_get_token(tokz, tok))
+			goto ret_err;
+		
+		ntokens++;
+		
+		if(!TOK_IS_OP(tok)){
+			if(ntokens==1 && !had_comma){
+				if(!TOK_IS_IDENT(tok)){
+					e=E_TOKZ_IDENTIFIER_EXPECTED;
+					goto ret_err;
+				}
+		
+				/* find the option */
+				for(opt=options; opt->optname; opt++){
+					if(strcmp(opt->optname, TOK_IDENT_VAL(tok))==0)
+						break;
+				}
+				
+				if(!opt->optname){
+					e=E_TOKZ_UNKNOWN_OPTION;
+					e_line=tok->line;
+					retval=P_ERROR;
+				}
+
+				had_comma=2;
+			}else{
+				if(!had_comma){
+					e=E_TOKZ_SYNTAX;
+					goto ret_err;
+				}
+			
+				had_comma=0;
+			}
+			continue;
+		}
+		
+		/* It is an operator */
+		
+		switch(TOK_OP_VAL(tok)){
+		case OP_SCOLON:
+			if(opt){
+				if(had_comma || opt->opts){
+					e=E_TOKZ_SYNTAX;
+					goto ret_err;
+				}
+				goto ret_success;
+			}
+			break;
+			
+		case OP_NEXTLINE:
+			if(had_comma==1){
+				e=E_TOKZ_SYNTAX;
+				e_line=tok->line-1;
+				goto ret_err2;
+			}
+			
+			if(opt && !opt->opts)
+				goto ret_success;
+			break;
+			
+		case OP_EOF:
+			if(had_comma==1){
+				e=E_TOKZ_UNEXPECTED_EOF;
+				goto ret_err;
+			}
+			
+			if(opt && opt->opts){
+				e=E_TOKZ_UNEXPECTED_EOF;
+				goto ret_err;
+			}
+			
+			retval=P_EOF;
+			goto ret_success;
+			
+		case OP_R_BRC:
+			if(had_comma==1){
+				e=E_TOKZ_SYNTAX;
+				goto ret_err;
+			}
+			
+			if(opt && opt->opts){
+				e=E_TOKZ_SYNTAX;
+				goto ret_err;
+			}
+			
+			retval=P_END_SECT;
+			goto ret_success;
+			
+		case OP_L_BRC:
+			if(had_comma==1 || !opt || !opt->opts){
+				e=E_TOKZ_SYNTAX;
+				goto ret_err;
+			}
+			
+			retval=P_BEG_SECT;
+			goto ret_success;
+			
+		case OP_COMMA:
+			if(had_comma){
+				e=E_TOKZ_SYNTAX;
+				goto ret_err;
+			}
+			had_comma=1;
+			break;
+			
+		default:
+			e=E_TOKZ_SYNTAX;
+			goto ret_err;
+		}
+		
+		ntokens--;
+	}
+
+ret_err:
+	e_line=tok->line;
+ret_err2:
+	retval=P_ERROR;
+
+ret_success:
+	if(retval==P_ERROR && e!=0)
+		tokz_warn_error(tokz, e_line, e);
+	
+ 	*opt_ret=opt;
+	*ntok_ret=ntokens;
+	
+	return retval;
+}
+
+
+/* */
+
+
+static bool call_end_sect(Tokenizer *tokz, const ConfOpt *options)
+{	
+	bool retval=TRUE;
+	
+	while(options->optname){
+		if(strcmp(options->optname, "#end")==0){
+			retval=options->fn(tokz, 0, NULL);
+			break;
+		}
+		options++;
+	}
+	
+	return retval;
+}
+
+
+static void call_cancel_sect(Tokenizer *tokz, const ConfOpt *options)
+{	
+	while(options->optname){
+		if(strcmp(options->optname, "#cancel")==0){
+			options->fn(tokz, 0, NULL);
+			break;
+		}
+		options++;
+	}
+}
+			
+
+/* */
+
+
+/* Does the parsing work
+ */
+bool parse_config_tokz(Tokenizer *tokz, const ConfOpt *options)
+{
+	Token tokens[MAX_TOKENS];
+	bool alloced_optstack=FALSE;
+	int i, t, ntokens;
+	int init_nest_lvl;
+	bool had_error=FALSE;
+
+	/* Allocate tokz->optstack if it does not yet exist (if it does,
+	 * we have been called from an option handler)
+	 */
+	if(!tokz->optstack){
+		tokz->optstack=ALLOC_N(const ConfOpt*, MAX_NEST);
+		if(!tokz->optstack){
+			warn_err();
+			return FALSE;
+		}
+		
+		memset(tokz->optstack, 0, sizeof(ConfOpt*)*MAX_NEST);
+		init_nest_lvl=tokz->nest_lvl=0;
+		alloced_optstack=TRUE;
+	}else{
+		init_nest_lvl=tokz->nest_lvl;
+	}
+	
+	tokz->optstack[init_nest_lvl]=options;
+	
+	for(i=0;i<MAX_TOKENS;i++)
+		tok_init(&tokens[i]);
+
+	
+	/* The loop
+	 */
+	while(1){
+		t=read_statement(&options, tokens, &ntokens,
+						 tokz, tokz->optstack[tokz->nest_lvl]);
+		
+		had_error=(t==P_ERROR);
+		
+		/* Check that arguments are ok */
+		if(!had_error && options)
+			had_error=!check_args(tokz, tokens, ntokens, options->argfmt);
+
+		if(tokz->flags&TOKZ_PARSER_INDENT_MODE)
+			verbose_indent(tokz->nest_lvl);
+		
+		/* New section? */
+		if(t==P_BEG_SECT){
+			if(tokz->nest_lvl==MAX_NEST-1){
+				tokz_warn_error(tokz, tokz->line, E_TOKZ_MAX_NEST);
+				had_error=TRUE;
+				while(ntokens--)
+					tok_free(&tokens[ntokens]);
+				break;
+			}else{
+				tokz->optstack[++tokz->nest_lvl]=options->opts;
+			}
+		}
+		
+		/* call the handler */
+		if(!had_error && options && options->fn)
+			had_error=!options->fn(tokz, ntokens-1, tokens);
+		
+		/* free the tokens */
+		while(ntokens--)
+			tok_free(&tokens[ntokens]);
+		
+		switch(t){		
+		case P_BEG_SECT:
+			if(!had_error)
+				continue;
+			/* #cancel handler should not be called when
+			 * error occured in section begin handler */
+			tokz->nest_lvl--;
+			break;
+
+		case P_EOF:
+			if(tokz->nest_lvl>0){
+				tokz_warn_error(tokz, 0, E_TOKZ_UNEXPECTED_EOF);
+				had_error=TRUE;
+			}else if(!had_error){
+				had_error=!call_end_sect(tokz, tokz->optstack[0]);
+			}
+			break;
+							
+		case P_END_SECT:
+			if(tokz->nest_lvl==0){
+				tokz_warn_error(tokz, tokz->line, E_TOKZ_SYNTAX);
+				had_error=TRUE;
+				break;
+			}
+			
+			if(!had_error)
+				had_error=!call_end_sect(tokz, tokz->optstack[tokz->nest_lvl]);
+
+			tokz->nest_lvl--;
+			
+			if(tokz->nest_lvl<init_nest_lvl)
+				break;
+			   
+			/* fall thru */
+
+		default:
+			if(!had_error)
+				continue;			
+		}
+		break;
+	}
+	
+	/* On error, call all the #cancel-handlers */
+	while(had_error && tokz->nest_lvl>=init_nest_lvl){
+		call_cancel_sect(tokz, tokz->optstack[tokz->nest_lvl]);
+		tokz->nest_lvl--;
+	}
+	
+	/* Free optstack if it was alloced by this call */
+	if(alloced_optstack){
+		free(tokz->optstack);
+		tokz->optstack=NULL;
+		tokz->nest_lvl=0;
+	}
+	
+	if(tokz->flags&TOKZ_PARSER_INDENT_MODE)
+		verbose_indent(init_nest_lvl);
+	
+	return !had_error;
+}
+
+
+/* */
+
+
+bool parse_config(const char *fname, const ConfOpt *options)
+{
+	Tokenizer *tokz;
+	bool ret;
+	
+	tokz=tokz_open(fname);
+	
+	if(tokz==NULL)
+		return FALSE;
+	
+	ret=parse_config_tokz(tokz, options);
+	
+	tokz_close(tokz);
+	
+	return ret;
+}
+
+
+bool parse_config_file(FILE *file, const ConfOpt *options)
+{
+	Tokenizer *tokz;
+	bool ret;
+	
+	tokz=tokz_open_file(file);
+	
+	if(tokz==NULL)
+		return FALSE;
+	
+	ret=parse_config_tokz(tokz, options);
+	
+	tokz_close(tokz);
+	
+	return ret;
+}
+
+
+/*
+ * Argument validity checking stuff
+ */
+
+
+static bool arg_match(Token *tok, char c)
+{
+	static const char chs[]={0, 'l', 'd', 'c', 's', 'i', 0, 0};
+	char c2;
+	
+	if(c=='.' || c=='*')
+		return TRUE;
+	
+	c2=chs[tok->type];
+	
+	if(c2==c)
+		return TRUE;
+	
+	if(c2=='c' && c=='l'){
+		TOK_SET_LONG(tok, TOK_CHAR_VAL(tok));
+		return TRUE;
+	}
+	
+	if(c2=='l' && c=='c'){
+		TOK_SET_CHAR(tok, TOK_LONG_VAL(tok));
+		return TRUE;
+	}
+	
+	if(c2=='l' && c=='d'){
+		TOK_SET_DOUBLE(tok, TOK_LONG_VAL(tok));
+		return TRUE;
+	}
+	   
+	return FALSE;
+}
+
+
+static bool check_argument(const char **pret, Token *tok, const char *p)
+{
+	int mode=0;
+
+	if(*p=='*'){
+		*pret=p;
+		return TRUE;
+	}else if(*p=='?'){
+		mode=1;
+		p++;
+	}else if(*p==':'){
+		mode=2;
+		p++;
+	}else if(*p=='+'){
+		*pret=p;
+		return arg_match(tok, *(p-1));
+	}
+	
+	while(*p!='\0'){
+		if(arg_match(tok, *p)){
+			p++;
+			while(mode==2 && *p==':'){
+				if(*++p=='\0')
+					break; /* invalid argument format string it is... */
+				p++;
+			}
+			*pret=p;
+			return TRUE;
+		}
+		
+		if(mode==0)
+			break;
+		
+		p++;
+		
+		if(mode==1){
+			*pret=p;
+			return TRUE;
+		}
+		
+		if(*p!=':')
+			break;
+		p++;
+	}
+	
+	*pret=p;
+	return FALSE;
+}
+
+						   
+static bool args_at_end(const char *p)
+{
+	if(p==NULL)
+		return TRUE;
+	
+	while(*p!='\0'){
+		if(*p=='*' || *p=='+')
+			p++;
+		else if(*p=='?')
+			p+=2;
+		else
+			return FALSE;
+	}
+	
+	return TRUE;
+}
+
+
+static bool check_args(const Tokenizer *tokz, Token *tokens, int ntokens,
+					   const char *fmt)
+{
+	int i;
+		
+	if(fmt==NULL)
+		return ntokens==2;
+
+	for(i=1; i<ntokens-1; i++){
+		if(!check_argument(&fmt, &tokens[i], fmt)){
+			tokz_warn_error(tokz, tokens[i].line,
+							*fmt!='\0' ? E_TOKZ_INVALID_ARGUMENT 
+									   : E_TOKZ_TOO_MANY_ARGS);
+			return FALSE;
+		}
+	}
+
+	if(!args_at_end(fmt)){
+		tokz_warn_error(tokz, tokens[i].line, E_TOKZ_TOO_FEW_ARGS);
+		return FALSE;
+	}
+	
+	return TRUE;
+}
+

mercurial