parser.c

Tue, 15 Feb 2005 18:57:52 +0100

author
tailor@f281.ttorni.ton.tut.fi
date
Tue, 15 Feb 2005 18:57:52 +0100
changeset 0
86b7f6f9c5c0
child 1
6e704fc09528
permissions
-rw-r--r--

Tailorization of trunk
Import of the upstream sources from the repository

http://tao.uab.es/ion/svn/libtu/trunk

as of revision 2

/*
 * libtu/parser.c
 *
 * Copyright (c) Tuomo Valkonen 1999-2000.
 * 
 * This file is distributed under the terms of the "Artistic License".
 * See the included file LICENSE for details.
 */

#include <string.h>

#include "include/parser.h"
#include "include/misc.h"
#include "include/output.h"

#define MAX_TOKENS 	32
#define MAX_NEST	16


enum{
	P_EOF,
	P_OK,
	P_ERROR,
	P_BEG_SECT,
	P_END_SECT
};


static bool check_args(const Tokenizer *tokz, Token *tokens, int ntokens,
					   const char *fmt);


/* */


static int read_statement(const ConfOpt **opt_ret, Token *tokens,
						  int *ntok_ret, Tokenizer *tokz,
						  const ConfOpt *options)
{
	int ntokens=0;
	Token *tok=NULL;
	const ConfOpt *opt=NULL;
	int had_comma=0;
	int retval=P_OK;
	int e=0;
	int e_line=0;

	while(1){
		if(ntokens==MAX_TOKENS-1){
			e=E_TOKZ_TOKEN_LIMIT;
			goto ret_err;
		}
		
		tok=&tokens[ntokens];
		
		if(!tokz_get_token(tokz, tok))
			goto ret_err;
		
		ntokens++;
		
		if(!TOK_IS_OP(tok)){
			if(ntokens==1 && !had_comma){
				if(!TOK_IS_IDENT(tok)){
					e=E_TOKZ_IDENTIFIER_EXPECTED;
					goto ret_err;
				}
		
				/* find the option */
				for(opt=options; opt->optname; opt++){
					if(strcmp(opt->optname, TOK_IDENT_VAL(tok))==0)
						break;
				}
				
				if(!opt->optname){
					e=E_TOKZ_UNKNOWN_OPTION;
					e_line=tok->line;
					retval=P_ERROR;
				}

				had_comma=2;
			}else{
				if(!had_comma){
					e=E_TOKZ_SYNTAX;
					goto ret_err;
				}
			
				had_comma=0;
			}
			continue;
		}
		
		/* It is an operator */
		
		switch(TOK_OP_VAL(tok)){
		case OP_SCOLON:
			if(opt){
				if(had_comma || opt->opts){
					e=E_TOKZ_SYNTAX;
					goto ret_err;
				}
				goto ret_success;
			}
			break;
			
		case OP_NEXTLINE:
			if(had_comma==1){
				e=E_TOKZ_SYNTAX;
				e_line=tok->line-1;
				goto ret_err2;
			}
			
			if(opt && !opt->opts)
				goto ret_success;
			break;
			
		case OP_EOF:
			if(had_comma==1){
				e=E_TOKZ_UNEXPECTED_EOF;
				goto ret_err;
			}
			
			if(opt && opt->opts){
				e=E_TOKZ_UNEXPECTED_EOF;
				goto ret_err;
			}
			
			retval=P_EOF;
			goto ret_success;
			
		case OP_R_BRC:
			if(had_comma==1){
				e=E_TOKZ_SYNTAX;
				goto ret_err;
			}
			
			if(opt && opt->opts){
				e=E_TOKZ_SYNTAX;
				goto ret_err;
			}
			
			retval=P_END_SECT;
			goto ret_success;
			
		case OP_L_BRC:
			if(had_comma==1 || !opt || !opt->opts){
				e=E_TOKZ_SYNTAX;
				goto ret_err;
			}
			
			retval=P_BEG_SECT;
			goto ret_success;
			
		case OP_COMMA:
			if(had_comma){
				e=E_TOKZ_SYNTAX;
				goto ret_err;
			}
			had_comma=1;
			break;
			
		default:
			e=E_TOKZ_SYNTAX;
			goto ret_err;
		}
		
		ntokens--;
	}

ret_err:
	e_line=tok->line;
ret_err2:
	retval=P_ERROR;

ret_success:
	if(retval==P_ERROR && e!=0)
		tokz_warn_error(tokz, e_line, e);
	
 	*opt_ret=opt;
	*ntok_ret=ntokens;
	
	return retval;
}


/* */


static bool call_end_sect(Tokenizer *tokz, const ConfOpt *options)
{	
	bool retval=TRUE;
	
	while(options->optname){
		if(strcmp(options->optname, "#end")==0){
			retval=options->fn(tokz, 0, NULL);
			break;
		}
		options++;
	}
	
	return retval;
}


static void call_cancel_sect(Tokenizer *tokz, const ConfOpt *options)
{	
	while(options->optname){
		if(strcmp(options->optname, "#cancel")==0){
			options->fn(tokz, 0, NULL);
			break;
		}
		options++;
	}
}
			

/* */


/* Does the parsing work
 */
bool parse_config_tokz(Tokenizer *tokz, const ConfOpt *options)
{
	Token tokens[MAX_TOKENS];
	bool alloced_optstack=FALSE;
	int i, t, ntokens;
	int init_nest_lvl;
	bool had_error=FALSE;

	/* Allocate tokz->optstack if it does not yet exist (if it does,
	 * we have been called from an option handler)
	 */
	if(!tokz->optstack){
		tokz->optstack=ALLOC_N(const ConfOpt*, MAX_NEST);
		if(!tokz->optstack){
			warn_err();
			return FALSE;
		}
		
		memset(tokz->optstack, 0, sizeof(ConfOpt*)*MAX_NEST);
		init_nest_lvl=tokz->nest_lvl=0;
		alloced_optstack=TRUE;
	}else{
		init_nest_lvl=tokz->nest_lvl;
	}
	
	tokz->optstack[init_nest_lvl]=options;
	
	for(i=0;i<MAX_TOKENS;i++)
		tok_init(&tokens[i]);

	
	/* The loop
	 */
	while(1){
		t=read_statement(&options, tokens, &ntokens,
						 tokz, tokz->optstack[tokz->nest_lvl]);
		
		had_error=(t==P_ERROR);
		
		/* Check that arguments are ok */
		if(!had_error && options)
			had_error=!check_args(tokz, tokens, ntokens, options->argfmt);

		if(tokz->flags&TOKZ_PARSER_INDENT_MODE)
			verbose_indent(tokz->nest_lvl);
		
		/* New section? */
		if(t==P_BEG_SECT){
			if(tokz->nest_lvl==MAX_NEST-1){
				tokz_warn_error(tokz, tokz->line, E_TOKZ_MAX_NEST);
				had_error=TRUE;
				while(ntokens--)
					tok_free(&tokens[ntokens]);
				break;
			}else{
				tokz->optstack[++tokz->nest_lvl]=options->opts;
			}
		}
		
		/* call the handler */
		if(!had_error && options && options->fn)
			had_error=!options->fn(tokz, ntokens-1, tokens);
		
		/* free the tokens */
		while(ntokens--)
			tok_free(&tokens[ntokens]);
		
		switch(t){		
		case P_BEG_SECT:
			if(!had_error)
				continue;
			/* #cancel handler should not be called when
			 * error occured in section begin handler */
			tokz->nest_lvl--;
			break;

		case P_EOF:
			if(tokz->nest_lvl>0){
				tokz_warn_error(tokz, 0, E_TOKZ_UNEXPECTED_EOF);
				had_error=TRUE;
			}else if(!had_error){
				had_error=!call_end_sect(tokz, tokz->optstack[0]);
			}
			break;
							
		case P_END_SECT:
			if(tokz->nest_lvl==0){
				tokz_warn_error(tokz, tokz->line, E_TOKZ_SYNTAX);
				had_error=TRUE;
				break;
			}
			
			if(!had_error)
				had_error=!call_end_sect(tokz, tokz->optstack[tokz->nest_lvl]);

			tokz->nest_lvl--;
			
			if(tokz->nest_lvl<init_nest_lvl)
				break;
			   
			/* fall thru */

		default:
			if(!had_error)
				continue;			
		}
		break;
	}
	
	/* On error, call all the #cancel-handlers */
	while(had_error && tokz->nest_lvl>=init_nest_lvl){
		call_cancel_sect(tokz, tokz->optstack[tokz->nest_lvl]);
		tokz->nest_lvl--;
	}
	
	/* Free optstack if it was alloced by this call */
	if(alloced_optstack){
		free(tokz->optstack);
		tokz->optstack=NULL;
		tokz->nest_lvl=0;
	}
	
	if(tokz->flags&TOKZ_PARSER_INDENT_MODE)
		verbose_indent(init_nest_lvl);
	
	return !had_error;
}


/* */


bool parse_config(const char *fname, const ConfOpt *options)
{
	Tokenizer *tokz;
	bool ret;
	
	tokz=tokz_open(fname);
	
	if(tokz==NULL)
		return FALSE;
	
	ret=parse_config_tokz(tokz, options);
	
	tokz_close(tokz);
	
	return ret;
}


bool parse_config_file(FILE *file, const ConfOpt *options)
{
	Tokenizer *tokz;
	bool ret;
	
	tokz=tokz_open_file(file);
	
	if(tokz==NULL)
		return FALSE;
	
	ret=parse_config_tokz(tokz, options);
	
	tokz_close(tokz);
	
	return ret;
}


/*
 * Argument validity checking stuff
 */


static bool arg_match(Token *tok, char c)
{
	static const char chs[]={0, 'l', 'd', 'c', 's', 'i', 0, 0};
	char c2;
	
	if(c=='.' || c=='*')
		return TRUE;
	
	c2=chs[tok->type];
	
	if(c2==c)
		return TRUE;
	
	if(c2=='c' && c=='l'){
		TOK_SET_LONG(tok, TOK_CHAR_VAL(tok));
		return TRUE;
	}
	
	if(c2=='l' && c=='c'){
		TOK_SET_CHAR(tok, TOK_LONG_VAL(tok));
		return TRUE;
	}
	
	if(c2=='l' && c=='d'){
		TOK_SET_DOUBLE(tok, TOK_LONG_VAL(tok));
		return TRUE;
	}
	   
	return FALSE;
}


static bool check_argument(const char **pret, Token *tok, const char *p)
{
	int mode=0;

	if(*p=='*'){
		*pret=p;
		return TRUE;
	}else if(*p=='?'){
		mode=1;
		p++;
	}else if(*p==':'){
		mode=2;
		p++;
	}else if(*p=='+'){
		*pret=p;
		return arg_match(tok, *(p-1));
	}
	
	while(*p!='\0'){
		if(arg_match(tok, *p)){
			p++;
			while(mode==2 && *p==':'){
				if(*++p=='\0')
					break; /* invalid argument format string it is... */
				p++;
			}
			*pret=p;
			return TRUE;
		}
		
		if(mode==0)
			break;
		
		p++;
		
		if(mode==1){
			*pret=p;
			return TRUE;
		}
		
		if(*p!=':')
			break;
		p++;
	}
	
	*pret=p;
	return FALSE;
}

						   
static bool args_at_end(const char *p)
{
	if(p==NULL)
		return TRUE;
	
	while(*p!='\0'){
		if(*p=='*' || *p=='+')
			p++;
		else if(*p=='?')
			p+=2;
		else
			return FALSE;
	}
	
	return TRUE;
}


static bool check_args(const Tokenizer *tokz, Token *tokens, int ntokens,
					   const char *fmt)
{
	int i;
		
	if(fmt==NULL)
		return ntokens==2;

	for(i=1; i<ntokens-1; i++){
		if(!check_argument(&fmt, &tokens[i], fmt)){
			tokz_warn_error(tokz, tokens[i].line,
							*fmt!='\0' ? E_TOKZ_INVALID_ARGUMENT 
									   : E_TOKZ_TOO_MANY_ARGS);
			return FALSE;
		}
	}

	if(!args_at_end(fmt)){
		tokz_warn_error(tokz, tokens[i].line, E_TOKZ_TOO_FEW_ARGS);
		return FALSE;
	}
	
	return TRUE;
}

mercurial