parser.c

Wed, 19 Apr 2000 22:03:38 +0200

author
tuomov
date
Wed, 19 Apr 2000 22:03:38 +0200
changeset 3
b1fbfab67908
parent 2
e14a1aba4c56
child 5
f878a9ffa3e0
permissions
-rw-r--r--

trunk: changeset 6
- Simpler optparser

- New Makefile system

/*
 * libtu/parser.c
 *
 * Copyright (c) Tuomo Valkonen 1999-2000.
 * 
 * This file is distributed under the terms of the "Artistic License".
 * See the included file LICENSE for details.
 */

#include <string.h>

#include "include/parser.h"
#include "include/misc.h"
#include "include/output.h"

#define MAX_TOKENS 	32
#define MAX_NEST	16


enum{
	P_NONE=1,
	P_EOF,
	P_STMT,
	P_STMT_NS,
	P_STMT_SECT,
	P_BEG_SECT,
	P_END_SECT
};


static bool check_args(const Tokenizer *tokz, Token *tokens, int ntokens,
					   const char *fmt);


/* */

static bool opt_include(Tokenizer *tokz, int n, Token *toks);


static ConfOpt common_opts[]={
	{"include", "s", opt_include, NULL},
	{NULL, NULL, NULL, NULL}
};


/* */


static int read_statement(Tokenizer *tokz, Token *tokens, int *ntok_ret)
{
	int ntokens=0;
	Token *tok=NULL;
	int had_comma=0; /* 0 - no, 1 - yes, 2 - not had, not expected */
	int retval=0;
	int e=0;

	while(1){
		tok=&tokens[ntokens];
		
		if(!tokz_get_token(tokz, tok)){
			e=1;
			continue;
		}

		if(ntokens==MAX_TOKENS-1){
			e=E_TOKZ_TOKEN_LIMIT;
			tokz_warn_error(tokz, tok->line, e);
			if(!(tokz->flags&TOKZ_ERROR_TOLERANT))
				break;
		}else{
			ntokens++;
		}
		
		if(!TOK_IS_OP(tok)){
			if(ntokens==1 && !had_comma){
				/*if(!TOK_IS_IDENT(tok)){
					e=E_TOKZ_IDENTIFIER_EXPECTED;
					goto handle_error;
				}*/
		
				had_comma=2;
			}else{
				if(had_comma==0)
					goto syntax;
			
				had_comma=0;
			}
			continue;
		}
		
		/* It is an operator */
		ntokens--;
		
		switch(TOK_OP_VAL(tok)){
		case OP_SCOLON:
			retval=(ntokens==0 ? P_NONE : P_STMT_NS);
			break;
			
		case OP_NEXTLINE:
			retval=(ntokens==0 ? P_NONE : P_STMT);
			break;
			
		case OP_L_BRC:
			retval=(ntokens==0 ? P_BEG_SECT : P_STMT_SECT);
			break;
			
		case OP_R_BRC:
			if(ntokens==0){
				retval=P_END_SECT;
			}else{
				tokz_unget_token(tokz, tok);
				retval=P_STMT_NS;
			}
			break;

		case OP_EOF:
			retval=(ntokens==0 ? P_EOF : P_STMT_NS);
			
			if(had_comma==1){
				e=E_TOKZ_UNEXPECTED_EOF;
				goto handle_error;
			}
			
			goto end;
			
		case OP_COMMA:
			if(had_comma!=0)
				goto syntax;

			had_comma=1;
			continue;
			
		default:
			goto syntax;
		}
		
		if(had_comma!=1)
			break;
		
	syntax:
		e=E_TOKZ_SYNTAX;
	handle_error:
		tokz_warn_error(tokz, tok->line, e);
		
		if(!(tokz->flags&TOKZ_ERROR_TOLERANT) || retval!=0)
			break;
	}
	
end:
	if(e!=0)
		retval=-retval;
	
	*ntok_ret=ntokens;
	
	return retval;
}


static bool find_beg_sect(Tokenizer *tokz)
{
	Token tok;

	while(tokz_get_token(tokz, &tok)){
		if(TOK_IS_OP(&tok)){
			if(TOK_OP_VAL(&tok)==OP_NEXTLINE)
				continue;

			if(TOK_OP_VAL(&tok)==OP_SCOLON)
				return FALSE;
		
			if(TOK_OP_VAL(&tok)==OP_L_BRC)
				return TRUE;
		}
		
		tokz_unget_token(tokz, &tok);
		break;
	}
	return FALSE;
}


/* */


static const ConfOpt* lookup_option(const ConfOpt *opts, const char *name)
{
	while(opts->optname!=NULL){
		if(strcmp(opts->optname, name)==0)
			return opts;
		opts++;
	}
	return NULL;
}

	
static bool call_end_sect(Tokenizer *tokz, const ConfOpt *opts)
{	
	opts=lookup_option(opts, "#end");
	if(opts!=NULL)
		return opts->fn(tokz, 0, NULL);
	
	return TRUE;
}


static bool call_cancel_sect(Tokenizer *tokz, const ConfOpt *opts)
{
	opts=lookup_option(opts, "#cancel");
	if(opts!=NULL)
		return opts->fn(tokz, 0, NULL);
	
	return TRUE;
}
			

/* */


bool parse_config_tokz(Tokenizer *tokz, const ConfOpt *options)
{
	Token tokens[MAX_TOKENS];
	bool alloced_optstack=FALSE;
	int i, t, ntokens=0;
	int init_nest_lvl;
	bool had_error;
	int errornest=0;

	/* Allocate tokz->optstack if it does not yet exist (if it does,
	 * we have been called from an option handler)
	 */
	if(!tokz->optstack){
		tokz->optstack=ALLOC_N(const ConfOpt*, MAX_NEST);
		if(!tokz->optstack){
			warn_err();
			return FALSE;
		}
		
		memset(tokz->optstack, 0, sizeof(ConfOpt*)*MAX_NEST);
		init_nest_lvl=tokz->nest_lvl=0;
		alloced_optstack=TRUE;
	}else{
		init_nest_lvl=tokz->nest_lvl;
	}
	
	tokz->optstack[init_nest_lvl]=options;
	
	for(i=0; i<MAX_TOKENS; i++)
		tok_init(&tokens[i]);

	
	/* The loop
	 */
	while(1){
		had_error=FALSE;

		/* free the tokens */
		while(ntokens--)
			tok_free(&tokens[ntokens]);
		
		/* read the tokens */
		t=read_statement(tokz, tokens, &ntokens);
		
		if((had_error=t<0))
			t=-t;
		
		switch(t){
		case P_STMT:
		case P_STMT_NS:
		case P_STMT_SECT:

			if(errornest)
				had_error=TRUE;
			else if(tokz->flags&TOKZ_PARSER_INDENT_MODE)
				verbose_indent(tokz->nest_lvl);
			
			if(!TOK_IS_IDENT(tokens+0)){
				had_error=TRUE;
				tokz_warn_error(tokz, tokens->line,
								E_TOKZ_IDENTIFIER_EXPECTED);
			}
			
			if(had_error)
				break;

			if(t==P_STMT){
				if(find_beg_sect(tokz))
					t=P_STMT_SECT;
			}
			
			options=lookup_option(tokz->optstack[tokz->nest_lvl],
								  TOK_IDENT_VAL(tokens+0));
			if(options==NULL)
				options=lookup_option(common_opts, TOK_IDENT_VAL(tokens+0));

			if(options==NULL){
				had_error=TRUE;
				tokz_warn_error(tokz, tokens->line, E_TOKZ_UNKNOWN_OPTION);
			}else{			
				had_error=!check_args(tokz, tokens, ntokens, options->argfmt);
			}
			
			if(had_error)
				break;
			
			if(options->opts!=NULL){
				if(t!=P_STMT_SECT){
					had_error=TRUE;
					tokz_warn_error(tokz, tokz->line, E_TOKZ_LBRACE_EXPECTED);
				}else if(tokz->nest_lvl==MAX_NEST-1){
					tokz_warn_error(tokz, tokz->line, E_TOKZ_MAX_NEST);
					had_error=TRUE;
				}else{
					tokz->optstack[++tokz->nest_lvl]=options->opts;
				}
			}else if(t==P_STMT_SECT){
				had_error=TRUE;
				tokz_warn_error(tokz, tokz->line, E_TOKZ_SYNTAX);
			}
			
			if(!had_error && options->fn!=NULL){
				had_error=!options->fn(tokz, ntokens, tokens);
				if(t==P_STMT_SECT && had_error)
					tokz->nest_lvl--;
			}
			break;
			
		case P_EOF:
			if(tokz_popf(tokz)){
				break;
			}else if(tokz->nest_lvl>0 || errornest>0){
				tokz_warn_error(tokz, 0, E_TOKZ_UNEXPECTED_EOF);
				had_error=TRUE;
			}
			goto eof;
			
		case P_BEG_SECT:
			had_error=TRUE;
			errornest++;
			tokz_warn_error(tokz, tokz->line, E_TOKZ_SYNTAX);
			break;
			
		case P_END_SECT:
			if(errornest!=0){
				errornest--;
				break;
			}
			
			if(tokz->nest_lvl==0){
				tokz_warn_error(tokz, tokz->line, E_TOKZ_SYNTAX);
				had_error=TRUE;
				break;
			}
			
			if(!had_error)
				had_error=!call_end_sect(tokz, tokz->optstack[tokz->nest_lvl]);

			tokz->nest_lvl--;
			
			if(tokz->nest_lvl<init_nest_lvl)
				goto eof;
		}
			
		if(!had_error)
			continue;
		
		if(t==P_STMT_SECT)
			errornest++;
		
		if(!(tokz->flags&TOKZ_ERROR_TOLERANT))
			break;
	}

eof:
	/* free the tokens */
	while(ntokens--)
		tok_free(&tokens[ntokens]);

	while(tokz->nest_lvl>=init_nest_lvl){
		if(tokz->flags&TOKZ_ERROR_TOLERANT || !had_error)
			call_end_sect(tokz, tokz->optstack[tokz->nest_lvl]);
		else
			call_cancel_sect(tokz, tokz->optstack[tokz->nest_lvl]);
		tokz->nest_lvl--;
	}
	
	/* Free optstack if it was alloced by this call */
	if(alloced_optstack){
		free(tokz->optstack);
		tokz->optstack=NULL;
		tokz->nest_lvl=0;
	}
	
	if(tokz->flags&TOKZ_PARSER_INDENT_MODE)
		verbose_indent(init_nest_lvl);
	
	return !had_error;
}


/* */


bool parse_config(const char *fname, const ConfOpt *options, int flags)
{
	Tokenizer *tokz;
	bool ret;
	
	tokz=tokz_open(fname);
	
	if(tokz==NULL)
		return FALSE;

	tokz->flags|=flags&~TOKZ_READ_COMMENTS;
	
	ret=parse_config_tokz(tokz, options);
	
	tokz_close(tokz);
	
	return ret;
}


bool parse_config_file(FILE *file, const ConfOpt *options, int flags)
{
	Tokenizer *tokz;
	bool ret;
	
	tokz=tokz_open_file(file);
	
	if(tokz==NULL)
		return FALSE;
	
	tokz->flags|=flags&~TOKZ_READ_COMMENTS;
	
	ret=parse_config_tokz(tokz, options);
	
	tokz_close(tokz);
	
	return ret;
}


/*
 * Argument validity checking stuff
 */


static int arg_match(Token *tok, char c)
{
	static const char chs[]={0, 'l', 'd', 'c', 's', 'i', 0, 0};
	char c2;
	
	if(c=='.' || c=='*')
		return 0;
	
	c2=chs[tok->type];
	
	if(c2==c)
		return 0;
	
	if(c2=='c' && c=='l'){
		TOK_SET_LONG(tok, TOK_CHAR_VAL(tok));
		return 0;
	}
	
	if(c2=='l' && c=='c'){
		TOK_SET_CHAR(tok, TOK_LONG_VAL(tok));
		return 0;
	}
	
	if(c2=='l' && c=='d'){
		TOK_SET_DOUBLE(tok, TOK_LONG_VAL(tok));
		return 0;
	}
	   
	return E_TOKZ_INVALID_ARGUMENT;
}


static int check_argument(const char **pret, Token *tok, const char *p)
{
	int mode;
	int e=E_TOKZ_TOO_MANY_ARGS;

again:
	mode=0;
	
	if(*p=='*'){
		*pret=p;
		return 0;
	}else if(*p=='?'){
		mode=1;
		p++;
	}else if(*p==':'){
		mode=2;
		p++;
	}else if(*p=='+'){
		*pret=p;
		return arg_match(tok, *(p-1));
	}
	
	while(*p!='\0'){
		e=arg_match(tok, *p);
		if(e==0){
			p++;
			while(mode==2 && *p==':'){
				if(*++p=='\0')
					break; /* Invalid argument format string, though... */
				p++;
			}
			*pret=p;
			return 0;
		}
		
		if(mode==0)
			break;
		
		p++;
		
		if(mode==1)
			goto again;
		
		/* mode==2 */
		
		if(*p!=':')
			break;
		p++;
		e=E_TOKZ_TOO_MANY_ARGS;
	}
	
	*pret=p;
	return e;
}

						   
static bool args_at_end(const char *p)
{
	if(p==NULL)
		return TRUE;
	
	while(*p!='\0'){
		if(*p=='*' || *p=='+')
			p++;
		else if(*p=='?')
			p+=2;
		else
			return FALSE;
	}
	
	return TRUE;
}


static bool check_args(const Tokenizer *tokz, Token *tokens, int ntokens,
					   const char *fmt)
{
	int i;
	int e;
	
	if(fmt==NULL){
		if(ntokens!=1)
			tokz_warn_error(tokz, tokens[0].line, E_TOKZ_TOO_MANY_ARGS);
		return ntokens==1;
	}

	for(i=1; i<ntokens; i++){
		e=check_argument(&fmt, &tokens[i], fmt);
		if(e!=0){
			tokz_warn_error(tokz, tokens[i].line, e);
			return FALSE;
		}
	}

	if(!args_at_end(fmt)){
		tokz_warn_error(tokz, tokens[i].line, E_TOKZ_TOO_FEW_ARGS);
		return FALSE;
	}
	
	return TRUE;
}


/* */


static bool opt_include(Tokenizer *tokz, int n, Token *toks)
{
	const char *fname=TOK_STRING_VAL(toks+1);
	const char *lastndx;
	char *tmpname;
	bool retval;
	
	if(fname[0]=='/' || tokz->name==NULL)
		goto thisdir;
	
	lastndx=strrchr(tokz->name, '/');
	
	if(lastndx==NULL)
		goto thisdir;
	
	tmpname=scatn(tokz->name, lastndx-tokz->name+1, fname, -1);
	
	if(tmpname==NULL){
		warn_err();
		return FALSE;
	}
	
	retval=tokz_pushf(tokz, tmpname);
	
	free(tmpname);
	
	return retval;

thisdir:
	return tokz_pushf(tokz, fname);
}

mercurial