tokenizer.c

changeset 62
aae5facf9fc5
parent 60
a4033700e35c
child 70
e866dc825481
equal deleted inserted replaced
61:fc585645ad05 62:aae5facf9fc5
19 #include <libtu/misc.h> 19 #include <libtu/misc.h>
20 #include <libtu/output.h> 20 #include <libtu/output.h>
21 21
22 22
23 static const char *errors[]={ 23 static const char *errors[]={
24 DUMMY_TR("(no error)"), 24 DUMMY_TR("(no error)"),
25 DUMMY_TR("Unexpected end of file"), /* E_TOKZ_UNEXPECTED_EOF */ 25 DUMMY_TR("Unexpected end of file"), /* E_TOKZ_UNEXPECTED_EOF */
26 DUMMY_TR("Unexpected end of line"), /* E_TOKZ_UNEXPECTED_EOL */ 26 DUMMY_TR("Unexpected end of line"), /* E_TOKZ_UNEXPECTED_EOL */
27 DUMMY_TR("End of line expected"), /* E_TOKZ_EOL_EXPECTED */ 27 DUMMY_TR("End of line expected"), /* E_TOKZ_EOL_EXPECTED */
28 DUMMY_TR("Invalid character"), /* E_TOKZ_INVALID_CHAR*/ 28 DUMMY_TR("Invalid character"), /* E_TOKZ_INVALID_CHAR*/
29 DUMMY_TR("Numeric constant too big"), /* E_TOKZ_TOOBIG */ 29 DUMMY_TR("Numeric constant too big"), /* E_TOKZ_TOOBIG */
30 DUMMY_TR("Invalid numberic format"), /* E_TOKZ_NUMFMT */ 30 DUMMY_TR("Invalid numberic format"), /* E_TOKZ_NUMFMT */
31 DUMMY_TR("Junk after numeric constant"), /* E_TOKZ_NUM_JUNK */ 31 DUMMY_TR("Junk after numeric constant"), /* E_TOKZ_NUM_JUNK */
32 DUMMY_TR("Not an integer"), /* E_TOKZ_NOTINT */ 32 DUMMY_TR("Not an integer"), /* E_TOKZ_NOTINT */
33 DUMMY_TR("Numeric constant out of range"), /* E_TOKZ_RANGE */ 33 DUMMY_TR("Numeric constant out of range"), /* E_TOKZ_RANGE */
34 DUMMY_TR("Multi-character character constant"), /* E_TOKZ_MULTICHAR */ 34 DUMMY_TR("Multi-character character constant"), /* E_TOKZ_MULTICHAR */
35 DUMMY_TR("Token/statement limit reached"), /* E_TOKZ_TOKEN_LIMIT */ 35 DUMMY_TR("Token/statement limit reached"), /* E_TOKZ_TOKEN_LIMIT */
36 DUMMY_TR("Unknown option"), /* E_TOKZ_UNKONWN_OPTION */ 36 DUMMY_TR("Unknown option"), /* E_TOKZ_UNKONWN_OPTION */
37 DUMMY_TR("Syntax error"), /* E_TOKZ_SYNTAX */ 37 DUMMY_TR("Syntax error"), /* E_TOKZ_SYNTAX */
38 DUMMY_TR("Invalid argument"), /* E_TOKZ_INVALID_ARGUMENT */ 38 DUMMY_TR("Invalid argument"), /* E_TOKZ_INVALID_ARGUMENT */
39 DUMMY_TR("End of statement expected"), /* E_TOKZ_EOS_EXPECTED */ 39 DUMMY_TR("End of statement expected"), /* E_TOKZ_EOS_EXPECTED */
40 DUMMY_TR("Too few arguments"), /* E_TOKZ_TOO_FEW_ARGS */ 40 DUMMY_TR("Too few arguments"), /* E_TOKZ_TOO_FEW_ARGS */
41 DUMMY_TR("Too many arguments"), /* E_TOKZ_TOO_MANY_ARGS */ 41 DUMMY_TR("Too many arguments"), /* E_TOKZ_TOO_MANY_ARGS */
42 DUMMY_TR("Maximum section nestin level exceeded"), /* E_TOK_Z_MAX_NEST */ 42 DUMMY_TR("Maximum section nestin level exceeded"), /* E_TOK_Z_MAX_NEST */
43 DUMMY_TR("Identifier expected"), /* E_TOKZ_IDENTIFIER_EXPECTED */ 43 DUMMY_TR("Identifier expected"), /* E_TOKZ_IDENTIFIER_EXPECTED */
44 DUMMY_TR("Starting brace ('{') expected"), /* E_TOKZ_LBRACE_EXPECTED */ 44 DUMMY_TR("Starting brace ('{') expected"), /* E_TOKZ_LBRACE_EXPECTED */
45 }; 45 };
46 46
47 47
48 /* */ 48 /* */
49 49
56 #define STRING_FINISH(X) {if(err!=0) return err; if(!_string_finish(&X, X##_tmp, X##_tmpl)) err=-ENOMEM;} 56 #define STRING_FINISH(X) {if(err!=0) return err; if(!_string_finish(&X, X##_tmp, X##_tmpl)) err=-ENOMEM;}
57 57
58 58
59 static bool _string_append(char **p, char *tmp, int *tmplen, char c) 59 static bool _string_append(char **p, char *tmp, int *tmplen, char c)
60 { 60 {
61 char *tmp2; 61 char *tmp2;
62 62
63 if(*tmplen==STRBLEN-1){ 63 if(*tmplen==STRBLEN-1){
64 tmp[STRBLEN-1]='\0'; 64 tmp[STRBLEN-1]='\0';
65 if(*p!=NULL){ 65 if(*p!=NULL){
66 tmp2=scat(*p, tmp); 66 tmp2=scat(*p, tmp);
67 free(*p); 67 free(*p);
68 *p=tmp2; 68 *p=tmp2;
69 }else{ 69 }else{
70 *p=scopy(tmp); 70 *p=scopy(tmp);
71 } 71 }
72 *tmplen=1; 72 *tmplen=1;
73 tmp[0]=c; 73 tmp[0]=c;
74 return *p!=NULL; 74 return *p!=NULL;
75 }else{ 75 }else{
76 tmp[(*tmplen)++]=c; 76 tmp[(*tmplen)++]=c;
77 return TRUE; 77 return TRUE;
78 } 78 }
79 } 79 }
80 80
81 81
82 static bool _string_finish(char **p, char *tmp, int tmplen) 82 static bool _string_finish(char **p, char *tmp, int tmplen)
83 { 83 {
84 char *tmp2; 84 char *tmp2;
85 85
86 if(tmplen==0){ 86 if(tmplen==0){
87 if(*p==NULL) 87 if(*p==NULL)
88 *p=scopy(""); 88 *p=scopy("");
89 }else{ 89 }else{
90 tmp[tmplen]='\0'; 90 tmp[tmplen]='\0';
91 if(*p!=NULL){ 91 if(*p!=NULL){
92 tmp2=scat(*p, tmp); 92 tmp2=scat(*p, tmp);
93 free(*p); 93 free(*p);
94 *p=tmp2; 94 *p=tmp2;
95 }else{ 95 }else{
96 *p=scopy(tmp); 96 *p=scopy(tmp);
97 } 97 }
98 } 98 }
99 return *p!=NULL; 99 return *p!=NULL;
100 } 100 }
101 101
102 102
103 /* */ 103 /* */
104 104
107 #define GETCH() _getch(tokz) 107 #define GETCH() _getch(tokz)
108 #define UNGETCH(C) _ungetch(tokz, C) 108 #define UNGETCH(C) _ungetch(tokz, C)
109 109
110 static int _getch(Tokenizer *tokz) 110 static int _getch(Tokenizer *tokz)
111 { 111 {
112 int c; 112 int c;
113 113
114 if(tokz->ungetc!=-1){ 114 if(tokz->ungetc!=-1){
115 c=tokz->ungetc; 115 c=tokz->ungetc;
116 tokz->ungetc=-1; 116 tokz->ungetc=-1;
117 }else if (tokz->flags&TOKZ_READ_FROM_BUFFER) { 117 }else if (tokz->flags&TOKZ_READ_FROM_BUFFER) {
118 assert(tokz->buffer.data!=NULL); 118 assert(tokz->buffer.data!=NULL);
119 if (tokz->buffer.pos==tokz->buffer.len) 119 if (tokz->buffer.pos==tokz->buffer.len)
120 c=EOF; 120 c=EOF;
121 else 121 else
122 c=tokz->buffer.data[tokz->buffer.pos++]; 122 c=tokz->buffer.data[tokz->buffer.pos++];
123 }else{ 123 }else{
124 c=getc(tokz->file); 124 c=getc(tokz->file);
125 } 125 }
126 126
127 return c; 127 return c;
128 } 128 }
129 129
130 130
131 static void _ungetch(Tokenizer *tokz, int c) 131 static void _ungetch(Tokenizer *tokz, int c)
132 { 132 {
133 tokz->ungetc=c; 133 tokz->ungetc=c;
134 } 134 }
135 135
136 136
137 /* */ 137 /* */
138 138
139 139
140 static int scan_line_comment(Token *tok, Tokenizer *tokz) 140 static int scan_line_comment(Token *tok, Tokenizer *tokz)
141 { 141 {
142 STRING_DECL_P(s, "#"); 142 STRING_DECL_P(s, "#");
143 int c; 143 int c;
144 144
145 c=GETCH(); 145 c=GETCH();
146 146
147 while(c!='\n' && c!=EOF){ 147 while(c!='\n' && c!=EOF){
148 STRING_APPEND(s, c); 148 STRING_APPEND(s, c);
149 c=GETCH(); 149 c=GETCH();
150 } 150 }
151 151
152 UNGETCH(c); 152 UNGETCH(c);
153 153
154 STRING_FINISH(s); 154 STRING_FINISH(s);
155 155
156 TOK_SET_COMMENT(tok, s); 156 TOK_SET_COMMENT(tok, s);
157 157
158 return 0; 158 return 0;
159 } 159 }
160 160
161 161
162 static int skip_line_comment(Tokenizer *tokz) 162 static int skip_line_comment(Tokenizer *tokz)
163 { 163 {
164 int c; 164 int c;
165 165
166 do{ 166 do{
167 c=GETCH(); 167 c=GETCH();
168 }while(c!='\n' && c!=EOF); 168 }while(c!='\n' && c!=EOF);
169 169
170 UNGETCH(c); 170 UNGETCH(c);
171 171
172 return 0; 172 return 0;
173 } 173 }
174 174
175 175
176 /* */ 176 /* */
177 177
178 178
179 static int scan_c_comment(Token *tok, Tokenizer *tokz) 179 static int scan_c_comment(Token *tok, Tokenizer *tokz)
180 { 180 {
181 STRING_DECL_P(s, "/*"); 181 STRING_DECL_P(s, "/*");
182 int c; 182 int c;
183 int st=0; 183 int st=0;
184 184
185 while(1){ 185 while(1){
186 c=GETCH(); 186 c=GETCH();
187 187
188 if(c==EOF){ 188 if(c==EOF){
189 STRING_FREE(s); 189 STRING_FREE(s);
190 return E_TOKZ_UNEXPECTED_EOF; 190 return E_TOKZ_UNEXPECTED_EOF;
191 } 191 }
192 192
193 STRING_APPEND(s, c); 193 STRING_APPEND(s, c);
194 194
195 if(c=='\n'){ 195 if(c=='\n'){
196 INC_LINE(); 196 INC_LINE();
197 }else if(st==0 && c=='*'){ 197 }else if(st==0 && c=='*'){
198 st=1; 198 st=1;
199 }else if(st==1){ 199 }else if(st==1){
200 if(c=='/') 200 if(c=='/')
201 break; 201 break;
202 st=0; 202 st=0;
203 } 203 }
204 } 204 }
205 205
206 STRING_FINISH(s); 206 STRING_FINISH(s);
207 207
208 TOK_SET_COMMENT(tok, s); 208 TOK_SET_COMMENT(tok, s);
209 209
210 return 0; 210 return 0;
211 } 211 }
212 212
213 213
214 static int skip_c_comment(Tokenizer *tokz) 214 static int skip_c_comment(Tokenizer *tokz)
215 { 215 {
216 int c; 216 int c;
217 int st=0; 217 int st=0;
218 218
219 while(1){ 219 while(1){
220 c=GETCH(); 220 c=GETCH();
221 221
222 if(c==EOF) 222 if(c==EOF)
223 return E_TOKZ_UNEXPECTED_EOF; 223 return E_TOKZ_UNEXPECTED_EOF;
224 224
225 if(c=='\n') 225 if(c=='\n')
226 INC_LINE(); 226 INC_LINE();
227 else if(st==0 && c=='*') 227 else if(st==0 && c=='*')
228 st=1; 228 st=1;
229 else if(st==1){ 229 else if(st==1){
230 if(c=='/') 230 if(c=='/')
231 break; 231 break;
232 st=0; 232 st=0;
233 } 233 }
234 } 234 }
235 235
236 return 0; 236 return 0;
237 } 237 }
238 238
239 239
240 /* */ 240 /* */
241 241
242 242
243 static int scan_char_escape(Tokenizer *tokz) 243 static int scan_char_escape(Tokenizer *tokz)
244 { 244 {
245 static char* special_chars="nrtbae"; 245 static char* special_chars="nrtbae";
246 static char* specials="\n\r\t\b\a\033"; 246 static char* specials="\n\r\t\b\a\033";
247 int base, max; 247 int base, max;
248 int i ,c; 248 int i ,c;
249 249
250 c=GETCH(); 250 c=GETCH();
251 251
252 for(i=0;special_chars[i];i++){ 252 for(i=0;special_chars[i];i++){
253 if(special_chars[i]==c) 253 if(special_chars[i]==c)
254 return specials[c]; 254 return specials[c];
255 } 255 }
256 256
257 if(c=='x' || c=='X'){ 257 if(c=='x' || c=='X'){
258 base=16;max=2;i=0; 258 base=16;max=2;i=0;
259 }else if(c=='d' || c=='D'){ 259 }else if(c=='d' || c=='D'){
260 base=10;max=3;i=0; 260 base=10;max=3;i=0;
261 }else if(c=='8' || c=='9'){ 261 }else if(c=='8' || c=='9'){
262 base=10;max=2;i=c-'0'; 262 base=10;max=2;i=c-'0';
263 }else if('0'<=c && c<='7'){ 263 }else if('0'<=c && c<='7'){
264 base=8;max=2;i=c-'0'; 264 base=8;max=2;i=c-'0';
265 }else if(c=='\n'){ 265 }else if(c=='\n'){
266 UNGETCH(c); 266 UNGETCH(c);
267 return -2; 267 return -2;
268 }else{ 268 }else{
269 return c; 269 return c;
270 } 270 }
271 271
272 272
273 while(--max>=0){ 273 while(--max>=0){
274 c=GETCH(); 274 c=GETCH();
275 275
276 if(c==EOF) 276 if(c==EOF)
277 return EOF; 277 return EOF;
278 278
279 if(c=='\n'){ 279 if(c=='\n'){
280 UNGETCH(c); 280 UNGETCH(c);
281 return -2; 281 return -2;
282 } 282 }
283 283
284 if(base==16){ 284 if(base==16){
285 if(!isxdigit(c)) 285 if(!isxdigit(c))
286 break; 286 break;
287 287
288 i<<=4; 288 i<<=4;
289 289
290 if(isdigit(c)) 290 if(isdigit(c))
291 i+=c-'0'; 291 i+=c-'0';
292 else if(i>='a') 292 else if(i>='a')
293 i+=0xa+c-'a'; 293 i+=0xa+c-'a';
294 else 294 else
295 i+=0xa+c-'a'; 295 i+=0xa+c-'a';
296 296
297 }else if(base==10){ 297 }else if(base==10){
298 if(!isdigit(c)) 298 if(!isdigit(c))
299 break; 299 break;
300 i*=10; 300 i*=10;
301 i+=c-'0'; 301 i+=c-'0';
302 }else{ 302 }else{
303 if(c<'0' || c>'7') 303 if(c<'0' || c>'7')
304 break; 304 break;
305 i<<=3; 305 i<<=3;
306 i+=c-'0'; 306 i+=c-'0';
307 } 307 }
308 } 308 }
309 309
310 if(max>=0) 310 if(max>=0)
311 UNGETCH(c); 311 UNGETCH(c);
312 312
313 return i; 313 return i;
314 } 314 }
315 315
316 316
317 /* */ 317 /* */
318 318
319 319
320 static int scan_string(Token *tok, Tokenizer *tokz, bool escapes) 320 static int scan_string(Token *tok, Tokenizer *tokz, bool escapes)
321 { 321 {
322 STRING_DECL(s); 322 STRING_DECL(s);
323 int c; 323 int c;
324 324
325 while(1){ 325 while(1){
326 c=GETCH(); 326 c=GETCH();
327 327
328 if(c=='"') 328 if(c=='"')
329 break; 329 break;
330 330
331 if(c=='\n'){ 331 if(c=='\n'){
332 UNGETCH(c); 332 UNGETCH(c);
333 STRING_FREE(s); 333 STRING_FREE(s);
334 return E_TOKZ_UNEXPECTED_EOL; 334 return E_TOKZ_UNEXPECTED_EOL;
335 } 335 }
336 336
337 if(c=='\\' && escapes){ 337 if(c=='\\' && escapes){
338 c=scan_char_escape(tokz); 338 c=scan_char_escape(tokz);
339 if(c==-2){ 339 if(c==-2){
340 STRING_FREE(s); 340 STRING_FREE(s);
341 return E_TOKZ_UNEXPECTED_EOL; 341 return E_TOKZ_UNEXPECTED_EOL;
342 } 342 }
343 } 343 }
344 344
345 if(c==EOF){ 345 if(c==EOF){
346 STRING_FREE(s); 346 STRING_FREE(s);
347 return E_TOKZ_UNEXPECTED_EOF; 347 return E_TOKZ_UNEXPECTED_EOF;
348 } 348 }
349 349
350 STRING_APPEND(s, c); 350 STRING_APPEND(s, c);
351 } 351 }
352 352
353 STRING_FINISH(s); 353 STRING_FINISH(s);
354 354
355 TOK_SET_STRING(tok, s); 355 TOK_SET_STRING(tok, s);
356 356
357 return 0; 357 return 0;
358 } 358 }
359 359
360 360
361 /* */ 361 /* */
362 362
363 363
364 static int scan_char(Token *tok, Tokenizer *tokz) 364 static int scan_char(Token *tok, Tokenizer *tokz)
365 { 365 {
366 int c, c2; 366 int c, c2;
367 367
368 c=GETCH(); 368 c=GETCH();
369 369
370 if(c==EOF) 370 if(c==EOF)
371 return E_TOKZ_UNEXPECTED_EOF; 371 return E_TOKZ_UNEXPECTED_EOF;
372 372
373 if(c=='\n') 373 if(c=='\n')
374 return E_TOKZ_UNEXPECTED_EOL; 374 return E_TOKZ_UNEXPECTED_EOL;
375 375
376 if(c=='\\'){ 376 if(c=='\\'){
377 c=scan_char_escape(tokz); 377 c=scan_char_escape(tokz);
378 378
379 if(c==EOF) 379 if(c==EOF)
380 return E_TOKZ_UNEXPECTED_EOF; 380 return E_TOKZ_UNEXPECTED_EOF;
381 381
382 if(c==-2) 382 if(c==-2)
383 return E_TOKZ_UNEXPECTED_EOL; 383 return E_TOKZ_UNEXPECTED_EOL;
384 } 384 }
385 385
386 c2=GETCH(); 386 c2=GETCH();
387 387
388 if(c2!='\'') 388 if(c2!='\'')
389 return E_TOKZ_MULTICHAR; 389 return E_TOKZ_MULTICHAR;
390 390
391 TOK_SET_CHAR(tok, c); 391 TOK_SET_CHAR(tok, c);
392 392
393 return 0; 393 return 0;
394 } 394 }
395 395
396 396
397 /* */ 397 /* */
398 398
400 #define START_IDENT(X) (isalpha(X) || X=='_' || X=='$') 400 #define START_IDENT(X) (isalpha(X) || X=='_' || X=='$')
401 401
402 402
403 static int scan_identifier(Token *tok, Tokenizer *tokz, int c) 403 static int scan_identifier(Token *tok, Tokenizer *tokz, int c)
404 { 404 {
405 STRING_DECL(s); 405 STRING_DECL(s);
406 406
407 do{ 407 do{
408 STRING_APPEND(s, c); 408 STRING_APPEND(s, c);
409 c=GETCH(); 409 c=GETCH();
410 }while(isalnum(c) || c=='_' || c=='$'); 410 }while(isalnum(c) || c=='_' || c=='$');
411 411
412 UNGETCH(c); 412 UNGETCH(c);
413 413
414 STRING_FINISH(s); 414 STRING_FINISH(s);
415 415
416 TOK_SET_IDENT(tok, s); 416 TOK_SET_IDENT(tok, s);
417 417
418 return 0; 418 return 0;
419 } 419 }
420 420
421 #define NP_SIMPLE_IMPL 421 #define NP_SIMPLE_IMPL
422 #include "np/numparser2.h" 422 #include "np/numparser2.h"
423 #include "np/np-conv.h" 423 #include "np/np-conv.h"
424 424
425 425
426 static int scan_number(Token *tok, Tokenizer *tokz, int c) 426 static int scan_number(Token *tok, Tokenizer *tokz, int c)
427 { 427 {
428 NPNum num=NUM_INIT; 428 NPNum num=NUM_INIT;
429 int e; 429 int e;
430 430
431 if((e=parse_number(&num, tokz, c))) 431 if((e=parse_number(&num, tokz, c)))
432 return e; 432 return e;
433 433
434 if(num.type==NPNUM_INT){ 434 if(num.type==NPNUM_INT){
435 long l; 435 long l;
436 if((e=num_to_long(&l, &num, TRUE))) 436 if((e=num_to_long(&l, &num, TRUE)))
437 return e; 437 return e;
438 438
439 TOK_SET_LONG(tok, l); 439 TOK_SET_LONG(tok, l);
440 }else if(num.type==NPNUM_FLOAT){ 440 }else if(num.type==NPNUM_FLOAT){
441 double d; 441 double d;
442 if((e=num_to_double(&d, &num))) 442 if((e=num_to_double(&d, &num)))
443 return e; 443 return e;
444 444
445 TOK_SET_DOUBLE(tok, d); 445 TOK_SET_DOUBLE(tok, d);
446 }else{ 446 }else{
447 return E_TOKZ_NUMFMT; 447 return E_TOKZ_NUMFMT;
448 } 448 }
449 449
450 return 0; 450 return 0;
451 } 451 }
452 452
453 453
454 /* */ 454 /* */
455 455
456 456
457 static uchar op_map[]={ 457 static uchar op_map[]={
458 0x00, /* ________ 0-7 */ 458 0x00, /* ________ 0-7 */
459 0x00, /* ________ 8-15 */ 459 0x00, /* ________ 8-15 */
460 0x00, /* ________ 16-23 */ 460 0x00, /* ________ 16-23 */
461 0x00, /* ________ 24-31 */ 461 0x00, /* ________ 24-31 */
462 0x62, /* _!___%&_ 32-39 */ 462 0x62, /* _!___%&_ 32-39 */
463 0xff, /* ()*+,-./ 40-47 */ 463 0xff, /* ()*+,-./ 40-47 */
464 0x00, /* ________ 48-55 */ 464 0x00, /* ________ 48-55 */
465 0xfc, /* __:;<=>? 56-63 */ 465 0xfc, /* __:;<=>? 56-63 */
466 0x01, /* @_______ 64-71 */ 466 0x01, /* @_______ 64-71 */
467 0x00, /* ________ 72-79 */ 467 0x00, /* ________ 72-79 */
468 0x00, /* ________ 80-87 */ 468 0x00, /* ________ 80-87 */
469 0x78, /* ___[_]^_ 88-95 */ 469 0x78, /* ___[_]^_ 88-95 */
470 0x00, /* ________ 96-103 */ 470 0x00, /* ________ 96-103 */
471 0x00, /* ________ 104-111 */ 471 0x00, /* ________ 104-111 */
472 0x00, /* ________ 112-119 */ 472 0x00, /* ________ 112-119 */
473 0x38 /* ___{|}__ 120-127 */ 473 0x38 /* ___{|}__ 120-127 */
474 }; 474 };
475 475
476 476
477 static bool map_isset(uchar *map, uint ch) 477 static bool map_isset(uchar *map, uint ch)
478 { 478 {
479 if(ch>127) 479 if(ch>127)
480 return FALSE; 480 return FALSE;
481 481
482 return map[ch>>3]&(1<<(ch&7)); 482 return map[ch>>3]&(1<<(ch&7));
483 } 483 }
484 484
485 485
486 static bool is_opch(uint ch) 486 static bool is_opch(uint ch)
487 { 487 {
488 return map_isset(op_map, ch); 488 return map_isset(op_map, ch);
489 } 489 }
490 490
491 491
492 static int scan_op(Token *tok, Tokenizer *tokz, int c) 492 static int scan_op(Token *tok, Tokenizer *tokz, int c)
493 { 493 {
494 int c2; 494 int c2;
495 int op=-1; 495 int op=-1;
496 496
497 /* Quickly check it is an operator character */ 497 /* Quickly check it is an operator character */
498 if(!is_opch(c)) 498 if(!is_opch(c))
499 return E_TOKZ_INVALID_CHAR; 499 return E_TOKZ_INVALID_CHAR;
500 500
501 switch(c){ 501 switch(c){
502 case '+': 502 case '+':
503 case '-': 503 case '-':
504 case '*': 504 case '*':
505 /* case '/': Checked elsewhere */ 505 /* case '/': Checked elsewhere */
506 case '%': 506 case '%':
507 case '^': 507 case '^':
508 case '!': 508 case '!':
509 case '=': 509 case '=':
510 case '<': 510 case '<':
511 case '>': 511 case '>':
512 c2=GETCH(); 512 c2=GETCH();
513 if(c2=='='){ 513 if(c2=='='){
514 op=c|(c2<<8); 514 op=c|(c2<<8);
515 }else if(c2==c && (c2!='%' && c2!='!' && c2!='*')){ 515 }else if(c2==c && (c2!='%' && c2!='!' && c2!='*')){
516 if(c=='<' || c=='>'){ 516 if(c=='<' || c=='>'){
517 int c3=GETCH(); 517 int c3=GETCH();
518 if(c3=='='){ 518 if(c3=='='){
519 op=c|(c2<<8)|(c3<<16); 519 op=c|(c2<<8)|(c3<<16);
520 }else{ 520 }else{
521 UNGETCH(c3); 521 UNGETCH(c3);
522 op=c|(c2<<8); 522 op=c|(c2<<8);
523 } 523 }
524 }else{ 524 }else{
525 op=c|(c2<<8); 525 op=c|(c2<<8);
526 } 526 }
527 }else{ 527 }else{
528 UNGETCH(c2); 528 UNGETCH(c2);
529 op=c; 529 op=c;
530 } 530 }
531 break; 531 break;
532 532
533 /* It is already known that it is a operator so these are not needed 533 /* It is already known that it is a operator so these are not needed
534 case ':': 534 case ':':
535 case '~': 535 case '~':
536 case '?': 536 case '?':
537 case '.': 537 case '.':
538 case ';'; 538 case ';';
539 case '{': 539 case '{':
540 case '}': 540 case '}':
541 case '@': 541 case '@':
542 case '|': 542 case '|':
543 case '&': 543 case '&':
544 */ 544 */
545 default: 545 default:
546 op=c; 546 op=c;
547 } 547 }
548 548
549 TOK_SET_OP(tok, op); 549 TOK_SET_OP(tok, op);
550 550
551 return 0; 551 return 0;
552 } 552 }
553 553
554 554
555 /* */ 555 /* */
556 556
557 557
558 void tokz_warn(const Tokenizer *tokz, int line, const char *fmt, ...) 558 void tokz_warn(const Tokenizer *tokz, int line, const char *fmt, ...)
559 { 559 {
560 va_list args; 560 va_list args;
561 561
562 va_start(args, fmt); 562 va_start(args, fmt);
563 563
564 if(tokz!=NULL) 564 if(tokz!=NULL)
565 warn_obj_line_v(tokz->name, line, fmt, args); 565 warn_obj_line_v(tokz->name, line, fmt, args);
566 else 566 else
567 warn(fmt, args); 567 warn(fmt, args);
568 568
569 va_end(args); 569 va_end(args);
570 } 570 }
571 571
572 572
573 void tokz_warn_error(const Tokenizer *tokz, int line, int e) 573 void tokz_warn_error(const Tokenizer *tokz, int line, int e)
574 { 574 {
575 if(e==E_TOKZ_UNEXPECTED_EOF) 575 if(e==E_TOKZ_UNEXPECTED_EOF)
576 line=0; 576 line=0;
577 577
578 if(e<0) 578 if(e<0)
579 tokz_warn(tokz, line, "%s", strerror(-e)); 579 tokz_warn(tokz, line, "%s", strerror(-e));
580 else 580 else
581 tokz_warn(tokz, line, "%s", TR(errors[e])); 581 tokz_warn(tokz, line, "%s", TR(errors[e]));
582 } 582 }
583 583
584 584
585 bool tokz_get_token(Tokenizer *tokz, Token *tok) 585 bool tokz_get_token(Tokenizer *tokz, Token *tok)
586 { 586 {
587 int c, c2, e; 587 int c, c2, e;
588 588
589 if (!(tokz->flags&TOKZ_READ_FROM_BUFFER)) 589 if (!(tokz->flags&TOKZ_READ_FROM_BUFFER))
590 assert(tokz->file!=NULL); 590 assert(tokz->file!=NULL);
591 591
592 tok_free(tok); 592 tok_free(tok);
593 593
594 if(!TOK_IS_INVALID(&(tokz->ungettok))){ 594 if(!TOK_IS_INVALID(&(tokz->ungettok))){
595 *tok=tokz->ungettok; 595 *tok=tokz->ungettok;
596 tokz->ungettok.type=TOK_INVALID; 596 tokz->ungettok.type=TOK_INVALID;
597 return TRUE; 597 return TRUE;
598 } 598 }
599 599
600 while(1){ 600 while(1){
601 601
602 e=0; 602 e=0;
603 603
604 do{ 604 do{
605 c=GETCH(); 605 c=GETCH();
606 }while(c!='\n' && c!=EOF && isspace(c)); 606 }while(c!='\n' && c!=EOF && isspace(c));
607 607
608 tok->line=tokz->line; 608 tok->line=tokz->line;
609 609
610 switch(c){ 610 switch(c){
611 case EOF: 611 case EOF:
612 TOK_SET_OP(tok, OP_EOF); 612 TOK_SET_OP(tok, OP_EOF);
613 return TRUE; 613 return TRUE;
614 614
615 case '\n': 615 case '\n':
616 INC_LINE(); 616 INC_LINE();
617 617
618 if(tokz->flags&TOKZ_IGNORE_NEXTLINE) 618 if(tokz->flags&TOKZ_IGNORE_NEXTLINE)
619 continue; 619 continue;
620 620
621 TOK_SET_OP(tok, OP_NEXTLINE); 621 TOK_SET_OP(tok, OP_NEXTLINE);
622 622
623 return TRUE; 623 return TRUE;
624 624
625 case '\\': 625 case '\\':
626 do{ 626 do{
627 c=GETCH(); 627 c=GETCH();
628 if(c==EOF){ 628 if(c==EOF){
629 TOK_SET_OP(tok, OP_EOF); 629 TOK_SET_OP(tok, OP_EOF);
630 return FALSE; 630 return FALSE;
631 } 631 }
632 if(!isspace(c) && e==0){ 632 if(!isspace(c) && e==0){
633 e=E_TOKZ_EOL_EXPECTED; 633 e=E_TOKZ_EOL_EXPECTED;
634 tokz_warn_error(tokz, tokz->line, e); 634 tokz_warn_error(tokz, tokz->line, e);
635 if(!(tokz->flags&TOKZ_ERROR_TOLERANT)) 635 if(!(tokz->flags&TOKZ_ERROR_TOLERANT))
636 return FALSE; 636 return FALSE;
637 } 637 }
638 }while(c!='\n'); 638 }while(c!='\n');
639 639
640 INC_LINE(); 640 INC_LINE();
641 continue; 641 continue;
642 642
643 case '#': 643 case '#':
644 if(tokz->flags&TOKZ_READ_COMMENTS){ 644 if(tokz->flags&TOKZ_READ_COMMENTS){
645 e=scan_line_comment(tok, tokz); 645 e=scan_line_comment(tok, tokz);
646 break; 646 break;
647 }else if((e=skip_line_comment(tokz))){ 647 }else if((e=skip_line_comment(tokz))){
648 break; 648 break;
649 } 649 }
650 650
651 continue; 651 continue;
652 652
653 case '/': 653 case '/':
654 c2=GETCH(); 654 c2=GETCH();
655 655
656 if(c2=='='){ 656 if(c2=='='){
657 TOK_SET_OP(tok, OP_AS_DIV); 657 TOK_SET_OP(tok, OP_AS_DIV);
658 return TRUE; 658 return TRUE;
659 } 659 }
660 660
661 if(c2!='*'){ 661 if(c2!='*'){
662 UNGETCH(c2); 662 UNGETCH(c2);
663 TOK_SET_OP(tok, OP_DIV); 663 TOK_SET_OP(tok, OP_DIV);
664 return TRUE; 664 return TRUE;
665 } 665 }
666 666
667 if(tokz->flags&TOKZ_READ_COMMENTS){ 667 if(tokz->flags&TOKZ_READ_COMMENTS){
668 e=scan_c_comment(tok, tokz); 668 e=scan_c_comment(tok, tokz);
669 break; 669 break;
670 }else if((e=skip_c_comment(tokz))){ 670 }else if((e=skip_c_comment(tokz))){
671 break; 671 break;
672 } 672 }
673 673
674 continue; 674 continue;
675 675
676 case '\"': 676 case '\"':
677 e=scan_string(tok, tokz, TRUE); 677 e=scan_string(tok, tokz, TRUE);
678 break; 678 break;
679 679
680 case '\'': 680 case '\'':
681 e=scan_char(tok, tokz); 681 e=scan_char(tok, tokz);
682 break; 682 break;
683 683
684 default: 684 default:
685 if(('0'<=c && c<='9') || c=='-' || c=='+'){ 685 if(('0'<=c && c<='9') || c=='-' || c=='+'){
686 e=scan_number(tok, tokz, c); 686 e=scan_number(tok, tokz, c);
687 break; 687 break;
688 } 688 }
689 689
690 if(START_IDENT(c)) 690 if(START_IDENT(c))
691 e=scan_identifier(tok, tokz, c); 691 e=scan_identifier(tok, tokz, c);
692 else 692 else
693 e=scan_op(tok, tokz, c); 693 e=scan_op(tok, tokz, c);
694 } 694 }
695 695
696 if(!e) 696 if(!e)
697 return TRUE; 697 return TRUE;
698 698
699 tokz_warn_error(tokz, tokz->line, e); 699 tokz_warn_error(tokz, tokz->line, e);
700 return FALSE; 700 return FALSE;
701 } 701 }
702 } 702 }
703 703
704 704
705 void tokz_unget_token(Tokenizer *tokz, Token *tok) 705 void tokz_unget_token(Tokenizer *tokz, Token *tok)
706 { 706 {
707 tok_free(&(tokz->ungettok)); 707 tok_free(&(tokz->ungettok));
708 tokz->ungettok=*tok; 708 tokz->ungettok=*tok;
709 tok->type=TOK_INVALID; 709 tok->type=TOK_INVALID;
710 } 710 }
711 711
712 712
713 /* 713 /*
714 * File open 714 * File open
715 */ 715 */
716 716
717 static bool do_tokz_pushf(Tokenizer *tokz) 717 static bool do_tokz_pushf(Tokenizer *tokz)
718 { 718 {
719 Tokenizer_FInfo *finfo; 719 Tokenizer_FInfo *finfo;
720 720
721 finfo=REALLOC_N(tokz->filestack, Tokenizer_FInfo, 721 finfo=REALLOC_N(tokz->filestack, Tokenizer_FInfo,
722 tokz->filestack_n, tokz->filestack_n+1); 722 tokz->filestack_n, tokz->filestack_n+1);
723 723
724 if(finfo==NULL) 724 if(finfo==NULL)
725 return FALSE; 725 return FALSE;
726 726
727 tokz->filestack=finfo; 727 tokz->filestack=finfo;
728 finfo=&(finfo[tokz->filestack_n++]); 728 finfo=&(finfo[tokz->filestack_n++]);
729 729
730 finfo->file=tokz->file; 730 finfo->file=tokz->file;
731 finfo->name=tokz->name; 731 finfo->name=tokz->name;
732 finfo->line=tokz->line; 732 finfo->line=tokz->line;
733 finfo->ungetc=tokz->ungetc; 733 finfo->ungetc=tokz->ungetc;
734 finfo->ungettok=tokz->ungettok; 734 finfo->ungettok=tokz->ungettok;
735 735
736 return TRUE; 736 return TRUE;
737 } 737 }
738 738
739 739
740 bool tokz_pushf_file(Tokenizer *tokz, FILE *file, const char *fname) 740 bool tokz_pushf_file(Tokenizer *tokz, FILE *file, const char *fname)
741 { 741 {
742 char *fname_copy=NULL; 742 char *fname_copy=NULL;
743 743
744 if(file==NULL) 744 if(file==NULL)
745 return FALSE; 745 return FALSE;
746 746
747 if(fname!=NULL){ 747 if(fname!=NULL){
748 fname_copy=scopy(fname); 748 fname_copy=scopy(fname);
749 if(fname_copy==NULL){ 749 if(fname_copy==NULL){
750 warn_err(); 750 warn_err();
751 return FALSE; 751 return FALSE;
752 } 752 }
753 } 753 }
754 754
755 if(tokz->file!=NULL){ 755 if(tokz->file!=NULL){
756 if(!do_tokz_pushf(tokz)){ 756 if(!do_tokz_pushf(tokz)){
757 warn_err(); 757 warn_err();
758 if(fname_copy!=NULL) 758 if(fname_copy!=NULL)
759 free(fname_copy); 759 free(fname_copy);
760 return FALSE; 760 return FALSE;
761 } 761 }
762 } 762 }
763 763
764 tokz->file=file; 764 tokz->file=file;
765 tokz->name=fname_copy; 765 tokz->name=fname_copy;
766 tokz->line=1; 766 tokz->line=1;
767 tokz->ungetc=-1; 767 tokz->ungetc=-1;
768 tokz->ungettok.type=TOK_INVALID; 768 tokz->ungettok.type=TOK_INVALID;
769 769
770 return TRUE; 770 return TRUE;
771 } 771 }
772 772
773 773
774 bool tokz_pushf(Tokenizer *tokz, const char *fname) 774 bool tokz_pushf(Tokenizer *tokz, const char *fname)
775 { 775 {
776 FILE *file; 776 FILE *file;
777 777
778 file=fopen(fname, "r"); 778 file=fopen(fname, "r");
779 779
780 if(file==NULL){ 780 if(file==NULL){
781 warn_err_obj(fname); 781 warn_err_obj(fname);
782 return FALSE; 782 return FALSE;
783 } 783 }
784 784
785 if(!tokz_pushf_file(tokz, file, fname)){ 785 if(!tokz_pushf_file(tokz, file, fname)){
786 fclose(file); 786 fclose(file);
787 return FALSE; 787 return FALSE;
788 } 788 }
789 789
790 return TRUE; 790 return TRUE;
791 } 791 }
792 792
793 793
794 794
795 static Tokenizer *tokz_create() 795 static Tokenizer *tokz_create()
796 { 796 {
797 Tokenizer*tokz; 797 Tokenizer*tokz;
798 798
799 tokz=ALLOC(Tokenizer); 799 tokz=ALLOC(Tokenizer);
800 800
801 if(tokz==NULL){ 801 if(tokz==NULL){
802 warn_err(); 802 warn_err();
803 return NULL; 803 return NULL;
804 } 804 }
805 805
806 tokz->file=NULL; 806 tokz->file=NULL;
807 tokz->name=NULL; 807 tokz->name=NULL;
808 tokz->line=1; 808 tokz->line=1;
809 tokz->ungetc=-1; 809 tokz->ungetc=-1;
810 tokz->ungettok.type=TOK_INVALID; 810 tokz->ungettok.type=TOK_INVALID;
811 tokz->flags=0; 811 tokz->flags=0;
812 tokz->optstack=NULL; 812 tokz->optstack=NULL;
813 tokz->nest_lvl=0; 813 tokz->nest_lvl=0;
814 tokz->filestack_n=0; 814 tokz->filestack_n=0;
815 tokz->filestack=NULL; 815 tokz->filestack=NULL;
816 tokz->buffer.data=0; 816 tokz->buffer.data=0;
817 tokz->buffer.len=0; 817 tokz->buffer.len=0;
818 tokz->buffer.pos=0; 818 tokz->buffer.pos=0;
819 819
820 return tokz; 820 return tokz;
821 } 821 }
822 822
823 823
824 Tokenizer *tokz_open(const char *fname) 824 Tokenizer *tokz_open(const char *fname)
825 { 825 {
826 Tokenizer *tokz; 826 Tokenizer *tokz;
827 827
828 tokz=tokz_create(); 828 tokz=tokz_create();
829 829
830 if(!tokz_pushf(tokz, fname)){ 830 if(!tokz_pushf(tokz, fname)){
831 free(tokz); 831 free(tokz);
832 return NULL; 832 return NULL;
833 } 833 }
834 834
835 return tokz; 835 return tokz;
836 } 836 }
837 837
838 838
839 Tokenizer *tokz_open_file(FILE *file, const char *fname) 839 Tokenizer *tokz_open_file(FILE *file, const char *fname)
840 { 840 {
841 Tokenizer *tokz; 841 Tokenizer *tokz;
842 842
843 tokz=tokz_create(); 843 tokz=tokz_create();
844 844
845 if(!tokz_pushf_file(tokz, file, fname)){ 845 if(!tokz_pushf_file(tokz, file, fname)){
846 free(tokz); 846 free(tokz);
847 return NULL; 847 return NULL;
848 } 848 }
849 849
850 return tokz; 850 return tokz;
851 } 851 }
852 852
853 Tokenizer *tokz_prepare_buffer(char *buffer, int len) 853 Tokenizer *tokz_prepare_buffer(char *buffer, int len)
854 { 854 {
855 Tokenizer *tokz; 855 Tokenizer *tokz;
856 char old=0; 856 char old=0;
857 857
858 tokz=tokz_create(); 858 tokz=tokz_create();
859 if(len>0){ 859 if(len>0){
860 old=buffer[len-1]; 860 old=buffer[len-1];
861 buffer[len-1]='\0'; 861 buffer[len-1]='\0';
862 } 862 }
863 863
864 tokz->flags|=TOKZ_READ_FROM_BUFFER; 864 tokz->flags|=TOKZ_READ_FROM_BUFFER;
865 tokz->buffer.data=scopy(buffer); 865 tokz->buffer.data=scopy(buffer);
866 tokz->buffer.len=(len>0 ? (uint)len : strlen(tokz->buffer.data)); 866 tokz->buffer.len=(len>0 ? (uint)len : strlen(tokz->buffer.data));
867 tokz->buffer.pos=0; 867 tokz->buffer.pos=0;
868 868
869 if(old>0) 869 if(old>0)
870 buffer[len-1]=old; 870 buffer[len-1]=old;
871 871
872 return tokz; 872 return tokz;
873 } 873 }
874 874
875 /* 875 /*
876 * File close 876 * File close
877 */ 877 */
878 878
879 static bool do_tokz_popf(Tokenizer *tokz, bool shrink) 879 static bool do_tokz_popf(Tokenizer *tokz, bool shrink)
880 { 880 {
881 Tokenizer_FInfo *finfo; 881 Tokenizer_FInfo *finfo;
882 882
883 if(tokz->filestack_n<=0) 883 if(tokz->filestack_n<=0)
884 return FALSE; 884 return FALSE;
885 885
886 if(tokz->file!=NULL) 886 if(tokz->file!=NULL)
887 fclose(tokz->file); 887 fclose(tokz->file);
888 if(tokz->name!=NULL) 888 if(tokz->name!=NULL)
889 free(tokz->name); 889 free(tokz->name);
890 890
891 finfo=&(tokz->filestack[--tokz->filestack_n]); 891 finfo=&(tokz->filestack[--tokz->filestack_n]);
892 892
893 tokz->file=finfo->file; 893 tokz->file=finfo->file;
894 tokz->name=finfo->name; 894 tokz->name=finfo->name;
895 tokz->line=finfo->line; 895 tokz->line=finfo->line;
896 tokz->ungetc=finfo->ungetc; 896 tokz->ungetc=finfo->ungetc;
897 tokz->ungettok=finfo->ungettok; 897 tokz->ungettok=finfo->ungettok;
898 898
899 if(tokz->filestack_n==0){ 899 if(tokz->filestack_n==0){
900 free(tokz->filestack); 900 free(tokz->filestack);
901 tokz->filestack=NULL; 901 tokz->filestack=NULL;
902 }else if(shrink){ 902 }else if(shrink){
903 finfo=REALLOC_N(tokz->filestack, Tokenizer_FInfo, 903 finfo=REALLOC_N(tokz->filestack, Tokenizer_FInfo,
904 tokz->filestack_n+1, tokz->filestack_n); 904 tokz->filestack_n+1, tokz->filestack_n);
905 if(finfo==NULL) 905 if(finfo==NULL)
906 warn_err(); 906 warn_err();
907 else 907 else
908 tokz->filestack=finfo; 908 tokz->filestack=finfo;
909 } 909 }
910 910
911 return TRUE; 911 return TRUE;
912 } 912 }
913 913
914 914
915 bool tokz_popf(Tokenizer *tokz) 915 bool tokz_popf(Tokenizer *tokz)
916 { 916 {
917 return do_tokz_popf(tokz, TRUE); 917 return do_tokz_popf(tokz, TRUE);
918 } 918 }
919 919
920 920
921 void tokz_close(Tokenizer *tokz) 921 void tokz_close(Tokenizer *tokz)
922 { 922 {
923 while(tokz->filestack_n>0) 923 while(tokz->filestack_n>0)
924 do_tokz_popf(tokz, FALSE); 924 do_tokz_popf(tokz, FALSE);
925 925
926 if(tokz->file!=NULL) 926 if(tokz->file!=NULL)
927 fclose(tokz->file); 927 fclose(tokz->file);
928 if(tokz->name!=NULL) 928 if(tokz->name!=NULL)
929 free(tokz->name); 929 free(tokz->name);
930 tok_free(&(tokz->ungettok)); 930 tok_free(&(tokz->ungettok));
931 931
932 free(tokz); 932 free(tokz);
933 } 933 }
934 934
935 935
936 936
937 /* */ 937 /* */
938 938
939 939
940 void tok_free(Token *tok) 940 void tok_free(Token *tok)
941 { 941 {
942 if(TOK_IS_STRING(tok) || TOK_IS_IDENT(tok) || TOK_IS_COMMENT(tok)){ 942 if(TOK_IS_STRING(tok) || TOK_IS_IDENT(tok) || TOK_IS_COMMENT(tok)){
943 if(TOK_STRING_VAL(tok)!=NULL) 943 if(TOK_STRING_VAL(tok)!=NULL)
944 free(TOK_STRING_VAL(tok)); 944 free(TOK_STRING_VAL(tok));
945 } 945 }
946 946
947 tok->type=TOK_INVALID; 947 tok->type=TOK_INVALID;
948 } 948 }
949 949
950 950
951 void tok_init(Token *tok) 951 void tok_init(Token *tok)
952 { 952 {
953 static Token dummy=TOK_INIT; 953 static Token dummy=TOK_INIT;
954 954
955 memcpy(tok, &dummy, sizeof(*tok)); 955 memcpy(tok, &dummy, sizeof(*tok));
956 } 956 }
957 957

mercurial