mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			240 lines
		
	
	
		
			4.5 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			240 lines
		
	
	
		
			4.5 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
#include <stdlib.h>
 | 
						|
#include <stdio.h>
 | 
						|
#include <string.h>
 | 
						|
 | 
						|
#define	ADDEQ	257
 | 
						|
#define	ANDAND	258
 | 
						|
#define	ANDEQ	259
 | 
						|
#define	ARRAY	260
 | 
						|
#define	ASM	261
 | 
						|
#define	AUTO	262
 | 
						|
#define	BREAK	263
 | 
						|
#define	CASE	264
 | 
						|
#define	CHAR	265
 | 
						|
#define	CONST	266
 | 
						|
#define	CONTINUE	267
 | 
						|
#define	DECR	268
 | 
						|
#define	DEFAULT	269
 | 
						|
#define	DEREF	270
 | 
						|
#define	DIVEQ	271
 | 
						|
#define	DO	272
 | 
						|
#define	DOUBLE	273
 | 
						|
#define	ELLIPSIS	274
 | 
						|
#define	ELSE	275
 | 
						|
#define	ENUM	276
 | 
						|
#define	EQL	277
 | 
						|
#define	EXTERN	278
 | 
						|
#define	FCON	279
 | 
						|
#define	FLOAT	280
 | 
						|
#define	FOR	281
 | 
						|
#define	FUNCTION	282
 | 
						|
#define	GEQ	283
 | 
						|
#define	GOTO	284
 | 
						|
#define	ICON	285
 | 
						|
#define	ID	286
 | 
						|
#define	IF	287
 | 
						|
#define	INCR	288
 | 
						|
#define	INT	289
 | 
						|
#define	LEQ	290
 | 
						|
#define	LONG	291
 | 
						|
#define	LSHIFT	292
 | 
						|
#define	LSHIFTEQ	293
 | 
						|
#define	MODEQ	294
 | 
						|
#define	MULEQ	295
 | 
						|
#define	NEQ	296
 | 
						|
#define	OREQ	297
 | 
						|
#define	OROR	298
 | 
						|
#define	POINTER	299
 | 
						|
#define	REGISTER	300
 | 
						|
#define	RETURN	301
 | 
						|
#define	RSHIFT	302
 | 
						|
#define	RSHIFTEQ	303
 | 
						|
#define	SCON	304
 | 
						|
#define	SHORT	305
 | 
						|
#define	SIGNED	306
 | 
						|
#define	SIZEOF	307
 | 
						|
#define	STATIC	308
 | 
						|
#define	STRUCT	309
 | 
						|
#define	SUBEQ	310
 | 
						|
#define	SWITCH	311
 | 
						|
#define	TYPEDEF	312
 | 
						|
#define	UNION	313
 | 
						|
#define	UNSIGNED	314
 | 
						|
#define	VOID	315
 | 
						|
#define	VOLATILE	316
 | 
						|
#define	WHILE	317
 | 
						|
#define	XOREQ	318
 | 
						|
#define	EOI	319
 | 
						|
 | 
						|
typedef unsigned int uint;
 | 
						|
typedef unsigned char uchar;
 | 
						|
 | 
						|
#define	BSIZE	8192
 | 
						|
 | 
						|
#define	YYCTYPE		uchar
 | 
						|
#define	YYCURSOR	cursor
 | 
						|
#define	YYLIMIT		s->lim
 | 
						|
#define	YYMARKER	s->ptr
 | 
						|
#define	YYFILL(n)	{cursor = fill(s, cursor);}
 | 
						|
 | 
						|
#define	RET(i)	{s->cur = cursor; return i;}
 | 
						|
 | 
						|
typedef struct Scanner {
 | 
						|
    int			fd;
 | 
						|
    uchar		*bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
 | 
						|
    uint		line;
 | 
						|
} Scanner;
 | 
						|
 | 
						|
uchar *fill(Scanner *s, uchar *cursor){
 | 
						|
    if(!s->eof){
 | 
						|
	uint cnt = s->tok - s->bot;
 | 
						|
	if(cnt){
 | 
						|
	    memcpy(s->bot, s->tok, s->lim - s->tok);
 | 
						|
	    s->tok = s->bot;
 | 
						|
	    s->ptr -= cnt;
 | 
						|
	    cursor -= cnt;
 | 
						|
	    s->pos -= cnt;
 | 
						|
	    s->lim -= cnt;
 | 
						|
	}
 | 
						|
	if((s->top - s->lim) < BSIZE){
 | 
						|
	    uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
 | 
						|
	    memcpy(buf, s->tok, s->lim - s->tok);
 | 
						|
	    s->tok = buf;
 | 
						|
	    s->ptr = &buf[s->ptr - s->bot];
 | 
						|
	    cursor = &buf[cursor - s->bot];
 | 
						|
	    s->pos = &buf[s->pos - s->bot];
 | 
						|
	    s->lim = &buf[s->lim - s->bot];
 | 
						|
	    s->top = &s->lim[BSIZE];
 | 
						|
	    free(s->bot);
 | 
						|
	    s->bot = buf;
 | 
						|
	}
 | 
						|
	if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
 | 
						|
	    s->eof = &s->lim[cnt]; *(s->eof)++ = '\n';
 | 
						|
	}
 | 
						|
	s->lim += cnt;
 | 
						|
    }
 | 
						|
    return cursor;
 | 
						|
}
 | 
						|
 | 
						|
int scan(Scanner *s){
 | 
						|
	uchar *cursor = s->cur;
 | 
						|
std:
 | 
						|
	s->tok = cursor;
 | 
						|
/*!re2c
 | 
						|
any	= [\000-\377];
 | 
						|
O	= [0-7];
 | 
						|
D	= [0-9];
 | 
						|
L	= [a-zA-Z_];
 | 
						|
H	= [a-fA-F0-9];
 | 
						|
E	= [Ee] [+-]? D+;
 | 
						|
FS	= [fFlL];
 | 
						|
IS	= [uUlL]*;
 | 
						|
ESC	= [\\] ([abfnrtv?'"\\] | "x" H+ | O+);
 | 
						|
*/
 | 
						|
 | 
						|
/*!re2c
 | 
						|
	"/*"			{ goto comment; }
 | 
						|
	
 | 
						|
	L (L|D)*		{ RET(ID); }
 | 
						|
	
 | 
						|
	("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
 | 
						|
	(['] (ESC|any\[\n\\'])* ['])
 | 
						|
				{ RET(ICON); }
 | 
						|
	
 | 
						|
	(D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
 | 
						|
				{ RET(FCON); }
 | 
						|
	
 | 
						|
	(["] (ESC|any\[\n\\"])* ["])
 | 
						|
				{ RET(SCON); }
 | 
						|
	
 | 
						|
	"..."                   { RET(ELLIPSIS); }
 | 
						|
	">>="			{ RET(RSHIFTEQ); }
 | 
						|
	"<<="			{ RET(LSHIFTEQ); }
 | 
						|
	"+="			{ RET(ADDEQ); }
 | 
						|
	"-="			{ RET(SUBEQ); }
 | 
						|
	"*="			{ RET(MULEQ); }
 | 
						|
	"/="			{ RET(DIVEQ); }
 | 
						|
	"%="			{ RET(MODEQ); }
 | 
						|
	"&="			{ RET(ANDEQ); }
 | 
						|
	"^="			{ RET(XOREQ); }
 | 
						|
	"|="			{ RET(OREQ); }
 | 
						|
	">>"			{ RET(RSHIFT); }
 | 
						|
	"<<"			{ RET(LSHIFT); }
 | 
						|
	"++"			{ RET(INCR); }
 | 
						|
	"--"			{ RET(DECR); }
 | 
						|
	"->"			{ RET(DEREF); }
 | 
						|
	"&&"			{ RET(ANDAND); }
 | 
						|
	"||"			{ RET(OROR); }
 | 
						|
	"<="			{ RET(LEQ); }
 | 
						|
	">="			{ RET(GEQ); }
 | 
						|
	"=="			{ RET(EQL); }
 | 
						|
	"!="			{ RET(NEQ); }
 | 
						|
	";"			{ RET(';'); }
 | 
						|
	"{"			{ RET('{'); }
 | 
						|
	"}"			{ RET('}'); }
 | 
						|
	","			{ RET(','); }
 | 
						|
	":"			{ RET(':'); }
 | 
						|
	"="			{ RET('='); }
 | 
						|
	"("			{ RET('('); }
 | 
						|
	")"			{ RET(')'); }
 | 
						|
	"["			{ RET('['); }
 | 
						|
	"]"			{ RET(']'); }
 | 
						|
	"."			{ RET('.'); }
 | 
						|
	"&"			{ RET('&'); }
 | 
						|
	"!"			{ RET('!'); }
 | 
						|
	"~"			{ RET('~'); }
 | 
						|
	"-"			{ RET('-'); }
 | 
						|
	"+"			{ RET('+'); }
 | 
						|
	"*"			{ RET('*'); }
 | 
						|
	"/"			{ RET('/'); }
 | 
						|
	"%"			{ RET('%'); }
 | 
						|
	"<"			{ RET('<'); }
 | 
						|
	">"			{ RET('>'); }
 | 
						|
	"^"			{ RET('^'); }
 | 
						|
	"|"			{ RET('|'); }
 | 
						|
	"?"			{ RET('?'); }
 | 
						|
 | 
						|
 | 
						|
	[ \t\v\f]+		{ goto std; }
 | 
						|
 | 
						|
	"\n"
 | 
						|
	    {
 | 
						|
		if(cursor == s->eof) RET(EOI);
 | 
						|
		s->pos = cursor; s->line++;
 | 
						|
		goto std;
 | 
						|
	    }
 | 
						|
 | 
						|
	any
 | 
						|
	    {
 | 
						|
		printf("unexpected character: %c\n", *s->tok);
 | 
						|
		goto std;
 | 
						|
	    }
 | 
						|
*/
 | 
						|
 | 
						|
comment:
 | 
						|
/*!re2c
 | 
						|
	"*/"			{ goto std; }
 | 
						|
	"\n"
 | 
						|
	    {
 | 
						|
		if(cursor == s->eof) RET(EOI);
 | 
						|
		s->tok = s->pos = cursor; s->line++;
 | 
						|
		goto comment;
 | 
						|
	    }
 | 
						|
        any			{ goto comment; }
 | 
						|
*/
 | 
						|
}
 | 
						|
 | 
						|
main(){
 | 
						|
    Scanner in;
 | 
						|
    int t;
 | 
						|
    memset((char*) &in, 0, sizeof(in));
 | 
						|
    in.fd = 0;
 | 
						|
    while((t = scan(&in)) != EOI){
 | 
						|
/*
 | 
						|
	printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok);
 | 
						|
	printf("%d\n", t);
 | 
						|
*/
 | 
						|
    }
 | 
						|
    close(in.fd);
 | 
						|
}
 |