mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	JFlex is a lexical analyzer generator for Java, see also http://jflex.de or https://github.com/jflex-de/jflex
		
			
				
	
	
		
			305 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			305 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 | 
						|
 * Copyright (C) 1998-2015  Gerwin Klein <lsf@jflex.de>                    *
 | 
						|
 * All rights reserved.                                                    *
 | 
						|
 *                                                                         *
 | 
						|
 * License: BSD                                                            *
 | 
						|
 *                                                                         *
 | 
						|
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 | 
						|
 | 
						|
/* Java 1.2 language lexer specification */
 | 
						|
 | 
						|
/* Use together with unicode.flex for Unicode preprocesssing */
 | 
						|
/* and java12.cup for a Java 1.2 parser                      */
 | 
						|
 | 
						|
/* Note that this lexer specification is not tuned for speed.
 | 
						|
   It is in fact quite slow on integer and floating point literals, 
 | 
						|
   because the input is read twice and the methods used to parse
 | 
						|
   the numbers are not very fast. 
 | 
						|
   For a production quality application (e.g. a Java compiler) 
 | 
						|
   this could be optimized */
 | 
						|
 | 
						|
 | 
						|
import java_cup.runtime.*;
 | 
						|
 | 
						|
%%
 | 
						|
 | 
						|
%public
 | 
						|
%class Scanner
 | 
						|
%implements sym
 | 
						|
 | 
						|
%unicode
 | 
						|
 | 
						|
%line
 | 
						|
%column
 | 
						|
 | 
						|
%cup
 | 
						|
%cupdebug
 | 
						|
 | 
						|
%{
 | 
						|
  StringBuilder string = new StringBuilder();
 | 
						|
  
 | 
						|
  private Symbol symbol(int type) {
 | 
						|
    return new JavaSymbol(type, yyline+1, yycolumn+1);
 | 
						|
  }
 | 
						|
 | 
						|
  private Symbol symbol(int type, Object value) {
 | 
						|
    return new JavaSymbol(type, yyline+1, yycolumn+1, value);
 | 
						|
  }
 | 
						|
 | 
						|
  /** 
 | 
						|
   * assumes correct representation of a long value for 
 | 
						|
   * specified radix in scanner buffer from <code>start</code> 
 | 
						|
   * to <code>end</code> 
 | 
						|
   */
 | 
						|
  private long parseLong(int start, int end, int radix) {
 | 
						|
    long result = 0;
 | 
						|
    long digit;
 | 
						|
 | 
						|
    for (int i = start; i < end; i++) {
 | 
						|
      digit  = Character.digit(yycharat(i),radix);
 | 
						|
      result*= radix;
 | 
						|
      result+= digit;
 | 
						|
    }
 | 
						|
 | 
						|
    return result;
 | 
						|
  }
 | 
						|
%}
 | 
						|
 | 
						|
/* main character classes */
 | 
						|
LineTerminator = \r|\n|\r\n
 | 
						|
InputCharacter = [^\r\n]
 | 
						|
 | 
						|
WhiteSpace = {LineTerminator} | [ \t\f]
 | 
						|
 | 
						|
/* comments */
 | 
						|
Comment = {TraditionalComment} | {EndOfLineComment} | 
 | 
						|
          {DocumentationComment}
 | 
						|
 | 
						|
TraditionalComment = "/*" [^*] ~"*/" | "/*" "*"+ "/"
 | 
						|
EndOfLineComment = "//" {InputCharacter}* {LineTerminator}?
 | 
						|
DocumentationComment = "/*" "*"+ [^/*] ~"*/"
 | 
						|
 | 
						|
/* identifiers */
 | 
						|
Identifier = [:jletter:][:jletterdigit:]*
 | 
						|
 | 
						|
/* integer literals */
 | 
						|
DecIntegerLiteral = 0 | [1-9][0-9]*
 | 
						|
DecLongLiteral    = {DecIntegerLiteral} [lL]
 | 
						|
 | 
						|
HexIntegerLiteral = 0 [xX] 0* {HexDigit} {1,8}
 | 
						|
HexLongLiteral    = 0 [xX] 0* {HexDigit} {1,16} [lL]
 | 
						|
HexDigit          = [0-9a-fA-F]
 | 
						|
 | 
						|
OctIntegerLiteral = 0+ [1-3]? {OctDigit} {1,15}
 | 
						|
OctLongLiteral    = 0+ 1? {OctDigit} {1,21} [lL]
 | 
						|
OctDigit          = [0-7]
 | 
						|
    
 | 
						|
/* floating point literals */        
 | 
						|
FloatLiteral  = ({FLit1}|{FLit2}|{FLit3}) {Exponent}? [fF]
 | 
						|
DoubleLiteral = ({FLit1}|{FLit2}|{FLit3}) {Exponent}?
 | 
						|
 | 
						|
FLit1    = [0-9]+ \. [0-9]* 
 | 
						|
FLit2    = \. [0-9]+ 
 | 
						|
FLit3    = [0-9]+ 
 | 
						|
Exponent = [eE] [+-]? [0-9]+
 | 
						|
 | 
						|
/* string and character literals */
 | 
						|
StringCharacter = [^\r\n\"\\]
 | 
						|
SingleCharacter = [^\r\n\'\\]
 | 
						|
 | 
						|
%state STRING, CHARLITERAL
 | 
						|
 | 
						|
%%
 | 
						|
 | 
						|
<YYINITIAL> {
 | 
						|
 | 
						|
  /* keywords */
 | 
						|
  "abstract"                     { return symbol(ABSTRACT); }
 | 
						|
  "boolean"                      { return symbol(BOOLEAN); }
 | 
						|
  "break"                        { return symbol(BREAK); }
 | 
						|
  "byte"                         { return symbol(BYTE); }
 | 
						|
  "case"                         { return symbol(CASE); }
 | 
						|
  "catch"                        { return symbol(CATCH); }
 | 
						|
  "char"                         { return symbol(CHAR); }
 | 
						|
  "class"                        { return symbol(CLASS); }
 | 
						|
  "const"                        { return symbol(CONST); }
 | 
						|
  "continue"                     { return symbol(CONTINUE); }
 | 
						|
  "do"                           { return symbol(DO); }
 | 
						|
  "double"                       { return symbol(DOUBLE); }
 | 
						|
  "else"                         { return symbol(ELSE); }
 | 
						|
  "extends"                      { return symbol(EXTENDS); }
 | 
						|
  "final"                        { return symbol(FINAL); }
 | 
						|
  "finally"                      { return symbol(FINALLY); }
 | 
						|
  "float"                        { return symbol(FLOAT); }
 | 
						|
  "for"                          { return symbol(FOR); }
 | 
						|
  "default"                      { return symbol(DEFAULT); }
 | 
						|
  "implements"                   { return symbol(IMPLEMENTS); }
 | 
						|
  "import"                       { return symbol(IMPORT); }
 | 
						|
  "instanceof"                   { return symbol(INSTANCEOF); }
 | 
						|
  "int"                          { return symbol(INT); }
 | 
						|
  "interface"                    { return symbol(INTERFACE); }
 | 
						|
  "long"                         { return symbol(LONG); }
 | 
						|
  "native"                       { return symbol(NATIVE); }
 | 
						|
  "new"                          { return symbol(NEW); }
 | 
						|
  "goto"                         { return symbol(GOTO); }
 | 
						|
  "if"                           { return symbol(IF); }
 | 
						|
  "public"                       { return symbol(PUBLIC); }
 | 
						|
  "short"                        { return symbol(SHORT); }
 | 
						|
  "super"                        { return symbol(SUPER); }
 | 
						|
  "switch"                       { return symbol(SWITCH); }
 | 
						|
  "synchronized"                 { return symbol(SYNCHRONIZED); }
 | 
						|
  "package"                      { return symbol(PACKAGE); }
 | 
						|
  "private"                      { return symbol(PRIVATE); }
 | 
						|
  "protected"                    { return symbol(PROTECTED); }
 | 
						|
  "transient"                    { return symbol(TRANSIENT); }
 | 
						|
  "return"                       { return symbol(RETURN); }
 | 
						|
  "void"                         { return symbol(VOID); }
 | 
						|
  "static"                       { return symbol(STATIC); }
 | 
						|
  "while"                        { return symbol(WHILE); }
 | 
						|
  "this"                         { return symbol(THIS); }
 | 
						|
  "throw"                        { return symbol(THROW); }
 | 
						|
  "throws"                       { return symbol(THROWS); }
 | 
						|
  "try"                          { return symbol(TRY); }
 | 
						|
  "volatile"                     { return symbol(VOLATILE); }
 | 
						|
  "strictfp"                     { return symbol(STRICTFP); }
 | 
						|
  
 | 
						|
  /* boolean literals */
 | 
						|
  "true"                         { return symbol(BOOLEAN_LITERAL, true); }
 | 
						|
  "false"                        { return symbol(BOOLEAN_LITERAL, false); }
 | 
						|
  
 | 
						|
  /* null literal */
 | 
						|
  "null"                         { return symbol(NULL_LITERAL); }
 | 
						|
  
 | 
						|
  
 | 
						|
  /* separators */
 | 
						|
  "("                            { return symbol(LPAREN); }
 | 
						|
  ")"                            { return symbol(RPAREN); }
 | 
						|
  "{"                            { return symbol(LBRACE); }
 | 
						|
  "}"                            { return symbol(RBRACE); }
 | 
						|
  "["                            { return symbol(LBRACK); }
 | 
						|
  "]"                            { return symbol(RBRACK); }
 | 
						|
  ";"                            { return symbol(SEMICOLON); }
 | 
						|
  ","                            { return symbol(COMMA); }
 | 
						|
  "."                            { return symbol(DOT); }
 | 
						|
  
 | 
						|
  /* operators */
 | 
						|
  "="                            { return symbol(EQ); }
 | 
						|
  ">"                            { return symbol(GT); }
 | 
						|
  "<"                            { return symbol(LT); }
 | 
						|
  "!"                            { return symbol(NOT); }
 | 
						|
  "~"                            { return symbol(COMP); }
 | 
						|
  "?"                            { return symbol(QUESTION); }
 | 
						|
  ":"                            { return symbol(COLON); }
 | 
						|
  "=="                           { return symbol(EQEQ); }
 | 
						|
  "<="                           { return symbol(LTEQ); }
 | 
						|
  ">="                           { return symbol(GTEQ); }
 | 
						|
  "!="                           { return symbol(NOTEQ); }
 | 
						|
  "&&"                           { return symbol(ANDAND); }
 | 
						|
  "||"                           { return symbol(OROR); }
 | 
						|
  "++"                           { return symbol(PLUSPLUS); }
 | 
						|
  "--"                           { return symbol(MINUSMINUS); }
 | 
						|
  "+"                            { return symbol(PLUS); }
 | 
						|
  "-"                            { return symbol(MINUS); }
 | 
						|
  "*"                            { return symbol(MULT); }
 | 
						|
  "/"                            { return symbol(DIV); }
 | 
						|
  "&"                            { return symbol(AND); }
 | 
						|
  "|"                            { return symbol(OR); }
 | 
						|
  "^"                            { return symbol(XOR); }
 | 
						|
  "%"                            { return symbol(MOD); }
 | 
						|
  "<<"                           { return symbol(LSHIFT); }
 | 
						|
  ">>"                           { return symbol(RSHIFT); }
 | 
						|
  ">>>"                          { return symbol(URSHIFT); }
 | 
						|
  "+="                           { return symbol(PLUSEQ); }
 | 
						|
  "-="                           { return symbol(MINUSEQ); }
 | 
						|
  "*="                           { return symbol(MULTEQ); }
 | 
						|
  "/="                           { return symbol(DIVEQ); }
 | 
						|
  "&="                           { return symbol(ANDEQ); }
 | 
						|
  "|="                           { return symbol(OREQ); }
 | 
						|
  "^="                           { return symbol(XOREQ); }
 | 
						|
  "%="                           { return symbol(MODEQ); }
 | 
						|
  "<<="                          { return symbol(LSHIFTEQ); }
 | 
						|
  ">>="                          { return symbol(RSHIFTEQ); }
 | 
						|
  ">>>="                         { return symbol(URSHIFTEQ); }
 | 
						|
  
 | 
						|
  /* string literal */
 | 
						|
  \"                             { yybegin(STRING); string.setLength(0); }
 | 
						|
 | 
						|
  /* character literal */
 | 
						|
  \'                             { yybegin(CHARLITERAL); }
 | 
						|
 | 
						|
  /* numeric literals */
 | 
						|
 | 
						|
  /* This is matched together with the minus, because the number is too big to 
 | 
						|
     be represented by a positive integer. */
 | 
						|
  "-2147483648"                  { return symbol(INTEGER_LITERAL, new Integer(Integer.MIN_VALUE)); }
 | 
						|
  
 | 
						|
  {DecIntegerLiteral}            { return symbol(INTEGER_LITERAL, new Integer(yytext())); }
 | 
						|
  {DecLongLiteral}               { return symbol(INTEGER_LITERAL, new Long(yytext().substring(0,yylength()-1))); }
 | 
						|
  
 | 
						|
  {HexIntegerLiteral}            { return symbol(INTEGER_LITERAL, new Integer((int) parseLong(2, yylength(), 16))); }
 | 
						|
  {HexLongLiteral}               { return symbol(INTEGER_LITERAL, new Long(parseLong(2, yylength()-1, 16))); }
 | 
						|
 
 | 
						|
  {OctIntegerLiteral}            { return symbol(INTEGER_LITERAL, new Integer((int) parseLong(0, yylength(), 8))); }  
 | 
						|
  {OctLongLiteral}               { return symbol(INTEGER_LITERAL, new Long(parseLong(0, yylength()-1, 8))); }
 | 
						|
  
 | 
						|
  {FloatLiteral}                 { return symbol(FLOATING_POINT_LITERAL, new Float(yytext().substring(0,yylength()-1))); }
 | 
						|
  {DoubleLiteral}                { return symbol(FLOATING_POINT_LITERAL, new Double(yytext())); }
 | 
						|
  {DoubleLiteral}[dD]            { return symbol(FLOATING_POINT_LITERAL, new Double(yytext().substring(0,yylength()-1))); }
 | 
						|
  
 | 
						|
  /* comments */
 | 
						|
  {Comment}                      { /* ignore */ }
 | 
						|
 | 
						|
  /* whitespace */
 | 
						|
  {WhiteSpace}                   { /* ignore */ }
 | 
						|
 | 
						|
  /* identifiers */ 
 | 
						|
  {Identifier}                   { return symbol(IDENTIFIER, yytext()); }  
 | 
						|
}
 | 
						|
 | 
						|
<STRING> {
 | 
						|
  \"                             { yybegin(YYINITIAL); return symbol(STRING_LITERAL, string.toString()); }
 | 
						|
  
 | 
						|
  {StringCharacter}+             { string.append( yytext() ); }
 | 
						|
  
 | 
						|
  /* escape sequences */
 | 
						|
  "\\b"                          { string.append( '\b' ); }
 | 
						|
  "\\t"                          { string.append( '\t' ); }
 | 
						|
  "\\n"                          { string.append( '\n' ); }
 | 
						|
  "\\f"                          { string.append( '\f' ); }
 | 
						|
  "\\r"                          { string.append( '\r' ); }
 | 
						|
  "\\\""                         { string.append( '\"' ); }
 | 
						|
  "\\'"                          { string.append( '\'' ); }
 | 
						|
  "\\\\"                         { string.append( '\\' ); }
 | 
						|
  \\[0-3]?{OctDigit}?{OctDigit}  { char val = (char) Integer.parseInt(yytext().substring(1),8);
 | 
						|
                        				   string.append( val ); }
 | 
						|
  
 | 
						|
  /* error cases */
 | 
						|
  \\.                            { throw new RuntimeException("Illegal escape sequence \""+yytext()+"\""); }
 | 
						|
  {LineTerminator}               { throw new RuntimeException("Unterminated string at end of line"); }
 | 
						|
}
 | 
						|
 | 
						|
<CHARLITERAL> {
 | 
						|
  {SingleCharacter}\'            { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, yytext().charAt(0)); }
 | 
						|
  
 | 
						|
  /* escape sequences */
 | 
						|
  "\\b"\'                        { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, '\b');}
 | 
						|
  "\\t"\'                        { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, '\t');}
 | 
						|
  "\\n"\'                        { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, '\n');}
 | 
						|
  "\\f"\'                        { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, '\f');}
 | 
						|
  "\\r"\'                        { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, '\r');}
 | 
						|
  "\\\""\'                       { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, '\"');}
 | 
						|
  "\\'"\'                        { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, '\'');}
 | 
						|
  "\\\\"\'                       { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, '\\'); }
 | 
						|
  \\[0-3]?{OctDigit}?{OctDigit}\' { yybegin(YYINITIAL); 
 | 
						|
			                              int val = Integer.parseInt(yytext().substring(1,yylength()-1),8);
 | 
						|
			                            return symbol(CHARACTER_LITERAL, (char)val); }
 | 
						|
  
 | 
						|
  /* error cases */
 | 
						|
  \\.                            { throw new RuntimeException("Illegal escape sequence \""+yytext()+"\""); }
 | 
						|
  {LineTerminator}               { throw new RuntimeException("Unterminated character literal at end of line"); }
 | 
						|
}
 | 
						|
 | 
						|
/* error fallback */
 | 
						|
[^]                              { throw new RuntimeException("Illegal character \""+yytext()+
 | 
						|
                                                              "\" at line "+yyline+", column "+yycolumn); }
 | 
						|
<<EOF>>                          { return symbol(EOF); } |