mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	JFlex is a lexical analyzer generator for Java, see also http://jflex.de or https://github.com/jflex-de/jflex
		
			
				
	
	
		
			743 lines
		
	
	
		
			30 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			743 lines
		
	
	
		
			30 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 | 
						|
 * JFlex 1.7.0-SNAPSHOT                                                    *
 | 
						|
 * Copyright (C) 1998-2015  Gerwin Klein <lsf@jflex.de>                    *
 | 
						|
 * All rights reserved.                                                    *
 | 
						|
 *                                                                         *
 | 
						|
 * License: BSD                                                            *
 | 
						|
 *                                                                         *
 | 
						|
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 | 
						|
 | 
						|
package jflex;
 | 
						|
 | 
						|
import java_cup.runtime.Symbol;
 | 
						|
import java.io.*;
 | 
						|
import java.util.Stack;
 | 
						|
import java.util.ArrayList;
 | 
						|
import java.util.List;
 | 
						|
import java.util.Map;
 | 
						|
import java.util.HashMap;
 | 
						|
import jflex.unicode.UnicodeProperties;
 | 
						|
 | 
						|
%%
 | 
						|
 | 
						|
%final
 | 
						|
%public
 | 
						|
%class LexScan
 | 
						|
%implements sym, java_cup.runtime.Scanner
 | 
						|
%function next_token
 | 
						|
 | 
						|
%type Symbol
 | 
						|
%unicode
 | 
						|
 | 
						|
%column
 | 
						|
%line
 | 
						|
 | 
						|
%eofclose
 | 
						|
 | 
						|
%state COMMENT, STATELIST, MACROS, REGEXPSTART
 | 
						|
%state REGEXP, JAVA_CODE, STATES, STRING_CONTENT
 | 
						|
%state CHARCLASS, COPY, REPEATEXP, EATWSPNL
 | 
						|
%state CTOR_ARG, REGEXP_CODEPOINT_SEQUENCE
 | 
						|
%state STRING_CODEPOINT_SEQUENCE, CHARCLASS_CODEPOINT
 | 
						|
 | 
						|
%inputstreamctor false
 | 
						|
 | 
						|
%cupdebug
 | 
						|
 | 
						|
%{
 | 
						|
  int balance = 0;
 | 
						|
  int commentbalance = 0;
 | 
						|
  int action_line = 0;
 | 
						|
  int bufferSize = 16384;
 | 
						|
 | 
						|
  File file;
 | 
						|
  Stack<File> files = new Stack<File>();
 | 
						|
 | 
						|
  StringBuilder userCode   = new StringBuilder();
 | 
						|
 | 
						|
  String classCode;
 | 
						|
  String initCode;
 | 
						|
  String initThrow;
 | 
						|
  String eofCode;
 | 
						|
  String eofThrow;
 | 
						|
  String lexThrow;
 | 
						|
  String eofVal;
 | 
						|
  String scanErrorException;
 | 
						|
  String cupSymbol = "sym";
 | 
						|
 | 
						|
  StringBuilder actionText = new StringBuilder();
 | 
						|
  StringBuilder string     = new StringBuilder();
 | 
						|
 | 
						|
  private UnicodeProperties unicodeProperties;
 | 
						|
 | 
						|
  boolean charCount;
 | 
						|
  boolean lineCount;
 | 
						|
  boolean columnCount;
 | 
						|
  boolean cupCompatible;
 | 
						|
  boolean cup2Compatible;
 | 
						|
  boolean cupDebug;
 | 
						|
  boolean isInteger;
 | 
						|
  boolean isIntWrap;
 | 
						|
  boolean isYYEOF;
 | 
						|
  boolean notUnix;
 | 
						|
  boolean isPublic;
 | 
						|
  boolean isFinal;
 | 
						|
  boolean isAbstract;
 | 
						|
  boolean bolUsed;
 | 
						|
  boolean standalone;
 | 
						|
  boolean debugOption;
 | 
						|
  boolean caseless;
 | 
						|
  boolean inclusive_states;
 | 
						|
  boolean eofclose;
 | 
						|
  boolean isASCII;
 | 
						|
  // TODO: In the version of JFlex after 1.6, the InputStream ctor 
 | 
						|
  // TODO: will never be emitted, and this option will cease to exist.
 | 
						|
  boolean emitInputStreamCtor = Options.emitInputStreamCtor;
 | 
						|
 | 
						|
  String isImplementing;
 | 
						|
  String isExtending;
 | 
						|
  String className = "Yylex";
 | 
						|
  String functionName;
 | 
						|
  String tokenType;
 | 
						|
  String visibility = "public";
 | 
						|
    
 | 
						|
  List<String> ctorArgs = new ArrayList<String>();
 | 
						|
  List<String> ctorTypes = new ArrayList<String>();
 | 
						|
    
 | 
						|
  LexicalStates states = new LexicalStates();
 | 
						|
 | 
						|
  List<Action> actions = new ArrayList<Action>();
 | 
						|
 | 
						|
  private int nextState;
 | 
						|
 | 
						|
  boolean macroDefinition;
 | 
						|
 | 
						|
  Timer t = new Timer();
 | 
						|
 | 
						|
  // CharClasses.init() is delayed until UnicodeProperties.init() has been called,
 | 
						|
  // since the max char code won't be known until then.
 | 
						|
  private CharClasses charClasses = new CharClasses();
 | 
						|
  
 | 
						|
  public CharClasses getCharClasses() {
 | 
						|
    return charClasses;
 | 
						|
  }
 | 
						|
 | 
						|
  public int currentLine() {
 | 
						|
    return yyline;
 | 
						|
  }
 | 
						|
 | 
						|
  public void setFile(File file) {
 | 
						|
    this.file = file;
 | 
						|
  }
 | 
						|
 | 
						|
  private Symbol symbol(int type, Object value) {
 | 
						|
    return new Symbol(type, yyline, yycolumn, value);
 | 
						|
  }
 | 
						|
 | 
						|
  private Symbol symbol(int type) {
 | 
						|
    return new Symbol(type, yyline, yycolumn);
 | 
						|
  }
 | 
						|
 | 
						|
  // updates line and column count to the beginning of the first
 | 
						|
  // non whitespace character in yytext, but leaves yyline+yycolumn
 | 
						|
  // untouched
 | 
						|
  private Symbol symbol_countUpdate(int type, Object value) {
 | 
						|
     int lc = yyline;
 | 
						|
     int cc = yycolumn;
 | 
						|
     String text = yytext();
 | 
						|
 | 
						|
     for (int i=0; i < text.length(); i++) {
 | 
						|
      char c = text.charAt(i);
 | 
						|
 | 
						|
      if (c != '\n' && c != '\r' && c != ' ' && c != '\t' )
 | 
						|
        return new Symbol(type, lc, cc, value);
 | 
						|
 | 
						|
      if (c == '\n') {
 | 
						|
        lc++;
 | 
						|
        cc = 0;
 | 
						|
      }
 | 
						|
      else
 | 
						|
        cc++;
 | 
						|
    }
 | 
						|
 | 
						|
    return new Symbol(type, yyline, yycolumn, value);
 | 
						|
  }
 | 
						|
 | 
						|
  private String makeMacroIdent() {
 | 
						|
    String matched = yytext().trim();
 | 
						|
    return matched.substring(1, matched.length()-1).trim();
 | 
						|
  }
 | 
						|
 | 
						|
  public static String conc(Object a, Object b) {
 | 
						|
    if (a == null && b == null) return null;
 | 
						|
    if (a == null) return b.toString();
 | 
						|
    if (b == null) return a.toString();
 | 
						|
 | 
						|
    return a.toString()+b.toString();
 | 
						|
  }
 | 
						|
 | 
						|
  public static String concExc(Object a, Object b) {
 | 
						|
    if (a == null && b == null) return null;
 | 
						|
    if (a == null) return b.toString();
 | 
						|
    if (b == null) return a.toString();
 | 
						|
 | 
						|
    return a.toString()+", "+b.toString();
 | 
						|
  }
 | 
						|
  
 | 
						|
  public UnicodeProperties getUnicodeProperties() {
 | 
						|
    return unicodeProperties;
 | 
						|
  }
 | 
						|
  
 | 
						|
  private void populateDefaultVersionUnicodeProperties() {
 | 
						|
    try {
 | 
						|
      unicodeProperties = new UnicodeProperties();
 | 
						|
    } catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
 | 
						|
      throw new ScannerException
 | 
						|
        (file, ErrorMessages.UNSUPPORTED_UNICODE_VERSION, yyline);
 | 
						|
    }
 | 
						|
    charClasses.init
 | 
						|
      (Options.jlex ? 127 : unicodeProperties.getMaximumCodePoint(), this);
 | 
						|
  }
 | 
						|
  
 | 
						|
  private void includeFile(String filePath) {
 | 
						|
    File f = new File(file.getParentFile(), filePath);
 | 
						|
    if ( !f.canRead() )
 | 
						|
      throw new ScannerException(file,ErrorMessages.NOT_READABLE, yyline);
 | 
						|
    // check for cycle
 | 
						|
    if (files.search(f) > 0)
 | 
						|
      throw new ScannerException(file,ErrorMessages.FILE_CYCLE, yyline);
 | 
						|
    try {
 | 
						|
      yypushStream( new FileReader(f) );
 | 
						|
      files.push(file);
 | 
						|
      file = f;
 | 
						|
      Out.println("Including \""+file+"\"");
 | 
						|
    }
 | 
						|
    catch (FileNotFoundException e) {
 | 
						|
      throw new ScannerException(file,ErrorMessages.NOT_READABLE, yyline);
 | 
						|
    }
 | 
						|
  }
 | 
						|
%}
 | 
						|
 | 
						|
%init{
 | 
						|
  states.insert("YYINITIAL", true);
 | 
						|
%init}
 | 
						|
 | 
						|
 | 
						|
Digit      = [0-9]
 | 
						|
HexDigit   = [0-9a-fA-F]
 | 
						|
OctDigit   = [0-7]
 | 
						|
 | 
						|
Number     = {Digit}+
 | 
						|
HexNumber  = \\ x {HexDigit} {2}
 | 
						|
OctNumber  = \\ [0-3]? {OctDigit} {1, 2}
 | 
						|
 | 
						|
// Unicode4 can encode chars only in the BMP with the 16 bits provided by its
 | 
						|
// 4 hex digits.
 | 
						|
Unicode4  = \\ u {HexDigit} {4}
 | 
						|
 | 
						|
// Unicode6 can encode all Unicode chars, both in the BMP and in the
 | 
						|
// supplementary planes -- only 21 bits are required as of Unicode 5.0,
 | 
						|
// but its six hex digits provide 24 bits.
 | 
						|
Unicode6  = \\ U {HexDigit} {6}
 | 
						|
 | 
						|
// see http://www.unicode.org/unicode/reports/tr18/
 | 
						|
WSP        = [ \t\b]
 | 
						|
WSPNL      = [\u2028\u2029\u000A\u000B\u000C\u000D\u0085\t\b\ ]
 | 
						|
NWSPNL     = [^\u2028\u2029\u000A\u000B\u000C\u000D\u0085\t\b\ ]
 | 
						|
NL         = [\u2028\u2029\u000A\u000B\u000C\u000D\u0085] | \u000D\u000A
 | 
						|
NNL        = [^\u2028\u2029\u000A\u000B\u000C\u000D\u0085]
 | 
						|
 | 
						|
Ident      = {IdentStart} {IdentPart}*
 | 
						|
QualIdent  = {Ident} ( {WSP}* "." {WSP}* {Ident} )*
 | 
						|
QUIL       = {QualIdent} ( {WSP}* "," {WSP}* {QualIdent} )*
 | 
						|
Array      = "[" {WSP}* "]"
 | 
						|
ParamPart  = {IdentStart}|{IdentPart}|"<"|">"|","|{WSP}|"&"|"?"|"."
 | 
						|
GenParam   = "<" {ParamPart}+ ">"
 | 
						|
ClassT     = {Ident} ({WSP}* {GenParam})?
 | 
						|
QClassT    = {QualIdent} ({WSP}* {GenParam})?
 | 
						|
ArrType    = ({GenParam} {WSP}*)? {QClassT} ({WSP}* {Array})*
 | 
						|
 | 
						|
IdentStart = [:jletter:]
 | 
						|
IdentPart  = [:jletterdigit:]
 | 
						|
 | 
						|
JFlexCommentChar = [^*/]|"/"+[^*/]|"*"+[^*/]
 | 
						|
JFlexComment = {JFlexCommentChar}+
 | 
						|
 | 
						|
/* Java comments */
 | 
						|
JavaComment = {TraditionalComment}|{EndOfLineComment}
 | 
						|
TraditionalComment = "/*"{CommentContent}\*+"/"
 | 
						|
EndOfLineComment = "//".*{NL}
 | 
						|
 | 
						|
CommentContent = ([^*]|\*+[^*/])*
 | 
						|
 | 
						|
StringCharacter = [^\u2028\u2029\u000A\u000B\u000C\u000D\u0085\"\\]
 | 
						|
 | 
						|
CharLiteral = \'([^\u2028\u2029\u000A\u000B\u000C\u000D\u0085\'\\]|{EscapeSequence})\'
 | 
						|
StringLiteral = \"({StringCharacter}|{EscapeSequence})*\"
 | 
						|
 | 
						|
EscapeSequence = \\[^\u2028\u2029\u000A\u000B\u000C\u000D\u0085]|\\+u{HexDigit}{4}|\\[0-3]?{OctDigit}{1,2}
 | 
						|
 | 
						|
/* \\(b|t|n|f|r|\"|\'|\\|[0-3]?{OctDigit}{1,2}|u{HexDigit}{4}) */
 | 
						|
 | 
						|
JavaRest = [^\{\}\"\'/]|"/"[^*/]
 | 
						|
JavaCode = ({JavaRest}|{StringLiteral}|{CharLiteral}|{JavaComment})+
 | 
						|
 | 
						|
DottedVersion =  [1-9][0-9]*(\.[0-9]+){0,2}
 | 
						|
 | 
						|
%%
 | 
						|
 | 
						|
<YYINITIAL> {
 | 
						|
  "%%".*{NL}?              {
 | 
						|
                             t.start();
 | 
						|
                             yybegin(MACROS);
 | 
						|
                             macroDefinition = true;
 | 
						|
                             return symbol(USERCODE,userCode);
 | 
						|
                           }
 | 
						|
  .*{NL} | .+              { userCode.append(yytext()); }
 | 
						|
  <<EOF>>                  { return symbol(EOF); }
 | 
						|
}
 | 
						|
 | 
						|
<MACROS>   ("%{"|"%init{"|"%initthrow{"|"%eof{"|"%eofthrow{"|"%yylexthrow{"|"%eofval{").*{NL}
 | 
						|
                                     { string.setLength(0); yybegin(COPY); }
 | 
						|
<COPY> {
 | 
						|
  "%}".*{NL}                    { classCode = conc(classCode,string);  yybegin(MACROS);  }
 | 
						|
  "%init}".*{NL}                { initCode = conc(initCode,string);    yybegin(MACROS);  }
 | 
						|
  "%initthrow}".*{NL}           { initThrow = concExc(initThrow,string);  yybegin(MACROS); }
 | 
						|
  "%eof}".*{NL}                 { eofCode = conc(eofCode,string); yybegin(MACROS); }
 | 
						|
  "%eofthrow}".*{NL}            { eofThrow = concExc(eofThrow,string); yybegin(MACROS); }
 | 
						|
  "%yylexthrow}".*{NL}          { lexThrow = concExc(lexThrow,string); yybegin(MACROS); }
 | 
						|
  "%eofval}".*{NL}              { eofVal = string.toString(); yybegin(MACROS); }
 | 
						|
 | 
						|
  .*{NL}                        { string.append(yytext()); }
 | 
						|
 | 
						|
  <<EOF>>                       { throw new ScannerException(file,ErrorMessages.EOF_IN_MACROS); }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
<MACROS> ^"%s" ("tate" "s"?)? {WSP}+   { inclusive_states = true; yybegin(STATELIST); }
 | 
						|
<MACROS> ^"%x" ("state" "s"?)? {WSP}+  { inclusive_states = false; yybegin(STATELIST); }
 | 
						|
<STATELIST> {
 | 
						|
  {Ident}                             { states.insert(yytext(),inclusive_states); }
 | 
						|
  ([\ \t]*","[\ \t]*)|([\ \t]+)       { }
 | 
						|
  {NL}                                { yybegin(MACROS);  }
 | 
						|
  <<EOF>>                       { throw new ScannerException(file,ErrorMessages.EOF_IN_MACROS); }
 | 
						|
}
 | 
						|
 | 
						|
<MACROS> {
 | 
						|
  "%char"                     { charCount = true;  }
 | 
						|
  "%line"                     { lineCount = true;  }
 | 
						|
  "%column"                   { columnCount = true; }
 | 
						|
  "%byaccj"                   { isInteger = true;
 | 
						|
                                if (eofVal == null)
 | 
						|
                                  eofVal = "return 0;";
 | 
						|
                                eofclose = true;
 | 
						|
                              }
 | 
						|
  "%cup2"                     { cup2Compatible = true;
 | 
						|
                                isImplementing = concExc(isImplementing, "Scanner");
 | 
						|
                                lineCount = true;
 | 
						|
                                columnCount = true;
 | 
						|
                                if (functionName == null)
 | 
						|
                                  functionName = "readNextTerminal";
 | 
						|
                                if (tokenType == null)
 | 
						|
                                  tokenType = "ScannerToken<? extends Object>";
 | 
						|
                                if (eofVal == null)
 | 
						|
                                  eofVal = "return token(SpecialTerminals.EndOfInputStream);";
 | 
						|
                                if (!Options.jlex) eofclose = true;
 | 
						|
                                return symbol(UNICODE); // %unicode
 | 
						|
                              }
 | 
						|
  "%cup"                      { cupCompatible = true;
 | 
						|
                                isImplementing = concExc(isImplementing, "java_cup.runtime.Scanner");
 | 
						|
                                if (functionName == null)
 | 
						|
                                  functionName = "next_token";
 | 
						|
                                if (tokenType == null)
 | 
						|
                                  tokenType = "java_cup.runtime.Symbol";
 | 
						|
                                if (eofVal == null)
 | 
						|
                                  eofVal = "return new java_cup.runtime.Symbol("+cupSymbol+".EOF);";
 | 
						|
                                if (!Options.jlex) eofclose = true;
 | 
						|
                              }
 | 
						|
  "%cupsym"{WSP}+{QualIdent} {WSP}*  { cupSymbol = yytext().substring(8).trim();
 | 
						|
                                if (cupCompatible) Out.warning(ErrorMessages.CUPSYM_AFTER_CUP, yyline); }
 | 
						|
  "%cupsym"{WSP}+{NNL}*       { throw new ScannerException(file,ErrorMessages.QUIL_CUPSYM, yyline); }
 | 
						|
  "%cupdebug"                 { cupDebug = true; }
 | 
						|
  "%eofclose"({WSP}+"true")?  { eofclose = true; }
 | 
						|
  "%eofclose"({WSP}+"false")  { eofclose = false; }
 | 
						|
  "%class"{WSP}+{ClassT} {WSP}*     { className = yytext().substring(7).trim();  }
 | 
						|
  "%ctorarg"{WSP}+{ArrType}{WSP}+   { yybegin(CTOR_ARG); ctorTypes.add(yytext().substring(8).trim()); }
 | 
						|
  "%function"{WSP}+{Ident} {WSP}*   { functionName = yytext().substring(10).trim(); }
 | 
						|
  "%type"{WSP}+{ArrType} {WSP}*     { tokenType = yytext().substring(6).trim(); }
 | 
						|
  "%integer"|"%int"           { isInteger = true;  }
 | 
						|
  "%intwrap"                  { isIntWrap = true;  }
 | 
						|
  "%yyeof"                    { isYYEOF = true;  }
 | 
						|
  "%notunix"                  { notUnix = true;  }
 | 
						|
  "%7bit"                     { isASCII = true; return symbol(ASCII); }
 | 
						|
  "%full"|"%8bit"             { return symbol(FULL); }
 | 
						|
  "%16bit"                    { populateDefaultVersionUnicodeProperties();
 | 
						|
                                return symbol(UNICODE);
 | 
						|
                              }
 | 
						|
  "%unicode"({WSP}+{DottedVersion})? { String v = yytext().substring(8).trim();
 | 
						|
                                       if (v.length() == 0) {
 | 
						|
                                         populateDefaultVersionUnicodeProperties();
 | 
						|
                                       } else {
 | 
						|
                                         try {
 | 
						|
                                           unicodeProperties = new UnicodeProperties(v);
 | 
						|
                                         } catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
 | 
						|
                                           throw new ScannerException
 | 
						|
                                             (file, ErrorMessages.UNSUPPORTED_UNICODE_VERSION, yyline);
 | 
						|
                                         }
 | 
						|
                                         charClasses.init
 | 
						|
                                           (Options.jlex ? 127 : unicodeProperties.getMaximumCodePoint(), this);
 | 
						|
                                       }
 | 
						|
                                       return symbol(UNICODE);
 | 
						|
                                     }
 | 
						|
 | 
						|
  "%caseless"|"%ignorecase"   { caseless = true; }
 | 
						|
  "%implements"{WSP}+.*       { isImplementing = concExc(isImplementing, yytext().substring(12).trim());  }
 | 
						|
  "%extends"{WSP}+{QClassT}{WSP}* { isExtending = yytext().substring(9).trim(); }
 | 
						|
  "%public"                   { isPublic = true; }
 | 
						|
  "%apiprivate"               { visibility = "private"; Skeleton.makePrivate(); }
 | 
						|
  "%final"                    { isFinal = true; }
 | 
						|
  "%abstract"                 { isAbstract = true; }
 | 
						|
  "%debug"                    { debugOption = true; }
 | 
						|
  "%standalone"               { standalone = true; isInteger = true; }
 | 
						|
  "%pack"                     { /* no-op - this is the only generation method */ }
 | 
						|
  "%include" {WSP}+ .*        { includeFile(yytext().substring(9).trim()); }
 | 
						|
  "%buffer" {WSP}+ {Number} {WSP}*   { bufferSize = Integer.parseInt(yytext().substring(8).trim()); }
 | 
						|
  "%buffer" {WSP}+ {NNL}*     { throw new ScannerException(file,ErrorMessages.NO_BUFFER_SIZE, yyline); }
 | 
						|
  "%initthrow" {WSP}+ {QUIL} {WSP}* { initThrow = concExc(initThrow,yytext().substring(11).trim()); }
 | 
						|
  "%initthrow" {WSP}+ {NNL}*  { throw new ScannerException(file,ErrorMessages.QUIL_INITTHROW, yyline); }
 | 
						|
  "%eofthrow"  {WSP}+ {QUIL} {WSP}*  { eofThrow = concExc(eofThrow,yytext().substring(10).trim()); }
 | 
						|
  "%eofthrow"  {WSP}+ {NNL}*  { throw new ScannerException(file,ErrorMessages.QUIL_EOFTHROW, yyline); }
 | 
						|
  "%yylexthrow"{WSP}+ {QUIL} {WSP}*  { lexThrow = concExc(lexThrow,yytext().substring(12).trim()); }
 | 
						|
  "%throws"    {WSP}+ {QUIL} {WSP}*  { lexThrow = concExc(lexThrow,yytext().substring(8).trim()); }
 | 
						|
  "%yylexthrow"{WSP}+ {NNL}*  { throw new ScannerException(file,ErrorMessages.QUIL_YYLEXTHROW, yyline); }
 | 
						|
  "%throws"    {WSP}+ {NNL}*  { throw new ScannerException(file,ErrorMessages.QUIL_THROW, yyline); }
 | 
						|
  "%scanerror" {WSP}+ {QualIdent} {WSP}* { scanErrorException = yytext().substring(11).trim(); }
 | 
						|
  "%scanerror" {WSP}+ {NNL}*  { throw new ScannerException(file,ErrorMessages.QUIL_SCANERROR, yyline); }
 | 
						|
// TODO: In the version of JFlex after 1.6, the %inputstreamctor directive will become a no-op: the InputStream ctor will never be emitted.  
 | 
						|
  "%inputstreamctor"({WSP}+"true")? { emitInputStreamCtor = true; }  
 | 
						|
  "%inputstreamctor"{WSP}+"false"   { emitInputStreamCtor = false; }
 | 
						|
 | 
						|
  {Ident}                     { return symbol(IDENT, yytext()); }
 | 
						|
  "="{WSP}*                   { if (null == unicodeProperties && ! isASCII) {
 | 
						|
                                  populateDefaultVersionUnicodeProperties();
 | 
						|
                                }
 | 
						|
                                yybegin(REGEXP); 
 | 
						|
                                return symbol(EQUALS); 
 | 
						|
                              }
 | 
						|
 | 
						|
  "/*"                        { nextState = MACROS; yybegin(COMMENT); }
 | 
						|
 | 
						|
  {EndOfLineComment}          { }
 | 
						|
 | 
						|
  ^"%%" {NNL}*                { if (null == unicodeProperties && ! isASCII) {
 | 
						|
                                  populateDefaultVersionUnicodeProperties();
 | 
						|
                                }
 | 
						|
                                macroDefinition = false; 
 | 
						|
                                yybegin(REGEXPSTART);
 | 
						|
                                return symbol(DELIMITER); 
 | 
						|
                              }
 | 
						|
  "%"{Ident}                  { throw new ScannerException(file,ErrorMessages.UNKNOWN_OPTION, yyline, yycolumn); }
 | 
						|
  "%"                         { throw new ScannerException(file,ErrorMessages.UNKNOWN_OPTION, yyline, yycolumn); }
 | 
						|
  ^{WSP}+"%"                  { Out.warning(ErrorMessages.NOT_AT_BOL, yyline); yypushback(1); }
 | 
						|
 | 
						|
  {WSP}+                      { }
 | 
						|
  {NL}+                       { }
 | 
						|
  <<EOF>>                     { if ( yymoreStreams() ) {
 | 
						|
                                  file = (File) files.pop();
 | 
						|
                                  yypopStream();
 | 
						|
                                }
 | 
						|
                                else
 | 
						|
                                  throw new ScannerException(file,ErrorMessages.EOF_IN_MACROS);
 | 
						|
                              }
 | 
						|
}
 | 
						|
 | 
						|
<CTOR_ARG> {
 | 
						|
  {Ident} {WSP}*   { yybegin(MACROS); ctorArgs.add(yytext().trim()); }
 | 
						|
  [^]              { throw new ScannerException(file,ErrorMessages.CTOR_ARG,yyline,yycolumn); }
 | 
						|
}
 | 
						|
 | 
						|
<REGEXPSTART> {
 | 
						|
  ^ {WSP}* "%include" {WSP}+ .*  { includeFile(yytext().trim().substring(9).trim()); }
 | 
						|
  {WSP}* "/*"                    { nextState = REGEXPSTART; yybegin(COMMENT); }
 | 
						|
  {WSP}* "<"                     { yybegin(STATES); return symbol_countUpdate(LESSTHAN, null); }
 | 
						|
  {WSP}* "}"                     { return symbol_countUpdate(RBRACE, null); }
 | 
						|
  {WSP}* "//" {NNL}*             { }
 | 
						|
  {WSP}* "<<EOF>>" {WSPNL}* "{"  { actionText.setLength(0); yybegin(JAVA_CODE);
 | 
						|
                                   Symbol s = symbol_countUpdate(EOFRULE, null);
 | 
						|
                                   action_line = s.left+1;
 | 
						|
                                   return s;
 | 
						|
                                 }
 | 
						|
  ^ {WSP}* {NWSPNL}              { yypushback(yylength()); yybegin(REGEXP); }
 | 
						|
  {WSP} | {NL}                   { }
 | 
						|
}
 | 
						|
 | 
						|
<STATES> {
 | 
						|
  {Ident}                     { return symbol(IDENT, yytext()); }
 | 
						|
  ","                         { return symbol(COMMA); }
 | 
						|
  {WSPNL}+                    { }
 | 
						|
 | 
						|
  // "{" will be caught in REGEXP
 | 
						|
  ">"{WSPNL}*                 { yybegin(REGEXP); return symbol(MORETHAN); }
 | 
						|
 | 
						|
  <<EOF>>                     { throw new ScannerException(file,ErrorMessages.EOF_IN_STATES); }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
<REGEXP> {
 | 
						|
  "<<EOF>>" {WSPNL}+ "{"  { actionText.setLength(0); yybegin(JAVA_CODE); action_line = yyline+1; return symbol(EOFRULE); }
 | 
						|
  "<<EOF>>"               { throw new ScannerException(file,ErrorMessages.EOF_WO_ACTION); }
 | 
						|
 | 
						|
  {WSPNL}*"|"{WSP}*$      { if (macroDefinition) {
 | 
						|
                              yybegin(EATWSPNL);
 | 
						|
                              return symbol(BAR);
 | 
						|
                            }
 | 
						|
                            else {
 | 
						|
                              yybegin(REGEXPSTART);
 | 
						|
                              return symbol(NOACTION);
 | 
						|
                            }
 | 
						|
                          }
 | 
						|
 | 
						|
  // stategroup
 | 
						|
  "{"            { yybegin(REGEXPSTART); return symbol(LBRACE); }
 | 
						|
 | 
						|
  {WSPNL}*"|"    { return symbol(BAR); }
 | 
						|
 | 
						|
  {WSPNL}*\"     { string.setLength(0); nextState = REGEXP; yybegin(STRING_CONTENT); }
 | 
						|
  {WSPNL}*"\\u{" { string.setLength(0); yybegin(REGEXP_CODEPOINT_SEQUENCE); }
 | 
						|
  {WSPNL}*"!"    { return symbol(BANG); }
 | 
						|
  {WSPNL}*"~"    { return symbol(TILDE); }
 | 
						|
  {WSPNL}*"("    { return symbol(OPENBRACKET); }
 | 
						|
  {WSPNL}*")"    { return symbol(CLOSEBRACKET); }
 | 
						|
  {WSPNL}*"*"    { return symbol(STAR); }
 | 
						|
  {WSPNL}*"+"    { return symbol(PLUS); }
 | 
						|
  {WSPNL}*"?"    { return symbol(QUESTION); }
 | 
						|
  {WSPNL}*"$"    { return symbol(DOLLAR); }
 | 
						|
  {WSPNL}*"^"    { bolUsed = true; return symbol(HAT); }
 | 
						|
  {WSPNL}*"."    { return symbol(POINT); }
 | 
						|
  {WSPNL}*"\\R"  { return symbol(NEWLINE); }
 | 
						|
  {WSPNL}*"["    { yybegin(CHARCLASS); return symbol(OPENCLASS); }
 | 
						|
  {WSPNL}*"/"    { return symbol(LOOKAHEAD); }
 | 
						|
  
 | 
						|
  {WSPNL}* "{" {WSP}* {Ident} {WSP}* "}" { return symbol_countUpdate(MACROUSE, makeMacroIdent()); }
 | 
						|
  {WSPNL}* "{" {WSP}* {Number}   { yybegin(REPEATEXP); 
 | 
						|
                                   return symbol(REPEAT, 
 | 
						|
                                                 new Integer(yytext().trim().substring(1).trim())); 
 | 
						|
                                 }
 | 
						|
 | 
						|
  {WSPNL}+ "{"    { actionText.setLength(0); yybegin(JAVA_CODE); action_line = yyline+1; return symbol(REGEXPEND); }
 | 
						|
  {NL}            { if (macroDefinition) { yybegin(MACROS); } return symbol(REGEXPEND); }
 | 
						|
 | 
						|
  {WSPNL}*"/*"    { nextState = REGEXP; yybegin(COMMENT); }
 | 
						|
 | 
						|
  {WSPNL}*"//"{NNL}*  { }
 | 
						|
 | 
						|
  {WSP}+          { }
 | 
						|
 | 
						|
  <CHARCLASS> {
 | 
						|
    {WSPNL}*"[:jletter:]"      { return symbol(JLETTERCLASS); }
 | 
						|
    {WSPNL}*"[:jletterdigit:]" { return symbol(JLETTERDIGITCLASS); }
 | 
						|
    {WSPNL}*"[:letter:]"       { return symbol(LETTERCLASS); }
 | 
						|
    {WSPNL}*"[:uppercase:]"    { return symbol(UPPERCLASS); }
 | 
						|
    {WSPNL}*"[:lowercase:]"    { return symbol(LOWERCLASS); }
 | 
						|
    {WSPNL}*"[:digit:]"        { return symbol(DIGITCLASS); }
 | 
						|
    {WSPNL}*"\\d"              { return symbol(DIGITCLASS); }
 | 
						|
    {WSPNL}*"\\D"              { return symbol(DIGITCLASSNOT); }
 | 
						|
    {WSPNL}*"\\s"              { return symbol(WHITESPACECLASS); }
 | 
						|
    {WSPNL}*"\\S"              { return symbol(WHITESPACECLASSNOT); }
 | 
						|
    {WSPNL}*"\\w"              { return symbol(WORDCLASS); }
 | 
						|
    {WSPNL}*"\\W"              { return symbol(WORDCLASSNOT); }
 | 
						|
    {WSPNL}*"\\p{"[^}]*"}"     { String trimmedText = yytext().trim();
 | 
						|
                                 String propertyValue = trimmedText.substring(3,trimmedText.length()-1);
 | 
						|
                                 IntCharSet set = unicodeProperties.getIntCharSet(propertyValue);
 | 
						|
                                 if (null == set) {
 | 
						|
                                   throw new ScannerException(file,ErrorMessages.INVALID_UNICODE_PROPERTY, yyline, yycolumn + 3);
 | 
						|
                                 }
 | 
						|
                                 return symbol(UNIPROPCCLASS, set);
 | 
						|
                               }
 | 
						|
    {WSPNL}*"\\P{"[^}]*"}"     { String trimmedText = yytext().trim();
 | 
						|
                                 String propertyValue = trimmedText.substring(3,trimmedText.length()-1);
 | 
						|
                                 IntCharSet set = unicodeProperties.getIntCharSet(propertyValue);
 | 
						|
                                 if (null == set) {
 | 
						|
                                   throw new ScannerException(file,ErrorMessages.INVALID_UNICODE_PROPERTY, yyline, yycolumn + 3);
 | 
						|
                                 }
 | 
						|
                                 return symbol(UNIPROPCCLASSNOT, set);
 | 
						|
                               }
 | 
						|
  }
 | 
						|
 | 
						|
  . { return symbol(CHAR, yytext().codePointAt(0)); }
 | 
						|
}
 | 
						|
 | 
						|
<EATWSPNL> {WSPNL}+  { yybegin(REGEXP); }
 | 
						|
 | 
						|
 | 
						|
<REPEATEXP> {
 | 
						|
  "}"          { yybegin(REGEXP); return symbol(RBRACE); }
 | 
						|
  "," {WSP}* {Number}  { return symbol(REPEAT, new Integer(yytext().substring(1).trim())); }
 | 
						|
  {WSP}+       { }
 | 
						|
 | 
						|
  <<EOF>>                 { throw new ScannerException(file,ErrorMessages.EOF_IN_REGEXP); }
 | 
						|
}
 | 
						|
 | 
						|
<CHARCLASS> {
 | 
						|
  "{"{Ident}"}" { return symbol(MACROUSE, yytext().substring(1,yylength()-1)); }
 | 
						|
  "["     { balance++; return symbol(OPENCLASS); }
 | 
						|
  "]"     { if (balance > 0) balance--; else yybegin(REGEXP); return symbol(CLOSECLASS); }
 | 
						|
  "^"     { return symbol(HAT); }
 | 
						|
  "-"     { return symbol(DASH); }
 | 
						|
  "--"    { return symbol(DIFFERENCE); }
 | 
						|
  "&&"    { return symbol(INTERSECTION); }
 | 
						|
  "||"    { /* union is the default operation - '||' can be ignored */ }
 | 
						|
  "~~"    { return symbol(SYMMETRICDIFFERENCE); }
 | 
						|
  "\\u{"  { yybegin(CHARCLASS_CODEPOINT); }
 | 
						|
 | 
						|
  // this is a hack to keep JLex compatibilty with char class
 | 
						|
  // expressions like [+-]
 | 
						|
  "-]"    { yypushback(1); yycolumn--; return symbol(CHAR, (int)'-'); }
 | 
						|
 | 
						|
  \"      { string.setLength(0); nextState = CHARCLASS; yybegin(STRING_CONTENT); }
 | 
						|
 | 
						|
  .       { return symbol(CHAR, yytext().codePointAt(0)); }
 | 
						|
 | 
						|
  \n      { throw new ScannerException(file,ErrorMessages.EOL_IN_CHARCLASS,yyline,yycolumn); }
 | 
						|
 | 
						|
  <<EOF>> { throw new ScannerException(file,ErrorMessages.EOF_IN_REGEXP); }
 | 
						|
}
 | 
						|
 | 
						|
<STRING_CONTENT> {
 | 
						|
  \"       { yybegin(nextState); return symbol(STRING, string.toString()); }
 | 
						|
  \\\"     { string.append('\"'); }
 | 
						|
  [^\"\\\u2028\u2029\u000A\u000B\u000C\u000D\u0085]+ { string.append(yytext()); }
 | 
						|
 | 
						|
  {NL}     { throw new ScannerException(file,ErrorMessages.UNTERMINATED_STR, yyline, yycolumn); }
 | 
						|
 | 
						|
  {HexNumber} { string.append( (char) Integer.parseInt(yytext().substring(2,yylength()), 16)); }
 | 
						|
  {OctNumber} { string.append( (char) Integer.parseInt(yytext().substring(1,yylength()), 8)); }
 | 
						|
  {Unicode4}  { string.append( (char) Integer.parseInt(yytext().substring(2,yylength()), 16)); }
 | 
						|
  {Unicode6}  { int codePoint = Integer.parseInt(yytext().substring(2,yylength()), 16);
 | 
						|
                if (codePoint <= unicodeProperties.getMaximumCodePoint()) {
 | 
						|
                  string.append(Character.toChars(codePoint));
 | 
						|
                } else {
 | 
						|
                  throw new ScannerException(file,ErrorMessages.CODEPOINT_OUT_OF_RANGE, yyline, yycolumn+2);
 | 
						|
                }
 | 
						|
              }
 | 
						|
  
 | 
						|
  "\\u{"      { yybegin(STRING_CODEPOINT_SEQUENCE); }
 | 
						|
 | 
						|
  \\b { string.append('\b'); }
 | 
						|
  \\n { string.append('\n'); }
 | 
						|
  \\t { string.append('\t'); }
 | 
						|
  \\f { string.append('\f'); }
 | 
						|
  \\r { string.append('\r'); }
 | 
						|
 | 
						|
  \\. { string.append(yytext().substring(1, yytext().offsetByCodePoints(1, 1))); }
 | 
						|
 | 
						|
  <<EOF>>     { throw new ScannerException(file,ErrorMessages.EOF_IN_STRING); }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
<REGEXP, CHARCLASS> {
 | 
						|
  {HexNumber} { return symbol(CHAR, Integer.parseInt(yytext().substring(2,yylength()), 16)); }
 | 
						|
  {OctNumber} { return symbol(CHAR, Integer.parseInt(yytext().substring(1,yylength()), 8)); }
 | 
						|
  {Unicode4}  { return symbol(CHAR, Integer.parseInt(yytext().substring(2,yylength()), 16)); }
 | 
						|
  {Unicode6}  { int codePoint = Integer.parseInt(yytext().substring(2,yylength()), 16);
 | 
						|
                if (codePoint <= unicodeProperties.getMaximumCodePoint()) {
 | 
						|
                  return symbol(CHAR, codePoint);
 | 
						|
                } else {
 | 
						|
                  throw new ScannerException(file,ErrorMessages.CODEPOINT_OUT_OF_RANGE, yyline, yycolumn+2);
 | 
						|
                }
 | 
						|
              }
 | 
						|
 | 
						|
  \\b { return symbol(CHAR, (int)'\b'); }
 | 
						|
  \\n { return symbol(CHAR, (int)'\n'); }
 | 
						|
  \\t { return symbol(CHAR, (int)'\t'); }
 | 
						|
  \\f { return symbol(CHAR, (int)'\f'); }
 | 
						|
  \\r { return symbol(CHAR, (int)'\r'); }
 | 
						|
 | 
						|
  \\. { return symbol(CHAR, yytext().codePointAt(1)); }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
<JAVA_CODE> {
 | 
						|
  "{"        { balance++; actionText.append('{'); }
 | 
						|
  "}"        { if (balance > 0) {
 | 
						|
                 balance--;
 | 
						|
                 actionText.append('}');
 | 
						|
               }
 | 
						|
               else {
 | 
						|
                 yybegin(REGEXPSTART);
 | 
						|
                 Action a = new Action(actionText.toString(), action_line);
 | 
						|
                 actions.add(a);
 | 
						|
                 return symbol(ACTION, a);
 | 
						|
               }
 | 
						|
             }
 | 
						|
 | 
						|
  {JavaCode}     { actionText.append(yytext()); }
 | 
						|
 | 
						|
  <<EOF>>     { throw new ScannerException(file,ErrorMessages.EOF_IN_ACTION, action_line-1); }
 | 
						|
}
 | 
						|
 | 
						|
<COMMENT> {
 | 
						|
 | 
						|
  "/"+ "*"  { commentbalance++; }
 | 
						|
  "*"+ "/"  { if (commentbalance > 0)
 | 
						|
                commentbalance--;
 | 
						|
              else
 | 
						|
                yybegin(nextState);
 | 
						|
            }
 | 
						|
 | 
						|
  {JFlexComment} { /* ignore */ }
 | 
						|
 | 
						|
  <<EOF>>     { throw new ScannerException(file,ErrorMessages.EOF_IN_COMMENT); }
 | 
						|
}
 | 
						|
 | 
						|
<REGEXP_CODEPOINT_SEQUENCE> {
 | 
						|
  "}"             { yybegin(REGEXP); return symbol(STRING, string.toString()); }
 | 
						|
  {HexDigit}{1,6} { int codePoint = Integer.parseInt(yytext(), 16);
 | 
						|
                    if (codePoint <= unicodeProperties.getMaximumCodePoint()) {
 | 
						|
                      string.append(Character.toChars(codePoint));
 | 
						|
                    } else {
 | 
						|
                      throw new ScannerException(file,ErrorMessages.CODEPOINT_OUT_OF_RANGE, yyline, yycolumn);
 | 
						|
                    }
 | 
						|
                  }
 | 
						|
  {WSPNL}+        { }
 | 
						|
  <<EOF>>         { throw new ScannerException(file,ErrorMessages.EOF_IN_REGEXP); }
 | 
						|
}
 | 
						|
 | 
						|
<STRING_CODEPOINT_SEQUENCE> { // Specialized form: newlines disallowed, and doesn't return a symbol
 | 
						|
  "}"             { yybegin(STRING_CONTENT); }
 | 
						|
  {HexDigit}{1,6} { int codePoint = Integer.parseInt(yytext(), 16);
 | 
						|
                    if (codePoint <= unicodeProperties.getMaximumCodePoint()) {
 | 
						|
                      string.append(Character.toChars(codePoint));
 | 
						|
                    } else {
 | 
						|
                      throw new ScannerException(file, ErrorMessages.CODEPOINT_OUT_OF_RANGE, yyline, yycolumn);
 | 
						|
                    }
 | 
						|
                  }
 | 
						|
  {NL}            { throw new ScannerException(file,ErrorMessages.UNTERMINATED_STR, yyline, yycolumn); }
 | 
						|
  {WSP}+          { }
 | 
						|
  <<EOF>>         { throw new ScannerException(file,ErrorMessages.EOF_IN_STRING); }
 | 
						|
}
 | 
						|
 | 
						|
<CHARCLASS_CODEPOINT> { // Specialized form: only one codepoint allowed, no whitespace allowed
 | 
						|
  {HexDigit}{1,6} "}" { int codePoint = Integer.parseInt(yytext().substring(0, yylength() - 1), 16);
 | 
						|
                        if (codePoint <= unicodeProperties.getMaximumCodePoint()) {
 | 
						|
                          yybegin(CHARCLASS);
 | 
						|
                          return symbol(CHAR, codePoint);
 | 
						|
                        } else {
 | 
						|
                          throw new ScannerException(file, ErrorMessages.CODEPOINT_OUT_OF_RANGE, yyline, yycolumn);
 | 
						|
                        }
 | 
						|
                      }
 | 
						|
  <<EOF>>             { throw new ScannerException(file,ErrorMessages.EOF_IN_REGEXP); }
 | 
						|
}
 | 
						|
 | 
						|
.  { throw new ScannerException(file,ErrorMessages.UNEXPECTED_CHAR, yyline, yycolumn); }
 | 
						|
\R { throw new ScannerException(file,ErrorMessages.UNEXPECTED_NL, yyline, yycolumn); }
 | 
						|
 | 
						|
<<EOF>>  { if ( yymoreStreams() ) {
 | 
						|
             file = (File) files.pop();
 | 
						|
             yypopStream();
 | 
						|
           }
 | 
						|
           else {
 | 
						|
             return symbol(EOF);
 | 
						|
           }
 | 
						|
         }
 |