add language recognition for JFlex grammars

JFlex is a lexical analyzer generator for Java, see also
http://jflex.de or https://github.com/jflex-de/jflex
This commit is contained in:
Gerwin Klein
2015-04-10 19:17:58 +10:00
parent 98a23c6a53
commit 9c4c6d908a
3 changed files with 1055 additions and 0 deletions

View File

@@ -1434,6 +1434,14 @@ J:
tm_scope: source.j
ace_mode: text
JFlex:
type: programming
color: "#EBCA30"
extensions:
- .flex
- .jflex
ace_mode: text
JSON:
type: data
tm_scope: source.json

742
samples/JFlex/LexScan.flex Normal file
View File

@@ -0,0 +1,742 @@
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* JFlex 1.7.0-SNAPSHOT *
* Copyright (C) 1998-2015 Gerwin Klein <lsf@jflex.de> *
* All rights reserved. *
* *
* License: BSD *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
package jflex;
import java_cup.runtime.Symbol;
import java.io.*;
import java.util.Stack;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.HashMap;
import jflex.unicode.UnicodeProperties;
%%
%final
%public
%class LexScan
%implements sym, java_cup.runtime.Scanner
%function next_token
%type Symbol
%unicode
%column
%line
%eofclose
%state COMMENT, STATELIST, MACROS, REGEXPSTART
%state REGEXP, JAVA_CODE, STATES, STRING_CONTENT
%state CHARCLASS, COPY, REPEATEXP, EATWSPNL
%state CTOR_ARG, REGEXP_CODEPOINT_SEQUENCE
%state STRING_CODEPOINT_SEQUENCE, CHARCLASS_CODEPOINT
%inputstreamctor false
%cupdebug
%{
int balance = 0;
int commentbalance = 0;
int action_line = 0;
int bufferSize = 16384;
File file;
Stack<File> files = new Stack<File>();
StringBuilder userCode = new StringBuilder();
String classCode;
String initCode;
String initThrow;
String eofCode;
String eofThrow;
String lexThrow;
String eofVal;
String scanErrorException;
String cupSymbol = "sym";
StringBuilder actionText = new StringBuilder();
StringBuilder string = new StringBuilder();
private UnicodeProperties unicodeProperties;
boolean charCount;
boolean lineCount;
boolean columnCount;
boolean cupCompatible;
boolean cup2Compatible;
boolean cupDebug;
boolean isInteger;
boolean isIntWrap;
boolean isYYEOF;
boolean notUnix;
boolean isPublic;
boolean isFinal;
boolean isAbstract;
boolean bolUsed;
boolean standalone;
boolean debugOption;
boolean caseless;
boolean inclusive_states;
boolean eofclose;
boolean isASCII;
// TODO: In the version of JFlex after 1.6, the InputStream ctor
// TODO: will never be emitted, and this option will cease to exist.
boolean emitInputStreamCtor = Options.emitInputStreamCtor;
String isImplementing;
String isExtending;
String className = "Yylex";
String functionName;
String tokenType;
String visibility = "public";
List<String> ctorArgs = new ArrayList<String>();
List<String> ctorTypes = new ArrayList<String>();
LexicalStates states = new LexicalStates();
List<Action> actions = new ArrayList<Action>();
private int nextState;
boolean macroDefinition;
Timer t = new Timer();
// CharClasses.init() is delayed until UnicodeProperties.init() has been called,
// since the max char code won't be known until then.
private CharClasses charClasses = new CharClasses();
public CharClasses getCharClasses() {
return charClasses;
}
public int currentLine() {
return yyline;
}
public void setFile(File file) {
this.file = file;
}
private Symbol symbol(int type, Object value) {
return new Symbol(type, yyline, yycolumn, value);
}
private Symbol symbol(int type) {
return new Symbol(type, yyline, yycolumn);
}
// updates line and column count to the beginning of the first
// non whitespace character in yytext, but leaves yyline+yycolumn
// untouched
private Symbol symbol_countUpdate(int type, Object value) {
int lc = yyline;
int cc = yycolumn;
String text = yytext();
for (int i=0; i < text.length(); i++) {
char c = text.charAt(i);
if (c != '\n' && c != '\r' && c != ' ' && c != '\t' )
return new Symbol(type, lc, cc, value);
if (c == '\n') {
lc++;
cc = 0;
}
else
cc++;
}
return new Symbol(type, yyline, yycolumn, value);
}
private String makeMacroIdent() {
String matched = yytext().trim();
return matched.substring(1, matched.length()-1).trim();
}
public static String conc(Object a, Object b) {
if (a == null && b == null) return null;
if (a == null) return b.toString();
if (b == null) return a.toString();
return a.toString()+b.toString();
}
public static String concExc(Object a, Object b) {
if (a == null && b == null) return null;
if (a == null) return b.toString();
if (b == null) return a.toString();
return a.toString()+", "+b.toString();
}
public UnicodeProperties getUnicodeProperties() {
return unicodeProperties;
}
private void populateDefaultVersionUnicodeProperties() {
try {
unicodeProperties = new UnicodeProperties();
} catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
throw new ScannerException
(file, ErrorMessages.UNSUPPORTED_UNICODE_VERSION, yyline);
}
charClasses.init
(Options.jlex ? 127 : unicodeProperties.getMaximumCodePoint(), this);
}
private void includeFile(String filePath) {
File f = new File(file.getParentFile(), filePath);
if ( !f.canRead() )
throw new ScannerException(file,ErrorMessages.NOT_READABLE, yyline);
// check for cycle
if (files.search(f) > 0)
throw new ScannerException(file,ErrorMessages.FILE_CYCLE, yyline);
try {
yypushStream( new FileReader(f) );
files.push(file);
file = f;
Out.println("Including \""+file+"\"");
}
catch (FileNotFoundException e) {
throw new ScannerException(file,ErrorMessages.NOT_READABLE, yyline);
}
}
%}
%init{
states.insert("YYINITIAL", true);
%init}
Digit = [0-9]
HexDigit = [0-9a-fA-F]
OctDigit = [0-7]
Number = {Digit}+
HexNumber = \\ x {HexDigit} {2}
OctNumber = \\ [0-3]? {OctDigit} {1, 2}
// Unicode4 can encode chars only in the BMP with the 16 bits provided by its
// 4 hex digits.
Unicode4 = \\ u {HexDigit} {4}
// Unicode6 can encode all Unicode chars, both in the BMP and in the
// supplementary planes -- only 21 bits are required as of Unicode 5.0,
// but its six hex digits provide 24 bits.
Unicode6 = \\ U {HexDigit} {6}
// see http://www.unicode.org/unicode/reports/tr18/
WSP = [ \t\b]
WSPNL = [\u2028\u2029\u000A\u000B\u000C\u000D\u0085\t\b\ ]
NWSPNL = [^\u2028\u2029\u000A\u000B\u000C\u000D\u0085\t\b\ ]
NL = [\u2028\u2029\u000A\u000B\u000C\u000D\u0085] | \u000D\u000A
NNL = [^\u2028\u2029\u000A\u000B\u000C\u000D\u0085]
Ident = {IdentStart} {IdentPart}*
QualIdent = {Ident} ( {WSP}* "." {WSP}* {Ident} )*
QUIL = {QualIdent} ( {WSP}* "," {WSP}* {QualIdent} )*
Array = "[" {WSP}* "]"
ParamPart = {IdentStart}|{IdentPart}|"<"|">"|","|{WSP}|"&"|"?"|"."
GenParam = "<" {ParamPart}+ ">"
ClassT = {Ident} ({WSP}* {GenParam})?
QClassT = {QualIdent} ({WSP}* {GenParam})?
ArrType = ({GenParam} {WSP}*)? {QClassT} ({WSP}* {Array})*
IdentStart = [:jletter:]
IdentPart = [:jletterdigit:]
JFlexCommentChar = [^*/]|"/"+[^*/]|"*"+[^*/]
JFlexComment = {JFlexCommentChar}+
/* Java comments */
JavaComment = {TraditionalComment}|{EndOfLineComment}
TraditionalComment = "/*"{CommentContent}\*+"/"
EndOfLineComment = "//".*{NL}
CommentContent = ([^*]|\*+[^*/])*
StringCharacter = [^\u2028\u2029\u000A\u000B\u000C\u000D\u0085\"\\]
CharLiteral = \'([^\u2028\u2029\u000A\u000B\u000C\u000D\u0085\'\\]|{EscapeSequence})\'
StringLiteral = \"({StringCharacter}|{EscapeSequence})*\"
EscapeSequence = \\[^\u2028\u2029\u000A\u000B\u000C\u000D\u0085]|\\+u{HexDigit}{4}|\\[0-3]?{OctDigit}{1,2}
/* \\(b|t|n|f|r|\"|\'|\\|[0-3]?{OctDigit}{1,2}|u{HexDigit}{4}) */
JavaRest = [^\{\}\"\'/]|"/"[^*/]
JavaCode = ({JavaRest}|{StringLiteral}|{CharLiteral}|{JavaComment})+
DottedVersion = [1-9][0-9]*(\.[0-9]+){0,2}
%%
<YYINITIAL> {
"%%".*{NL}? {
t.start();
yybegin(MACROS);
macroDefinition = true;
return symbol(USERCODE,userCode);
}
.*{NL} | .+ { userCode.append(yytext()); }
<<EOF>> { return symbol(EOF); }
}
<MACROS> ("%{"|"%init{"|"%initthrow{"|"%eof{"|"%eofthrow{"|"%yylexthrow{"|"%eofval{").*{NL}
{ string.setLength(0); yybegin(COPY); }
<COPY> {
"%}".*{NL} { classCode = conc(classCode,string); yybegin(MACROS); }
"%init}".*{NL} { initCode = conc(initCode,string); yybegin(MACROS); }
"%initthrow}".*{NL} { initThrow = concExc(initThrow,string); yybegin(MACROS); }
"%eof}".*{NL} { eofCode = conc(eofCode,string); yybegin(MACROS); }
"%eofthrow}".*{NL} { eofThrow = concExc(eofThrow,string); yybegin(MACROS); }
"%yylexthrow}".*{NL} { lexThrow = concExc(lexThrow,string); yybegin(MACROS); }
"%eofval}".*{NL} { eofVal = string.toString(); yybegin(MACROS); }
.*{NL} { string.append(yytext()); }
<<EOF>> { throw new ScannerException(file,ErrorMessages.EOF_IN_MACROS); }
}
<MACROS> ^"%s" ("tate" "s"?)? {WSP}+ { inclusive_states = true; yybegin(STATELIST); }
<MACROS> ^"%x" ("state" "s"?)? {WSP}+ { inclusive_states = false; yybegin(STATELIST); }
<STATELIST> {
{Ident} { states.insert(yytext(),inclusive_states); }
([\ \t]*","[\ \t]*)|([\ \t]+) { }
{NL} { yybegin(MACROS); }
<<EOF>> { throw new ScannerException(file,ErrorMessages.EOF_IN_MACROS); }
}
<MACROS> {
"%char" { charCount = true; }
"%line" { lineCount = true; }
"%column" { columnCount = true; }
"%byaccj" { isInteger = true;
if (eofVal == null)
eofVal = "return 0;";
eofclose = true;
}
"%cup2" { cup2Compatible = true;
isImplementing = concExc(isImplementing, "Scanner");
lineCount = true;
columnCount = true;
if (functionName == null)
functionName = "readNextTerminal";
if (tokenType == null)
tokenType = "ScannerToken<? extends Object>";
if (eofVal == null)
eofVal = "return token(SpecialTerminals.EndOfInputStream);";
if (!Options.jlex) eofclose = true;
return symbol(UNICODE); // %unicode
}
"%cup" { cupCompatible = true;
isImplementing = concExc(isImplementing, "java_cup.runtime.Scanner");
if (functionName == null)
functionName = "next_token";
if (tokenType == null)
tokenType = "java_cup.runtime.Symbol";
if (eofVal == null)
eofVal = "return new java_cup.runtime.Symbol("+cupSymbol+".EOF);";
if (!Options.jlex) eofclose = true;
}
"%cupsym"{WSP}+{QualIdent} {WSP}* { cupSymbol = yytext().substring(8).trim();
if (cupCompatible) Out.warning(ErrorMessages.CUPSYM_AFTER_CUP, yyline); }
"%cupsym"{WSP}+{NNL}* { throw new ScannerException(file,ErrorMessages.QUIL_CUPSYM, yyline); }
"%cupdebug" { cupDebug = true; }
"%eofclose"({WSP}+"true")? { eofclose = true; }
"%eofclose"({WSP}+"false") { eofclose = false; }
"%class"{WSP}+{ClassT} {WSP}* { className = yytext().substring(7).trim(); }
"%ctorarg"{WSP}+{ArrType}{WSP}+ { yybegin(CTOR_ARG); ctorTypes.add(yytext().substring(8).trim()); }
"%function"{WSP}+{Ident} {WSP}* { functionName = yytext().substring(10).trim(); }
"%type"{WSP}+{ArrType} {WSP}* { tokenType = yytext().substring(6).trim(); }
"%integer"|"%int" { isInteger = true; }
"%intwrap" { isIntWrap = true; }
"%yyeof" { isYYEOF = true; }
"%notunix" { notUnix = true; }
"%7bit" { isASCII = true; return symbol(ASCII); }
"%full"|"%8bit" { return symbol(FULL); }
"%16bit" { populateDefaultVersionUnicodeProperties();
return symbol(UNICODE);
}
"%unicode"({WSP}+{DottedVersion})? { String v = yytext().substring(8).trim();
if (v.length() == 0) {
populateDefaultVersionUnicodeProperties();
} else {
try {
unicodeProperties = new UnicodeProperties(v);
} catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
throw new ScannerException
(file, ErrorMessages.UNSUPPORTED_UNICODE_VERSION, yyline);
}
charClasses.init
(Options.jlex ? 127 : unicodeProperties.getMaximumCodePoint(), this);
}
return symbol(UNICODE);
}
"%caseless"|"%ignorecase" { caseless = true; }
"%implements"{WSP}+.* { isImplementing = concExc(isImplementing, yytext().substring(12).trim()); }
"%extends"{WSP}+{QClassT}{WSP}* { isExtending = yytext().substring(9).trim(); }
"%public" { isPublic = true; }
"%apiprivate" { visibility = "private"; Skeleton.makePrivate(); }
"%final" { isFinal = true; }
"%abstract" { isAbstract = true; }
"%debug" { debugOption = true; }
"%standalone" { standalone = true; isInteger = true; }
"%pack" { /* no-op - this is the only generation method */ }
"%include" {WSP}+ .* { includeFile(yytext().substring(9).trim()); }
"%buffer" {WSP}+ {Number} {WSP}* { bufferSize = Integer.parseInt(yytext().substring(8).trim()); }
"%buffer" {WSP}+ {NNL}* { throw new ScannerException(file,ErrorMessages.NO_BUFFER_SIZE, yyline); }
"%initthrow" {WSP}+ {QUIL} {WSP}* { initThrow = concExc(initThrow,yytext().substring(11).trim()); }
"%initthrow" {WSP}+ {NNL}* { throw new ScannerException(file,ErrorMessages.QUIL_INITTHROW, yyline); }
"%eofthrow" {WSP}+ {QUIL} {WSP}* { eofThrow = concExc(eofThrow,yytext().substring(10).trim()); }
"%eofthrow" {WSP}+ {NNL}* { throw new ScannerException(file,ErrorMessages.QUIL_EOFTHROW, yyline); }
"%yylexthrow"{WSP}+ {QUIL} {WSP}* { lexThrow = concExc(lexThrow,yytext().substring(12).trim()); }
"%throws" {WSP}+ {QUIL} {WSP}* { lexThrow = concExc(lexThrow,yytext().substring(8).trim()); }
"%yylexthrow"{WSP}+ {NNL}* { throw new ScannerException(file,ErrorMessages.QUIL_YYLEXTHROW, yyline); }
"%throws" {WSP}+ {NNL}* { throw new ScannerException(file,ErrorMessages.QUIL_THROW, yyline); }
"%scanerror" {WSP}+ {QualIdent} {WSP}* { scanErrorException = yytext().substring(11).trim(); }
"%scanerror" {WSP}+ {NNL}* { throw new ScannerException(file,ErrorMessages.QUIL_SCANERROR, yyline); }
// TODO: In the version of JFlex after 1.6, the %inputstreamctor directive will become a no-op: the InputStream ctor will never be emitted.
"%inputstreamctor"({WSP}+"true")? { emitInputStreamCtor = true; }
"%inputstreamctor"{WSP}+"false" { emitInputStreamCtor = false; }
{Ident} { return symbol(IDENT, yytext()); }
"="{WSP}* { if (null == unicodeProperties && ! isASCII) {
populateDefaultVersionUnicodeProperties();
}
yybegin(REGEXP);
return symbol(EQUALS);
}
"/*" { nextState = MACROS; yybegin(COMMENT); }
{EndOfLineComment} { }
^"%%" {NNL}* { if (null == unicodeProperties && ! isASCII) {
populateDefaultVersionUnicodeProperties();
}
macroDefinition = false;
yybegin(REGEXPSTART);
return symbol(DELIMITER);
}
"%"{Ident} { throw new ScannerException(file,ErrorMessages.UNKNOWN_OPTION, yyline, yycolumn); }
"%" { throw new ScannerException(file,ErrorMessages.UNKNOWN_OPTION, yyline, yycolumn); }
^{WSP}+"%" { Out.warning(ErrorMessages.NOT_AT_BOL, yyline); yypushback(1); }
{WSP}+ { }
{NL}+ { }
<<EOF>> { if ( yymoreStreams() ) {
file = (File) files.pop();
yypopStream();
}
else
throw new ScannerException(file,ErrorMessages.EOF_IN_MACROS);
}
}
<CTOR_ARG> {
{Ident} {WSP}* { yybegin(MACROS); ctorArgs.add(yytext().trim()); }
[^] { throw new ScannerException(file,ErrorMessages.CTOR_ARG,yyline,yycolumn); }
}
<REGEXPSTART> {
^ {WSP}* "%include" {WSP}+ .* { includeFile(yytext().trim().substring(9).trim()); }
{WSP}* "/*" { nextState = REGEXPSTART; yybegin(COMMENT); }
{WSP}* "<" { yybegin(STATES); return symbol_countUpdate(LESSTHAN, null); }
{WSP}* "}" { return symbol_countUpdate(RBRACE, null); }
{WSP}* "//" {NNL}* { }
{WSP}* "<<EOF>>" {WSPNL}* "{" { actionText.setLength(0); yybegin(JAVA_CODE);
Symbol s = symbol_countUpdate(EOFRULE, null);
action_line = s.left+1;
return s;
}
^ {WSP}* {NWSPNL} { yypushback(yylength()); yybegin(REGEXP); }
{WSP} | {NL} { }
}
<STATES> {
{Ident} { return symbol(IDENT, yytext()); }
"," { return symbol(COMMA); }
{WSPNL}+ { }
// "{" will be caught in REGEXP
">"{WSPNL}* { yybegin(REGEXP); return symbol(MORETHAN); }
<<EOF>> { throw new ScannerException(file,ErrorMessages.EOF_IN_STATES); }
}
<REGEXP> {
"<<EOF>>" {WSPNL}+ "{" { actionText.setLength(0); yybegin(JAVA_CODE); action_line = yyline+1; return symbol(EOFRULE); }
"<<EOF>>" { throw new ScannerException(file,ErrorMessages.EOF_WO_ACTION); }
{WSPNL}*"|"{WSP}*$ { if (macroDefinition) {
yybegin(EATWSPNL);
return symbol(BAR);
}
else {
yybegin(REGEXPSTART);
return symbol(NOACTION);
}
}
// stategroup
"{" { yybegin(REGEXPSTART); return symbol(LBRACE); }
{WSPNL}*"|" { return symbol(BAR); }
{WSPNL}*\" { string.setLength(0); nextState = REGEXP; yybegin(STRING_CONTENT); }
{WSPNL}*"\\u{" { string.setLength(0); yybegin(REGEXP_CODEPOINT_SEQUENCE); }
{WSPNL}*"!" { return symbol(BANG); }
{WSPNL}*"~" { return symbol(TILDE); }
{WSPNL}*"(" { return symbol(OPENBRACKET); }
{WSPNL}*")" { return symbol(CLOSEBRACKET); }
{WSPNL}*"*" { return symbol(STAR); }
{WSPNL}*"+" { return symbol(PLUS); }
{WSPNL}*"?" { return symbol(QUESTION); }
{WSPNL}*"$" { return symbol(DOLLAR); }
{WSPNL}*"^" { bolUsed = true; return symbol(HAT); }
{WSPNL}*"." { return symbol(POINT); }
{WSPNL}*"\\R" { return symbol(NEWLINE); }
{WSPNL}*"[" { yybegin(CHARCLASS); return symbol(OPENCLASS); }
{WSPNL}*"/" { return symbol(LOOKAHEAD); }
{WSPNL}* "{" {WSP}* {Ident} {WSP}* "}" { return symbol_countUpdate(MACROUSE, makeMacroIdent()); }
{WSPNL}* "{" {WSP}* {Number} { yybegin(REPEATEXP);
return symbol(REPEAT,
new Integer(yytext().trim().substring(1).trim()));
}
{WSPNL}+ "{" { actionText.setLength(0); yybegin(JAVA_CODE); action_line = yyline+1; return symbol(REGEXPEND); }
{NL} { if (macroDefinition) { yybegin(MACROS); } return symbol(REGEXPEND); }
{WSPNL}*"/*" { nextState = REGEXP; yybegin(COMMENT); }
{WSPNL}*"//"{NNL}* { }
{WSP}+ { }
<CHARCLASS> {
{WSPNL}*"[:jletter:]" { return symbol(JLETTERCLASS); }
{WSPNL}*"[:jletterdigit:]" { return symbol(JLETTERDIGITCLASS); }
{WSPNL}*"[:letter:]" { return symbol(LETTERCLASS); }
{WSPNL}*"[:uppercase:]" { return symbol(UPPERCLASS); }
{WSPNL}*"[:lowercase:]" { return symbol(LOWERCLASS); }
{WSPNL}*"[:digit:]" { return symbol(DIGITCLASS); }
{WSPNL}*"\\d" { return symbol(DIGITCLASS); }
{WSPNL}*"\\D" { return symbol(DIGITCLASSNOT); }
{WSPNL}*"\\s" { return symbol(WHITESPACECLASS); }
{WSPNL}*"\\S" { return symbol(WHITESPACECLASSNOT); }
{WSPNL}*"\\w" { return symbol(WORDCLASS); }
{WSPNL}*"\\W" { return symbol(WORDCLASSNOT); }
{WSPNL}*"\\p{"[^}]*"}" { String trimmedText = yytext().trim();
String propertyValue = trimmedText.substring(3,trimmedText.length()-1);
IntCharSet set = unicodeProperties.getIntCharSet(propertyValue);
if (null == set) {
throw new ScannerException(file,ErrorMessages.INVALID_UNICODE_PROPERTY, yyline, yycolumn + 3);
}
return symbol(UNIPROPCCLASS, set);
}
{WSPNL}*"\\P{"[^}]*"}" { String trimmedText = yytext().trim();
String propertyValue = trimmedText.substring(3,trimmedText.length()-1);
IntCharSet set = unicodeProperties.getIntCharSet(propertyValue);
if (null == set) {
throw new ScannerException(file,ErrorMessages.INVALID_UNICODE_PROPERTY, yyline, yycolumn + 3);
}
return symbol(UNIPROPCCLASSNOT, set);
}
}
. { return symbol(CHAR, yytext().codePointAt(0)); }
}
<EATWSPNL> {WSPNL}+ { yybegin(REGEXP); }
<REPEATEXP> {
"}" { yybegin(REGEXP); return symbol(RBRACE); }
"," {WSP}* {Number} { return symbol(REPEAT, new Integer(yytext().substring(1).trim())); }
{WSP}+ { }
<<EOF>> { throw new ScannerException(file,ErrorMessages.EOF_IN_REGEXP); }
}
<CHARCLASS> {
"{"{Ident}"}" { return symbol(MACROUSE, yytext().substring(1,yylength()-1)); }
"[" { balance++; return symbol(OPENCLASS); }
"]" { if (balance > 0) balance--; else yybegin(REGEXP); return symbol(CLOSECLASS); }
"^" { return symbol(HAT); }
"-" { return symbol(DASH); }
"--" { return symbol(DIFFERENCE); }
"&&" { return symbol(INTERSECTION); }
"||" { /* union is the default operation - '||' can be ignored */ }
"~~" { return symbol(SYMMETRICDIFFERENCE); }
"\\u{" { yybegin(CHARCLASS_CODEPOINT); }
// this is a hack to keep JLex compatibilty with char class
// expressions like [+-]
"-]" { yypushback(1); yycolumn--; return symbol(CHAR, (int)'-'); }
\" { string.setLength(0); nextState = CHARCLASS; yybegin(STRING_CONTENT); }
. { return symbol(CHAR, yytext().codePointAt(0)); }
\n { throw new ScannerException(file,ErrorMessages.EOL_IN_CHARCLASS,yyline,yycolumn); }
<<EOF>> { throw new ScannerException(file,ErrorMessages.EOF_IN_REGEXP); }
}
<STRING_CONTENT> {
\" { yybegin(nextState); return symbol(STRING, string.toString()); }
\\\" { string.append('\"'); }
[^\"\\\u2028\u2029\u000A\u000B\u000C\u000D\u0085]+ { string.append(yytext()); }
{NL} { throw new ScannerException(file,ErrorMessages.UNTERMINATED_STR, yyline, yycolumn); }
{HexNumber} { string.append( (char) Integer.parseInt(yytext().substring(2,yylength()), 16)); }
{OctNumber} { string.append( (char) Integer.parseInt(yytext().substring(1,yylength()), 8)); }
{Unicode4} { string.append( (char) Integer.parseInt(yytext().substring(2,yylength()), 16)); }
{Unicode6} { int codePoint = Integer.parseInt(yytext().substring(2,yylength()), 16);
if (codePoint <= unicodeProperties.getMaximumCodePoint()) {
string.append(Character.toChars(codePoint));
} else {
throw new ScannerException(file,ErrorMessages.CODEPOINT_OUT_OF_RANGE, yyline, yycolumn+2);
}
}
"\\u{" { yybegin(STRING_CODEPOINT_SEQUENCE); }
\\b { string.append('\b'); }
\\n { string.append('\n'); }
\\t { string.append('\t'); }
\\f { string.append('\f'); }
\\r { string.append('\r'); }
\\. { string.append(yytext().substring(1, yytext().offsetByCodePoints(1, 1))); }
<<EOF>> { throw new ScannerException(file,ErrorMessages.EOF_IN_STRING); }
}
<REGEXP, CHARCLASS> {
{HexNumber} { return symbol(CHAR, Integer.parseInt(yytext().substring(2,yylength()), 16)); }
{OctNumber} { return symbol(CHAR, Integer.parseInt(yytext().substring(1,yylength()), 8)); }
{Unicode4} { return symbol(CHAR, Integer.parseInt(yytext().substring(2,yylength()), 16)); }
{Unicode6} { int codePoint = Integer.parseInt(yytext().substring(2,yylength()), 16);
if (codePoint <= unicodeProperties.getMaximumCodePoint()) {
return symbol(CHAR, codePoint);
} else {
throw new ScannerException(file,ErrorMessages.CODEPOINT_OUT_OF_RANGE, yyline, yycolumn+2);
}
}
\\b { return symbol(CHAR, (int)'\b'); }
\\n { return symbol(CHAR, (int)'\n'); }
\\t { return symbol(CHAR, (int)'\t'); }
\\f { return symbol(CHAR, (int)'\f'); }
\\r { return symbol(CHAR, (int)'\r'); }
\\. { return symbol(CHAR, yytext().codePointAt(1)); }
}
<JAVA_CODE> {
"{" { balance++; actionText.append('{'); }
"}" { if (balance > 0) {
balance--;
actionText.append('}');
}
else {
yybegin(REGEXPSTART);
Action a = new Action(actionText.toString(), action_line);
actions.add(a);
return symbol(ACTION, a);
}
}
{JavaCode} { actionText.append(yytext()); }
<<EOF>> { throw new ScannerException(file,ErrorMessages.EOF_IN_ACTION, action_line-1); }
}
<COMMENT> {
"/"+ "*" { commentbalance++; }
"*"+ "/" { if (commentbalance > 0)
commentbalance--;
else
yybegin(nextState);
}
{JFlexComment} { /* ignore */ }
<<EOF>> { throw new ScannerException(file,ErrorMessages.EOF_IN_COMMENT); }
}
<REGEXP_CODEPOINT_SEQUENCE> {
"}" { yybegin(REGEXP); return symbol(STRING, string.toString()); }
{HexDigit}{1,6} { int codePoint = Integer.parseInt(yytext(), 16);
if (codePoint <= unicodeProperties.getMaximumCodePoint()) {
string.append(Character.toChars(codePoint));
} else {
throw new ScannerException(file,ErrorMessages.CODEPOINT_OUT_OF_RANGE, yyline, yycolumn);
}
}
{WSPNL}+ { }
<<EOF>> { throw new ScannerException(file,ErrorMessages.EOF_IN_REGEXP); }
}
<STRING_CODEPOINT_SEQUENCE> { // Specialized form: newlines disallowed, and doesn't return a symbol
"}" { yybegin(STRING_CONTENT); }
{HexDigit}{1,6} { int codePoint = Integer.parseInt(yytext(), 16);
if (codePoint <= unicodeProperties.getMaximumCodePoint()) {
string.append(Character.toChars(codePoint));
} else {
throw new ScannerException(file, ErrorMessages.CODEPOINT_OUT_OF_RANGE, yyline, yycolumn);
}
}
{NL} { throw new ScannerException(file,ErrorMessages.UNTERMINATED_STR, yyline, yycolumn); }
{WSP}+ { }
<<EOF>> { throw new ScannerException(file,ErrorMessages.EOF_IN_STRING); }
}
<CHARCLASS_CODEPOINT> { // Specialized form: only one codepoint allowed, no whitespace allowed
{HexDigit}{1,6} "}" { int codePoint = Integer.parseInt(yytext().substring(0, yylength() - 1), 16);
if (codePoint <= unicodeProperties.getMaximumCodePoint()) {
yybegin(CHARCLASS);
return symbol(CHAR, codePoint);
} else {
throw new ScannerException(file, ErrorMessages.CODEPOINT_OUT_OF_RANGE, yyline, yycolumn);
}
}
<<EOF>> { throw new ScannerException(file,ErrorMessages.EOF_IN_REGEXP); }
}
. { throw new ScannerException(file,ErrorMessages.UNEXPECTED_CHAR, yyline, yycolumn); }
\R { throw new ScannerException(file,ErrorMessages.UNEXPECTED_NL, yyline, yycolumn); }
<<EOF>> { if ( yymoreStreams() ) {
file = (File) files.pop();
yypopStream();
}
else {
return symbol(EOF);
}
}

305
samples/JFlex/java.jflex Normal file
View File

@@ -0,0 +1,305 @@
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* Copyright (C) 1998-2015 Gerwin Klein <lsf@jflex.de> *
* All rights reserved. *
* *
* License: BSD *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/* Java 1.2 language lexer specification */
/* Use together with unicode.flex for Unicode preprocesssing */
/* and java12.cup for a Java 1.2 parser */
/* Note that this lexer specification is not tuned for speed.
It is in fact quite slow on integer and floating point literals,
because the input is read twice and the methods used to parse
the numbers are not very fast.
For a production quality application (e.g. a Java compiler)
this could be optimized */
import java_cup.runtime.*;
%%
%public
%class Scanner
%implements sym
%unicode
%line
%column
%cup
%cupdebug
%{
StringBuilder string = new StringBuilder();
private Symbol symbol(int type) {
return new JavaSymbol(type, yyline+1, yycolumn+1);
}
private Symbol symbol(int type, Object value) {
return new JavaSymbol(type, yyline+1, yycolumn+1, value);
}
/**
* assumes correct representation of a long value for
* specified radix in scanner buffer from <code>start</code>
* to <code>end</code>
*/
private long parseLong(int start, int end, int radix) {
long result = 0;
long digit;
for (int i = start; i < end; i++) {
digit = Character.digit(yycharat(i),radix);
result*= radix;
result+= digit;
}
return result;
}
%}
/* main character classes */
LineTerminator = \r|\n|\r\n
InputCharacter = [^\r\n]
WhiteSpace = {LineTerminator} | [ \t\f]
/* comments */
Comment = {TraditionalComment} | {EndOfLineComment} |
{DocumentationComment}
TraditionalComment = "/*" [^*] ~"*/" | "/*" "*"+ "/"
EndOfLineComment = "//" {InputCharacter}* {LineTerminator}?
DocumentationComment = "/*" "*"+ [^/*] ~"*/"
/* identifiers */
Identifier = [:jletter:][:jletterdigit:]*
/* integer literals */
DecIntegerLiteral = 0 | [1-9][0-9]*
DecLongLiteral = {DecIntegerLiteral} [lL]
HexIntegerLiteral = 0 [xX] 0* {HexDigit} {1,8}
HexLongLiteral = 0 [xX] 0* {HexDigit} {1,16} [lL]
HexDigit = [0-9a-fA-F]
OctIntegerLiteral = 0+ [1-3]? {OctDigit} {1,15}
OctLongLiteral = 0+ 1? {OctDigit} {1,21} [lL]
OctDigit = [0-7]
/* floating point literals */
FloatLiteral = ({FLit1}|{FLit2}|{FLit3}) {Exponent}? [fF]
DoubleLiteral = ({FLit1}|{FLit2}|{FLit3}) {Exponent}?
FLit1 = [0-9]+ \. [0-9]*
FLit2 = \. [0-9]+
FLit3 = [0-9]+
Exponent = [eE] [+-]? [0-9]+
/* string and character literals */
StringCharacter = [^\r\n\"\\]
SingleCharacter = [^\r\n\'\\]
%state STRING, CHARLITERAL
%%
<YYINITIAL> {
/* keywords */
"abstract" { return symbol(ABSTRACT); }
"boolean" { return symbol(BOOLEAN); }
"break" { return symbol(BREAK); }
"byte" { return symbol(BYTE); }
"case" { return symbol(CASE); }
"catch" { return symbol(CATCH); }
"char" { return symbol(CHAR); }
"class" { return symbol(CLASS); }
"const" { return symbol(CONST); }
"continue" { return symbol(CONTINUE); }
"do" { return symbol(DO); }
"double" { return symbol(DOUBLE); }
"else" { return symbol(ELSE); }
"extends" { return symbol(EXTENDS); }
"final" { return symbol(FINAL); }
"finally" { return symbol(FINALLY); }
"float" { return symbol(FLOAT); }
"for" { return symbol(FOR); }
"default" { return symbol(DEFAULT); }
"implements" { return symbol(IMPLEMENTS); }
"import" { return symbol(IMPORT); }
"instanceof" { return symbol(INSTANCEOF); }
"int" { return symbol(INT); }
"interface" { return symbol(INTERFACE); }
"long" { return symbol(LONG); }
"native" { return symbol(NATIVE); }
"new" { return symbol(NEW); }
"goto" { return symbol(GOTO); }
"if" { return symbol(IF); }
"public" { return symbol(PUBLIC); }
"short" { return symbol(SHORT); }
"super" { return symbol(SUPER); }
"switch" { return symbol(SWITCH); }
"synchronized" { return symbol(SYNCHRONIZED); }
"package" { return symbol(PACKAGE); }
"private" { return symbol(PRIVATE); }
"protected" { return symbol(PROTECTED); }
"transient" { return symbol(TRANSIENT); }
"return" { return symbol(RETURN); }
"void" { return symbol(VOID); }
"static" { return symbol(STATIC); }
"while" { return symbol(WHILE); }
"this" { return symbol(THIS); }
"throw" { return symbol(THROW); }
"throws" { return symbol(THROWS); }
"try" { return symbol(TRY); }
"volatile" { return symbol(VOLATILE); }
"strictfp" { return symbol(STRICTFP); }
/* boolean literals */
"true" { return symbol(BOOLEAN_LITERAL, true); }
"false" { return symbol(BOOLEAN_LITERAL, false); }
/* null literal */
"null" { return symbol(NULL_LITERAL); }
/* separators */
"(" { return symbol(LPAREN); }
")" { return symbol(RPAREN); }
"{" { return symbol(LBRACE); }
"}" { return symbol(RBRACE); }
"[" { return symbol(LBRACK); }
"]" { return symbol(RBRACK); }
";" { return symbol(SEMICOLON); }
"," { return symbol(COMMA); }
"." { return symbol(DOT); }
/* operators */
"=" { return symbol(EQ); }
">" { return symbol(GT); }
"<" { return symbol(LT); }
"!" { return symbol(NOT); }
"~" { return symbol(COMP); }
"?" { return symbol(QUESTION); }
":" { return symbol(COLON); }
"==" { return symbol(EQEQ); }
"<=" { return symbol(LTEQ); }
">=" { return symbol(GTEQ); }
"!=" { return symbol(NOTEQ); }
"&&" { return symbol(ANDAND); }
"||" { return symbol(OROR); }
"++" { return symbol(PLUSPLUS); }
"--" { return symbol(MINUSMINUS); }
"+" { return symbol(PLUS); }
"-" { return symbol(MINUS); }
"*" { return symbol(MULT); }
"/" { return symbol(DIV); }
"&" { return symbol(AND); }
"|" { return symbol(OR); }
"^" { return symbol(XOR); }
"%" { return symbol(MOD); }
"<<" { return symbol(LSHIFT); }
">>" { return symbol(RSHIFT); }
">>>" { return symbol(URSHIFT); }
"+=" { return symbol(PLUSEQ); }
"-=" { return symbol(MINUSEQ); }
"*=" { return symbol(MULTEQ); }
"/=" { return symbol(DIVEQ); }
"&=" { return symbol(ANDEQ); }
"|=" { return symbol(OREQ); }
"^=" { return symbol(XOREQ); }
"%=" { return symbol(MODEQ); }
"<<=" { return symbol(LSHIFTEQ); }
">>=" { return symbol(RSHIFTEQ); }
">>>=" { return symbol(URSHIFTEQ); }
/* string literal */
\" { yybegin(STRING); string.setLength(0); }
/* character literal */
\' { yybegin(CHARLITERAL); }
/* numeric literals */
/* This is matched together with the minus, because the number is too big to
be represented by a positive integer. */
"-2147483648" { return symbol(INTEGER_LITERAL, new Integer(Integer.MIN_VALUE)); }
{DecIntegerLiteral} { return symbol(INTEGER_LITERAL, new Integer(yytext())); }
{DecLongLiteral} { return symbol(INTEGER_LITERAL, new Long(yytext().substring(0,yylength()-1))); }
{HexIntegerLiteral} { return symbol(INTEGER_LITERAL, new Integer((int) parseLong(2, yylength(), 16))); }
{HexLongLiteral} { return symbol(INTEGER_LITERAL, new Long(parseLong(2, yylength()-1, 16))); }
{OctIntegerLiteral} { return symbol(INTEGER_LITERAL, new Integer((int) parseLong(0, yylength(), 8))); }
{OctLongLiteral} { return symbol(INTEGER_LITERAL, new Long(parseLong(0, yylength()-1, 8))); }
{FloatLiteral} { return symbol(FLOATING_POINT_LITERAL, new Float(yytext().substring(0,yylength()-1))); }
{DoubleLiteral} { return symbol(FLOATING_POINT_LITERAL, new Double(yytext())); }
{DoubleLiteral}[dD] { return symbol(FLOATING_POINT_LITERAL, new Double(yytext().substring(0,yylength()-1))); }
/* comments */
{Comment} { /* ignore */ }
/* whitespace */
{WhiteSpace} { /* ignore */ }
/* identifiers */
{Identifier} { return symbol(IDENTIFIER, yytext()); }
}
<STRING> {
\" { yybegin(YYINITIAL); return symbol(STRING_LITERAL, string.toString()); }
{StringCharacter}+ { string.append( yytext() ); }
/* escape sequences */
"\\b" { string.append( '\b' ); }
"\\t" { string.append( '\t' ); }
"\\n" { string.append( '\n' ); }
"\\f" { string.append( '\f' ); }
"\\r" { string.append( '\r' ); }
"\\\"" { string.append( '\"' ); }
"\\'" { string.append( '\'' ); }
"\\\\" { string.append( '\\' ); }
\\[0-3]?{OctDigit}?{OctDigit} { char val = (char) Integer.parseInt(yytext().substring(1),8);
string.append( val ); }
/* error cases */
\\. { throw new RuntimeException("Illegal escape sequence \""+yytext()+"\""); }
{LineTerminator} { throw new RuntimeException("Unterminated string at end of line"); }
}
<CHARLITERAL> {
{SingleCharacter}\' { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, yytext().charAt(0)); }
/* escape sequences */
"\\b"\' { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, '\b');}
"\\t"\' { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, '\t');}
"\\n"\' { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, '\n');}
"\\f"\' { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, '\f');}
"\\r"\' { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, '\r');}
"\\\""\' { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, '\"');}
"\\'"\' { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, '\'');}
"\\\\"\' { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, '\\'); }
\\[0-3]?{OctDigit}?{OctDigit}\' { yybegin(YYINITIAL);
int val = Integer.parseInt(yytext().substring(1,yylength()-1),8);
return symbol(CHARACTER_LITERAL, (char)val); }
/* error cases */
\\. { throw new RuntimeException("Illegal escape sequence \""+yytext()+"\""); }
{LineTerminator} { throw new RuntimeException("Unterminated character literal at end of line"); }
}
/* error fallback */
[^] { throw new RuntimeException("Illegal character \""+yytext()+
"\" at line "+yyline+", column "+yycolumn); }
<<EOF>> { return symbol(EOF); }