mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-28 17:20:22 +00:00
add language recognition for JFlex grammars
JFlex is a lexical analyzer generator for Java, see also http://jflex.de or https://github.com/jflex-de/jflex
This commit is contained in:
@@ -1434,6 +1434,14 @@ J:
|
||||
tm_scope: source.j
|
||||
ace_mode: text
|
||||
|
||||
JFlex:
|
||||
type: programming
|
||||
color: "#EBCA30"
|
||||
extensions:
|
||||
- .flex
|
||||
- .jflex
|
||||
ace_mode: text
|
||||
|
||||
JSON:
|
||||
type: data
|
||||
tm_scope: source.json
|
||||
|
||||
742
samples/JFlex/LexScan.flex
Normal file
742
samples/JFlex/LexScan.flex
Normal file
@@ -0,0 +1,742 @@
|
||||
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
||||
* JFlex 1.7.0-SNAPSHOT *
|
||||
* Copyright (C) 1998-2015 Gerwin Klein <lsf@jflex.de> *
|
||||
* All rights reserved. *
|
||||
* *
|
||||
* License: BSD *
|
||||
* *
|
||||
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
||||
|
||||
package jflex;
|
||||
|
||||
import java_cup.runtime.Symbol;
|
||||
import java.io.*;
|
||||
import java.util.Stack;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
import jflex.unicode.UnicodeProperties;
|
||||
|
||||
%%
|
||||
|
||||
%final
|
||||
%public
|
||||
%class LexScan
|
||||
%implements sym, java_cup.runtime.Scanner
|
||||
%function next_token
|
||||
|
||||
%type Symbol
|
||||
%unicode
|
||||
|
||||
%column
|
||||
%line
|
||||
|
||||
%eofclose
|
||||
|
||||
%state COMMENT, STATELIST, MACROS, REGEXPSTART
|
||||
%state REGEXP, JAVA_CODE, STATES, STRING_CONTENT
|
||||
%state CHARCLASS, COPY, REPEATEXP, EATWSPNL
|
||||
%state CTOR_ARG, REGEXP_CODEPOINT_SEQUENCE
|
||||
%state STRING_CODEPOINT_SEQUENCE, CHARCLASS_CODEPOINT
|
||||
|
||||
%inputstreamctor false
|
||||
|
||||
%cupdebug
|
||||
|
||||
%{
|
||||
int balance = 0;
|
||||
int commentbalance = 0;
|
||||
int action_line = 0;
|
||||
int bufferSize = 16384;
|
||||
|
||||
File file;
|
||||
Stack<File> files = new Stack<File>();
|
||||
|
||||
StringBuilder userCode = new StringBuilder();
|
||||
|
||||
String classCode;
|
||||
String initCode;
|
||||
String initThrow;
|
||||
String eofCode;
|
||||
String eofThrow;
|
||||
String lexThrow;
|
||||
String eofVal;
|
||||
String scanErrorException;
|
||||
String cupSymbol = "sym";
|
||||
|
||||
StringBuilder actionText = new StringBuilder();
|
||||
StringBuilder string = new StringBuilder();
|
||||
|
||||
private UnicodeProperties unicodeProperties;
|
||||
|
||||
boolean charCount;
|
||||
boolean lineCount;
|
||||
boolean columnCount;
|
||||
boolean cupCompatible;
|
||||
boolean cup2Compatible;
|
||||
boolean cupDebug;
|
||||
boolean isInteger;
|
||||
boolean isIntWrap;
|
||||
boolean isYYEOF;
|
||||
boolean notUnix;
|
||||
boolean isPublic;
|
||||
boolean isFinal;
|
||||
boolean isAbstract;
|
||||
boolean bolUsed;
|
||||
boolean standalone;
|
||||
boolean debugOption;
|
||||
boolean caseless;
|
||||
boolean inclusive_states;
|
||||
boolean eofclose;
|
||||
boolean isASCII;
|
||||
// TODO: In the version of JFlex after 1.6, the InputStream ctor
|
||||
// TODO: will never be emitted, and this option will cease to exist.
|
||||
boolean emitInputStreamCtor = Options.emitInputStreamCtor;
|
||||
|
||||
String isImplementing;
|
||||
String isExtending;
|
||||
String className = "Yylex";
|
||||
String functionName;
|
||||
String tokenType;
|
||||
String visibility = "public";
|
||||
|
||||
List<String> ctorArgs = new ArrayList<String>();
|
||||
List<String> ctorTypes = new ArrayList<String>();
|
||||
|
||||
LexicalStates states = new LexicalStates();
|
||||
|
||||
List<Action> actions = new ArrayList<Action>();
|
||||
|
||||
private int nextState;
|
||||
|
||||
boolean macroDefinition;
|
||||
|
||||
Timer t = new Timer();
|
||||
|
||||
// CharClasses.init() is delayed until UnicodeProperties.init() has been called,
|
||||
// since the max char code won't be known until then.
|
||||
private CharClasses charClasses = new CharClasses();
|
||||
|
||||
public CharClasses getCharClasses() {
|
||||
return charClasses;
|
||||
}
|
||||
|
||||
public int currentLine() {
|
||||
return yyline;
|
||||
}
|
||||
|
||||
public void setFile(File file) {
|
||||
this.file = file;
|
||||
}
|
||||
|
||||
private Symbol symbol(int type, Object value) {
|
||||
return new Symbol(type, yyline, yycolumn, value);
|
||||
}
|
||||
|
||||
private Symbol symbol(int type) {
|
||||
return new Symbol(type, yyline, yycolumn);
|
||||
}
|
||||
|
||||
// updates line and column count to the beginning of the first
|
||||
// non whitespace character in yytext, but leaves yyline+yycolumn
|
||||
// untouched
|
||||
private Symbol symbol_countUpdate(int type, Object value) {
|
||||
int lc = yyline;
|
||||
int cc = yycolumn;
|
||||
String text = yytext();
|
||||
|
||||
for (int i=0; i < text.length(); i++) {
|
||||
char c = text.charAt(i);
|
||||
|
||||
if (c != '\n' && c != '\r' && c != ' ' && c != '\t' )
|
||||
return new Symbol(type, lc, cc, value);
|
||||
|
||||
if (c == '\n') {
|
||||
lc++;
|
||||
cc = 0;
|
||||
}
|
||||
else
|
||||
cc++;
|
||||
}
|
||||
|
||||
return new Symbol(type, yyline, yycolumn, value);
|
||||
}
|
||||
|
||||
private String makeMacroIdent() {
|
||||
String matched = yytext().trim();
|
||||
return matched.substring(1, matched.length()-1).trim();
|
||||
}
|
||||
|
||||
public static String conc(Object a, Object b) {
|
||||
if (a == null && b == null) return null;
|
||||
if (a == null) return b.toString();
|
||||
if (b == null) return a.toString();
|
||||
|
||||
return a.toString()+b.toString();
|
||||
}
|
||||
|
||||
public static String concExc(Object a, Object b) {
|
||||
if (a == null && b == null) return null;
|
||||
if (a == null) return b.toString();
|
||||
if (b == null) return a.toString();
|
||||
|
||||
return a.toString()+", "+b.toString();
|
||||
}
|
||||
|
||||
public UnicodeProperties getUnicodeProperties() {
|
||||
return unicodeProperties;
|
||||
}
|
||||
|
||||
private void populateDefaultVersionUnicodeProperties() {
|
||||
try {
|
||||
unicodeProperties = new UnicodeProperties();
|
||||
} catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
|
||||
throw new ScannerException
|
||||
(file, ErrorMessages.UNSUPPORTED_UNICODE_VERSION, yyline);
|
||||
}
|
||||
charClasses.init
|
||||
(Options.jlex ? 127 : unicodeProperties.getMaximumCodePoint(), this);
|
||||
}
|
||||
|
||||
private void includeFile(String filePath) {
|
||||
File f = new File(file.getParentFile(), filePath);
|
||||
if ( !f.canRead() )
|
||||
throw new ScannerException(file,ErrorMessages.NOT_READABLE, yyline);
|
||||
// check for cycle
|
||||
if (files.search(f) > 0)
|
||||
throw new ScannerException(file,ErrorMessages.FILE_CYCLE, yyline);
|
||||
try {
|
||||
yypushStream( new FileReader(f) );
|
||||
files.push(file);
|
||||
file = f;
|
||||
Out.println("Including \""+file+"\"");
|
||||
}
|
||||
catch (FileNotFoundException e) {
|
||||
throw new ScannerException(file,ErrorMessages.NOT_READABLE, yyline);
|
||||
}
|
||||
}
|
||||
%}
|
||||
|
||||
%init{
|
||||
states.insert("YYINITIAL", true);
|
||||
%init}
|
||||
|
||||
|
||||
Digit = [0-9]
|
||||
HexDigit = [0-9a-fA-F]
|
||||
OctDigit = [0-7]
|
||||
|
||||
Number = {Digit}+
|
||||
HexNumber = \\ x {HexDigit} {2}
|
||||
OctNumber = \\ [0-3]? {OctDigit} {1, 2}
|
||||
|
||||
// Unicode4 can encode chars only in the BMP with the 16 bits provided by its
|
||||
// 4 hex digits.
|
||||
Unicode4 = \\ u {HexDigit} {4}
|
||||
|
||||
// Unicode6 can encode all Unicode chars, both in the BMP and in the
|
||||
// supplementary planes -- only 21 bits are required as of Unicode 5.0,
|
||||
// but its six hex digits provide 24 bits.
|
||||
Unicode6 = \\ U {HexDigit} {6}
|
||||
|
||||
// see http://www.unicode.org/unicode/reports/tr18/
|
||||
WSP = [ \t\b]
|
||||
WSPNL = [\u2028\u2029\u000A\u000B\u000C\u000D\u0085\t\b\ ]
|
||||
NWSPNL = [^\u2028\u2029\u000A\u000B\u000C\u000D\u0085\t\b\ ]
|
||||
NL = [\u2028\u2029\u000A\u000B\u000C\u000D\u0085] | \u000D\u000A
|
||||
NNL = [^\u2028\u2029\u000A\u000B\u000C\u000D\u0085]
|
||||
|
||||
Ident = {IdentStart} {IdentPart}*
|
||||
QualIdent = {Ident} ( {WSP}* "." {WSP}* {Ident} )*
|
||||
QUIL = {QualIdent} ( {WSP}* "," {WSP}* {QualIdent} )*
|
||||
Array = "[" {WSP}* "]"
|
||||
ParamPart = {IdentStart}|{IdentPart}|"<"|">"|","|{WSP}|"&"|"?"|"."
|
||||
GenParam = "<" {ParamPart}+ ">"
|
||||
ClassT = {Ident} ({WSP}* {GenParam})?
|
||||
QClassT = {QualIdent} ({WSP}* {GenParam})?
|
||||
ArrType = ({GenParam} {WSP}*)? {QClassT} ({WSP}* {Array})*
|
||||
|
||||
IdentStart = [:jletter:]
|
||||
IdentPart = [:jletterdigit:]
|
||||
|
||||
JFlexCommentChar = [^*/]|"/"+[^*/]|"*"+[^*/]
|
||||
JFlexComment = {JFlexCommentChar}+
|
||||
|
||||
/* Java comments */
|
||||
JavaComment = {TraditionalComment}|{EndOfLineComment}
|
||||
TraditionalComment = "/*"{CommentContent}\*+"/"
|
||||
EndOfLineComment = "//".*{NL}
|
||||
|
||||
CommentContent = ([^*]|\*+[^*/])*
|
||||
|
||||
StringCharacter = [^\u2028\u2029\u000A\u000B\u000C\u000D\u0085\"\\]
|
||||
|
||||
CharLiteral = \'([^\u2028\u2029\u000A\u000B\u000C\u000D\u0085\'\\]|{EscapeSequence})\'
|
||||
StringLiteral = \"({StringCharacter}|{EscapeSequence})*\"
|
||||
|
||||
EscapeSequence = \\[^\u2028\u2029\u000A\u000B\u000C\u000D\u0085]|\\+u{HexDigit}{4}|\\[0-3]?{OctDigit}{1,2}
|
||||
|
||||
/* \\(b|t|n|f|r|\"|\'|\\|[0-3]?{OctDigit}{1,2}|u{HexDigit}{4}) */
|
||||
|
||||
JavaRest = [^\{\}\"\'/]|"/"[^*/]
|
||||
JavaCode = ({JavaRest}|{StringLiteral}|{CharLiteral}|{JavaComment})+
|
||||
|
||||
DottedVersion = [1-9][0-9]*(\.[0-9]+){0,2}
|
||||
|
||||
%%
|
||||
|
||||
<YYINITIAL> {
|
||||
"%%".*{NL}? {
|
||||
t.start();
|
||||
yybegin(MACROS);
|
||||
macroDefinition = true;
|
||||
return symbol(USERCODE,userCode);
|
||||
}
|
||||
.*{NL} | .+ { userCode.append(yytext()); }
|
||||
<<EOF>> { return symbol(EOF); }
|
||||
}
|
||||
|
||||
<MACROS> ("%{"|"%init{"|"%initthrow{"|"%eof{"|"%eofthrow{"|"%yylexthrow{"|"%eofval{").*{NL}
|
||||
{ string.setLength(0); yybegin(COPY); }
|
||||
<COPY> {
|
||||
"%}".*{NL} { classCode = conc(classCode,string); yybegin(MACROS); }
|
||||
"%init}".*{NL} { initCode = conc(initCode,string); yybegin(MACROS); }
|
||||
"%initthrow}".*{NL} { initThrow = concExc(initThrow,string); yybegin(MACROS); }
|
||||
"%eof}".*{NL} { eofCode = conc(eofCode,string); yybegin(MACROS); }
|
||||
"%eofthrow}".*{NL} { eofThrow = concExc(eofThrow,string); yybegin(MACROS); }
|
||||
"%yylexthrow}".*{NL} { lexThrow = concExc(lexThrow,string); yybegin(MACROS); }
|
||||
"%eofval}".*{NL} { eofVal = string.toString(); yybegin(MACROS); }
|
||||
|
||||
.*{NL} { string.append(yytext()); }
|
||||
|
||||
<<EOF>> { throw new ScannerException(file,ErrorMessages.EOF_IN_MACROS); }
|
||||
}
|
||||
|
||||
|
||||
<MACROS> ^"%s" ("tate" "s"?)? {WSP}+ { inclusive_states = true; yybegin(STATELIST); }
|
||||
<MACROS> ^"%x" ("state" "s"?)? {WSP}+ { inclusive_states = false; yybegin(STATELIST); }
|
||||
<STATELIST> {
|
||||
{Ident} { states.insert(yytext(),inclusive_states); }
|
||||
([\ \t]*","[\ \t]*)|([\ \t]+) { }
|
||||
{NL} { yybegin(MACROS); }
|
||||
<<EOF>> { throw new ScannerException(file,ErrorMessages.EOF_IN_MACROS); }
|
||||
}
|
||||
|
||||
<MACROS> {
|
||||
"%char" { charCount = true; }
|
||||
"%line" { lineCount = true; }
|
||||
"%column" { columnCount = true; }
|
||||
"%byaccj" { isInteger = true;
|
||||
if (eofVal == null)
|
||||
eofVal = "return 0;";
|
||||
eofclose = true;
|
||||
}
|
||||
"%cup2" { cup2Compatible = true;
|
||||
isImplementing = concExc(isImplementing, "Scanner");
|
||||
lineCount = true;
|
||||
columnCount = true;
|
||||
if (functionName == null)
|
||||
functionName = "readNextTerminal";
|
||||
if (tokenType == null)
|
||||
tokenType = "ScannerToken<? extends Object>";
|
||||
if (eofVal == null)
|
||||
eofVal = "return token(SpecialTerminals.EndOfInputStream);";
|
||||
if (!Options.jlex) eofclose = true;
|
||||
return symbol(UNICODE); // %unicode
|
||||
}
|
||||
"%cup" { cupCompatible = true;
|
||||
isImplementing = concExc(isImplementing, "java_cup.runtime.Scanner");
|
||||
if (functionName == null)
|
||||
functionName = "next_token";
|
||||
if (tokenType == null)
|
||||
tokenType = "java_cup.runtime.Symbol";
|
||||
if (eofVal == null)
|
||||
eofVal = "return new java_cup.runtime.Symbol("+cupSymbol+".EOF);";
|
||||
if (!Options.jlex) eofclose = true;
|
||||
}
|
||||
"%cupsym"{WSP}+{QualIdent} {WSP}* { cupSymbol = yytext().substring(8).trim();
|
||||
if (cupCompatible) Out.warning(ErrorMessages.CUPSYM_AFTER_CUP, yyline); }
|
||||
"%cupsym"{WSP}+{NNL}* { throw new ScannerException(file,ErrorMessages.QUIL_CUPSYM, yyline); }
|
||||
"%cupdebug" { cupDebug = true; }
|
||||
"%eofclose"({WSP}+"true")? { eofclose = true; }
|
||||
"%eofclose"({WSP}+"false") { eofclose = false; }
|
||||
"%class"{WSP}+{ClassT} {WSP}* { className = yytext().substring(7).trim(); }
|
||||
"%ctorarg"{WSP}+{ArrType}{WSP}+ { yybegin(CTOR_ARG); ctorTypes.add(yytext().substring(8).trim()); }
|
||||
"%function"{WSP}+{Ident} {WSP}* { functionName = yytext().substring(10).trim(); }
|
||||
"%type"{WSP}+{ArrType} {WSP}* { tokenType = yytext().substring(6).trim(); }
|
||||
"%integer"|"%int" { isInteger = true; }
|
||||
"%intwrap" { isIntWrap = true; }
|
||||
"%yyeof" { isYYEOF = true; }
|
||||
"%notunix" { notUnix = true; }
|
||||
"%7bit" { isASCII = true; return symbol(ASCII); }
|
||||
"%full"|"%8bit" { return symbol(FULL); }
|
||||
"%16bit" { populateDefaultVersionUnicodeProperties();
|
||||
return symbol(UNICODE);
|
||||
}
|
||||
"%unicode"({WSP}+{DottedVersion})? { String v = yytext().substring(8).trim();
|
||||
if (v.length() == 0) {
|
||||
populateDefaultVersionUnicodeProperties();
|
||||
} else {
|
||||
try {
|
||||
unicodeProperties = new UnicodeProperties(v);
|
||||
} catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
|
||||
throw new ScannerException
|
||||
(file, ErrorMessages.UNSUPPORTED_UNICODE_VERSION, yyline);
|
||||
}
|
||||
charClasses.init
|
||||
(Options.jlex ? 127 : unicodeProperties.getMaximumCodePoint(), this);
|
||||
}
|
||||
return symbol(UNICODE);
|
||||
}
|
||||
|
||||
"%caseless"|"%ignorecase" { caseless = true; }
|
||||
"%implements"{WSP}+.* { isImplementing = concExc(isImplementing, yytext().substring(12).trim()); }
|
||||
"%extends"{WSP}+{QClassT}{WSP}* { isExtending = yytext().substring(9).trim(); }
|
||||
"%public" { isPublic = true; }
|
||||
"%apiprivate" { visibility = "private"; Skeleton.makePrivate(); }
|
||||
"%final" { isFinal = true; }
|
||||
"%abstract" { isAbstract = true; }
|
||||
"%debug" { debugOption = true; }
|
||||
"%standalone" { standalone = true; isInteger = true; }
|
||||
"%pack" { /* no-op - this is the only generation method */ }
|
||||
"%include" {WSP}+ .* { includeFile(yytext().substring(9).trim()); }
|
||||
"%buffer" {WSP}+ {Number} {WSP}* { bufferSize = Integer.parseInt(yytext().substring(8).trim()); }
|
||||
"%buffer" {WSP}+ {NNL}* { throw new ScannerException(file,ErrorMessages.NO_BUFFER_SIZE, yyline); }
|
||||
"%initthrow" {WSP}+ {QUIL} {WSP}* { initThrow = concExc(initThrow,yytext().substring(11).trim()); }
|
||||
"%initthrow" {WSP}+ {NNL}* { throw new ScannerException(file,ErrorMessages.QUIL_INITTHROW, yyline); }
|
||||
"%eofthrow" {WSP}+ {QUIL} {WSP}* { eofThrow = concExc(eofThrow,yytext().substring(10).trim()); }
|
||||
"%eofthrow" {WSP}+ {NNL}* { throw new ScannerException(file,ErrorMessages.QUIL_EOFTHROW, yyline); }
|
||||
"%yylexthrow"{WSP}+ {QUIL} {WSP}* { lexThrow = concExc(lexThrow,yytext().substring(12).trim()); }
|
||||
"%throws" {WSP}+ {QUIL} {WSP}* { lexThrow = concExc(lexThrow,yytext().substring(8).trim()); }
|
||||
"%yylexthrow"{WSP}+ {NNL}* { throw new ScannerException(file,ErrorMessages.QUIL_YYLEXTHROW, yyline); }
|
||||
"%throws" {WSP}+ {NNL}* { throw new ScannerException(file,ErrorMessages.QUIL_THROW, yyline); }
|
||||
"%scanerror" {WSP}+ {QualIdent} {WSP}* { scanErrorException = yytext().substring(11).trim(); }
|
||||
"%scanerror" {WSP}+ {NNL}* { throw new ScannerException(file,ErrorMessages.QUIL_SCANERROR, yyline); }
|
||||
// TODO: In the version of JFlex after 1.6, the %inputstreamctor directive will become a no-op: the InputStream ctor will never be emitted.
|
||||
"%inputstreamctor"({WSP}+"true")? { emitInputStreamCtor = true; }
|
||||
"%inputstreamctor"{WSP}+"false" { emitInputStreamCtor = false; }
|
||||
|
||||
{Ident} { return symbol(IDENT, yytext()); }
|
||||
"="{WSP}* { if (null == unicodeProperties && ! isASCII) {
|
||||
populateDefaultVersionUnicodeProperties();
|
||||
}
|
||||
yybegin(REGEXP);
|
||||
return symbol(EQUALS);
|
||||
}
|
||||
|
||||
"/*" { nextState = MACROS; yybegin(COMMENT); }
|
||||
|
||||
{EndOfLineComment} { }
|
||||
|
||||
^"%%" {NNL}* { if (null == unicodeProperties && ! isASCII) {
|
||||
populateDefaultVersionUnicodeProperties();
|
||||
}
|
||||
macroDefinition = false;
|
||||
yybegin(REGEXPSTART);
|
||||
return symbol(DELIMITER);
|
||||
}
|
||||
"%"{Ident} { throw new ScannerException(file,ErrorMessages.UNKNOWN_OPTION, yyline, yycolumn); }
|
||||
"%" { throw new ScannerException(file,ErrorMessages.UNKNOWN_OPTION, yyline, yycolumn); }
|
||||
^{WSP}+"%" { Out.warning(ErrorMessages.NOT_AT_BOL, yyline); yypushback(1); }
|
||||
|
||||
{WSP}+ { }
|
||||
{NL}+ { }
|
||||
<<EOF>> { if ( yymoreStreams() ) {
|
||||
file = (File) files.pop();
|
||||
yypopStream();
|
||||
}
|
||||
else
|
||||
throw new ScannerException(file,ErrorMessages.EOF_IN_MACROS);
|
||||
}
|
||||
}
|
||||
|
||||
<CTOR_ARG> {
|
||||
{Ident} {WSP}* { yybegin(MACROS); ctorArgs.add(yytext().trim()); }
|
||||
[^] { throw new ScannerException(file,ErrorMessages.CTOR_ARG,yyline,yycolumn); }
|
||||
}
|
||||
|
||||
<REGEXPSTART> {
|
||||
^ {WSP}* "%include" {WSP}+ .* { includeFile(yytext().trim().substring(9).trim()); }
|
||||
{WSP}* "/*" { nextState = REGEXPSTART; yybegin(COMMENT); }
|
||||
{WSP}* "<" { yybegin(STATES); return symbol_countUpdate(LESSTHAN, null); }
|
||||
{WSP}* "}" { return symbol_countUpdate(RBRACE, null); }
|
||||
{WSP}* "//" {NNL}* { }
|
||||
{WSP}* "<<EOF>>" {WSPNL}* "{" { actionText.setLength(0); yybegin(JAVA_CODE);
|
||||
Symbol s = symbol_countUpdate(EOFRULE, null);
|
||||
action_line = s.left+1;
|
||||
return s;
|
||||
}
|
||||
^ {WSP}* {NWSPNL} { yypushback(yylength()); yybegin(REGEXP); }
|
||||
{WSP} | {NL} { }
|
||||
}
|
||||
|
||||
<STATES> {
|
||||
{Ident} { return symbol(IDENT, yytext()); }
|
||||
"," { return symbol(COMMA); }
|
||||
{WSPNL}+ { }
|
||||
|
||||
// "{" will be caught in REGEXP
|
||||
">"{WSPNL}* { yybegin(REGEXP); return symbol(MORETHAN); }
|
||||
|
||||
<<EOF>> { throw new ScannerException(file,ErrorMessages.EOF_IN_STATES); }
|
||||
}
|
||||
|
||||
|
||||
<REGEXP> {
|
||||
"<<EOF>>" {WSPNL}+ "{" { actionText.setLength(0); yybegin(JAVA_CODE); action_line = yyline+1; return symbol(EOFRULE); }
|
||||
"<<EOF>>" { throw new ScannerException(file,ErrorMessages.EOF_WO_ACTION); }
|
||||
|
||||
{WSPNL}*"|"{WSP}*$ { if (macroDefinition) {
|
||||
yybegin(EATWSPNL);
|
||||
return symbol(BAR);
|
||||
}
|
||||
else {
|
||||
yybegin(REGEXPSTART);
|
||||
return symbol(NOACTION);
|
||||
}
|
||||
}
|
||||
|
||||
// stategroup
|
||||
"{" { yybegin(REGEXPSTART); return symbol(LBRACE); }
|
||||
|
||||
{WSPNL}*"|" { return symbol(BAR); }
|
||||
|
||||
{WSPNL}*\" { string.setLength(0); nextState = REGEXP; yybegin(STRING_CONTENT); }
|
||||
{WSPNL}*"\\u{" { string.setLength(0); yybegin(REGEXP_CODEPOINT_SEQUENCE); }
|
||||
{WSPNL}*"!" { return symbol(BANG); }
|
||||
{WSPNL}*"~" { return symbol(TILDE); }
|
||||
{WSPNL}*"(" { return symbol(OPENBRACKET); }
|
||||
{WSPNL}*")" { return symbol(CLOSEBRACKET); }
|
||||
{WSPNL}*"*" { return symbol(STAR); }
|
||||
{WSPNL}*"+" { return symbol(PLUS); }
|
||||
{WSPNL}*"?" { return symbol(QUESTION); }
|
||||
{WSPNL}*"$" { return symbol(DOLLAR); }
|
||||
{WSPNL}*"^" { bolUsed = true; return symbol(HAT); }
|
||||
{WSPNL}*"." { return symbol(POINT); }
|
||||
{WSPNL}*"\\R" { return symbol(NEWLINE); }
|
||||
{WSPNL}*"[" { yybegin(CHARCLASS); return symbol(OPENCLASS); }
|
||||
{WSPNL}*"/" { return symbol(LOOKAHEAD); }
|
||||
|
||||
{WSPNL}* "{" {WSP}* {Ident} {WSP}* "}" { return symbol_countUpdate(MACROUSE, makeMacroIdent()); }
|
||||
{WSPNL}* "{" {WSP}* {Number} { yybegin(REPEATEXP);
|
||||
return symbol(REPEAT,
|
||||
new Integer(yytext().trim().substring(1).trim()));
|
||||
}
|
||||
|
||||
{WSPNL}+ "{" { actionText.setLength(0); yybegin(JAVA_CODE); action_line = yyline+1; return symbol(REGEXPEND); }
|
||||
{NL} { if (macroDefinition) { yybegin(MACROS); } return symbol(REGEXPEND); }
|
||||
|
||||
{WSPNL}*"/*" { nextState = REGEXP; yybegin(COMMENT); }
|
||||
|
||||
{WSPNL}*"//"{NNL}* { }
|
||||
|
||||
{WSP}+ { }
|
||||
|
||||
<CHARCLASS> {
|
||||
{WSPNL}*"[:jletter:]" { return symbol(JLETTERCLASS); }
|
||||
{WSPNL}*"[:jletterdigit:]" { return symbol(JLETTERDIGITCLASS); }
|
||||
{WSPNL}*"[:letter:]" { return symbol(LETTERCLASS); }
|
||||
{WSPNL}*"[:uppercase:]" { return symbol(UPPERCLASS); }
|
||||
{WSPNL}*"[:lowercase:]" { return symbol(LOWERCLASS); }
|
||||
{WSPNL}*"[:digit:]" { return symbol(DIGITCLASS); }
|
||||
{WSPNL}*"\\d" { return symbol(DIGITCLASS); }
|
||||
{WSPNL}*"\\D" { return symbol(DIGITCLASSNOT); }
|
||||
{WSPNL}*"\\s" { return symbol(WHITESPACECLASS); }
|
||||
{WSPNL}*"\\S" { return symbol(WHITESPACECLASSNOT); }
|
||||
{WSPNL}*"\\w" { return symbol(WORDCLASS); }
|
||||
{WSPNL}*"\\W" { return symbol(WORDCLASSNOT); }
|
||||
{WSPNL}*"\\p{"[^}]*"}" { String trimmedText = yytext().trim();
|
||||
String propertyValue = trimmedText.substring(3,trimmedText.length()-1);
|
||||
IntCharSet set = unicodeProperties.getIntCharSet(propertyValue);
|
||||
if (null == set) {
|
||||
throw new ScannerException(file,ErrorMessages.INVALID_UNICODE_PROPERTY, yyline, yycolumn + 3);
|
||||
}
|
||||
return symbol(UNIPROPCCLASS, set);
|
||||
}
|
||||
{WSPNL}*"\\P{"[^}]*"}" { String trimmedText = yytext().trim();
|
||||
String propertyValue = trimmedText.substring(3,trimmedText.length()-1);
|
||||
IntCharSet set = unicodeProperties.getIntCharSet(propertyValue);
|
||||
if (null == set) {
|
||||
throw new ScannerException(file,ErrorMessages.INVALID_UNICODE_PROPERTY, yyline, yycolumn + 3);
|
||||
}
|
||||
return symbol(UNIPROPCCLASSNOT, set);
|
||||
}
|
||||
}
|
||||
|
||||
. { return symbol(CHAR, yytext().codePointAt(0)); }
|
||||
}
|
||||
|
||||
<EATWSPNL> {WSPNL}+ { yybegin(REGEXP); }
|
||||
|
||||
|
||||
<REPEATEXP> {
|
||||
"}" { yybegin(REGEXP); return symbol(RBRACE); }
|
||||
"," {WSP}* {Number} { return symbol(REPEAT, new Integer(yytext().substring(1).trim())); }
|
||||
{WSP}+ { }
|
||||
|
||||
<<EOF>> { throw new ScannerException(file,ErrorMessages.EOF_IN_REGEXP); }
|
||||
}
|
||||
|
||||
<CHARCLASS> {
|
||||
"{"{Ident}"}" { return symbol(MACROUSE, yytext().substring(1,yylength()-1)); }
|
||||
"[" { balance++; return symbol(OPENCLASS); }
|
||||
"]" { if (balance > 0) balance--; else yybegin(REGEXP); return symbol(CLOSECLASS); }
|
||||
"^" { return symbol(HAT); }
|
||||
"-" { return symbol(DASH); }
|
||||
"--" { return symbol(DIFFERENCE); }
|
||||
"&&" { return symbol(INTERSECTION); }
|
||||
"||" { /* union is the default operation - '||' can be ignored */ }
|
||||
"~~" { return symbol(SYMMETRICDIFFERENCE); }
|
||||
"\\u{" { yybegin(CHARCLASS_CODEPOINT); }
|
||||
|
||||
// this is a hack to keep JLex compatibilty with char class
|
||||
// expressions like [+-]
|
||||
"-]" { yypushback(1); yycolumn--; return symbol(CHAR, (int)'-'); }
|
||||
|
||||
\" { string.setLength(0); nextState = CHARCLASS; yybegin(STRING_CONTENT); }
|
||||
|
||||
. { return symbol(CHAR, yytext().codePointAt(0)); }
|
||||
|
||||
\n { throw new ScannerException(file,ErrorMessages.EOL_IN_CHARCLASS,yyline,yycolumn); }
|
||||
|
||||
<<EOF>> { throw new ScannerException(file,ErrorMessages.EOF_IN_REGEXP); }
|
||||
}
|
||||
|
||||
<STRING_CONTENT> {
|
||||
\" { yybegin(nextState); return symbol(STRING, string.toString()); }
|
||||
\\\" { string.append('\"'); }
|
||||
[^\"\\\u2028\u2029\u000A\u000B\u000C\u000D\u0085]+ { string.append(yytext()); }
|
||||
|
||||
{NL} { throw new ScannerException(file,ErrorMessages.UNTERMINATED_STR, yyline, yycolumn); }
|
||||
|
||||
{HexNumber} { string.append( (char) Integer.parseInt(yytext().substring(2,yylength()), 16)); }
|
||||
{OctNumber} { string.append( (char) Integer.parseInt(yytext().substring(1,yylength()), 8)); }
|
||||
{Unicode4} { string.append( (char) Integer.parseInt(yytext().substring(2,yylength()), 16)); }
|
||||
{Unicode6} { int codePoint = Integer.parseInt(yytext().substring(2,yylength()), 16);
|
||||
if (codePoint <= unicodeProperties.getMaximumCodePoint()) {
|
||||
string.append(Character.toChars(codePoint));
|
||||
} else {
|
||||
throw new ScannerException(file,ErrorMessages.CODEPOINT_OUT_OF_RANGE, yyline, yycolumn+2);
|
||||
}
|
||||
}
|
||||
|
||||
"\\u{" { yybegin(STRING_CODEPOINT_SEQUENCE); }
|
||||
|
||||
\\b { string.append('\b'); }
|
||||
\\n { string.append('\n'); }
|
||||
\\t { string.append('\t'); }
|
||||
\\f { string.append('\f'); }
|
||||
\\r { string.append('\r'); }
|
||||
|
||||
\\. { string.append(yytext().substring(1, yytext().offsetByCodePoints(1, 1))); }
|
||||
|
||||
<<EOF>> { throw new ScannerException(file,ErrorMessages.EOF_IN_STRING); }
|
||||
}
|
||||
|
||||
|
||||
<REGEXP, CHARCLASS> {
|
||||
{HexNumber} { return symbol(CHAR, Integer.parseInt(yytext().substring(2,yylength()), 16)); }
|
||||
{OctNumber} { return symbol(CHAR, Integer.parseInt(yytext().substring(1,yylength()), 8)); }
|
||||
{Unicode4} { return symbol(CHAR, Integer.parseInt(yytext().substring(2,yylength()), 16)); }
|
||||
{Unicode6} { int codePoint = Integer.parseInt(yytext().substring(2,yylength()), 16);
|
||||
if (codePoint <= unicodeProperties.getMaximumCodePoint()) {
|
||||
return symbol(CHAR, codePoint);
|
||||
} else {
|
||||
throw new ScannerException(file,ErrorMessages.CODEPOINT_OUT_OF_RANGE, yyline, yycolumn+2);
|
||||
}
|
||||
}
|
||||
|
||||
\\b { return symbol(CHAR, (int)'\b'); }
|
||||
\\n { return symbol(CHAR, (int)'\n'); }
|
||||
\\t { return symbol(CHAR, (int)'\t'); }
|
||||
\\f { return symbol(CHAR, (int)'\f'); }
|
||||
\\r { return symbol(CHAR, (int)'\r'); }
|
||||
|
||||
\\. { return symbol(CHAR, yytext().codePointAt(1)); }
|
||||
}
|
||||
|
||||
|
||||
<JAVA_CODE> {
|
||||
"{" { balance++; actionText.append('{'); }
|
||||
"}" { if (balance > 0) {
|
||||
balance--;
|
||||
actionText.append('}');
|
||||
}
|
||||
else {
|
||||
yybegin(REGEXPSTART);
|
||||
Action a = new Action(actionText.toString(), action_line);
|
||||
actions.add(a);
|
||||
return symbol(ACTION, a);
|
||||
}
|
||||
}
|
||||
|
||||
{JavaCode} { actionText.append(yytext()); }
|
||||
|
||||
<<EOF>> { throw new ScannerException(file,ErrorMessages.EOF_IN_ACTION, action_line-1); }
|
||||
}
|
||||
|
||||
<COMMENT> {
|
||||
|
||||
"/"+ "*" { commentbalance++; }
|
||||
"*"+ "/" { if (commentbalance > 0)
|
||||
commentbalance--;
|
||||
else
|
||||
yybegin(nextState);
|
||||
}
|
||||
|
||||
{JFlexComment} { /* ignore */ }
|
||||
|
||||
<<EOF>> { throw new ScannerException(file,ErrorMessages.EOF_IN_COMMENT); }
|
||||
}
|
||||
|
||||
<REGEXP_CODEPOINT_SEQUENCE> {
|
||||
"}" { yybegin(REGEXP); return symbol(STRING, string.toString()); }
|
||||
{HexDigit}{1,6} { int codePoint = Integer.parseInt(yytext(), 16);
|
||||
if (codePoint <= unicodeProperties.getMaximumCodePoint()) {
|
||||
string.append(Character.toChars(codePoint));
|
||||
} else {
|
||||
throw new ScannerException(file,ErrorMessages.CODEPOINT_OUT_OF_RANGE, yyline, yycolumn);
|
||||
}
|
||||
}
|
||||
{WSPNL}+ { }
|
||||
<<EOF>> { throw new ScannerException(file,ErrorMessages.EOF_IN_REGEXP); }
|
||||
}
|
||||
|
||||
<STRING_CODEPOINT_SEQUENCE> { // Specialized form: newlines disallowed, and doesn't return a symbol
|
||||
"}" { yybegin(STRING_CONTENT); }
|
||||
{HexDigit}{1,6} { int codePoint = Integer.parseInt(yytext(), 16);
|
||||
if (codePoint <= unicodeProperties.getMaximumCodePoint()) {
|
||||
string.append(Character.toChars(codePoint));
|
||||
} else {
|
||||
throw new ScannerException(file, ErrorMessages.CODEPOINT_OUT_OF_RANGE, yyline, yycolumn);
|
||||
}
|
||||
}
|
||||
{NL} { throw new ScannerException(file,ErrorMessages.UNTERMINATED_STR, yyline, yycolumn); }
|
||||
{WSP}+ { }
|
||||
<<EOF>> { throw new ScannerException(file,ErrorMessages.EOF_IN_STRING); }
|
||||
}
|
||||
|
||||
<CHARCLASS_CODEPOINT> { // Specialized form: only one codepoint allowed, no whitespace allowed
|
||||
{HexDigit}{1,6} "}" { int codePoint = Integer.parseInt(yytext().substring(0, yylength() - 1), 16);
|
||||
if (codePoint <= unicodeProperties.getMaximumCodePoint()) {
|
||||
yybegin(CHARCLASS);
|
||||
return symbol(CHAR, codePoint);
|
||||
} else {
|
||||
throw new ScannerException(file, ErrorMessages.CODEPOINT_OUT_OF_RANGE, yyline, yycolumn);
|
||||
}
|
||||
}
|
||||
<<EOF>> { throw new ScannerException(file,ErrorMessages.EOF_IN_REGEXP); }
|
||||
}
|
||||
|
||||
. { throw new ScannerException(file,ErrorMessages.UNEXPECTED_CHAR, yyline, yycolumn); }
|
||||
\R { throw new ScannerException(file,ErrorMessages.UNEXPECTED_NL, yyline, yycolumn); }
|
||||
|
||||
<<EOF>> { if ( yymoreStreams() ) {
|
||||
file = (File) files.pop();
|
||||
yypopStream();
|
||||
}
|
||||
else {
|
||||
return symbol(EOF);
|
||||
}
|
||||
}
|
||||
305
samples/JFlex/java.jflex
Normal file
305
samples/JFlex/java.jflex
Normal file
@@ -0,0 +1,305 @@
|
||||
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
||||
* Copyright (C) 1998-2015 Gerwin Klein <lsf@jflex.de> *
|
||||
* All rights reserved. *
|
||||
* *
|
||||
* License: BSD *
|
||||
* *
|
||||
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
||||
|
||||
/* Java 1.2 language lexer specification */
|
||||
|
||||
/* Use together with unicode.flex for Unicode preprocesssing */
|
||||
/* and java12.cup for a Java 1.2 parser */
|
||||
|
||||
/* Note that this lexer specification is not tuned for speed.
|
||||
It is in fact quite slow on integer and floating point literals,
|
||||
because the input is read twice and the methods used to parse
|
||||
the numbers are not very fast.
|
||||
For a production quality application (e.g. a Java compiler)
|
||||
this could be optimized */
|
||||
|
||||
|
||||
import java_cup.runtime.*;
|
||||
|
||||
%%
|
||||
|
||||
%public
|
||||
%class Scanner
|
||||
%implements sym
|
||||
|
||||
%unicode
|
||||
|
||||
%line
|
||||
%column
|
||||
|
||||
%cup
|
||||
%cupdebug
|
||||
|
||||
%{
|
||||
StringBuilder string = new StringBuilder();
|
||||
|
||||
private Symbol symbol(int type) {
|
||||
return new JavaSymbol(type, yyline+1, yycolumn+1);
|
||||
}
|
||||
|
||||
private Symbol symbol(int type, Object value) {
|
||||
return new JavaSymbol(type, yyline+1, yycolumn+1, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* assumes correct representation of a long value for
|
||||
* specified radix in scanner buffer from <code>start</code>
|
||||
* to <code>end</code>
|
||||
*/
|
||||
private long parseLong(int start, int end, int radix) {
|
||||
long result = 0;
|
||||
long digit;
|
||||
|
||||
for (int i = start; i < end; i++) {
|
||||
digit = Character.digit(yycharat(i),radix);
|
||||
result*= radix;
|
||||
result+= digit;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
%}
|
||||
|
||||
/* main character classes */
|
||||
LineTerminator = \r|\n|\r\n
|
||||
InputCharacter = [^\r\n]
|
||||
|
||||
WhiteSpace = {LineTerminator} | [ \t\f]
|
||||
|
||||
/* comments */
|
||||
Comment = {TraditionalComment} | {EndOfLineComment} |
|
||||
{DocumentationComment}
|
||||
|
||||
TraditionalComment = "/*" [^*] ~"*/" | "/*" "*"+ "/"
|
||||
EndOfLineComment = "//" {InputCharacter}* {LineTerminator}?
|
||||
DocumentationComment = "/*" "*"+ [^/*] ~"*/"
|
||||
|
||||
/* identifiers */
|
||||
Identifier = [:jletter:][:jletterdigit:]*
|
||||
|
||||
/* integer literals */
|
||||
DecIntegerLiteral = 0 | [1-9][0-9]*
|
||||
DecLongLiteral = {DecIntegerLiteral} [lL]
|
||||
|
||||
HexIntegerLiteral = 0 [xX] 0* {HexDigit} {1,8}
|
||||
HexLongLiteral = 0 [xX] 0* {HexDigit} {1,16} [lL]
|
||||
HexDigit = [0-9a-fA-F]
|
||||
|
||||
OctIntegerLiteral = 0+ [1-3]? {OctDigit} {1,15}
|
||||
OctLongLiteral = 0+ 1? {OctDigit} {1,21} [lL]
|
||||
OctDigit = [0-7]
|
||||
|
||||
/* floating point literals */
|
||||
FloatLiteral = ({FLit1}|{FLit2}|{FLit3}) {Exponent}? [fF]
|
||||
DoubleLiteral = ({FLit1}|{FLit2}|{FLit3}) {Exponent}?
|
||||
|
||||
FLit1 = [0-9]+ \. [0-9]*
|
||||
FLit2 = \. [0-9]+
|
||||
FLit3 = [0-9]+
|
||||
Exponent = [eE] [+-]? [0-9]+
|
||||
|
||||
/* string and character literals */
|
||||
StringCharacter = [^\r\n\"\\]
|
||||
SingleCharacter = [^\r\n\'\\]
|
||||
|
||||
%state STRING, CHARLITERAL
|
||||
|
||||
%%
|
||||
|
||||
<YYINITIAL> {
|
||||
|
||||
/* keywords */
|
||||
"abstract" { return symbol(ABSTRACT); }
|
||||
"boolean" { return symbol(BOOLEAN); }
|
||||
"break" { return symbol(BREAK); }
|
||||
"byte" { return symbol(BYTE); }
|
||||
"case" { return symbol(CASE); }
|
||||
"catch" { return symbol(CATCH); }
|
||||
"char" { return symbol(CHAR); }
|
||||
"class" { return symbol(CLASS); }
|
||||
"const" { return symbol(CONST); }
|
||||
"continue" { return symbol(CONTINUE); }
|
||||
"do" { return symbol(DO); }
|
||||
"double" { return symbol(DOUBLE); }
|
||||
"else" { return symbol(ELSE); }
|
||||
"extends" { return symbol(EXTENDS); }
|
||||
"final" { return symbol(FINAL); }
|
||||
"finally" { return symbol(FINALLY); }
|
||||
"float" { return symbol(FLOAT); }
|
||||
"for" { return symbol(FOR); }
|
||||
"default" { return symbol(DEFAULT); }
|
||||
"implements" { return symbol(IMPLEMENTS); }
|
||||
"import" { return symbol(IMPORT); }
|
||||
"instanceof" { return symbol(INSTANCEOF); }
|
||||
"int" { return symbol(INT); }
|
||||
"interface" { return symbol(INTERFACE); }
|
||||
"long" { return symbol(LONG); }
|
||||
"native" { return symbol(NATIVE); }
|
||||
"new" { return symbol(NEW); }
|
||||
"goto" { return symbol(GOTO); }
|
||||
"if" { return symbol(IF); }
|
||||
"public" { return symbol(PUBLIC); }
|
||||
"short" { return symbol(SHORT); }
|
||||
"super" { return symbol(SUPER); }
|
||||
"switch" { return symbol(SWITCH); }
|
||||
"synchronized" { return symbol(SYNCHRONIZED); }
|
||||
"package" { return symbol(PACKAGE); }
|
||||
"private" { return symbol(PRIVATE); }
|
||||
"protected" { return symbol(PROTECTED); }
|
||||
"transient" { return symbol(TRANSIENT); }
|
||||
"return" { return symbol(RETURN); }
|
||||
"void" { return symbol(VOID); }
|
||||
"static" { return symbol(STATIC); }
|
||||
"while" { return symbol(WHILE); }
|
||||
"this" { return symbol(THIS); }
|
||||
"throw" { return symbol(THROW); }
|
||||
"throws" { return symbol(THROWS); }
|
||||
"try" { return symbol(TRY); }
|
||||
"volatile" { return symbol(VOLATILE); }
|
||||
"strictfp" { return symbol(STRICTFP); }
|
||||
|
||||
/* boolean literals */
|
||||
"true" { return symbol(BOOLEAN_LITERAL, true); }
|
||||
"false" { return symbol(BOOLEAN_LITERAL, false); }
|
||||
|
||||
/* null literal */
|
||||
"null" { return symbol(NULL_LITERAL); }
|
||||
|
||||
|
||||
/* separators */
|
||||
"(" { return symbol(LPAREN); }
|
||||
")" { return symbol(RPAREN); }
|
||||
"{" { return symbol(LBRACE); }
|
||||
"}" { return symbol(RBRACE); }
|
||||
"[" { return symbol(LBRACK); }
|
||||
"]" { return symbol(RBRACK); }
|
||||
";" { return symbol(SEMICOLON); }
|
||||
"," { return symbol(COMMA); }
|
||||
"." { return symbol(DOT); }
|
||||
|
||||
/* operators */
|
||||
"=" { return symbol(EQ); }
|
||||
">" { return symbol(GT); }
|
||||
"<" { return symbol(LT); }
|
||||
"!" { return symbol(NOT); }
|
||||
"~" { return symbol(COMP); }
|
||||
"?" { return symbol(QUESTION); }
|
||||
":" { return symbol(COLON); }
|
||||
"==" { return symbol(EQEQ); }
|
||||
"<=" { return symbol(LTEQ); }
|
||||
">=" { return symbol(GTEQ); }
|
||||
"!=" { return symbol(NOTEQ); }
|
||||
"&&" { return symbol(ANDAND); }
|
||||
"||" { return symbol(OROR); }
|
||||
"++" { return symbol(PLUSPLUS); }
|
||||
"--" { return symbol(MINUSMINUS); }
|
||||
"+" { return symbol(PLUS); }
|
||||
"-" { return symbol(MINUS); }
|
||||
"*" { return symbol(MULT); }
|
||||
"/" { return symbol(DIV); }
|
||||
"&" { return symbol(AND); }
|
||||
"|" { return symbol(OR); }
|
||||
"^" { return symbol(XOR); }
|
||||
"%" { return symbol(MOD); }
|
||||
"<<" { return symbol(LSHIFT); }
|
||||
">>" { return symbol(RSHIFT); }
|
||||
">>>" { return symbol(URSHIFT); }
|
||||
"+=" { return symbol(PLUSEQ); }
|
||||
"-=" { return symbol(MINUSEQ); }
|
||||
"*=" { return symbol(MULTEQ); }
|
||||
"/=" { return symbol(DIVEQ); }
|
||||
"&=" { return symbol(ANDEQ); }
|
||||
"|=" { return symbol(OREQ); }
|
||||
"^=" { return symbol(XOREQ); }
|
||||
"%=" { return symbol(MODEQ); }
|
||||
"<<=" { return symbol(LSHIFTEQ); }
|
||||
">>=" { return symbol(RSHIFTEQ); }
|
||||
">>>=" { return symbol(URSHIFTEQ); }
|
||||
|
||||
/* string literal */
|
||||
\" { yybegin(STRING); string.setLength(0); }
|
||||
|
||||
/* character literal */
|
||||
\' { yybegin(CHARLITERAL); }
|
||||
|
||||
/* numeric literals */
|
||||
|
||||
/* This is matched together with the minus, because the number is too big to
|
||||
be represented by a positive integer. */
|
||||
"-2147483648" { return symbol(INTEGER_LITERAL, new Integer(Integer.MIN_VALUE)); }
|
||||
|
||||
{DecIntegerLiteral} { return symbol(INTEGER_LITERAL, new Integer(yytext())); }
|
||||
{DecLongLiteral} { return symbol(INTEGER_LITERAL, new Long(yytext().substring(0,yylength()-1))); }
|
||||
|
||||
{HexIntegerLiteral} { return symbol(INTEGER_LITERAL, new Integer((int) parseLong(2, yylength(), 16))); }
|
||||
{HexLongLiteral} { return symbol(INTEGER_LITERAL, new Long(parseLong(2, yylength()-1, 16))); }
|
||||
|
||||
{OctIntegerLiteral} { return symbol(INTEGER_LITERAL, new Integer((int) parseLong(0, yylength(), 8))); }
|
||||
{OctLongLiteral} { return symbol(INTEGER_LITERAL, new Long(parseLong(0, yylength()-1, 8))); }
|
||||
|
||||
{FloatLiteral} { return symbol(FLOATING_POINT_LITERAL, new Float(yytext().substring(0,yylength()-1))); }
|
||||
{DoubleLiteral} { return symbol(FLOATING_POINT_LITERAL, new Double(yytext())); }
|
||||
{DoubleLiteral}[dD] { return symbol(FLOATING_POINT_LITERAL, new Double(yytext().substring(0,yylength()-1))); }
|
||||
|
||||
/* comments */
|
||||
{Comment} { /* ignore */ }
|
||||
|
||||
/* whitespace */
|
||||
{WhiteSpace} { /* ignore */ }
|
||||
|
||||
/* identifiers */
|
||||
{Identifier} { return symbol(IDENTIFIER, yytext()); }
|
||||
}
|
||||
|
||||
<STRING> {
|
||||
\" { yybegin(YYINITIAL); return symbol(STRING_LITERAL, string.toString()); }
|
||||
|
||||
{StringCharacter}+ { string.append( yytext() ); }
|
||||
|
||||
/* escape sequences */
|
||||
"\\b" { string.append( '\b' ); }
|
||||
"\\t" { string.append( '\t' ); }
|
||||
"\\n" { string.append( '\n' ); }
|
||||
"\\f" { string.append( '\f' ); }
|
||||
"\\r" { string.append( '\r' ); }
|
||||
"\\\"" { string.append( '\"' ); }
|
||||
"\\'" { string.append( '\'' ); }
|
||||
"\\\\" { string.append( '\\' ); }
|
||||
\\[0-3]?{OctDigit}?{OctDigit} { char val = (char) Integer.parseInt(yytext().substring(1),8);
|
||||
string.append( val ); }
|
||||
|
||||
/* error cases */
|
||||
\\. { throw new RuntimeException("Illegal escape sequence \""+yytext()+"\""); }
|
||||
{LineTerminator} { throw new RuntimeException("Unterminated string at end of line"); }
|
||||
}
|
||||
|
||||
<CHARLITERAL> {
|
||||
{SingleCharacter}\' { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, yytext().charAt(0)); }
|
||||
|
||||
/* escape sequences */
|
||||
"\\b"\' { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, '\b');}
|
||||
"\\t"\' { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, '\t');}
|
||||
"\\n"\' { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, '\n');}
|
||||
"\\f"\' { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, '\f');}
|
||||
"\\r"\' { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, '\r');}
|
||||
"\\\""\' { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, '\"');}
|
||||
"\\'"\' { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, '\'');}
|
||||
"\\\\"\' { yybegin(YYINITIAL); return symbol(CHARACTER_LITERAL, '\\'); }
|
||||
\\[0-3]?{OctDigit}?{OctDigit}\' { yybegin(YYINITIAL);
|
||||
int val = Integer.parseInt(yytext().substring(1,yylength()-1),8);
|
||||
return symbol(CHARACTER_LITERAL, (char)val); }
|
||||
|
||||
/* error cases */
|
||||
\\. { throw new RuntimeException("Illegal escape sequence \""+yytext()+"\""); }
|
||||
{LineTerminator} { throw new RuntimeException("Unterminated character literal at end of line"); }
|
||||
}
|
||||
|
||||
/* error fallback */
|
||||
[^] { throw new RuntimeException("Illegal character \""+yytext()+
|
||||
"\" at line "+yyline+", column "+yycolumn); }
|
||||
<<EOF>> { return symbol(EOF); }
|
||||
Reference in New Issue
Block a user