Files
linguist/samples/Jison/lex.jison
2017-02-22 00:24:50 -08:00

146 lines
2.8 KiB
Plaintext

// `%nonassoc` tells the parser compiler (JISON) that these tokens cannot occur more than once,
// i.e. input like '//a' (tokens '/', '/' and 'a') is not a legal input while '/a' (tokens '/' and 'a')
// *is* legal input for this grammar.
%nonassoc '/' '/!'
// Likewise for `%left`: this informs the LALR(1) grammar compiler (JISON) that these tokens
// *can* occur repeatedly, e.g. 'a?*' and even 'a**' are considered legal inputs given this
// grammar!
//
// Token `RANGE_REGEX` may seem the odd one out here but really isn't: given the `regex_base`
// choice/rule `regex_base range_regex`, which is recursive, this grammar tells JISON that
// any input matching a sequence like `regex_base range_regex range_regex` *is* legal.
// If you do not want that to be legal, you MUST adjust the grammar rule set you match your
// actual intent.
%left '*' '+' '?' RANGE_REGEX
%%
lex
: definitions include '%%' rules '%%' EOF
{{ $$ = {macros: $1, rules: $4};
if ($2) $$.actionInclude = $2;
return $$; }}
| definitions include '%%' rules EOF
{{ $$ = {macros: $1, rules: $4};
if ($2) $$.actionInclude = $2;
return $$; }}
;
include
: action
|
;
definitions
: definitions definition
{ $$ = $1; $$.concat($2); }
| definition
{ $$ = [$1]; }
;
definition
: name regex
{ $$ = [$1, $2]; }
;
name
: NAME
{ $$ = yytext; }
;
rules
: rules rule
{ $$ = $1; $$.push($2); }
| rule
{ $$ = [$1]; }
;
rule
: regex action
{ $$ = [$1, $2]; }
;
action
: ACTION
{ $$ = yytext; }
;
regex
: start_caret regex_list end_dollar
{ $$ = $1+$2+$3; }
;
start_caret
: '^'
{ $$ = '^'; }
|
{ $$ = ''; }
;
end_dollar
: '$'
{ $$ = '$'; }
|
{ $$ = ''; }
;
regex_list
: regex_list '|' regex_chain
{ $$ = $1+'|'+$3; }
| regex_chain
;
regex_chain
: regex_chain regex_base
{ $$ = $1+$2;}
| regex_base
{ $$ = $1;}
;
regex_base
: '(' regex_list ')'
{ $$ = '('+$2+')'; }
| regex_base '+'
{ $$ = $1+'+'; }
| regex_base '*'
{ $$ = $1+'*'; }
| regex_base '?'
{ $$ = $1+'?'; }
| '/' regex_base
{ $$ = '(?=' + $regex_base + ')'; }
| '/!' regex_base
{ $$ = '(?!' + $regex_base + ')'; }
| name_expansion
| regex_base range_regex
{ $$ = $1+$2; }
| any_group_regex
| '.'
{ $$ = '.'; }
| string
;
name_expansion
: '{' name '}'
{{ $$ = '{'+$2+'}'; }}
;
any_group_regex
: ANY_GROUP_REGEX
{ $$ = yytext; }
;
range_regex
: RANGE_REGEX
{ $$ = yytext; }
;
string
: STRING_LIT
{ $$ = yy.prepareString(yytext.substr(1, yyleng-2)); }
;