Add Jison (#3488)

2026-05-12 16:36:08 +00:00 · 2017-02-22 03:24:50 -05:00
parent 9ca6a5841e
commit 733ef63193
10 changed files with 751 additions and 0 deletions
--- a/samples/Jison/lex.jison
+++ b/samples/Jison/lex.jison
@@ -0,0 +1,145 @@
+
+// `%nonassoc` tells the parser compiler (JISON) that these tokens cannot occur more than once,
+// i.e. input like '//a' (tokens '/', '/' and 'a') is not a legal input while '/a' (tokens '/' and 'a')
+// *is* legal input for this grammar.
+ 
+%nonassoc '/' '/!'
+
+// Likewise for `%left`: this informs the LALR(1) grammar compiler (JISON) that these tokens
+// *can* occur repeatedly, e.g. 'a?*' and even 'a**' are considered legal inputs given this
+// grammar!
+//
+// Token `RANGE_REGEX` may seem the odd one out here but really isn't: given the `regex_base`
+// choice/rule `regex_base range_regex`, which is recursive, this grammar tells JISON that 
+// any input matching a sequence like `regex_base range_regex range_regex` *is* legal.
+// If you do not want that to be legal, you MUST adjust the grammar rule set you match your
+// actual intent.
+
+%left '*' '+' '?' RANGE_REGEX
+
+
+%% 
+
+lex 
+    : definitions include '%%' rules '%%' EOF
+        {{ $$ = {macros: $1, rules: $4};
+          if ($2) $$.actionInclude = $2;
+          return $$; }}
+    | definitions include '%%' rules EOF
+        {{ $$ = {macros: $1, rules: $4};
+          if ($2) $$.actionInclude = $2;
+          return $$; }}
+    ;
+
+include
+    : action
+    |
+    ;
+
+definitions
+    : definitions definition
+        { $$ = $1; $$.concat($2); }
+    | definition
+        { $$ = [$1]; }
+    ;
+
+definition
+    : name regex
+        { $$ = [$1, $2]; }
+    ;
+
+name
+    : NAME
+        { $$ = yytext; }
+    ;
+
+rules
+    : rules rule
+        { $$ = $1; $$.push($2); }
+    | rule
+        { $$ = [$1]; }
+    ;
+
+rule
+    : regex action
+        { $$ = [$1, $2]; }
+    ;
+
+action
+    : ACTION 
+        { $$ = yytext; }
+    ;
+
+regex
+    : start_caret regex_list end_dollar
+        { $$ = $1+$2+$3; }
+    ;
+
+start_caret
+    : '^'
+        { $$ = '^'; }
+    |
+        { $$ = ''; }
+    ;
+
+end_dollar
+    : '$'
+        { $$ = '$'; }
+    |
+        { $$ = ''; }
+    ;
+
+regex_list
+    : regex_list '|' regex_chain
+        { $$ = $1+'|'+$3; }
+    | regex_chain
+    ;
+
+regex_chain
+    : regex_chain regex_base
+        { $$ = $1+$2;}
+    | regex_base
+        { $$ = $1;}
+    ;
+
+regex_base
+    : '(' regex_list ')'
+        { $$ = '('+$2+')'; }
+    | regex_base '+'
+        { $$ = $1+'+'; }
+    | regex_base '*'
+        { $$ = $1+'*'; }
+    | regex_base '?'
+        { $$ = $1+'?'; }
+    | '/' regex_base
+        { $$ = '(?=' + $regex_base + ')'; }
+    | '/!' regex_base
+        { $$ = '(?!' + $regex_base + ')'; }
+    | name_expansion
+    | regex_base range_regex
+        { $$ = $1+$2; }
+    | any_group_regex
+    | '.'
+        { $$ = '.'; }
+    | string
+    ;
+
+name_expansion
+    : '{' name '}'
+        {{ $$ = '{'+$2+'}'; }}
+    ;
+
+any_group_regex
+    : ANY_GROUP_REGEX
+        { $$ = yytext; }
+    ;
+
+range_regex
+    : RANGE_REGEX
+        { $$ = yytext; }
+    ;
+
+string
+    : STRING_LIT
+        { $$ = yy.prepareString(yytext.substr(1, yyleng-2)); }
+    ;