From 733ef6319378ab8c76dc72681bf2f435083dbcae Mon Sep 17 00:00:00 2001 From: Nate Whetsell Date: Wed, 22 Feb 2017 03:24:50 -0500 Subject: [PATCH] Add Jison (#3488) --- .gitmodules | 3 + grammars.yml | 4 + lib/linguist/languages.yml | 16 + samples/Jison Lex/classy.jisonlex | 39 ++ samples/Jison Lex/lex_grammar.jisonlex | 29 ++ samples/Jison/ansic.jison | 418 +++++++++++++++++++++ samples/Jison/classy.jison | 84 +++++ samples/Jison/lex.jison | 145 +++++++ vendor/grammars/language-jison | 1 + vendor/licenses/grammar/language-jison.txt | 12 + 10 files changed, 751 insertions(+) create mode 100644 samples/Jison Lex/classy.jisonlex create mode 100644 samples/Jison Lex/lex_grammar.jisonlex create mode 100644 samples/Jison/ansic.jison create mode 100644 samples/Jison/classy.jison create mode 100644 samples/Jison/lex.jison create mode 160000 vendor/grammars/language-jison create mode 100644 vendor/licenses/grammar/language-jison.txt diff --git a/.gitmodules b/.gitmodules index 4ab05cd4..2e8110c7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -830,3 +830,6 @@ [submodule "vendor/grammars/atom-language-p4"] path = vendor/grammars/atom-language-p4 url = https://github.com/TakeshiTseng/atom-language-p4 +[submodule "vendor/grammars/language-jison"] + path = vendor/grammars/language-jison + url = https://github.com/cdibbs/language-jison diff --git a/grammars.yml b/grammars.yml index caf574a3..256597c2 100755 --- a/grammars.yml +++ b/grammars.yml @@ -403,6 +403,10 @@ vendor/grammars/language-javascript: - source.js - source.js.regexp - source.js.regexp.replacement +vendor/grammars/language-jison: +- source.jison +- source.jisonlex +- source.jisonlex-injection vendor/grammars/language-jsoniq: - source.jq - source.xq diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index f182d18d..15211f6f 100755 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -2023,6 +2023,22 @@ JavaScript: interpreters: - node language_id: 183 +Jison: + type: programming + group: Yacc + extensions: + - ".jison" + tm_scope: source.jison + ace_mode: text + language_id: 284531423 +Jison Lex: + type: programming + group: Lex + extensions: + - ".jisonlex" + tm_scope: source.jisonlex + ace_mode: text + language_id: 406395330 Julia: type: programming extensions: diff --git a/samples/Jison Lex/classy.jisonlex b/samples/Jison Lex/classy.jisonlex new file mode 100644 index 00000000..e0e7edf6 --- /dev/null +++ b/samples/Jison Lex/classy.jisonlex @@ -0,0 +1,39 @@ +digit [0-9] +id [a-zA-Z][a-zA-Z0-9]* + +%% +"//".* /* ignore comment */ +"main" return 'MAIN'; +"class" return 'CLASS'; +"extends" return 'EXTENDS'; +"nat" return 'NATTYPE'; +"if" return 'IF'; +"else" return 'ELSE'; +"for" return 'FOR'; +"printNat" return 'PRINTNAT'; +"readNat" return 'READNAT'; +"this" return 'THIS'; +"new" return 'NEW'; +"var" return 'VAR'; +"null" return 'NUL'; +{digit}+ return 'NATLITERAL'; +{id} return 'ID'; +"==" return 'EQUALITY'; +"=" return 'ASSIGN'; +"+" return 'PLUS'; +"-" return 'MINUS'; +"*" return 'TIMES'; +">" return 'GREATER'; +"||" return 'OR'; +"!" return 'NOT'; +"." return 'DOT'; +"{" return 'LBRACE'; +"}" return 'RBRACE'; +"(" return 'LPAREN'; +")" return 'RPAREN'; +";" return 'SEMICOLON'; +\s+ /* skip whitespace */ +"." throw 'Illegal character'; +<> return 'ENDOFFILE'; + + diff --git a/samples/Jison Lex/lex_grammar.jisonlex b/samples/Jison Lex/lex_grammar.jisonlex new file mode 100644 index 00000000..77178888 --- /dev/null +++ b/samples/Jison Lex/lex_grammar.jisonlex @@ -0,0 +1,29 @@ + +%% +\n+ {yy.freshLine = true;} +\s+ {yy.freshLine = false;} +"y{"[^}]*"}" {yytext = yytext.substr(2, yyleng - 3); return 'ACTION';} +[a-zA-Z_][a-zA-Z0-9_-]* {return 'NAME';} +'"'([^"]|'\"')*'"' {return 'STRING_LIT';} +"'"([^']|"\'")*"'" {return 'STRING_LIT';} +"|" {return '|';} +"["("\]"|[^\]])*"]" {return 'ANY_GROUP_REGEX';} +"(" {return '(';} +")" {return ')';} +"+" {return '+';} +"*" {return '*';} +"?" {return '?';} +"^" {return '^';} +"/" {return '/';} +"\\"[a-zA-Z0] {return 'ESCAPE_CHAR';} +"$" {return '$';} +"<>" {return '$';} +"." {return '.';} +"%%" {return '%%';} +"{"\d+(","\s?\d+|",")?"}" {return 'RANGE_REGEX';} +/"{" %{if (yy.freshLine) { this.input('{'); return '{'; } else { this.unput('y'); }%} +"}" %{return '}';%} +"%{"(.|\n)*?"}%" {yytext = yytext.substr(2, yyleng - 4); return 'ACTION';} +. {/* ignore bad characters */} +<> {return 'EOF';} + diff --git a/samples/Jison/ansic.jison b/samples/Jison/ansic.jison new file mode 100644 index 00000000..ef0ac557 --- /dev/null +++ b/samples/Jison/ansic.jison @@ -0,0 +1,418 @@ +%token IDENTIFIER CONSTANT STRING_LITERAL SIZEOF +%token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP +%token AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN +%token SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN +%token XOR_ASSIGN OR_ASSIGN TYPE_NAME + +%token TYPEDEF EXTERN STATIC AUTO REGISTER +%token CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE CONST VOLATILE VOID +%token STRUCT UNION ENUM ELLIPSIS + +%token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN + +%nonassoc IF_WITHOUT_ELSE +%nonassoc ELSE + +%start translation_unit +%% + +primary_expression + : IDENTIFIER + | CONSTANT + | STRING_LITERAL + | '(' expression ')' + ; + +postfix_expression + : primary_expression + | postfix_expression '[' expression ']' + | postfix_expression '(' ')' + | postfix_expression '(' argument_expression_list ')' + | postfix_expression '.' IDENTIFIER + | postfix_expression PTR_OP IDENTIFIER + | postfix_expression INC_OP + | postfix_expression DEC_OP + ; + +argument_expression_list + : assignment_expression + | argument_expression_list ',' assignment_expression + ; + +unary_expression + : postfix_expression + | INC_OP unary_expression + | DEC_OP unary_expression + | unary_operator cast_expression + | SIZEOF unary_expression + | SIZEOF '(' type_name ')' + ; + +unary_operator + : '&' + | '*' + | '+' + | '-' + | '~' + | '!' + ; + +cast_expression + : unary_expression + | '(' type_name ')' cast_expression + ; + +multiplicative_expression + : cast_expression + | multiplicative_expression '*' cast_expression + | multiplicative_expression '/' cast_expression + | multiplicative_expression '%' cast_expression + ; + +additive_expression + : multiplicative_expression + | additive_expression '+' multiplicative_expression + | additive_expression '-' multiplicative_expression + ; + +shift_expression + : additive_expression + | shift_expression LEFT_OP additive_expression + | shift_expression RIGHT_OP additive_expression + ; + +relational_expression + : shift_expression + | relational_expression '<' shift_expression + | relational_expression '>' shift_expression + | relational_expression LE_OP shift_expression + | relational_expression GE_OP shift_expression + ; + +equality_expression + : relational_expression + | equality_expression EQ_OP relational_expression + | equality_expression NE_OP relational_expression + ; + +and_expression + : equality_expression + | and_expression '&' equality_expression + ; + +exclusive_or_expression + : and_expression + | exclusive_or_expression '^' and_expression + ; + +inclusive_or_expression + : exclusive_or_expression + | inclusive_or_expression '|' exclusive_or_expression + ; + +logical_and_expression + : inclusive_or_expression + | logical_and_expression AND_OP inclusive_or_expression + ; + +logical_or_expression + : logical_and_expression + | logical_or_expression OR_OP logical_and_expression + ; + +conditional_expression + : logical_or_expression + | logical_or_expression '?' expression ':' conditional_expression + ; + +assignment_expression + : conditional_expression + | unary_expression assignment_operator assignment_expression + ; + +assignment_operator + : '=' + | MUL_ASSIGN + | DIV_ASSIGN + | MOD_ASSIGN + | ADD_ASSIGN + | SUB_ASSIGN + | LEFT_ASSIGN + | RIGHT_ASSIGN + | AND_ASSIGN + | XOR_ASSIGN + | OR_ASSIGN + ; + +expression + : assignment_expression + | expression ',' assignment_expression + ; + +constant_expression + : conditional_expression + ; + +declaration + : declaration_specifiers ';' + | declaration_specifiers init_declarator_list ';' + ; + +declaration_specifiers + : storage_class_specifier + | storage_class_specifier declaration_specifiers + | type_specifier + | type_specifier declaration_specifiers + | type_qualifier + | type_qualifier declaration_specifiers + ; + +init_declarator_list + : init_declarator + | init_declarator_list ',' init_declarator + ; + +init_declarator + : declarator + | declarator '=' initializer + ; + +storage_class_specifier + : TYPEDEF + | EXTERN + | STATIC + | AUTO + | REGISTER + ; + +type_specifier + : VOID + | CHAR + | SHORT + | INT + | LONG + | FLOAT + | DOUBLE + | SIGNED + | UNSIGNED + | struct_or_union_specifier + | enum_specifier + | TYPE_NAME + ; + +struct_or_union_specifier + : struct_or_union IDENTIFIER '{' struct_declaration_list '}' + | struct_or_union '{' struct_declaration_list '}' + | struct_or_union IDENTIFIER + ; + +struct_or_union + : STRUCT + | UNION + ; + +struct_declaration_list + : struct_declaration + | struct_declaration_list struct_declaration + ; + +struct_declaration + : specifier_qualifier_list struct_declarator_list ';' + ; + +specifier_qualifier_list + : type_specifier specifier_qualifier_list + | type_specifier + | type_qualifier specifier_qualifier_list + | type_qualifier + ; + +struct_declarator_list + : struct_declarator + | struct_declarator_list ',' struct_declarator + ; + +struct_declarator + : declarator + | ':' constant_expression + | declarator ':' constant_expression + ; + +enum_specifier + : ENUM '{' enumerator_list '}' + | ENUM IDENTIFIER '{' enumerator_list '}' + | ENUM IDENTIFIER + ; + +enumerator_list + : enumerator + | enumerator_list ',' enumerator + ; + +enumerator + : IDENTIFIER + | IDENTIFIER '=' constant_expression + ; + +type_qualifier + : CONST + | VOLATILE + ; + +declarator + : pointer direct_declarator + | direct_declarator + ; + +direct_declarator + : IDENTIFIER + | '(' declarator ')' + | direct_declarator '[' constant_expression ']' + | direct_declarator '[' ']' + | direct_declarator '(' parameter_type_list ')' + | direct_declarator '(' identifier_list ')' + | direct_declarator '(' ')' + ; + +pointer + : '*' + | '*' type_qualifier_list + | '*' pointer + | '*' type_qualifier_list pointer + ; + +type_qualifier_list + : type_qualifier + | type_qualifier_list type_qualifier + ; + + +parameter_type_list + : parameter_list + | parameter_list ',' ELLIPSIS + ; + +parameter_list + : parameter_declaration + | parameter_list ',' parameter_declaration + ; + +parameter_declaration + : declaration_specifiers declarator + | declaration_specifiers abstract_declarator + | declaration_specifiers + ; + +identifier_list + : IDENTIFIER + | identifier_list ',' IDENTIFIER + ; + +type_name + : specifier_qualifier_list + | specifier_qualifier_list abstract_declarator + ; + +abstract_declarator + : pointer + | direct_abstract_declarator + | pointer direct_abstract_declarator + ; + +direct_abstract_declarator + : '(' abstract_declarator ')' + | '[' ']' + | '[' constant_expression ']' + | direct_abstract_declarator '[' ']' + | direct_abstract_declarator '[' constant_expression ']' + | '(' ')' + | '(' parameter_type_list ')' + | direct_abstract_declarator '(' ')' + | direct_abstract_declarator '(' parameter_type_list ')' + ; + +initializer + : assignment_expression + | '{' initializer_list '}' + | '{' initializer_list ',' '}' + ; + +initializer_list + : initializer + | initializer_list ',' initializer + ; + +statement + : labeled_statement + | compound_statement + | expression_statement + | selection_statement + | iteration_statement + | jump_statement + ; + +labeled_statement + : IDENTIFIER ':' statement + | CASE constant_expression ':' statement + | DEFAULT ':' statement + ; + +compound_statement + : '{' '}' + | '{' statement_list '}' + | '{' declaration_list '}' + | '{' declaration_list statement_list '}' + ; + +declaration_list + : declaration + | declaration_list declaration + ; + +statement_list + : statement + | statement_list statement + ; + +expression_statement + : ';' + | expression ';' + ; + +selection_statement + : IF '(' expression ')' statement %prec IF_WITHOUT_ELSE + | IF '(' expression ')' statement ELSE statement + | SWITCH '(' expression ')' statement + ; + +iteration_statement + : WHILE '(' expression ')' statement + | DO statement WHILE '(' expression ')' ';' + | FOR '(' expression_statement expression_statement ')' statement + | FOR '(' expression_statement expression_statement expression ')' statement + ; + +jump_statement + : GOTO IDENTIFIER ';' + | CONTINUE ';' + | BREAK ';' + | RETURN ';' + | RETURN expression ';' + ; + +translation_unit + : external_declaration + | translation_unit external_declaration + ; + +external_declaration + : function_definition + | declaration + ; + +function_definition + : declaration_specifiers declarator declaration_list compound_statement + | declaration_specifiers declarator compound_statement + | declarator declaration_list compound_statement + | declarator compound_statement + ; diff --git a/samples/Jison/classy.jison b/samples/Jison/classy.jison new file mode 100644 index 00000000..5623014a --- /dev/null +++ b/samples/Jison/classy.jison @@ -0,0 +1,84 @@ + +/* description: ClassyLang grammar. Very classy. */ +/* + To build parser: + + $ ./bin/jison examples/classy.jison examples/classy.jisonlex + +*/ + + +/* author: Zach Carter */ + +%right ASSIGN +%left OR +%nonassoc EQUALITY GREATER +%left PLUS MINUS +%left TIMES +%right NOT +%left DOT + +%% + +pgm + : cdl MAIN LBRACE vdl el RBRACE ENDOFFILE + ; + +cdl + : c cdl + | + ; + +c + : CLASS id EXTENDS id LBRACE vdl mdl RBRACE + ; + +vdl + : VAR t id SEMICOLON vdl + | + ; + +mdl + : t id LPAREN t id RPAREN LBRACE vdl el RBRACE mdl + | + ; + +t + : NATTYPE + | id + ; + +id + : ID + ; + +el + : e SEMICOLON el + | e SEMICOLON + ; + +e + : NATLITERAL + | NUL + | id + | NEW id + | THIS + | IF LPAREN e RPAREN LBRACE el RBRACE ELSE LBRACE el RBRACE + | FOR LPAREN e SEMICOLON e SEMICOLON e RPAREN LBRACE el RBRACE + | READNAT LPAREN RPAREN + | PRINTNAT LPAREN e RPAREN + | e PLUS e + | e MINUS e + | e TIMES e + | e EQUALITY e + | e GREATER e + | NOT e + | e OR e + | e DOT id + | id ASSIGN e + | e DOT id ASSIGN e + | id LPAREN e RPAREN + | e DOT id LPAREN e RPAREN + | LPAREN e RPAREN + ; + diff --git a/samples/Jison/lex.jison b/samples/Jison/lex.jison new file mode 100644 index 00000000..050bbdd4 --- /dev/null +++ b/samples/Jison/lex.jison @@ -0,0 +1,145 @@ + +// `%nonassoc` tells the parser compiler (JISON) that these tokens cannot occur more than once, +// i.e. input like '//a' (tokens '/', '/' and 'a') is not a legal input while '/a' (tokens '/' and 'a') +// *is* legal input for this grammar. + +%nonassoc '/' '/!' + +// Likewise for `%left`: this informs the LALR(1) grammar compiler (JISON) that these tokens +// *can* occur repeatedly, e.g. 'a?*' and even 'a**' are considered legal inputs given this +// grammar! +// +// Token `RANGE_REGEX` may seem the odd one out here but really isn't: given the `regex_base` +// choice/rule `regex_base range_regex`, which is recursive, this grammar tells JISON that +// any input matching a sequence like `regex_base range_regex range_regex` *is* legal. +// If you do not want that to be legal, you MUST adjust the grammar rule set you match your +// actual intent. + +%left '*' '+' '?' RANGE_REGEX + + +%% + +lex + : definitions include '%%' rules '%%' EOF + {{ $$ = {macros: $1, rules: $4}; + if ($2) $$.actionInclude = $2; + return $$; }} + | definitions include '%%' rules EOF + {{ $$ = {macros: $1, rules: $4}; + if ($2) $$.actionInclude = $2; + return $$; }} + ; + +include + : action + | + ; + +definitions + : definitions definition + { $$ = $1; $$.concat($2); } + | definition + { $$ = [$1]; } + ; + +definition + : name regex + { $$ = [$1, $2]; } + ; + +name + : NAME + { $$ = yytext; } + ; + +rules + : rules rule + { $$ = $1; $$.push($2); } + | rule + { $$ = [$1]; } + ; + +rule + : regex action + { $$ = [$1, $2]; } + ; + +action + : ACTION + { $$ = yytext; } + ; + +regex + : start_caret regex_list end_dollar + { $$ = $1+$2+$3; } + ; + +start_caret + : '^' + { $$ = '^'; } + | + { $$ = ''; } + ; + +end_dollar + : '$' + { $$ = '$'; } + | + { $$ = ''; } + ; + +regex_list + : regex_list '|' regex_chain + { $$ = $1+'|'+$3; } + | regex_chain + ; + +regex_chain + : regex_chain regex_base + { $$ = $1+$2;} + | regex_base + { $$ = $1;} + ; + +regex_base + : '(' regex_list ')' + { $$ = '('+$2+')'; } + | regex_base '+' + { $$ = $1+'+'; } + | regex_base '*' + { $$ = $1+'*'; } + | regex_base '?' + { $$ = $1+'?'; } + | '/' regex_base + { $$ = '(?=' + $regex_base + ')'; } + | '/!' regex_base + { $$ = '(?!' + $regex_base + ')'; } + | name_expansion + | regex_base range_regex + { $$ = $1+$2; } + | any_group_regex + | '.' + { $$ = '.'; } + | string + ; + +name_expansion + : '{' name '}' + {{ $$ = '{'+$2+'}'; }} + ; + +any_group_regex + : ANY_GROUP_REGEX + { $$ = yytext; } + ; + +range_regex + : RANGE_REGEX + { $$ = yytext; } + ; + +string + : STRING_LIT + { $$ = yy.prepareString(yytext.substr(1, yyleng-2)); } + ; diff --git a/vendor/grammars/language-jison b/vendor/grammars/language-jison new file mode 160000 index 00000000..1cf15347 --- /dev/null +++ b/vendor/grammars/language-jison @@ -0,0 +1 @@ +Subproject commit 1cf15347b96f832c8335b3bf2e1fa73a47913de9 diff --git a/vendor/licenses/grammar/language-jison.txt b/vendor/licenses/grammar/language-jison.txt new file mode 100644 index 00000000..93b13a59 --- /dev/null +++ b/vendor/licenses/grammar/language-jison.txt @@ -0,0 +1,12 @@ +--- +type: grammar +name: language-jison +license: mit +--- +Copyright © 2014–2017 Chris Dibbern + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.