%% Copyright (c) 2008-2013 Robert Virding %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at %% %% http://www.apache.org/licenses/LICENSE-2.0 %% %% Unless required by applicable law or agreed to in writing, software %% distributed under the License is distributed on an "AS IS" BASIS, %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. %% File : lfe_scan.xrl %% Author : Robert Virding %% Purpose : Token definitions for Lisp Flavoured Erlang. Definitions. B = [01] O = [0-7] D = [0-9] H = [0-9a-fA-F] B36 = [0-9a-zA-Z] U = [A-Z] L = [a-z] A = ({U}|{L}) DEL = [][()}{";\000-\s] SYM = [^][()}{";\000-\s\177-\237] SSYM = [^][()}{"|;#`',\000-\s\177-\237] WS = ([\000-\s]|;[^\n]*) Rules. %% Bracketed Comments using #| foo |# #{D}*\|[^\|]*\|+([^#\|][^\|]*\|+)*# : block_comment(string:substr(TokenChars, 3)). %% Separators ' : {token,{'\'',TokenLine}}. ` : {token,{'`',TokenLine}}. , : {token,{',',TokenLine}}. ,@ : {token,{',@',TokenLine}}. \. : {token,{'.',TokenLine}}. [][()}{] : {token,{list_to_atom(TokenChars),TokenLine}}. #{D}*[bB]\( : {token,{'#B(',TokenLine}}. #{D}*[mM]\( : {token,{'#M(',TokenLine}}. #{D}*\( : {token,{'#(',TokenLine}}. #{D}*\. : {token,{'#.',TokenLine}}. #{D}*` : {token,{'#`',TokenLine}}. #{D}*; : {token,{'#;',TokenLine}}. #{D}*, : {token,{'#,',TokenLine}}. #{D}*,@ : {token,{'#,@',TokenLine}}. %% Characters #{D}*\\(x{H}+|.) : char_token(skip_past(TokenChars, $\\, $\\), TokenLine). %% Based numbers #{D}*\*{SYM}+ : base_token(skip_past(TokenChars, $*, $*), 2, TokenLine). #{D}*[bB]{SYM}+ : base_token(skip_past(TokenChars, $b, $B), 2, TokenLine). #{D}*[oO]{SYM}+ : base_token(skip_past(TokenChars, $o, $O), 8, TokenLine). #{D}*[dD]{SYM}+ : base_token(skip_past(TokenChars, $d, $D), 10, TokenLine). #{D}*[xX]{SYM}+ : base_token(skip_past(TokenChars, $x, $X), 16, TokenLine). #{D}*[rR]{SYM}+ : %% Scan over digit chars to get base. {Base,[_|Ds]} = base1(tl(TokenChars), 10, 0), base_token(Ds, Base, TokenLine). %% String "(\\x{H}+;|\\.|[^"\\])*" : %% Strip quotes. S = string:substr(TokenChars, 2, TokenLen - 2), {token,{string,TokenLine,chars(S)}}. %% Binary string #"(\\x{H}+;|\\.|[^"\\])*" : %% Strip quotes. S = string:substr(TokenChars, 3, TokenLen - 3), Bin = unicode:characters_to_binary(chars(S), utf8, utf8), {token,{binary,TokenLine,Bin}}. %% Symbols \|(\\x{H}+;|\\.|[^|\\])*\| : %% Strip quotes. S = string:substr(TokenChars, 2, TokenLen - 2), symbol_token(chars(S), TokenLine). %% Funs #'{SSYM}{SYM}*/{D}+ : %% Strip sharpsign single-quote. FunStr = string:substr(TokenChars,3), {token,{'#\'',TokenLine,FunStr}}. %% Atoms [+-]?{D}+ : case catch {ok,list_to_integer(TokenChars)} of {ok,I} -> {token,{number,TokenLine,I}}; _ -> {error,"illegal integer"} end. [+-]?{D}+\.{D}+([eE][+-]?{D}+)? : case catch {ok,list_to_float(TokenChars)} of {ok,F} -> {token,{number,TokenLine,F}}; _ -> {error,"illegal float"} end. {SSYM}{SYM}* : symbol_token(TokenChars, TokenLine). {WS}+ : skip_token. Erlang code. %% Copyright (c) 2008-2013 Robert Virding %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at %% %% http://www.apache.org/licenses/LICENSE-2.0 %% %% Unless required by applicable law or agreed to in writing, software %% distributed under the License is distributed on an "AS IS" BASIS, %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. %% File : lfe_scan.erl %% Author : Robert Virding %% Purpose : Token definitions for Lisp Flavoured Erlang. -export([start_symbol_char/1,symbol_char/1]). -import(string, [substr/2,substr/3]). %% start_symbol_char(Char) -> true | false. %% symbol_char(Char) -> true | false. %% Define start symbol chars and symbol chars. start_symbol_char($#) -> false; start_symbol_char($`) -> false; start_symbol_char($') -> false; %' start_symbol_char($,) -> false; start_symbol_char($|) -> false; %Symbol quote character start_symbol_char(C) -> symbol_char(C). symbol_char($() -> false; symbol_char($)) -> false; symbol_char($[) -> false; symbol_char($]) -> false; symbol_char(${) -> false; symbol_char($}) -> false; symbol_char($") -> false; symbol_char($;) -> false; symbol_char(C) -> ((C > $\s) and (C =< $~)) orelse (C > $\240). %% symbol_token(Chars, Line) -> {token,{symbol,Line,Symbol}} | {error,E}. %% Build a symbol from list of legal characters, else error. symbol_token(Cs, L) -> case catch {ok,list_to_atom(Cs)} of {ok,S} -> {token,{symbol,L,S}}; _ -> {error,"illegal symbol"} end. %% base_token(Chars, Base, Line) -> Integer. %% Convert a string of Base characters into a number. We only allow %% base betqeen 2 and 36, and an optional sign character first. base_token(_, B, _) when B < 2; B > 36 -> {error,"illegal number base"}; base_token([$+|Cs], B, L) -> base_token(Cs, B, +1, L); base_token([$-|Cs], B, L) -> base_token(Cs, B, -1, L); base_token(Cs, B, L) -> base_token(Cs, B, +1, L). base_token(Cs, B, S, L) -> case base1(Cs, B, 0) of {N,[]} -> {token,{number,L,S*N}}; {_,_} -> {error,"illegal based number"} end. base1([C|Cs], Base, SoFar) when C >= $0, C =< $9, C < Base + $0 -> Next = SoFar * Base + (C - $0), base1(Cs, Base, Next); base1([C|Cs], Base, SoFar) when C >= $a, C =< $z, C < Base + $a - 10 -> Next = SoFar * Base + (C - $a + 10), base1(Cs, Base, Next); base1([C|Cs], Base, SoFar) when C >= $A, C =< $Z, C < Base + $A - 10 -> Next = SoFar * Base + (C - $A + 10), base1(Cs, Base, Next); base1([C|Cs], _Base, SoFar) -> {SoFar,[C|Cs]}; base1([], _Base, N) -> {N,[]}. -define(IS_UNICODE(C), ((C >= 0) and (C =< 16#10FFFF))). %% char_token(InputChars, Line) -> {token,{number,L,N}} | {error,E}. %% Convert an input string into the corresponding character. For a %% sequence of hex characters we check resultant is code is in the %% unicode range. char_token([$x,C|Cs], L) -> case base1([C|Cs], 16, 0) of {N,[]} when ?IS_UNICODE(N) -> {token,{number,L,N}}; _ -> {error,"illegal character"} end; char_token([C], L) -> {token,{number,L,C}}. %% chars(InputChars) -> Chars. %% Convert an input string into the corresponding string characters. %% We know that the input string is correct. chars([$\\,$x,C|Cs0]) -> case hex_char(C) of true -> case base1([C|Cs0], 16, 0) of {N,[$;|Cs1]} -> [N|chars(Cs1)]; _Other -> [escape_char($x)|chars([C|Cs0])] end; false -> [escape_char($x)|chars([C|Cs0])] end; chars([$\\,C|Cs]) -> [escape_char(C)|chars(Cs)]; chars([C|Cs]) -> [C|chars(Cs)]; chars([]) -> []. hex_char(C) when C >= $0, C =< $9 -> true; hex_char(C) when C >= $a, C =< $f -> true; hex_char(C) when C >= $A, C =< $F -> true; hex_char(_) -> false. escape_char($b) -> $\b; %\b = BS escape_char($t) -> $\t; %\t = TAB escape_char($n) -> $\n; %\n = LF escape_char($v) -> $\v; %\v = VT escape_char($f) -> $\f; %\f = FF escape_char($r) -> $\r; %\r = CR escape_char($e) -> $\e; %\e = ESC escape_char($s) -> $\s; %\s = SPC escape_char($d) -> $\d; %\d = DEL escape_char(C) -> C. %% Block Comment: %% Provide a sensible error when people attempt to include nested %% comments because currently the parser cannot process them without %% a rebuild. But simply exploding on a '#|' is not going to be that %% helpful. block_comment(TokenChars) -> %% Check we're not opening another comment block. case string:str(TokenChars, "#|") of 0 -> skip_token; %% No nesting found _ -> {error, "illegal nested block comment"} end. %% skip_until(String, Char1, Char2) -> String. %% skip_past(String, Char1, Char2) -> String. %% skip_until([C|_]=Cs, C1, C2) when C =:= C1 ; C =:= C2 -> Cs; %% skip_until([_|Cs], C1, C2) -> skip_until(Cs, C1, C2); %% skip_until([], _, _) -> []. skip_past([C|Cs], C1, C2) when C =:= C1 ; C =:= C2 -> Cs; skip_past([_|Cs], C1, C2) -> skip_past(Cs, C1, C2); skip_past([], _, _) -> [].