mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 09:40:21 +00:00
Lexer crash fix (#3900)
* input may return 0 for EOF Stops overruns into fread from nothing. * remove two trailing contexts * fix up sgml tokens
This commit is contained in:
7
Rakefile
7
Rakefile
@@ -47,15 +47,10 @@ task :samples => :compile do
|
||||
File.write 'lib/linguist/samples.json', json
|
||||
end
|
||||
|
||||
FLEX_MIN_VER = [2, 5, 39]
|
||||
task :flex do
|
||||
if `flex -V` !~ /^flex (\d+)\.(\d+)\.(\d+)/
|
||||
if `flex -V` !~ /^flex \d+\.\d+\.\d+/
|
||||
fail "flex not detected"
|
||||
end
|
||||
maj, min, rev = $1.to_i, $2.to_i, $3.to_i
|
||||
if maj < FLEX_MIN_VER[0] || (maj == FLEX_MIN_VER[0] && (min < FLEX_MIN_VER[1] || (min == FLEX_MIN_VER[1] && rev < FLEX_MIN_VER[2])))
|
||||
fail "building linguist's lexer requires at least flex #{FLEX_MIN_VER.join(".")}"
|
||||
end
|
||||
system "cd ext/linguist && flex tokenizer.l"
|
||||
end
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -11,7 +11,7 @@
|
||||
#define FLEX_SCANNER
|
||||
#define YY_FLEX_MAJOR_VERSION 2
|
||||
#define YY_FLEX_MINOR_VERSION 5
|
||||
#define YY_FLEX_SUBMINOR_VERSION 39
|
||||
#define YY_FLEX_SUBMINOR_VERSION 35
|
||||
#if YY_FLEX_SUBMINOR_VERSION > 0
|
||||
#define FLEX_BETA
|
||||
#endif
|
||||
@@ -49,6 +49,7 @@ typedef int16_t flex_int16_t;
|
||||
typedef uint16_t flex_uint16_t;
|
||||
typedef int32_t flex_int32_t;
|
||||
typedef uint32_t flex_uint32_t;
|
||||
typedef uint64_t flex_uint64_t;
|
||||
#else
|
||||
typedef signed char flex_int8_t;
|
||||
typedef short int flex_int16_t;
|
||||
@@ -56,6 +57,7 @@ typedef int flex_int32_t;
|
||||
typedef unsigned char flex_uint8_t;
|
||||
typedef unsigned short int flex_uint16_t;
|
||||
typedef unsigned int flex_uint32_t;
|
||||
#endif /* ! C99 */
|
||||
|
||||
/* Limits of integral types. */
|
||||
#ifndef INT8_MIN
|
||||
@@ -86,8 +88,6 @@ typedef unsigned int flex_uint32_t;
|
||||
#define UINT32_MAX (4294967295U)
|
||||
#endif
|
||||
|
||||
#endif /* ! C99 */
|
||||
|
||||
#endif /* ! FLEXINT_H */
|
||||
|
||||
#ifdef __cplusplus
|
||||
@@ -130,15 +130,7 @@ typedef void* yyscan_t;
|
||||
|
||||
/* Size of default input buffer. */
|
||||
#ifndef YY_BUF_SIZE
|
||||
#ifdef __ia64__
|
||||
/* On IA-64, the buffer size is 16k, not 8k.
|
||||
* Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
|
||||
* Ditto for the __ia64__ case accordingly.
|
||||
*/
|
||||
#define YY_BUF_SIZE 32768
|
||||
#else
|
||||
#define YY_BUF_SIZE 16384
|
||||
#endif /* __ia64__ */
|
||||
#endif
|
||||
|
||||
#ifndef YY_TYPEDEF_YY_BUFFER_STATE
|
||||
@@ -277,10 +269,6 @@ int linguist_yyget_lineno (yyscan_t yyscanner );
|
||||
|
||||
void linguist_yyset_lineno (int line_number ,yyscan_t yyscanner );
|
||||
|
||||
int linguist_yyget_column (yyscan_t yyscanner );
|
||||
|
||||
void linguist_yyset_column (int column_no ,yyscan_t yyscanner );
|
||||
|
||||
/* Macros after this point can all be overridden by user definitions in
|
||||
* section 1.
|
||||
*/
|
||||
@@ -307,12 +295,7 @@ static int yy_flex_strlen (yyconst char * ,yyscan_t yyscanner);
|
||||
|
||||
/* Amount of stuff to slurp up with each read. */
|
||||
#ifndef YY_READ_BUF_SIZE
|
||||
#ifdef __ia64__
|
||||
/* On IA-64, the buffer size is 16k, not 8k */
|
||||
#define YY_READ_BUF_SIZE 16384
|
||||
#else
|
||||
#define YY_READ_BUF_SIZE 8192
|
||||
#endif /* __ia64__ */
|
||||
#endif
|
||||
|
||||
/* Number of entries by which start-condition stack grows. */
|
||||
@@ -345,9 +328,9 @@ extern int linguist_yylex (yyscan_t yyscanner);
|
||||
#undef YY_DECL
|
||||
#endif
|
||||
|
||||
#line 117 "tokenizer.l"
|
||||
#line 118 "tokenizer.l"
|
||||
|
||||
|
||||
#line 352 "lex.linguist_yy.h"
|
||||
#line 335 "lex.linguist_yy.h"
|
||||
#undef linguist_yyIN_HEADER
|
||||
#endif /* linguist_yyHEADER_H */
|
||||
|
||||
@@ -9,25 +9,25 @@
|
||||
|
||||
#define eat_until_eol() do { \
|
||||
int c; \
|
||||
while ((c = input(yyscanner)) != '\n' && c != EOF); \
|
||||
if (c == EOF) \
|
||||
yyterminate(); \
|
||||
while ((c = input(yyscanner)) != '\n' && c != EOF && c); \
|
||||
if (c == EOF || !c) \
|
||||
return 0; \
|
||||
} while (0)
|
||||
|
||||
#define eat_until_unescaped(q) do { \
|
||||
int c; \
|
||||
while ((c = input(yyscanner)) != EOF) { \
|
||||
while ((c = input(yyscanner)) != EOF && c) { \
|
||||
if (c == '\n') \
|
||||
break; \
|
||||
if (c == '\\') { \
|
||||
c = input(yyscanner); \
|
||||
if (c == EOF) \
|
||||
yyterminate(); \
|
||||
if (c == EOF || !c) \
|
||||
return 0; \
|
||||
} else if (c == q) \
|
||||
break; \
|
||||
} \
|
||||
if (c == EOF) \
|
||||
yyterminate(); \
|
||||
if (c == EOF || !c) \
|
||||
return 0; \
|
||||
} while (0)
|
||||
|
||||
%}
|
||||
@@ -84,7 +84,7 @@
|
||||
\" { eat_until_unescaped('"'); }
|
||||
' { eat_until_unescaped('\''); }
|
||||
(0x[0-9a-fA-F]([0-9a-fA-F]|\.)*|[0-9]([0-9]|\.)*)([uU][lL]{0,2}|([eE][-+][0-9]*)?[fFlL]*) { /* nothing */ }
|
||||
\<[^ \t\n\r<>]+/>|" "[^<>\n]{0,2048}> {
|
||||
\<[[:alnum:]_!./?-]+ {
|
||||
if (strcmp(yytext, "<!--") == 0) {
|
||||
BEGIN(xml_comment);
|
||||
} else {
|
||||
@@ -93,8 +93,8 @@
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
<sgml>[[:alnum:]_]+=/\" { feed_token(strdup(yytext), REGULAR_TOKEN); input(yyscanner); eat_until_unescaped('"'); return 1; }
|
||||
<sgml>[[:alnum:]_]+=/' { feed_token(strdup(yytext), REGULAR_TOKEN); input(yyscanner); eat_until_unescaped('\''); return 1; }
|
||||
<sgml>[[:alnum:]_]+=\" { feed_token(strndup(yytext, strlen(yytext) - 1), REGULAR_TOKEN); eat_until_unescaped('"'); return 1; }
|
||||
<sgml>[[:alnum:]_]+=' { feed_token(strndup(yytext, strlen(yytext) - 1), REGULAR_TOKEN); eat_until_unescaped('\''); return 1; }
|
||||
<sgml>[[:alnum:]_]+=[[:alnum:]_]* { feed_token(strdup(yytext), REGULAR_TOKEN); *(strchr(yyextra->token, '=') + 1) = 0; return 1; }
|
||||
<sgml>[[:alnum:]_]+ { feed_token(strdup(yytext), REGULAR_TOKEN); return 1; }
|
||||
<sgml>\> { BEGIN(INITIAL); }
|
||||
|
||||
Reference in New Issue
Block a user