mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 09:40:21 +00:00
Limit tokens to 64 characters or less (#3922)
This commit is contained in:
@@ -2,6 +2,9 @@
|
|||||||
#include "linguist.h"
|
#include "linguist.h"
|
||||||
#include "lex.linguist_yy.h"
|
#include "lex.linguist_yy.h"
|
||||||
|
|
||||||
|
// Anything longer is unlikely to be useful.
|
||||||
|
#define MAX_TOKEN_LEN 64
|
||||||
|
|
||||||
int linguist_yywrap(yyscan_t yyscanner) {
|
int linguist_yywrap(yyscan_t yyscanner) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@@ -32,19 +35,27 @@ static VALUE rb_tokenizer_extract_tokens(VALUE self, VALUE rb_data) {
|
|||||||
case NO_ACTION:
|
case NO_ACTION:
|
||||||
break;
|
break;
|
||||||
case REGULAR_TOKEN:
|
case REGULAR_TOKEN:
|
||||||
rb_ary_push(ary, rb_str_new2(extra.token));
|
len = strlen(extra.token);
|
||||||
|
if (len <= MAX_TOKEN_LEN)
|
||||||
|
rb_ary_push(ary, rb_str_new(extra.token, len));
|
||||||
free(extra.token);
|
free(extra.token);
|
||||||
break;
|
break;
|
||||||
case SHEBANG_TOKEN:
|
case SHEBANG_TOKEN:
|
||||||
s = rb_str_new2("SHEBANG#!");
|
len = strlen(extra.token);
|
||||||
rb_str_cat2(s, extra.token);
|
if (len <= MAX_TOKEN_LEN) {
|
||||||
rb_ary_push(ary, s);
|
s = rb_str_new2("SHEBANG#!");
|
||||||
|
rb_str_cat(s, extra.token, len);
|
||||||
|
rb_ary_push(ary, s);
|
||||||
|
}
|
||||||
free(extra.token);
|
free(extra.token);
|
||||||
break;
|
break;
|
||||||
case SGML_TOKEN:
|
case SGML_TOKEN:
|
||||||
s = rb_str_new2(extra.token);
|
len = strlen(extra.token);
|
||||||
rb_str_cat2(s, ">");
|
if (len <= MAX_TOKEN_LEN) {
|
||||||
rb_ary_push(ary, s);
|
s = rb_str_new(extra.token, len);
|
||||||
|
rb_str_cat2(s, ">");
|
||||||
|
rb_ary_push(ary, s);
|
||||||
|
}
|
||||||
free(extra.token);
|
free(extra.token);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user