Limit tokens to 64 characters or less (#3922)

This commit is contained in:
Ashe Connor
2017-12-01 13:41:59 +11:00
committed by GitHub
parent e335d48625
commit 9a8ab45b6f

View File

@@ -2,6 +2,9 @@
#include "linguist.h"
#include "lex.linguist_yy.h"
// Anything longer is unlikely to be useful.
#define MAX_TOKEN_LEN 64
int linguist_yywrap(yyscan_t yyscanner) {
return 1;
}
@@ -32,19 +35,27 @@ static VALUE rb_tokenizer_extract_tokens(VALUE self, VALUE rb_data) {
case NO_ACTION:
break;
case REGULAR_TOKEN:
rb_ary_push(ary, rb_str_new2(extra.token));
len = strlen(extra.token);
if (len <= MAX_TOKEN_LEN)
rb_ary_push(ary, rb_str_new(extra.token, len));
free(extra.token);
break;
case SHEBANG_TOKEN:
len = strlen(extra.token);
if (len <= MAX_TOKEN_LEN) {
s = rb_str_new2("SHEBANG#!");
rb_str_cat2(s, extra.token);
rb_str_cat(s, extra.token, len);
rb_ary_push(ary, s);
}
free(extra.token);
break;
case SGML_TOKEN:
s = rb_str_new2(extra.token);
len = strlen(extra.token);
if (len <= MAX_TOKEN_LEN) {
s = rb_str_new(extra.token, len);
rb_str_cat2(s, ">");
rb_ary_push(ary, s);
}
free(extra.token);
break;
}