Limit tokens to 64 characters or less (#3922)

This commit is contained in:
Ashe Connor
2017-12-01 13:41:59 +11:00
committed by GitHub
parent e335d48625
commit 9a8ab45b6f

View File

@@ -2,6 +2,9 @@
#include "linguist.h" #include "linguist.h"
#include "lex.linguist_yy.h" #include "lex.linguist_yy.h"
// Anything longer is unlikely to be useful.
#define MAX_TOKEN_LEN 64
int linguist_yywrap(yyscan_t yyscanner) { int linguist_yywrap(yyscan_t yyscanner) {
return 1; return 1;
} }
@@ -32,19 +35,27 @@ static VALUE rb_tokenizer_extract_tokens(VALUE self, VALUE rb_data) {
case NO_ACTION: case NO_ACTION:
break; break;
case REGULAR_TOKEN: case REGULAR_TOKEN:
rb_ary_push(ary, rb_str_new2(extra.token)); len = strlen(extra.token);
if (len <= MAX_TOKEN_LEN)
rb_ary_push(ary, rb_str_new(extra.token, len));
free(extra.token); free(extra.token);
break; break;
case SHEBANG_TOKEN: case SHEBANG_TOKEN:
s = rb_str_new2("SHEBANG#!"); len = strlen(extra.token);
rb_str_cat2(s, extra.token); if (len <= MAX_TOKEN_LEN) {
rb_ary_push(ary, s); s = rb_str_new2("SHEBANG#!");
rb_str_cat(s, extra.token, len);
rb_ary_push(ary, s);
}
free(extra.token); free(extra.token);
break; break;
case SGML_TOKEN: case SGML_TOKEN:
s = rb_str_new2(extra.token); len = strlen(extra.token);
rb_str_cat2(s, ">"); if (len <= MAX_TOKEN_LEN) {
rb_ary_push(ary, s); s = rb_str_new(extra.token, len);
rb_str_cat2(s, ">");
rb_ary_push(ary, s);
}
free(extra.token); free(extra.token);
break; break;
} }