From b183fcca05b9b9cf7834d100601532e89810e861 Mon Sep 17 00:00:00 2001 From: Joshua Peek Date: Mon, 27 Aug 2012 11:30:38 -0500 Subject: [PATCH] Only read up to 100KB --- lib/linguist/tokenizer.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/linguist/tokenizer.rb b/lib/linguist/tokenizer.rb index ef3d54b7..fcd88efc 100644 --- a/lib/linguist/tokenizer.rb +++ b/lib/linguist/tokenizer.rb @@ -16,6 +16,9 @@ module Linguist new.extract_tokens(data) end + # Read up to 100KB + BYTE_LIMIT = 100_000 + # Start state on token, ignore anything till the next newline SINGLE_LINE_COMMENTS = [ '//', # C @@ -55,6 +58,8 @@ module Linguist tokens = [] until s.eos? + break if s.pos >= BYTE_LIMIT + if token = s.scan(/^#!.+$/) if name = extract_shebang(token) tokens << "SHEBANG#!#{name}"