From b183fcca05b9b9cf7834d100601532e89810e861 Mon Sep 17 00:00:00 2001
From: Joshua Peek <josh@joshpeek.com>
Date: Mon, 27 Aug 2012 11:30:38 -0500
Subject: [PATCH] Only read up to 100KB

---
 lib/linguist/tokenizer.rb | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/lib/linguist/tokenizer.rb b/lib/linguist/tokenizer.rb
index ef3d54b7..fcd88efc 100644
--- a/lib/linguist/tokenizer.rb
+++ b/lib/linguist/tokenizer.rb
@@ -16,6 +16,9 @@ module Linguist
       new.extract_tokens(data)
     end
 
+    # Read up to 100KB
+    BYTE_LIMIT = 100_000
+
     # Start state on token, ignore anything till the next newline
     SINGLE_LINE_COMMENTS = [
       '//', # C
@@ -55,6 +58,8 @@ module Linguist
 
       tokens = []
       until s.eos?
+        break if s.pos >= BYTE_LIMIT
+
         if token = s.scan(/^#!.+$/)
           if name = extract_shebang(token)
             tokens << "SHEBANG#!#{name}"