From e415a1351bf3e7eff0a33574400fa89c0941d9bf Mon Sep 17 00:00:00 2001 From: "Scott J. Goldman" Date: Fri, 31 Aug 2012 22:47:19 -0700 Subject: [PATCH] When testing if a blob is indexable, check size first Otherwise, charlock_holmes will allocate another large binary buffer for testing the encoding, which is a problem if the binary blob is many hundreds of MB large. It'll just fail and crash ruby. --- github-linguist.gemspec | 1 + lib/linguist/blob_helper.rb | 6 +++--- test/test_blob.rb | 7 +++++++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/github-linguist.gemspec b/github-linguist.gemspec index 246824ac..3b1e3701 100644 --- a/github-linguist.gemspec +++ b/github-linguist.gemspec @@ -12,6 +12,7 @@ Gem::Specification.new do |s| s.add_dependency 'escape_utils', '~> 0.2.3' s.add_dependency 'mime-types', '~> 1.19' s.add_dependency 'pygments.rb', '>= 0.2.13' + s.add_development_dependency 'mocha' s.add_development_dependency 'json' s.add_development_dependency 'rake' s.add_development_dependency 'yajl-ruby' diff --git a/lib/linguist/blob_helper.rb b/lib/linguist/blob_helper.rb index b2d72f11..fbbaff9c 100644 --- a/lib/linguist/blob_helper.rb +++ b/lib/linguist/blob_helper.rb @@ -250,7 +250,9 @@ module Linguist # # Return true or false def indexable? - if binary? + if size > 100 * 1024 + false + elsif binary? false elsif extname == '.txt' true @@ -260,8 +262,6 @@ module Linguist false elsif generated? false - elsif size > 100 * 1024 - false else true end diff --git a/test/test_blob.rb b/test/test_blob.rb index 0832b8fd..17e9ef8c 100644 --- a/test/test_blob.rb +++ b/test/test_blob.rb @@ -2,6 +2,7 @@ require 'linguist/file_blob' require 'linguist/samples' require 'test/unit' +require 'mocha' require 'mime/types' require 'pygments' @@ -261,6 +262,12 @@ class TestBlob < Test::Unit::TestCase assert !blob("Text/dump.sql").indexable? assert !blob("Binary/github.po").indexable? assert !blob("Binary/linguist.gem").indexable? + + # large binary blobs should fail on size check first, not call + # into charlock_holmes and alloc big buffers for testing encoding + b = blob("Binary/octocat.ai") + b.expects(:binary?).never + assert !b.indexable? end def test_language