When testing if a blob is indexable, check size first

Otherwise, charlock_holmes will allocate another large binary
buffer for testing the encoding, which is a problem if the binary
blob is many hundreds of MB large. It'll just fail and crash ruby.
This commit is contained in:
Scott J. Goldman
2012-08-31 22:47:19 -07:00
parent 6ec907a915
commit e415a1351b
3 changed files with 11 additions and 3 deletions

View File

@@ -12,6 +12,7 @@ Gem::Specification.new do |s|
s.add_dependency 'escape_utils', '~> 0.2.3'
s.add_dependency 'mime-types', '~> 1.19'
s.add_dependency 'pygments.rb', '>= 0.2.13'
s.add_development_dependency 'mocha'
s.add_development_dependency 'json'
s.add_development_dependency 'rake'
s.add_development_dependency 'yajl-ruby'

View File

@@ -250,7 +250,9 @@ module Linguist
#
# Return true or false
def indexable?
if binary?
if size > 100 * 1024
false
elsif binary?
false
elsif extname == '.txt'
true
@@ -260,8 +262,6 @@ module Linguist
false
elsif generated?
false
elsif size > 100 * 1024
false
else
true
end

View File

@@ -2,6 +2,7 @@ require 'linguist/file_blob'
require 'linguist/samples'
require 'test/unit'
require 'mocha'
require 'mime/types'
require 'pygments'
@@ -261,6 +262,12 @@ class TestBlob < Test::Unit::TestCase
assert !blob("Text/dump.sql").indexable?
assert !blob("Binary/github.po").indexable?
assert !blob("Binary/linguist.gem").indexable?
# large binary blobs should fail on size check first, not call
# into charlock_holmes and alloc big buffers for testing encoding
b = blob("Binary/octocat.ai")
b.expects(:binary?).never
assert !b.indexable?
end
def test_language