mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	When testing if a blob is indexable, check size first
Otherwise, charlock_holmes will allocate another large binary buffer for testing the encoding, which is a problem if the binary blob is many hundreds of MB large. It'll just fail and crash ruby.
This commit is contained in:
		@@ -12,6 +12,7 @@ Gem::Specification.new do |s|
 | 
			
		||||
  s.add_dependency 'escape_utils',    '~> 0.2.3'
 | 
			
		||||
  s.add_dependency 'mime-types',      '~> 1.19'
 | 
			
		||||
  s.add_dependency 'pygments.rb',     '>= 0.2.13'
 | 
			
		||||
  s.add_development_dependency 'mocha'
 | 
			
		||||
  s.add_development_dependency 'json'
 | 
			
		||||
  s.add_development_dependency 'rake'
 | 
			
		||||
  s.add_development_dependency 'yajl-ruby'
 | 
			
		||||
 
 | 
			
		||||
@@ -250,7 +250,9 @@ module Linguist
 | 
			
		||||
    #
 | 
			
		||||
    # Return true or false
 | 
			
		||||
    def indexable?
 | 
			
		||||
      if binary?
 | 
			
		||||
      if size > 100 * 1024
 | 
			
		||||
        false
 | 
			
		||||
      elsif binary?
 | 
			
		||||
        false
 | 
			
		||||
      elsif extname == '.txt'
 | 
			
		||||
        true
 | 
			
		||||
@@ -260,8 +262,6 @@ module Linguist
 | 
			
		||||
        false
 | 
			
		||||
      elsif generated?
 | 
			
		||||
        false
 | 
			
		||||
      elsif size > 100 * 1024
 | 
			
		||||
        false
 | 
			
		||||
      else
 | 
			
		||||
        true
 | 
			
		||||
      end
 | 
			
		||||
 
 | 
			
		||||
@@ -2,6 +2,7 @@ require 'linguist/file_blob'
 | 
			
		||||
require 'linguist/samples'
 | 
			
		||||
 | 
			
		||||
require 'test/unit'
 | 
			
		||||
require 'mocha'
 | 
			
		||||
require 'mime/types'
 | 
			
		||||
require 'pygments'
 | 
			
		||||
 | 
			
		||||
@@ -261,6 +262,12 @@ class TestBlob < Test::Unit::TestCase
 | 
			
		||||
    assert !blob("Text/dump.sql").indexable?
 | 
			
		||||
    assert !blob("Binary/github.po").indexable?
 | 
			
		||||
    assert !blob("Binary/linguist.gem").indexable?
 | 
			
		||||
 | 
			
		||||
    # large binary blobs should fail on size check first, not call 
 | 
			
		||||
    # into charlock_holmes and alloc big buffers for testing encoding
 | 
			
		||||
    b = blob("Binary/octocat.ai")
 | 
			
		||||
    b.expects(:binary?).never
 | 
			
		||||
    assert !b.indexable?
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def test_language
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user