mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	When testing if a blob is indexable, check size first
Otherwise, charlock_holmes will allocate another large binary buffer for testing the encoding, which is a problem if the binary blob is many hundreds of MB large. It'll just fail and crash ruby.
This commit is contained in:
		| @@ -12,6 +12,7 @@ Gem::Specification.new do |s| | ||||
|   s.add_dependency 'escape_utils',    '~> 0.2.3' | ||||
|   s.add_dependency 'mime-types',      '~> 1.19' | ||||
|   s.add_dependency 'pygments.rb',     '>= 0.2.13' | ||||
|   s.add_development_dependency 'mocha' | ||||
|   s.add_development_dependency 'json' | ||||
|   s.add_development_dependency 'rake' | ||||
|   s.add_development_dependency 'yajl-ruby' | ||||
|   | ||||
| @@ -250,7 +250,9 @@ module Linguist | ||||
|     # | ||||
|     # Return true or false | ||||
|     def indexable? | ||||
|       if binary? | ||||
|       if size > 100 * 1024 | ||||
|         false | ||||
|       elsif binary? | ||||
|         false | ||||
|       elsif extname == '.txt' | ||||
|         true | ||||
| @@ -260,8 +262,6 @@ module Linguist | ||||
|         false | ||||
|       elsif generated? | ||||
|         false | ||||
|       elsif size > 100 * 1024 | ||||
|         false | ||||
|       else | ||||
|         true | ||||
|       end | ||||
|   | ||||
| @@ -2,6 +2,7 @@ require 'linguist/file_blob' | ||||
| require 'linguist/samples' | ||||
|  | ||||
| require 'test/unit' | ||||
| require 'mocha' | ||||
| require 'mime/types' | ||||
| require 'pygments' | ||||
|  | ||||
| @@ -261,6 +262,12 @@ class TestBlob < Test::Unit::TestCase | ||||
|     assert !blob("Text/dump.sql").indexable? | ||||
|     assert !blob("Binary/github.po").indexable? | ||||
|     assert !blob("Binary/linguist.gem").indexable? | ||||
|  | ||||
|     # large binary blobs should fail on size check first, not call  | ||||
|     # into charlock_holmes and alloc big buffers for testing encoding | ||||
|     b = blob("Binary/octocat.ai") | ||||
|     b.expects(:binary?).never | ||||
|     assert !b.indexable? | ||||
|   end | ||||
|  | ||||
|   def test_language | ||||
|   | ||||
		Reference in New Issue
	
	Block a user