mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	Merge pull request #1245 from alindeman/binarylike_data
Handle case where newline chars don't transcode to detected encoding
This commit is contained in:
		| @@ -256,10 +256,16 @@ module Linguist | |||||||
|           # without changing the encoding of `data`, and |           # without changing the encoding of `data`, and | ||||||
|           # also--importantly--without having to duplicate many (potentially |           # also--importantly--without having to duplicate many (potentially | ||||||
|           # large) strings. |           # large) strings. | ||||||
|           encoded_newlines = ["\r\n", "\r", "\n"]. |           begin | ||||||
|             map { |nl| nl.encode(encoding).force_encoding(data.encoding) } |             encoded_newlines = ["\r\n", "\r", "\n"]. | ||||||
|  |               map { |nl| nl.encode(encoding, "ASCII-8BIT").force_encoding(data.encoding) } | ||||||
|  |  | ||||||
|           data.split(Regexp.union(encoded_newlines), -1) |             data.split(Regexp.union(encoded_newlines), -1) | ||||||
|  |           rescue Encoding::ConverterNotFoundError | ||||||
|  |             # The data is not splittable in the detected encoding.  Assume it's | ||||||
|  |             # one big line. | ||||||
|  |             [data] | ||||||
|  |           end | ||||||
|         else |         else | ||||||
|           [] |           [] | ||||||
|         end |         end | ||||||
|   | |||||||
							
								
								
									
										1
									
								
								samples/Text/iso8859-8-i.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								samples/Text/iso8859-8-i.txt
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | |||||||
|  | %<25><><EFBFBD> | ||||||
| @@ -97,6 +97,7 @@ class TestBlob < Test::Unit::TestCase | |||||||
|   def test_sloc |   def test_sloc | ||||||
|     assert_equal 2, blob("Ruby/foo.rb").sloc |     assert_equal 2, blob("Ruby/foo.rb").sloc | ||||||
|     assert_equal 3, blob("Text/utf16le-windows.txt").sloc |     assert_equal 3, blob("Text/utf16le-windows.txt").sloc | ||||||
|  |     assert_equal 1, blob("Text/iso8859-8-i.txt").sloc | ||||||
|   end |   end | ||||||
|  |  | ||||||
|   def test_encoding |   def test_encoding | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user