Merge pull request #1245 from alindeman/binarylike_data

Handle case where newline chars don't transcode to detected encoding
This commit is contained in:
Andy Lindeman
2014-06-03 12:55:33 -04:00
3 changed files with 11 additions and 3 deletions

View File

@@ -256,10 +256,16 @@ module Linguist
# without changing the encoding of `data`, and
# also--importantly--without having to duplicate many (potentially
# large) strings.
encoded_newlines = ["\r\n", "\r", "\n"].
map { |nl| nl.encode(encoding).force_encoding(data.encoding) }
begin
encoded_newlines = ["\r\n", "\r", "\n"].
map { |nl| nl.encode(encoding, "ASCII-8BIT").force_encoding(data.encoding) }
data.split(Regexp.union(encoded_newlines), -1)
data.split(Regexp.union(encoded_newlines), -1)
rescue Encoding::ConverterNotFoundError
# The data is not splittable in the detected encoding. Assume it's
# one big line.
[data]
end
else
[]
end

View File

@@ -0,0 +1 @@
%<25><><EFBFBD>

View File

@@ -97,6 +97,7 @@ class TestBlob < Test::Unit::TestCase
def test_sloc
assert_equal 2, blob("Ruby/foo.rb").sloc
assert_equal 3, blob("Text/utf16le-windows.txt").sloc
assert_equal 1, blob("Text/iso8859-8-i.txt").sloc
end
def test_encoding