mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Merge pull request #1245 from alindeman/binarylike_data
Handle case where newline chars don't transcode to detected encoding
This commit is contained in:
@@ -256,10 +256,16 @@ module Linguist
|
|||||||
# without changing the encoding of `data`, and
|
# without changing the encoding of `data`, and
|
||||||
# also--importantly--without having to duplicate many (potentially
|
# also--importantly--without having to duplicate many (potentially
|
||||||
# large) strings.
|
# large) strings.
|
||||||
|
begin
|
||||||
encoded_newlines = ["\r\n", "\r", "\n"].
|
encoded_newlines = ["\r\n", "\r", "\n"].
|
||||||
map { |nl| nl.encode(encoding).force_encoding(data.encoding) }
|
map { |nl| nl.encode(encoding, "ASCII-8BIT").force_encoding(data.encoding) }
|
||||||
|
|
||||||
data.split(Regexp.union(encoded_newlines), -1)
|
data.split(Regexp.union(encoded_newlines), -1)
|
||||||
|
rescue Encoding::ConverterNotFoundError
|
||||||
|
# The data is not splittable in the detected encoding. Assume it's
|
||||||
|
# one big line.
|
||||||
|
[data]
|
||||||
|
end
|
||||||
else
|
else
|
||||||
[]
|
[]
|
||||||
end
|
end
|
||||||
|
|||||||
1
samples/Text/iso8859-8-i.txt
Normal file
1
samples/Text/iso8859-8-i.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
%<25><><EFBFBD>
|
||||||
@@ -97,6 +97,7 @@ class TestBlob < Test::Unit::TestCase
|
|||||||
def test_sloc
|
def test_sloc
|
||||||
assert_equal 2, blob("Ruby/foo.rb").sloc
|
assert_equal 2, blob("Ruby/foo.rb").sloc
|
||||||
assert_equal 3, blob("Text/utf16le-windows.txt").sloc
|
assert_equal 3, blob("Text/utf16le-windows.txt").sloc
|
||||||
|
assert_equal 1, blob("Text/iso8859-8-i.txt").sloc
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_encoding
|
def test_encoding
|
||||||
|
|||||||
Reference in New Issue
Block a user