mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Merge pull request #1245 from alindeman/binarylike_data
Handle case where newline chars don't transcode to detected encoding
This commit is contained in:
@@ -256,10 +256,16 @@ module Linguist
|
||||
# without changing the encoding of `data`, and
|
||||
# also--importantly--without having to duplicate many (potentially
|
||||
# large) strings.
|
||||
encoded_newlines = ["\r\n", "\r", "\n"].
|
||||
map { |nl| nl.encode(encoding).force_encoding(data.encoding) }
|
||||
begin
|
||||
encoded_newlines = ["\r\n", "\r", "\n"].
|
||||
map { |nl| nl.encode(encoding, "ASCII-8BIT").force_encoding(data.encoding) }
|
||||
|
||||
data.split(Regexp.union(encoded_newlines), -1)
|
||||
data.split(Regexp.union(encoded_newlines), -1)
|
||||
rescue Encoding::ConverterNotFoundError
|
||||
# The data is not splittable in the detected encoding. Assume it's
|
||||
# one big line.
|
||||
[data]
|
||||
end
|
||||
else
|
||||
[]
|
||||
end
|
||||
|
||||
1
samples/Text/iso8859-8-i.txt
Normal file
1
samples/Text/iso8859-8-i.txt
Normal file
@@ -0,0 +1 @@
|
||||
%<25><><EFBFBD>
|
||||
@@ -97,6 +97,7 @@ class TestBlob < Test::Unit::TestCase
|
||||
def test_sloc
|
||||
assert_equal 2, blob("Ruby/foo.rb").sloc
|
||||
assert_equal 3, blob("Text/utf16le-windows.txt").sloc
|
||||
assert_equal 1, blob("Text/iso8859-8-i.txt").sloc
|
||||
end
|
||||
|
||||
def test_encoding
|
||||
|
||||
Reference in New Issue
Block a user