diff --git a/lib/linguist/blob_helper.rb b/lib/linguist/blob_helper.rb index 76eab7ea..67f6eef3 100644 --- a/lib/linguist/blob_helper.rb +++ b/lib/linguist/blob_helper.rb @@ -256,10 +256,16 @@ module Linguist # without changing the encoding of `data`, and # also--importantly--without having to duplicate many (potentially # large) strings. - encoded_newlines = ["\r\n", "\r", "\n"]. - map { |nl| nl.encode(encoding).force_encoding(data.encoding) } + begin + encoded_newlines = ["\r\n", "\r", "\n"]. + map { |nl| nl.encode(encoding, "ASCII-8BIT").force_encoding(data.encoding) } - data.split(Regexp.union(encoded_newlines), -1) + data.split(Regexp.union(encoded_newlines), -1) + rescue Encoding::ConverterNotFoundError + # The data is not splittable in the detected encoding. Assume it's + # one big line. + [data] + end else [] end diff --git a/samples/Text/iso8859-8-i.txt b/samples/Text/iso8859-8-i.txt new file mode 100644 index 00000000..ed2bf6c4 --- /dev/null +++ b/samples/Text/iso8859-8-i.txt @@ -0,0 +1 @@ +%¿áé \ No newline at end of file diff --git a/test/test_blob.rb b/test/test_blob.rb index da13b96e..2109e9b4 100644 --- a/test/test_blob.rb +++ b/test/test_blob.rb @@ -97,6 +97,7 @@ class TestBlob < Test::Unit::TestCase def test_sloc assert_equal 2, blob("Ruby/foo.rb").sloc assert_equal 3, blob("Text/utf16le-windows.txt").sloc + assert_equal 1, blob("Text/iso8859-8-i.txt").sloc end def test_encoding