Counts the number of lines correctly for files with certain multibyte encodings

This commit is contained in:
Andy Lindeman
2014-05-21 11:44:18 -04:00
parent 93d7aa3d07
commit 85efbde3f7
4 changed files with 6 additions and 2 deletions

View File

@@ -241,7 +241,8 @@ module Linguist
def lines
@lines ||=
if viewable? && data
data.split(/\r\n|\r|\n/, -1)
newlines = Regexp.new("\r\n|\r|\n".encode(encoding))
data.force_encoding(encoding).split(newlines, -1)
else
[]
end
@@ -262,7 +263,7 @@ module Linguist
#
# Returns Integer
def sloc
lines.grep(/\S/).size
lines.grep(Regexp.new('\S'.encode(encoding))).size
end
# Public: Is the blob a generated file?

Binary file not shown.

BIN
samples/Text/utf16le.txt Normal file

Binary file not shown.

View File

@@ -77,12 +77,15 @@ class TestBlob < Test::Unit::TestCase
def test_sloc
assert_equal 2, blob("Ruby/foo.rb").sloc
assert_equal 3, blob("Text/utf16le-windows.txt").sloc
end
def test_encoding
assert_equal "ISO-8859-2", blob("Text/README").encoding
assert_equal "ISO-8859-1", blob("Text/dump.sql").encoding
assert_equal "UTF-8", blob("Text/foo.txt").encoding
assert_equal "UTF-16LE", blob("Text/utf16le.txt").encoding
assert_equal "UTF-16LE", blob("Text/utf16le-windows.txt").encoding
assert_nil blob("Binary/dog.o").encoding
end