Counts the number of lines correctly for files with certain multibyte encodings

This commit is contained in:
Andy Lindeman
2014-05-21 11:44:18 -04:00
parent 93d7aa3d07
commit 85efbde3f7
4 changed files with 6 additions and 2 deletions

View File

@@ -241,7 +241,8 @@ module Linguist
def lines def lines
@lines ||= @lines ||=
if viewable? && data if viewable? && data
data.split(/\r\n|\r|\n/, -1) newlines = Regexp.new("\r\n|\r|\n".encode(encoding))
data.force_encoding(encoding).split(newlines, -1)
else else
[] []
end end
@@ -262,7 +263,7 @@ module Linguist
# #
# Returns Integer # Returns Integer
def sloc def sloc
lines.grep(/\S/).size lines.grep(Regexp.new('\S'.encode(encoding))).size
end end
# Public: Is the blob a generated file? # Public: Is the blob a generated file?

Binary file not shown.

BIN
samples/Text/utf16le.txt Normal file

Binary file not shown.

View File

@@ -77,12 +77,15 @@ class TestBlob < Test::Unit::TestCase
def test_sloc def test_sloc
assert_equal 2, blob("Ruby/foo.rb").sloc assert_equal 2, blob("Ruby/foo.rb").sloc
assert_equal 3, blob("Text/utf16le-windows.txt").sloc
end end
def test_encoding def test_encoding
assert_equal "ISO-8859-2", blob("Text/README").encoding assert_equal "ISO-8859-2", blob("Text/README").encoding
assert_equal "ISO-8859-1", blob("Text/dump.sql").encoding assert_equal "ISO-8859-1", blob("Text/dump.sql").encoding
assert_equal "UTF-8", blob("Text/foo.txt").encoding assert_equal "UTF-8", blob("Text/foo.txt").encoding
assert_equal "UTF-16LE", blob("Text/utf16le.txt").encoding
assert_equal "UTF-16LE", blob("Text/utf16le-windows.txt").encoding
assert_nil blob("Binary/dog.o").encoding assert_nil blob("Binary/dog.o").encoding
end end