diff --git a/github-linguist.gemspec b/github-linguist.gemspec index b38abb6f..dbfd58a9 100644 --- a/github-linguist.gemspec +++ b/github-linguist.gemspec @@ -13,7 +13,7 @@ Gem::Specification.new do |s| s.files = Dir['lib/**/*'] s.executables << 'linguist' - s.add_dependency 'charlock_holmes', '~> 0.7.1' + s.add_dependency 'charlock_holmes', '~> 0.7.2' s.add_dependency 'escape_utils', '~> 1.0.1' s.add_dependency 'mime-types', '~> 1.19' s.add_dependency 'pygments.rb', '~> 0.5.4' diff --git a/lib/linguist/blob_helper.rb b/lib/linguist/blob_helper.rb index 67f6eef3..15ab2d9f 100644 --- a/lib/linguist/blob_helper.rb +++ b/lib/linguist/blob_helper.rb @@ -112,6 +112,12 @@ module Linguist end end + def ruby_encoding + if hash = detect_encoding + hash[:ruby_encoding] + end + end + # Try to guess the encoding # # Returns: a Hash, with :encoding, :confidence, :type @@ -258,7 +264,7 @@ module Linguist # large) strings. begin encoded_newlines = ["\r\n", "\r", "\n"]. - map { |nl| nl.encode(encoding, "ASCII-8BIT").force_encoding(data.encoding) } + map { |nl| nl.encode(ruby_encoding, "ASCII-8BIT").force_encoding(data.encoding) } data.split(Regexp.union(encoded_newlines), -1) rescue Encoding::ConverterNotFoundError diff --git a/samples/Text/ISO-2022-KR.txt b/samples/Text/ISO-2022-KR.txt new file mode 100644 index 00000000..721d7051 --- /dev/null +++ b/samples/Text/ISO-2022-KR.txt @@ -0,0 +1,43 @@ +$)C# +# Out-AnsiGraph.psm1 +# Author: xcud +# History: +# v0.1 September 21, 2009 initial version +# +# PS Example> ps | select -first 5 | sort -property VM | +# Out-AnsiGraph ProcessName, VM +# AEADISRV  14508032 +# audiodg  50757632 +# conhost  73740288 +# AppleMobileDeviceService  92061696 +# btdna  126443520 +# +function Out-AnsiGraph($Parameter1=$null) { + BEGIN { + $q = new-object Collections.queue + $max = 0; $namewidth = 0; + } + + PROCESS { + if($_) { + $name = $_.($Parameter1[0]); + $val = $_.($Parameter1[1]) + if($max -lt $val) { $max = $val} + if($namewidth -lt $name.length) { + $namewidth = $name.length } + $q.enqueue(@($name, $val)) + } + } + + END { + $q | %{ + $graph = ""; 0..($_[1]/$max*20) | + %{ $graph += "" } + $name = "{0,$namewidth}" -f $_[0] + "$name $graph " + $_[1] + } + + } +} + +Export-ModuleMember Out-AnsiGraph \ No newline at end of file diff --git a/test/test_blob.rb b/test/test_blob.rb index f900e57f..d877cbae 100644 --- a/test/test_blob.rb +++ b/test/test_blob.rb @@ -102,10 +102,17 @@ class TestBlob < Test::Unit::TestCase def test_encoding assert_equal "ISO-8859-2", blob("Text/README").encoding + assert_equal "ISO-8859-2", blob("Text/README").ruby_encoding assert_equal "ISO-8859-1", blob("Text/dump.sql").encoding + assert_equal "ISO-8859-1", blob("Text/dump.sql").ruby_encoding assert_equal "UTF-8", blob("Text/foo.txt").encoding + assert_equal "UTF-8", blob("Text/foo.txt").ruby_encoding assert_equal "UTF-16LE", blob("Text/utf16le.txt").encoding + assert_equal "UTF-16LE", blob("Text/utf16le.txt").ruby_encoding assert_equal "UTF-16LE", blob("Text/utf16le-windows.txt").encoding + assert_equal "UTF-16LE", blob("Text/utf16le-windows.txt").ruby_encoding + assert_equal "ISO-2022-KR", blob("Text/ISO-2022-KR.txt").encoding + assert_equal "binary", blob("Text/ISO-2022-KR.txt").ruby_encoding assert_nil blob("Binary/dog.o").encoding end