mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Merge pull request #1253 from github/newer-charlock
Use the :ruby_encoding value from charlock 0.7.2
This commit is contained in:
@@ -13,7 +13,7 @@ Gem::Specification.new do |s|
|
||||
s.files = Dir['lib/**/*']
|
||||
s.executables << 'linguist'
|
||||
|
||||
s.add_dependency 'charlock_holmes', '~> 0.7.1'
|
||||
s.add_dependency 'charlock_holmes', '~> 0.7.2'
|
||||
s.add_dependency 'escape_utils', '~> 1.0.1'
|
||||
s.add_dependency 'mime-types', '~> 1.19'
|
||||
s.add_dependency 'pygments.rb', '~> 0.5.4'
|
||||
|
||||
@@ -112,6 +112,12 @@ module Linguist
|
||||
end
|
||||
end
|
||||
|
||||
def ruby_encoding
|
||||
if hash = detect_encoding
|
||||
hash[:ruby_encoding]
|
||||
end
|
||||
end
|
||||
|
||||
# Try to guess the encoding
|
||||
#
|
||||
# Returns: a Hash, with :encoding, :confidence, :type
|
||||
@@ -258,7 +264,7 @@ module Linguist
|
||||
# large) strings.
|
||||
begin
|
||||
encoded_newlines = ["\r\n", "\r", "\n"].
|
||||
map { |nl| nl.encode(encoding, "ASCII-8BIT").force_encoding(data.encoding) }
|
||||
map { |nl| nl.encode(ruby_encoding, "ASCII-8BIT").force_encoding(data.encoding) }
|
||||
|
||||
data.split(Regexp.union(encoded_newlines), -1)
|
||||
rescue Encoding::ConverterNotFoundError
|
||||
|
||||
43
samples/Text/ISO-2022-KR.txt
Normal file
43
samples/Text/ISO-2022-KR.txt
Normal file
@@ -0,0 +1,43 @@
|
||||
$)C#
|
||||
# Out-AnsiGraph.psm1
|
||||
# Author: xcud
|
||||
# History:
|
||||
# v0.1 September 21, 2009 initial version
|
||||
#
|
||||
# PS Example> ps | select -first 5 | sort -property VM |
|
||||
# Out-AnsiGraph ProcessName, VM
|
||||
# AEADISRV 14508032
|
||||
# audiodg 50757632
|
||||
# conhost 73740288
|
||||
# AppleMobileDeviceService 92061696
|
||||
# btdna 126443520
|
||||
#
|
||||
function Out-AnsiGraph($Parameter1=$null) {
|
||||
BEGIN {
|
||||
$q = new-object Collections.queue
|
||||
$max = 0; $namewidth = 0;
|
||||
}
|
||||
|
||||
PROCESS {
|
||||
if($_) {
|
||||
$name = $_.($Parameter1[0]);
|
||||
$val = $_.($Parameter1[1])
|
||||
if($max -lt $val) { $max = $val}
|
||||
if($namewidth -lt $name.length) {
|
||||
$namewidth = $name.length }
|
||||
$q.enqueue(@($name, $val))
|
||||
}
|
||||
}
|
||||
|
||||
END {
|
||||
$q | %{
|
||||
$graph = ""; 0..($_[1]/$max*20) |
|
||||
%{ $graph += "" }
|
||||
$name = "{0,$namewidth}" -f $_[0]
|
||||
"$name $graph " + $_[1]
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
Export-ModuleMember Out-AnsiGraph
|
||||
@@ -102,10 +102,17 @@ class TestBlob < Test::Unit::TestCase
|
||||
|
||||
def test_encoding
|
||||
assert_equal "ISO-8859-2", blob("Text/README").encoding
|
||||
assert_equal "ISO-8859-2", blob("Text/README").ruby_encoding
|
||||
assert_equal "ISO-8859-1", blob("Text/dump.sql").encoding
|
||||
assert_equal "ISO-8859-1", blob("Text/dump.sql").ruby_encoding
|
||||
assert_equal "UTF-8", blob("Text/foo.txt").encoding
|
||||
assert_equal "UTF-8", blob("Text/foo.txt").ruby_encoding
|
||||
assert_equal "UTF-16LE", blob("Text/utf16le.txt").encoding
|
||||
assert_equal "UTF-16LE", blob("Text/utf16le.txt").ruby_encoding
|
||||
assert_equal "UTF-16LE", blob("Text/utf16le-windows.txt").encoding
|
||||
assert_equal "UTF-16LE", blob("Text/utf16le-windows.txt").ruby_encoding
|
||||
assert_equal "ISO-2022-KR", blob("Text/ISO-2022-KR.txt").encoding
|
||||
assert_equal "binary", blob("Text/ISO-2022-KR.txt").ruby_encoding
|
||||
assert_nil blob("Binary/dog.o").encoding
|
||||
end
|
||||
|
||||
|
||||
Reference in New Issue
Block a user