mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Use charlock to detect binary blobs
This commit is contained in:
@@ -2,6 +2,7 @@ require 'linguist/language'
|
|||||||
require 'linguist/mime'
|
require 'linguist/mime'
|
||||||
require 'linguist/pathname'
|
require 'linguist/pathname'
|
||||||
|
|
||||||
|
require 'charlock_holmes'
|
||||||
require 'escape_utils'
|
require 'escape_utils'
|
||||||
require 'pygments'
|
require 'pygments'
|
||||||
require 'yaml'
|
require 'yaml'
|
||||||
@@ -52,7 +53,7 @@ module Linguist
|
|||||||
#
|
#
|
||||||
# Returns a content type String.
|
# Returns a content type String.
|
||||||
def content_type
|
def content_type
|
||||||
pathname.content_type
|
@content_type ||= binary? ? mime_type : "text/plain; charset=#{encoding.downcase}"
|
||||||
end
|
end
|
||||||
|
|
||||||
# Public: Get the Content-Disposition header value
|
# Public: Get the Content-Disposition header value
|
||||||
@@ -71,11 +72,30 @@ module Linguist
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def encoding
|
||||||
|
if hash = detect_encoding
|
||||||
|
hash[:encoding]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Try to guess the encoding
|
||||||
|
#
|
||||||
|
# Returns: a Hash, with :encoding, :confidence, :type
|
||||||
|
# this will return nil if an error occurred during detection or
|
||||||
|
# no valid encoding could be found
|
||||||
|
def detect_encoding
|
||||||
|
@detect_encoding ||= CharlockHolmes::EncodingDetector.new.detect(data) if data
|
||||||
|
end
|
||||||
|
|
||||||
# Public: Is the blob binary?
|
# Public: Is the blob binary?
|
||||||
#
|
#
|
||||||
# Return true or false
|
# Return true or false
|
||||||
def binary?
|
def binary?
|
||||||
pathname.binary?
|
if mime_type = Mime.lookup_mime_type_for(pathname.extname)
|
||||||
|
mime_type.binary?
|
||||||
|
else
|
||||||
|
detect_encoding.nil? || detect_encoding[:type] == :binary
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Public: Is the blob text?
|
# Public: Is the blob text?
|
||||||
@@ -529,6 +549,8 @@ module Linguist
|
|||||||
# Returns html String
|
# Returns html String
|
||||||
def colorize(options = {})
|
def colorize(options = {})
|
||||||
return if !text? || large?
|
return if !text? || large?
|
||||||
|
options[:options] ||= {}
|
||||||
|
options[:options][:encoding] ||= encoding
|
||||||
lexer.highlight(data, options)
|
lexer.highlight(data, options)
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -540,7 +562,7 @@ module Linguist
|
|||||||
# Returns html String
|
# Returns html String
|
||||||
def colorize_without_wrapper(options = {})
|
def colorize_without_wrapper(options = {})
|
||||||
return if !text? || large?
|
return if !text? || large?
|
||||||
if text = lexer.highlight(data, options)
|
if text = colorize(options)
|
||||||
text[%r{<div class="highlight"><pre>(.*?)</pre>\s*</div>}m, 1]
|
text[%r{<div class="highlight"><pre>(.*?)</pre>\s*</div>}m, 1]
|
||||||
else
|
else
|
||||||
''
|
''
|
||||||
|
|||||||
@@ -38,14 +38,11 @@ class TestBlob < Test::Unit::TestCase
|
|||||||
assert_equal "application/octet-stream", blob("dog.o").content_type
|
assert_equal "application/octet-stream", blob("dog.o").content_type
|
||||||
assert_equal "application/pdf", blob("foo.pdf").content_type
|
assert_equal "application/pdf", blob("foo.pdf").content_type
|
||||||
assert_equal "image/png", blob("foo.png").content_type
|
assert_equal "image/png", blob("foo.png").content_type
|
||||||
assert_equal "text/plain; charset=utf8", blob("README").content_type
|
assert_equal "text/plain; charset=iso-8859-2", blob("README").content_type
|
||||||
assert_equal "text/plain; charset=utf8", blob("foo.html").content_type
|
assert_equal "text/plain; charset=iso-8859-1", blob("script.pl").content_type
|
||||||
assert_equal "text/plain; charset=utf8", blob("foo.pl").content_type
|
assert_equal "text/plain; charset=iso-8859-1", blob("script.py").content_type
|
||||||
assert_equal "text/plain; charset=utf8", blob("foo.py").content_type
|
assert_equal "text/plain; charset=iso-8859-1", blob("script.rb").content_type
|
||||||
assert_equal "text/plain; charset=utf8", blob("foo.rb").content_type
|
assert_equal "text/plain; charset=iso-8859-1", blob("script.sh").content_type
|
||||||
assert_equal "text/plain; charset=utf8", blob("foo.sh").content_type
|
|
||||||
assert_equal "text/plain; charset=utf8", blob("foo.xhtml").content_type
|
|
||||||
assert_equal "text/plain; charset=utf8", blob("foo.xml").content_type
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_disposition
|
def test_disposition
|
||||||
@@ -79,6 +76,13 @@ class TestBlob < Test::Unit::TestCase
|
|||||||
assert_equal 2, blob("foo.rb").sloc
|
assert_equal 2, blob("foo.rb").sloc
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_encoding
|
||||||
|
assert_equal "ISO-8859-2", blob("README").encoding
|
||||||
|
assert_equal "ISO-8859-1", blob("dump.sql").encoding
|
||||||
|
assert_equal "UTF-8", blob("file.txt").encoding
|
||||||
|
assert_nil blob("dog.o").encoding
|
||||||
|
end
|
||||||
|
|
||||||
def test_binary
|
def test_binary
|
||||||
assert blob("git.deb").binary?
|
assert blob("git.deb").binary?
|
||||||
assert blob("git.exe").binary?
|
assert blob("git.exe").binary?
|
||||||
@@ -86,6 +90,7 @@ class TestBlob < Test::Unit::TestCase
|
|||||||
assert blob("linguist.gem").binary?
|
assert blob("linguist.gem").binary?
|
||||||
assert blob("octocat.ai").binary?
|
assert blob("octocat.ai").binary?
|
||||||
assert blob("octocat.png").binary?
|
assert blob("octocat.png").binary?
|
||||||
|
assert blob("zip").binary?
|
||||||
assert !blob("README").binary?
|
assert !blob("README").binary?
|
||||||
assert !blob("file.txt").binary?
|
assert !blob("file.txt").binary?
|
||||||
assert !blob("foo.rb").binary?
|
assert !blob("foo.rb").binary?
|
||||||
@@ -330,7 +335,6 @@ class TestBlob < Test::Unit::TestCase
|
|||||||
assert_equal Language['Parrot Assembly'], blob("hello.pasm").language
|
assert_equal Language['Parrot Assembly'], blob("hello.pasm").language
|
||||||
|
|
||||||
# http://gosu-lang.org
|
# http://gosu-lang.org
|
||||||
assert_equal Language['Gosu'], blob("Hello.gs").language
|
|
||||||
assert_equal Language['Gosu'], blob("Hello.gsx").language
|
assert_equal Language['Gosu'], blob("Hello.gsx").language
|
||||||
assert_equal Language['Gosu'], blob("hello.gsp").language
|
assert_equal Language['Gosu'], blob("hello.gsp").language
|
||||||
assert_equal Language['Gosu'], blob("Hello.gst").language
|
assert_equal Language['Gosu'], blob("Hello.gst").language
|
||||||
|
|||||||
Reference in New Issue
Block a user