From f5e4789ccb33f97c9d78d6b4f364c62468d77482 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Wed, 2 Jul 2014 19:51:06 +0200 Subject: [PATCH] Properly handle detection of binary files --- lib/linguist/language.rb | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/lib/linguist/language.rb b/lib/linguist/language.rb index ed9b68e9..81e70361 100644 --- a/lib/linguist/language.rb +++ b/lib/linguist/language.rb @@ -92,13 +92,20 @@ module Linguist # Public: Detects the Language of the blob. # - # blob - an object that implements the Linguist `Blob` interface; + # blob - an object that includes the Linguist `BlobHelper` interface; # see Linguist::LazyBlob and Linguist::FileBlob for examples # # Returns Language or nil. def self.detect(blob) name = blob.name.to_s + # Check if the blob is possibly binary and bail early; this is a cheap + # test that uses the extension name to guess a binary binary mime type. + # + # We'll perform a more comprehensive test later which actually involves + # looking for binary characters in the blob + return nil if blob.likely_binary? + # A bit of an elegant hack. If the file is executable but extensionless, # append a "magic" extension so it can be classified with other # languages that have shebang scripts. @@ -116,8 +123,8 @@ module Linguist data = blob.data possible_language_names = possible_languages.map(&:name) - # Don't bother with emptiness - if data.nil? || data == "" + # Don't bother with binary contents or an empty file + if blob.binary? || data.nil? || data == "" nil # Check if there's a shebang line and use that as authoritative elsif (result = find_by_shebang(data)) && !result.empty?