This commit is contained in:
Brandon Keepers
2014-11-03 08:54:11 -05:00
parent 815337299a
commit 74fa4b9b75
4 changed files with 8 additions and 3 deletions

View File

@@ -115,11 +115,14 @@ module Linguist
# Check if the blob is possibly binary and bail early. # Check if the blob is possibly binary and bail early.
return nil if blob.likely_binary? || blob.binary? return nil if blob.likely_binary? || blob.binary?
# Call each strategy until 0 or 1 candidates are returned
STRATEGIES.reduce([]) do |languages, strategy| STRATEGIES.reduce([]) do |languages, strategy|
if candidates = strategy.call(blob, languages) if candidates = strategy.call(blob, languages)
if candidates.size > 1 if candidates.size > 1
# More than one candidate was found, return them for the next strategy
candidates candidates
else else
# 1 or 0 candidates, stop trying strategies
break candidates break candidates
end end
else else

View File

@@ -1,8 +1,9 @@
module Linguist module Linguist
module Strategy module Strategy
# Detect language using the bayesian classifier
class Classifier class Classifier
def self.call(blob, languages) def self.call(blob, languages)
Linguist::Classifier.classify(Samples.cache, blob.data, possible_language_names).map do |name| Linguist::Classifier.classify(Samples.cache, blob.data, laguages.map(&:name)).map do |name|
# Return the actual Language object based of the string language name (i.e., first element of `#classify`) # Return the actual Language object based of the string language name (i.e., first element of `#classify`)
Language[name] Language[name]
end end

View File

@@ -1,8 +1,9 @@
module Linguist module Linguist
module Strategy module Strategy
# Stops detection if the blob contents are empty
class EmptyBlob class EmptyBlob
def self.call(blob, langauges) def self.call(blob, langauges)
# Don't bother with binary contents or an empty file # Return empty array to stop detection
[] if blob.data.nil? || blob.data == "" [] if blob.data.nil? || blob.data == ""
end end
end end

View File

@@ -1,5 +1,6 @@
module Linguist module Linguist
module Strategy module Strategy
# Detects language based on filename and/or extension
class Filename class Filename
def self.call(blob, _) def self.call(blob, _)
name = blob.name.to_s name = blob.name.to_s
@@ -12,7 +13,6 @@ module Linguist
name += ".script!" name += ".script!"
end end
# First try to find languages that match based on filename.
Language.find_by_filename(name) Language.find_by_filename(name)
end end
end end