diff --git a/lib/linguist/language.rb b/lib/linguist/language.rb index 17b7ddad..e53037a5 100644 --- a/lib/linguist/language.rb +++ b/lib/linguist/language.rb @@ -115,11 +115,14 @@ module Linguist # Check if the blob is possibly binary and bail early. return nil if blob.likely_binary? || blob.binary? + # Call each strategy until 0 or 1 candidates are returned STRATEGIES.reduce([]) do |languages, strategy| if candidates = strategy.call(blob, languages) if candidates.size > 1 + # More than one candidate was found, return them for the next strategy candidates else + # 1 or 0 candidates, stop trying strategies break candidates end else diff --git a/lib/linguist/strategy/classifier.rb b/lib/linguist/strategy/classifier.rb index e362d9fb..0bdbae69 100644 --- a/lib/linguist/strategy/classifier.rb +++ b/lib/linguist/strategy/classifier.rb @@ -1,8 +1,9 @@ module Linguist module Strategy + # Detect language using the bayesian classifier class Classifier def self.call(blob, languages) - Linguist::Classifier.classify(Samples.cache, blob.data, possible_language_names).map do |name| + Linguist::Classifier.classify(Samples.cache, blob.data, laguages.map(&:name)).map do |name| # Return the actual Language object based of the string language name (i.e., first element of `#classify`) Language[name] end diff --git a/lib/linguist/strategy/empty_blob.rb b/lib/linguist/strategy/empty_blob.rb index cc30477d..a43b6c9d 100644 --- a/lib/linguist/strategy/empty_blob.rb +++ b/lib/linguist/strategy/empty_blob.rb @@ -1,8 +1,9 @@ module Linguist module Strategy + # Stops detection if the blob contents are empty class EmptyBlob def self.call(blob, langauges) - # Don't bother with binary contents or an empty file + # Return empty array to stop detection [] if blob.data.nil? || blob.data == "" end end diff --git a/lib/linguist/strategy/filename.rb b/lib/linguist/strategy/filename.rb index 3a3dcca9..163d96e9 100644 --- a/lib/linguist/strategy/filename.rb +++ b/lib/linguist/strategy/filename.rb @@ -1,5 +1,6 @@ module Linguist module Strategy + # Detects language based on filename and/or extension class Filename def self.call(blob, _) name = blob.name.to_s @@ -12,7 +13,6 @@ module Linguist name += ".script!" end - # First try to find languages that match based on filename. Language.find_by_filename(name) end end