Try strategies until one language is returned

This commit is contained in:
Brandon Keepers
2014-11-27 11:12:47 -05:00
parent a4081498f8
commit c1a9737313
3 changed files with 13 additions and 15 deletions

View File

@@ -4,7 +4,7 @@ module Linguist
ACTIVE = true ACTIVE = true
def self.call(blob, languages) def self.call(blob, languages)
find_by_heuristics(blob.data, langauges.map(&:name)) find_by_heuristics(blob.data, languages.map(&:name))
end end
# Public: Given an array of String language names, # Public: Given an array of String language names,

View File

@@ -10,6 +10,9 @@ require 'linguist/heuristics'
require 'linguist/samples' require 'linguist/samples'
require 'linguist/file_blob' require 'linguist/file_blob'
require 'linguist/blob_helper' require 'linguist/blob_helper'
require 'linguist/strategy/filename'
require 'linguist/strategy/shebang'
require 'linguist/strategy/classifier'
module Linguist module Linguist
# Language names that are recognizable by GitHub. Defined languages # Language names that are recognizable by GitHub. Defined languages
@@ -91,10 +94,6 @@ module Linguist
language language
end end
require 'linguist/strategy/filename'
require 'linguist/strategy/shebang'
require 'linguist/strategy/classifier'
STRATEGIES = [ STRATEGIES = [
Linguist::Strategy::Filename, Linguist::Strategy::Filename,
Linguist::Strategy::Shebang, Linguist::Strategy::Shebang,
@@ -112,17 +111,16 @@ module Linguist
# Bail early if the blob is binary or empty. # Bail early if the blob is binary or empty.
return nil if blob.likely_binary? || blob.binary? || blob.empty? return nil if blob.likely_binary? || blob.binary? || blob.empty?
# Call each strategy until 0 or 1 candidates are returned # Call each strategy until one candidate is returned
STRATEGIES.reduce([]) do |languages, strategy| STRATEGIES.reduce([]) do |languages, strategy|
if candidates = strategy.call(blob, languages) candidates = strategy.call(blob, languages)
if candidates.size > 1 if candidates.size == 1
# More than one candidate was found, return them for the next strategy return candidates.first
candidates elsif candidates.size > 1
else # More than one candidate was found, pass them to the next strategy
# 1 or 0 candidates, stop trying strategies candidates
break candidates
end
else else
# Strategy couldn't find any candidates, so pass on the original list
languages languages
end end
end.first end.first

View File

@@ -3,7 +3,7 @@ module Linguist
# Detect language using the bayesian classifier # Detect language using the bayesian classifier
class Classifier class Classifier
def self.call(blob, languages) def self.call(blob, languages)
Linguist::Classifier.classify(Samples.cache, blob.data, laguages.map(&:name)).map do |name| Linguist::Classifier.classify(Samples.cache, blob.data, languages.map(&:name)).map do |name, _|
# Return the actual Language object based of the string language name (i.e., first element of `#classify`) # Return the actual Language object based of the string language name (i.e., first element of `#classify`)
Language[name] Language[name]
end end