Refactoring of Language.detect

This commit is contained in:
Paul Chaignon
2014-06-03 09:52:24 +02:00
parent 8a546d2a7a
commit bd380f44cc

View File

@@ -110,12 +110,13 @@ module Linguist
# First try to find languages that match based on filename.
possible_languages = find_by_filename(name)
if possible_languages.length == 1
possible_languages.first
# If there is more than one possible language with that extension (or no
# extension at all, in the case of extensionless scripts), we need to continue
# our detection work
if possible_languages.length > 1
else
data = data.call() if data.respond_to?(:call)
possible_language_names = possible_languages.map(&:name)
# Don't bother with emptiness
if data.nil? || data == ""
@@ -123,24 +124,19 @@ module Linguist
# Check if there's a shebang line and use that as authoritative
elsif (result = find_by_shebang(data)) && !result.empty?
result.first
# No shebang. Still more work to do. Try to find it with our heuristics.
elsif (determined = Heuristics.find_by_heuristics(data, possible_language_names)) && !determined.empty?
determined.first
# Lastly, fall back to the probablistic classifier.
elsif classified = Classifier.classify(Samples::DATA, data, possible_language_names ).first
# Return the actual Language object based of the string language name (i.e., first element of `#classify`)
Language[classified[0]]
end
elsif possible_languages.length == 0
data = data.call() if data.respond_to?(:call)
# More than one language with that extension. We need to make a choice.
elsif possible_languages.length > 1
possible_language_names = possible_languages.map(&:name)
# Check if there's a shebang line and use that as authoritative
if (result = find_by_shebang(data)) && !result.empty?
result.first
# Try to find it with our heuristics.
if (determined = Heuristics.find_by_heuristics(data, possible_language_names)) && !determined.empty?
determined.first
# Lastly, fall back to the probablistic classifier.
elsif classified = Classifier.classify(Samples::DATA, data, possible_language_names ).first
# Return the actual Language object based of the string language name (i.e., first element of `#classify`)
Language[classified[0]]
end
end
else
# Simplest and most common case, we can just return the one match based on extension
possible_languages.first
end
end