mirror of
https://github.com/KevinMidboe/linguist.git
synced 2026-02-10 10:19:40 +00:00
Refactoring of Language.detect
This commit is contained in:
@@ -110,12 +110,13 @@ module Linguist
|
|||||||
# First try to find languages that match based on filename.
|
# First try to find languages that match based on filename.
|
||||||
possible_languages = find_by_filename(name)
|
possible_languages = find_by_filename(name)
|
||||||
|
|
||||||
|
if possible_languages.length == 1
|
||||||
|
possible_languages.first
|
||||||
# If there is more than one possible language with that extension (or no
|
# If there is more than one possible language with that extension (or no
|
||||||
# extension at all, in the case of extensionless scripts), we need to continue
|
# extension at all, in the case of extensionless scripts), we need to continue
|
||||||
# our detection work
|
# our detection work
|
||||||
if possible_languages.length > 1
|
else
|
||||||
data = data.call() if data.respond_to?(:call)
|
data = data.call() if data.respond_to?(:call)
|
||||||
possible_language_names = possible_languages.map(&:name)
|
|
||||||
|
|
||||||
# Don't bother with emptiness
|
# Don't bother with emptiness
|
||||||
if data.nil? || data == ""
|
if data.nil? || data == ""
|
||||||
@@ -123,24 +124,19 @@ module Linguist
|
|||||||
# Check if there's a shebang line and use that as authoritative
|
# Check if there's a shebang line and use that as authoritative
|
||||||
elsif (result = find_by_shebang(data)) && !result.empty?
|
elsif (result = find_by_shebang(data)) && !result.empty?
|
||||||
result.first
|
result.first
|
||||||
# No shebang. Still more work to do. Try to find it with our heuristics.
|
# More than one language with that extension. We need to make a choice.
|
||||||
elsif (determined = Heuristics.find_by_heuristics(data, possible_language_names)) && !determined.empty?
|
elsif possible_languages.length > 1
|
||||||
determined.first
|
possible_language_names = possible_languages.map(&:name)
|
||||||
# Lastly, fall back to the probablistic classifier.
|
|
||||||
elsif classified = Classifier.classify(Samples::DATA, data, possible_language_names ).first
|
|
||||||
# Return the actual Language object based of the string language name (i.e., first element of `#classify`)
|
|
||||||
Language[classified[0]]
|
|
||||||
end
|
|
||||||
elsif possible_languages.length == 0
|
|
||||||
data = data.call() if data.respond_to?(:call)
|
|
||||||
|
|
||||||
# Check if there's a shebang line and use that as authoritative
|
# Try to find it with our heuristics.
|
||||||
if (result = find_by_shebang(data)) && !result.empty?
|
if (determined = Heuristics.find_by_heuristics(data, possible_language_names)) && !determined.empty?
|
||||||
result.first
|
determined.first
|
||||||
|
# Lastly, fall back to the probablistic classifier.
|
||||||
|
elsif classified = Classifier.classify(Samples::DATA, data, possible_language_names ).first
|
||||||
|
# Return the actual Language object based of the string language name (i.e., first element of `#classify`)
|
||||||
|
Language[classified[0]]
|
||||||
|
end
|
||||||
end
|
end
|
||||||
else
|
|
||||||
# Simplest and most common case, we can just return the one match based on extension
|
|
||||||
possible_languages.first
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user