mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-12-08 20:38:47 +00:00
Use heuristics earlier to inform the rest of the classification process
This commit is contained in:
@@ -125,6 +125,11 @@ module Linguist
|
|||||||
if possible_languages.length > 1
|
if possible_languages.length > 1
|
||||||
data = blob.data
|
data = blob.data
|
||||||
possible_language_names = possible_languages.map(&:name)
|
possible_language_names = possible_languages.map(&:name)
|
||||||
|
heuristic_languages = Heuristics.find_by_heuristics(data, possible_language_names)
|
||||||
|
|
||||||
|
if heuristic_languages.size > 1
|
||||||
|
possible_language_names = heuristic_languages.map(&:name)
|
||||||
|
end
|
||||||
|
|
||||||
# Don't bother with binary contents or an empty file
|
# Don't bother with binary contents or an empty file
|
||||||
if data.nil? || data == ""
|
if data.nil? || data == ""
|
||||||
@@ -133,8 +138,8 @@ module Linguist
|
|||||||
elsif (result = find_by_shebang(data)) && !result.empty?
|
elsif (result = find_by_shebang(data)) && !result.empty?
|
||||||
result.first
|
result.first
|
||||||
# No shebang. Still more work to do. Try to find it with our heuristics.
|
# No shebang. Still more work to do. Try to find it with our heuristics.
|
||||||
elsif (determined = Heuristics.find_by_heuristics(data, possible_language_names)) && !determined.empty?
|
elsif heuristic_languages.size == 1
|
||||||
determined.first
|
heuristic_languages.first
|
||||||
# Lastly, fall back to the probabilistic classifier.
|
# Lastly, fall back to the probabilistic classifier.
|
||||||
elsif classified = Classifier.classify(Samples.cache, data, possible_language_names).first
|
elsif classified = Classifier.classify(Samples.cache, data, possible_language_names).first
|
||||||
# Return the actual Language object based of the string language name (i.e., first element of `#classify`)
|
# Return the actual Language object based of the string language name (i.e., first element of `#classify`)
|
||||||
|
|||||||
Reference in New Issue
Block a user