mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Log regexp and classifier guess mismatches
This commit is contained in:
@@ -1,3 +1,4 @@
|
|||||||
|
require 'linguist/classifier'
|
||||||
require 'linguist/language'
|
require 'linguist/language'
|
||||||
require 'linguist/mime'
|
require 'linguist/mime'
|
||||||
require 'linguist/pathname'
|
require 'linguist/pathname'
|
||||||
@@ -453,11 +454,30 @@ module Linguist
|
|||||||
# Returns a Language or nil.
|
# Returns a Language or nil.
|
||||||
def disambiguate_extension_language
|
def disambiguate_extension_language
|
||||||
if Language.ambiguous?(extname)
|
if Language.ambiguous?(extname)
|
||||||
|
possible_languages = Language.all.select { |l| l.extensions.include?(extname) }
|
||||||
|
|
||||||
name = "guess_#{extname.sub(/^\./, '')}_language"
|
name = "guess_#{extname.sub(/^\./, '')}_language"
|
||||||
send(name) if respond_to?(name)
|
language = send(name) if respond_to?(name)
|
||||||
|
|
||||||
|
if possible_languages.any?
|
||||||
|
results = Classifier.instance.classify(data, possible_languages)
|
||||||
|
guessed_language, score = results.first
|
||||||
|
|
||||||
|
if guessed_language != language
|
||||||
|
report_classifier_incorrect_guess(language, guessed_language, score)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
language
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class LanguageClassifierError < StandardError; end
|
||||||
|
|
||||||
|
def report_classifier_incorrect_guess(expected, actual, score)
|
||||||
|
raise LanguageClassifierError, "Expected #{expected}, but was #{actual} scoring #{score}.\n#{name}\n#{data}"
|
||||||
|
end
|
||||||
|
|
||||||
# Internal: Guess language of .cls files
|
# Internal: Guess language of .cls files
|
||||||
#
|
#
|
||||||
# Returns a Language.
|
# Returns a Language.
|
||||||
|
|||||||
Reference in New Issue
Block a user