add instrumentation to detection and classification

This commit is contained in:
Charlie Somerville
2015-02-25 12:34:07 +11:00
parent 04252c28f2
commit fd7633518f
3 changed files with 32 additions and 16 deletions

View File

@@ -6,3 +6,15 @@ require 'linguist/repository'
require 'linguist/samples' require 'linguist/samples'
require 'linguist/shebang' require 'linguist/shebang'
require 'linguist/version' require 'linguist/version'
class << Linguist
attr_accessor :instrumenter
def instrument(*args, &bk)
if instrumenter
instrumenter.instrument(*args, &bk)
else
yield
end
end
end

View File

@@ -16,9 +16,11 @@ module Linguist
# #
# Returns an Array of Language objects, most probable first. # Returns an Array of Language objects, most probable first.
def self.call(blob, possible_languages) def self.call(blob, possible_languages)
language_names = possible_languages.map(&:name) Linguist.instrument("linguist.bayesian_classification") do
classify(Samples.cache, blob.data, language_names).map do |name, _| language_names = possible_languages.map(&:name)
Language[name] # Return the actual Language objects classify(Samples.cache, blob.data, language_names).map do |name, _|
Language[name] # Return the actual Language objects
end
end end
end end

View File

@@ -105,19 +105,21 @@ module Linguist
# Bail early if the blob is binary or empty. # Bail early if the blob is binary or empty.
return nil if blob.likely_binary? || blob.binary? || blob.empty? return nil if blob.likely_binary? || blob.binary? || blob.empty?
# Call each strategy until one candidate is returned. Linguist.instrument("linguist.detection") do
STRATEGIES.reduce([]) do |languages, strategy| # Call each strategy until one candidate is returned.
candidates = strategy.call(blob, languages) STRATEGIES.reduce([]) do |languages, strategy|
if candidates.size == 1 candidates = strategy.call(blob, languages)
return candidates.first if candidates.size == 1
elsif candidates.size > 1 return candidates.first
# More than one candidate was found, pass them to the next strategy. elsif candidates.size > 1
candidates # More than one candidate was found, pass them to the next strategy.
else candidates
# No candiates were found, pass on languages from the previous strategy. else
languages # No candiates were found, pass on languages from the previous strategy.
end languages
end.first end
end.first
end
end end
# Public: Get all Languages # Public: Get all Languages