From fd7633518f1aff15eee5338fd6715fa2717341fe Mon Sep 17 00:00:00 2001 From: Charlie Somerville Date: Wed, 25 Feb 2015 12:34:07 +1100 Subject: [PATCH] add instrumentation to detection and classification --- lib/linguist.rb | 12 ++++++++++++ lib/linguist/classifier.rb | 8 +++++--- lib/linguist/language.rb | 28 +++++++++++++++------------- 3 files changed, 32 insertions(+), 16 deletions(-) diff --git a/lib/linguist.rb b/lib/linguist.rb index ff9fc3a2..4419ff5b 100644 --- a/lib/linguist.rb +++ b/lib/linguist.rb @@ -6,3 +6,15 @@ require 'linguist/repository' require 'linguist/samples' require 'linguist/shebang' require 'linguist/version' + +class << Linguist + attr_accessor :instrumenter + + def instrument(*args, &bk) + if instrumenter + instrumenter.instrument(*args, &bk) + else + yield + end + end +end diff --git a/lib/linguist/classifier.rb b/lib/linguist/classifier.rb index 89a0df2f..208467e4 100644 --- a/lib/linguist/classifier.rb +++ b/lib/linguist/classifier.rb @@ -16,9 +16,11 @@ module Linguist # # Returns an Array of Language objects, most probable first. def self.call(blob, possible_languages) - language_names = possible_languages.map(&:name) - classify(Samples.cache, blob.data, language_names).map do |name, _| - Language[name] # Return the actual Language objects + Linguist.instrument("linguist.bayesian_classification") do + language_names = possible_languages.map(&:name) + classify(Samples.cache, blob.data, language_names).map do |name, _| + Language[name] # Return the actual Language objects + end end end diff --git a/lib/linguist/language.rb b/lib/linguist/language.rb index 2490a9f6..68b4c4fc 100644 --- a/lib/linguist/language.rb +++ b/lib/linguist/language.rb @@ -105,19 +105,21 @@ module Linguist # Bail early if the blob is binary or empty. return nil if blob.likely_binary? || blob.binary? || blob.empty? - # Call each strategy until one candidate is returned. - STRATEGIES.reduce([]) do |languages, strategy| - candidates = strategy.call(blob, languages) - if candidates.size == 1 - return candidates.first - elsif candidates.size > 1 - # More than one candidate was found, pass them to the next strategy. - candidates - else - # No candiates were found, pass on languages from the previous strategy. - languages - end - end.first + Linguist.instrument("linguist.detection") do + # Call each strategy until one candidate is returned. + STRATEGIES.reduce([]) do |languages, strategy| + candidates = strategy.call(blob, languages) + if candidates.size == 1 + return candidates.first + elsif candidates.size > 1 + # More than one candidate was found, pass them to the next strategy. + candidates + else + # No candiates were found, pass on languages from the previous strategy. + languages + end + end.first + end end # Public: Get all Languages