diff --git a/lib/linguist.rb b/lib/linguist.rb index 4419ff5b..3929efb9 100644 --- a/lib/linguist.rb +++ b/lib/linguist.rb @@ -14,7 +14,7 @@ class << Linguist if instrumenter instrumenter.instrument(*args, &bk) else - yield + yield if block_given? end end end diff --git a/lib/linguist/classifier.rb b/lib/linguist/classifier.rb index 208467e4..89a0df2f 100644 --- a/lib/linguist/classifier.rb +++ b/lib/linguist/classifier.rb @@ -16,11 +16,9 @@ module Linguist # # Returns an Array of Language objects, most probable first. def self.call(blob, possible_languages) - Linguist.instrument("linguist.bayesian_classification") do - language_names = possible_languages.map(&:name) - classify(Samples.cache, blob.data, language_names).map do |name, _| - Language[name] # Return the actual Language objects - end + language_names = possible_languages.map(&:name) + classify(Samples.cache, blob.data, language_names).map do |name, _| + Language[name] # Return the actual Language objects end end diff --git a/lib/linguist/language.rb b/lib/linguist/language.rb index 68b4c4fc..a1ff3318 100644 --- a/lib/linguist/language.rb +++ b/lib/linguist/language.rb @@ -105,20 +105,30 @@ module Linguist # Bail early if the blob is binary or empty. return nil if blob.likely_binary? || blob.binary? || blob.empty? - Linguist.instrument("linguist.detection") do + Linguist.instrument("linguist.detection", :blob => blob) do # Call each strategy until one candidate is returned. - STRATEGIES.reduce([]) do |languages, strategy| - candidates = strategy.call(blob, languages) + languages = [] + returning_strategy = nil + + STRATEGIES.each do |strategy| + returning_strategy = strategy + candidates = Linguist.instrument("linguist.strategy", :blob => blob, :strategy => strategy, :candidates => languages) do + strategy.call(blob, languages) + end if candidates.size == 1 - return candidates.first + languages = candidates + break elsif candidates.size > 1 # More than one candidate was found, pass them to the next strategy. - candidates + languages = candidates else - # No candiates were found, pass on languages from the previous strategy. - languages + # No candidates, try the next strategy end - end.first + end + + Linguist.instrument("linguist.detected", :blob => blob, :strategy => returning_strategy, :language => languages.first) + + languages.first end end diff --git a/test/test_instrumentation.rb b/test/test_instrumentation.rb new file mode 100644 index 00000000..ab0615e5 --- /dev/null +++ b/test/test_instrumentation.rb @@ -0,0 +1,50 @@ +require_relative "./helper" + +class TestInstrumentation < Minitest::Test + include Linguist + + class LocalInstrumenter + Event = Struct.new(:name, :args) + + attr_reader :events + + def initialize + @events = [] + end + + def instrument(name, *args) + @events << Event.new(name, args) + yield if block_given? + end + end + + def setup + Linguist.instrumenter = LocalInstrumenter.new + end + + def teardown + Linguist.instrumenter = nil + end + + def test_detection_instrumentation_with_binary_blob + binary_blob = fixture_blob("Binary/octocat.ai") + Language.detect(binary_blob) + + # Shouldn't instrument this (as it's binary) + assert_equal 0, Linguist.instrumenter.events.size + end + + def test_modeline_instrumentation + blob = fixture_blob("Data/Modelines/ruby") + Language.detect(blob) + + detect_event = Linguist.instrumenter.events.last + detect_event_payload = detect_event[:args].first + + assert_equal 3, Linguist.instrumenter.events.size + assert_equal "linguist.detected", detect_event.name + assert_equal Language['Ruby'], detect_event_payload[:language] + assert_equal blob, detect_event_payload[:blob] + assert_equal Linguist::Strategy::Modeline, detect_event_payload[:strategy] + end +end