mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
add instrumentation to detection and classification
This commit is contained in:
@@ -6,3 +6,15 @@ require 'linguist/repository'
|
|||||||
require 'linguist/samples'
|
require 'linguist/samples'
|
||||||
require 'linguist/shebang'
|
require 'linguist/shebang'
|
||||||
require 'linguist/version'
|
require 'linguist/version'
|
||||||
|
|
||||||
|
class << Linguist
|
||||||
|
attr_accessor :instrumenter
|
||||||
|
|
||||||
|
def instrument(*args, &bk)
|
||||||
|
if instrumenter
|
||||||
|
instrumenter.instrument(*args, &bk)
|
||||||
|
else
|
||||||
|
yield
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|||||||
@@ -16,11 +16,13 @@ module Linguist
|
|||||||
#
|
#
|
||||||
# Returns an Array of Language objects, most probable first.
|
# Returns an Array of Language objects, most probable first.
|
||||||
def self.call(blob, possible_languages)
|
def self.call(blob, possible_languages)
|
||||||
|
Linguist.instrument("linguist.bayesian_classification") do
|
||||||
language_names = possible_languages.map(&:name)
|
language_names = possible_languages.map(&:name)
|
||||||
classify(Samples.cache, blob.data, language_names).map do |name, _|
|
classify(Samples.cache, blob.data, language_names).map do |name, _|
|
||||||
Language[name] # Return the actual Language objects
|
Language[name] # Return the actual Language objects
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
# Public: Train classifier that data is a certain language.
|
# Public: Train classifier that data is a certain language.
|
||||||
#
|
#
|
||||||
|
|||||||
@@ -105,6 +105,7 @@ module Linguist
|
|||||||
# Bail early if the blob is binary or empty.
|
# Bail early if the blob is binary or empty.
|
||||||
return nil if blob.likely_binary? || blob.binary? || blob.empty?
|
return nil if blob.likely_binary? || blob.binary? || blob.empty?
|
||||||
|
|
||||||
|
Linguist.instrument("linguist.detection") do
|
||||||
# Call each strategy until one candidate is returned.
|
# Call each strategy until one candidate is returned.
|
||||||
STRATEGIES.reduce([]) do |languages, strategy|
|
STRATEGIES.reduce([]) do |languages, strategy|
|
||||||
candidates = strategy.call(blob, languages)
|
candidates = strategy.call(blob, languages)
|
||||||
@@ -119,6 +120,7 @@ module Linguist
|
|||||||
end
|
end
|
||||||
end.first
|
end.first
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
# Public: Get all Languages
|
# Public: Get all Languages
|
||||||
#
|
#
|
||||||
|
|||||||
Reference in New Issue
Block a user