mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
* Separate find_by_extension and find_by_filename find_by_extension now takes a path as argument and not only the file extension. Currently only find_by_extension is used as a strategy. * Add find_by_filename as first strategy
101 lines
3.0 KiB
Ruby
101 lines
3.0 KiB
Ruby
require 'linguist/blob_helper'
|
||
require 'linguist/generated'
|
||
require 'linguist/grammars'
|
||
require 'linguist/heuristics'
|
||
require 'linguist/language'
|
||
require 'linguist/repository'
|
||
require 'linguist/samples'
|
||
require 'linguist/shebang'
|
||
require 'linguist/version'
|
||
|
||
class << Linguist
|
||
# Public: Detects the Language of the blob.
|
||
#
|
||
# blob - an object that includes the Linguist `BlobHelper` interface;
|
||
# see Linguist::LazyBlob and Linguist::FileBlob for examples
|
||
#
|
||
# Returns Language or nil.
|
||
def detect(blob)
|
||
# Bail early if the blob is binary or empty.
|
||
return nil if blob.likely_binary? || blob.binary? || blob.empty?
|
||
|
||
Linguist.instrument("linguist.detection", :blob => blob) do
|
||
# Call each strategy until one candidate is returned.
|
||
languages = []
|
||
returning_strategy = nil
|
||
|
||
STRATEGIES.each do |strategy|
|
||
returning_strategy = strategy
|
||
candidates = Linguist.instrument("linguist.strategy", :blob => blob, :strategy => strategy, :candidates => languages) do
|
||
strategy.call(blob, languages)
|
||
end
|
||
if candidates.size == 1
|
||
languages = candidates
|
||
break
|
||
elsif candidates.size > 1
|
||
# More than one candidate was found, pass them to the next strategy.
|
||
languages = candidates
|
||
else
|
||
# No candidates, try the next strategy
|
||
end
|
||
end
|
||
|
||
Linguist.instrument("linguist.detected", :blob => blob, :strategy => returning_strategy, :language => languages.first)
|
||
|
||
languages.first
|
||
end
|
||
end
|
||
|
||
# Internal: The strategies used to detect the language of a file.
|
||
#
|
||
# A strategy is an object that has a `.call` method that takes two arguments:
|
||
#
|
||
# blob - An object that quacks like a blob.
|
||
# languages - An Array of candidate Language objects that were returned by the
|
||
# previous strategy.
|
||
#
|
||
# A strategy should return an Array of Language candidates.
|
||
#
|
||
# Strategies are called in turn until a single Language is returned.
|
||
STRATEGIES = [
|
||
Linguist::Strategy::Modeline,
|
||
Linguist::Strategy::Filename,
|
||
Linguist::Shebang,
|
||
Linguist::Strategy::Extension,
|
||
Linguist::Heuristics,
|
||
Linguist::Classifier
|
||
]
|
||
|
||
# Public: Set an instrumenter.
|
||
#
|
||
# class CustomInstrumenter
|
||
# def instrument(name, payload = {})
|
||
# warn "Instrumenting #{name}: #{payload[:blob]}"
|
||
# end
|
||
# end
|
||
#
|
||
# Linguist.instrumenter = CustomInstrumenter
|
||
#
|
||
# The instrumenter must conform to the `ActiveSupport::Notifications`
|
||
# interface, which defines `#instrument` and accepts:
|
||
#
|
||
# name - the String name of the event (e.g. "linguist.detected")
|
||
# payload - a Hash of the exception context.
|
||
attr_accessor :instrumenter
|
||
|
||
# Internal: Perform instrumentation on a block
|
||
#
|
||
# Linguist.instrument("linguist.dosomething", :blob => blob) do
|
||
# # logic to instrument here.
|
||
# end
|
||
#
|
||
def instrument(*args, &bk)
|
||
if instrumenter
|
||
instrumenter.instrument(*args, &bk)
|
||
elsif block_given?
|
||
yield
|
||
end
|
||
end
|
||
|
||
end
|