Files
linguist/lib/linguist.rb
Paul Chaignon 9b941a34f0 Use filenames as a definitive answer (#2006)
* Separate find_by_extension and find_by_filename
find_by_extension now takes a path as argument and not only the file extension.
Currently only find_by_extension is used as a strategy.

* Add find_by_filename as first strategy
2016-12-12 12:34:33 -08:00

101 lines
3.0 KiB
Ruby
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

require 'linguist/blob_helper'
require 'linguist/generated'
require 'linguist/grammars'
require 'linguist/heuristics'
require 'linguist/language'
require 'linguist/repository'
require 'linguist/samples'
require 'linguist/shebang'
require 'linguist/version'
class << Linguist
# Public: Detects the Language of the blob.
#
# blob - an object that includes the Linguist `BlobHelper` interface;
# see Linguist::LazyBlob and Linguist::FileBlob for examples
#
# Returns Language or nil.
def detect(blob)
# Bail early if the blob is binary or empty.
return nil if blob.likely_binary? || blob.binary? || blob.empty?
Linguist.instrument("linguist.detection", :blob => blob) do
# Call each strategy until one candidate is returned.
languages = []
returning_strategy = nil
STRATEGIES.each do |strategy|
returning_strategy = strategy
candidates = Linguist.instrument("linguist.strategy", :blob => blob, :strategy => strategy, :candidates => languages) do
strategy.call(blob, languages)
end
if candidates.size == 1
languages = candidates
break
elsif candidates.size > 1
# More than one candidate was found, pass them to the next strategy.
languages = candidates
else
# No candidates, try the next strategy
end
end
Linguist.instrument("linguist.detected", :blob => blob, :strategy => returning_strategy, :language => languages.first)
languages.first
end
end
# Internal: The strategies used to detect the language of a file.
#
# A strategy is an object that has a `.call` method that takes two arguments:
#
# blob - An object that quacks like a blob.
# languages - An Array of candidate Language objects that were returned by the
#     previous strategy.
#
# A strategy should return an Array of Language candidates.
#
# Strategies are called in turn until a single Language is returned.
STRATEGIES = [
Linguist::Strategy::Modeline,
Linguist::Strategy::Filename,
Linguist::Shebang,
Linguist::Strategy::Extension,
Linguist::Heuristics,
Linguist::Classifier
]
# Public: Set an instrumenter.
#
# class CustomInstrumenter
# def instrument(name, payload = {})
# warn "Instrumenting #{name}: #{payload[:blob]}"
# end
# end
#
# Linguist.instrumenter = CustomInstrumenter
#
# The instrumenter must conform to the `ActiveSupport::Notifications`
# interface, which defines `#instrument` and accepts:
#
# name - the String name of the event (e.g. "linguist.detected")
# payload - a Hash of the exception context.
attr_accessor :instrumenter
# Internal: Perform instrumentation on a block
#
# Linguist.instrument("linguist.dosomething", :blob => blob) do
# # logic to instrument here.
# end
#
def instrument(*args, &bk)
if instrumenter
instrumenter.instrument(*args, &bk)
elsif block_given?
yield
end
end
end