Change find_by_filename api to return all matching languages

This commit is contained in:
Joshua Peek
2012-08-03 13:53:12 -05:00
parent 4a06d2ea7e
commit 6014bd015e
3 changed files with 25 additions and 103 deletions

View File

@@ -410,14 +410,15 @@ module Linguist
def guess_language def guess_language
return if binary_mime_type? return if binary_mime_type?
# Disambiguate between multiple language extensions possible_languages = Language.find_by_filename(name.to_s)
disambiguate_extension_language ||
# See if there is a Language for the extension if possible_languages.length > 1
Language.find_by_filename(name.to_s) || if result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
Language[result[0]]
# Try to detect Language from shebang line end
shebang_language else
possible_languages.first || shebang_language
end
end end
# Internal: Get the lexer of the blob. # Internal: Get the lexer of the blob.
@@ -427,20 +428,6 @@ module Linguist
language ? language.lexer : Pygments::Lexer.find_by_name('Text only') language ? language.lexer : Pygments::Lexer.find_by_name('Text only')
end end
# Internal: Disambiguates between multiple language extensions.
#
# Returns a Language or nil.
def disambiguate_extension_language
if Language.ambiguous?(extname)
possible_languages = Language.all.select { |l| l.extensions.include?(extname) }.map(&:name)
if possible_languages.any?
if result = Classifier.classify(Samples::DATA, data, possible_languages).first
Language[result[0]]
end
end
end
end
# Internal: Extract the script name from the shebang line # Internal: Extract the script name from the shebang line
# #
# Requires Blob#data # Requires Blob#data

View File

@@ -15,8 +15,8 @@ module Linguist
@index = {} @index = {}
@name_index = {} @name_index = {}
@alias_index = {} @alias_index = {}
@extension_index = {} @extension_index = Hash.new { |h,k| h[k] = [] }
@filename_index = {} @filename_index = Hash.new { |h,k| h[k] = [] }
# Valid Languages types # Valid Languages types
TYPES = [:data, :markup, :programming] TYPES = [:data, :markup, :programming]
@@ -60,13 +60,7 @@ module Linguist
raise ArgumentError, "Extension is missing a '.': #{extension.inspect}" raise ArgumentError, "Extension is missing a '.': #{extension.inspect}"
end end
unless ambiguous?(extension) @extension_index[extension] << language
# Index the extension with a leading ".": ".rb"
@extension_index[extension] = language
# Index the extension without a leading ".": "rb"
@extension_index[extension.sub(/^\./, '')] = language
end
end end
language.overrides.each do |extension| language.overrides.each do |extension|
@@ -82,7 +76,7 @@ module Linguist
end end
language.filenames.each do |filename| language.filenames.each do |filename|
@filename_index[filename] = language @filename_index[filename] << language
end end
language language
@@ -123,33 +117,19 @@ module Linguist
@alias_index[name] @alias_index[name]
end end
# Public: Look up Language by extension. # Public: Look up Languages by filename.
#
# extension - The extension String. May include leading "."
#
# Examples
#
# Language.find_by_extension('.rb')
# # => #<Language name="Ruby">
#
# Returns the Language or nil if none was found.
def self.find_by_extension(extension)
@extension_index[extension]
end
# Public: Look up Language by filename.
# #
# filename - The path String. # filename - The path String.
# #
# Examples # Examples
# #
# Language.find_by_filename('foo.rb') # Language.find_by_filename('foo.rb')
# # => #<Language name="Ruby"> # # => [#<Language name="Ruby">]
# #
# Returns the Language or nil if none was found. # Returns all matching Languages or [] if none were found.
def self.find_by_filename(filename) def self.find_by_filename(filename)
basename, extname = File.basename(filename), File.extname(filename) basename, extname = File.basename(filename), File.extname(filename)
@filename_index[basename] || @extension_index[extname] @filename_index[basename] + @extension_index[extname]
end end
# Public: Look up Language by its name or lexer. # Public: Look up Language by its name or lexer.

View File

@@ -10,25 +10,12 @@ class TestLanguage < Test::Unit::TestCase
def test_ambiguous_extensions def test_ambiguous_extensions
assert Language.ambiguous?('.cls') assert Language.ambiguous?('.cls')
assert_equal Language['Apex'], Language.find_by_extension('cls')
assert Language.ambiguous?('.h') assert Language.ambiguous?('.h')
assert_equal Language['C'], Language.find_by_extension('h')
assert Language.ambiguous?('.m') assert Language.ambiguous?('.m')
assert_equal Language['Objective-C'], Language.find_by_extension('m')
assert Language.ambiguous?('.pl') assert Language.ambiguous?('.pl')
assert_equal Language['Perl'], Language.find_by_extension('pl')
assert Language.ambiguous?('.r') assert Language.ambiguous?('.r')
assert_equal Language['R'], Language.find_by_extension('r')
assert Language.ambiguous?('.t') assert Language.ambiguous?('.t')
assert_equal Language['Turing'], Language.find_by_extension('t')
assert Language.ambiguous?('.v') assert Language.ambiguous?('.v')
assert_equal Language['Verilog'], Language.find_by_extension('v')
end end
def test_lexer def test_lexer
@@ -242,48 +229,16 @@ class TestLanguage < Test::Unit::TestCase
end end
end end
def test_find_by_extension
assert_equal Language['Ruby'], Language.find_by_extension('.rb')
assert_equal Language['Ruby'], Language.find_by_extension('rb')
assert_equal Language['Dart'], Language.find_by_extension('dart')
assert_equal Language['Groff'], Language.find_by_extension('man')
assert_equal Language['Groff'], Language.find_by_extension('1')
assert_equal Language['Groff'], Language.find_by_extension('2')
assert_equal Language['Groff'], Language.find_by_extension('3')
assert_equal Language['PHP'], Language.find_by_extension('php')
assert_equal Language['PHP'], Language.find_by_extension('php3')
assert_equal Language['PHP'], Language.find_by_extension('php4')
assert_equal Language['PHP'], Language.find_by_extension('php5')
assert_equal Language['PowerShell'], Language.find_by_extension('psm1')
assert_equal Language['PowerShell'], Language.find_by_extension('ps1')
# Aliases for Streamline.js ( https://github.com/Sage/streamlinejs )
assert_equal Language['JavaScript'], Language.find_by_extension('_js')
assert_equal Language['CoffeeScript'], Language.find_by_extension('_coffee')
assert_nil Language.find_by_extension('.nkt')
end
def test_find_all_by_extension
Language.all.each do |language|
assert_equal language, Language.find_by_extension(language.primary_extension)
language.extensions.each do |extension|
unless Language.ambiguous?(extension)
assert_equal language, Language.find_by_extension(extension)
end
end
end
end
def test_find_by_filename def test_find_by_filename
assert_equal Language['Shell'], Language.find_by_filename('PKGBUILD') assert_equal [Language['Shell']], Language.find_by_filename('PKGBUILD')
assert_equal Language['Ruby'], Language.find_by_filename('foo.rb') assert_equal [Language['Ruby']], Language.find_by_filename('foo.rb')
assert_equal Language['Ruby'], Language.find_by_filename('foo/bar.rb') assert_equal [Language['Ruby']], Language.find_by_filename('foo/bar.rb')
assert_equal Language['Ruby'], Language.find_by_filename('Rakefile') assert_equal [Language['Ruby']], Language.find_by_filename('Rakefile')
assert_nil Language.find_by_filename('rb') assert_equal [Language['Ruby']], Language.find_by_filename('PKGBUILD.rb')
assert_nil Language.find_by_filename('.rb') assert_equal [Language['C'], Language['C++'], Language['Objective-C']], Language.find_by_filename('foo.h')
assert_nil Language.find_by_filename('.nkt') assert_equal [], Language.find_by_filename('rb')
assert_equal [], Language.find_by_filename('.rb')
assert_equal [], Language.find_by_filename('.nkt')
end end
def test_find def test_find