Change find_by_filename api to return all matching languages

This commit is contained in:
Joshua Peek
2012-08-03 13:53:12 -05:00
parent 4a06d2ea7e
commit 6014bd015e
3 changed files with 25 additions and 103 deletions

View File

@@ -410,14 +410,15 @@ module Linguist
def guess_language
return if binary_mime_type?
# Disambiguate between multiple language extensions
disambiguate_extension_language ||
possible_languages = Language.find_by_filename(name.to_s)
# See if there is a Language for the extension
Language.find_by_filename(name.to_s) ||
# Try to detect Language from shebang line
shebang_language
if possible_languages.length > 1
if result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
Language[result[0]]
end
else
possible_languages.first || shebang_language
end
end
# Internal: Get the lexer of the blob.
@@ -427,20 +428,6 @@ module Linguist
language ? language.lexer : Pygments::Lexer.find_by_name('Text only')
end
# Internal: Disambiguates between multiple language extensions.
#
# Returns a Language or nil.
def disambiguate_extension_language
if Language.ambiguous?(extname)
possible_languages = Language.all.select { |l| l.extensions.include?(extname) }.map(&:name)
if possible_languages.any?
if result = Classifier.classify(Samples::DATA, data, possible_languages).first
Language[result[0]]
end
end
end
end
# Internal: Extract the script name from the shebang line
#
# Requires Blob#data

View File

@@ -15,8 +15,8 @@ module Linguist
@index = {}
@name_index = {}
@alias_index = {}
@extension_index = {}
@filename_index = {}
@extension_index = Hash.new { |h,k| h[k] = [] }
@filename_index = Hash.new { |h,k| h[k] = [] }
# Valid Languages types
TYPES = [:data, :markup, :programming]
@@ -60,13 +60,7 @@ module Linguist
raise ArgumentError, "Extension is missing a '.': #{extension.inspect}"
end
unless ambiguous?(extension)
# Index the extension with a leading ".": ".rb"
@extension_index[extension] = language
# Index the extension without a leading ".": "rb"
@extension_index[extension.sub(/^\./, '')] = language
end
@extension_index[extension] << language
end
language.overrides.each do |extension|
@@ -82,7 +76,7 @@ module Linguist
end
language.filenames.each do |filename|
@filename_index[filename] = language
@filename_index[filename] << language
end
language
@@ -123,33 +117,19 @@ module Linguist
@alias_index[name]
end
# Public: Look up Language by extension.
#
# extension - The extension String. May include leading "."
#
# Examples
#
# Language.find_by_extension('.rb')
# # => #<Language name="Ruby">
#
# Returns the Language or nil if none was found.
def self.find_by_extension(extension)
@extension_index[extension]
end
# Public: Look up Language by filename.
# Public: Look up Languages by filename.
#
# filename - The path String.
#
# Examples
#
# Language.find_by_filename('foo.rb')
# # => #<Language name="Ruby">
# # => [#<Language name="Ruby">]
#
# Returns the Language or nil if none was found.
# Returns all matching Languages or [] if none were found.
def self.find_by_filename(filename)
basename, extname = File.basename(filename), File.extname(filename)
@filename_index[basename] || @extension_index[extname]
@filename_index[basename] + @extension_index[extname]
end
# Public: Look up Language by its name or lexer.

View File

@@ -10,25 +10,12 @@ class TestLanguage < Test::Unit::TestCase
def test_ambiguous_extensions
assert Language.ambiguous?('.cls')
assert_equal Language['Apex'], Language.find_by_extension('cls')
assert Language.ambiguous?('.h')
assert_equal Language['C'], Language.find_by_extension('h')
assert Language.ambiguous?('.m')
assert_equal Language['Objective-C'], Language.find_by_extension('m')
assert Language.ambiguous?('.pl')
assert_equal Language['Perl'], Language.find_by_extension('pl')
assert Language.ambiguous?('.r')
assert_equal Language['R'], Language.find_by_extension('r')
assert Language.ambiguous?('.t')
assert_equal Language['Turing'], Language.find_by_extension('t')
assert Language.ambiguous?('.v')
assert_equal Language['Verilog'], Language.find_by_extension('v')
end
def test_lexer
@@ -242,48 +229,16 @@ class TestLanguage < Test::Unit::TestCase
end
end
def test_find_by_extension
assert_equal Language['Ruby'], Language.find_by_extension('.rb')
assert_equal Language['Ruby'], Language.find_by_extension('rb')
assert_equal Language['Dart'], Language.find_by_extension('dart')
assert_equal Language['Groff'], Language.find_by_extension('man')
assert_equal Language['Groff'], Language.find_by_extension('1')
assert_equal Language['Groff'], Language.find_by_extension('2')
assert_equal Language['Groff'], Language.find_by_extension('3')
assert_equal Language['PHP'], Language.find_by_extension('php')
assert_equal Language['PHP'], Language.find_by_extension('php3')
assert_equal Language['PHP'], Language.find_by_extension('php4')
assert_equal Language['PHP'], Language.find_by_extension('php5')
assert_equal Language['PowerShell'], Language.find_by_extension('psm1')
assert_equal Language['PowerShell'], Language.find_by_extension('ps1')
# Aliases for Streamline.js ( https://github.com/Sage/streamlinejs )
assert_equal Language['JavaScript'], Language.find_by_extension('_js')
assert_equal Language['CoffeeScript'], Language.find_by_extension('_coffee')
assert_nil Language.find_by_extension('.nkt')
end
def test_find_all_by_extension
Language.all.each do |language|
assert_equal language, Language.find_by_extension(language.primary_extension)
language.extensions.each do |extension|
unless Language.ambiguous?(extension)
assert_equal language, Language.find_by_extension(extension)
end
end
end
end
def test_find_by_filename
assert_equal Language['Shell'], Language.find_by_filename('PKGBUILD')
assert_equal Language['Ruby'], Language.find_by_filename('foo.rb')
assert_equal Language['Ruby'], Language.find_by_filename('foo/bar.rb')
assert_equal Language['Ruby'], Language.find_by_filename('Rakefile')
assert_nil Language.find_by_filename('rb')
assert_nil Language.find_by_filename('.rb')
assert_nil Language.find_by_filename('.nkt')
assert_equal [Language['Shell']], Language.find_by_filename('PKGBUILD')
assert_equal [Language['Ruby']], Language.find_by_filename('foo.rb')
assert_equal [Language['Ruby']], Language.find_by_filename('foo/bar.rb')
assert_equal [Language['Ruby']], Language.find_by_filename('Rakefile')
assert_equal [Language['Ruby']], Language.find_by_filename('PKGBUILD.rb')
assert_equal [Language['C'], Language['C++'], Language['Objective-C']], Language.find_by_filename('foo.h')
assert_equal [], Language.find_by_filename('rb')
assert_equal [], Language.find_by_filename('.rb')
assert_equal [], Language.find_by_filename('.nkt')
end
def test_find