From 6014bd015e56dbfd212bc1a6d9c75cc36a1449c7 Mon Sep 17 00:00:00 2001 From: Joshua Peek Date: Fri, 3 Aug 2012 13:53:12 -0500 Subject: [PATCH] Change find_by_filename api to return all matching languages --- lib/linguist/blob_helper.rb | 29 +++++------------ lib/linguist/language.rb | 36 +++++---------------- test/test_language.rb | 63 ++++++------------------------------- 3 files changed, 25 insertions(+), 103 deletions(-) diff --git a/lib/linguist/blob_helper.rb b/lib/linguist/blob_helper.rb index 38e6d263..ee9f32fa 100644 --- a/lib/linguist/blob_helper.rb +++ b/lib/linguist/blob_helper.rb @@ -410,14 +410,15 @@ module Linguist def guess_language return if binary_mime_type? - # Disambiguate between multiple language extensions - disambiguate_extension_language || + possible_languages = Language.find_by_filename(name.to_s) - # See if there is a Language for the extension - Language.find_by_filename(name.to_s) || - - # Try to detect Language from shebang line - shebang_language + if possible_languages.length > 1 + if result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first + Language[result[0]] + end + else + possible_languages.first || shebang_language + end end # Internal: Get the lexer of the blob. @@ -427,20 +428,6 @@ module Linguist language ? language.lexer : Pygments::Lexer.find_by_name('Text only') end - # Internal: Disambiguates between multiple language extensions. - # - # Returns a Language or nil. - def disambiguate_extension_language - if Language.ambiguous?(extname) - possible_languages = Language.all.select { |l| l.extensions.include?(extname) }.map(&:name) - if possible_languages.any? - if result = Classifier.classify(Samples::DATA, data, possible_languages).first - Language[result[0]] - end - end - end - end - # Internal: Extract the script name from the shebang line # # Requires Blob#data diff --git a/lib/linguist/language.rb b/lib/linguist/language.rb index 24b78673..8586fa7e 100644 --- a/lib/linguist/language.rb +++ b/lib/linguist/language.rb @@ -15,8 +15,8 @@ module Linguist @index = {} @name_index = {} @alias_index = {} - @extension_index = {} - @filename_index = {} + @extension_index = Hash.new { |h,k| h[k] = [] } + @filename_index = Hash.new { |h,k| h[k] = [] } # Valid Languages types TYPES = [:data, :markup, :programming] @@ -60,13 +60,7 @@ module Linguist raise ArgumentError, "Extension is missing a '.': #{extension.inspect}" end - unless ambiguous?(extension) - # Index the extension with a leading ".": ".rb" - @extension_index[extension] = language - - # Index the extension without a leading ".": "rb" - @extension_index[extension.sub(/^\./, '')] = language - end + @extension_index[extension] << language end language.overrides.each do |extension| @@ -82,7 +76,7 @@ module Linguist end language.filenames.each do |filename| - @filename_index[filename] = language + @filename_index[filename] << language end language @@ -123,33 +117,19 @@ module Linguist @alias_index[name] end - # Public: Look up Language by extension. - # - # extension - The extension String. May include leading "." - # - # Examples - # - # Language.find_by_extension('.rb') - # # => # - # - # Returns the Language or nil if none was found. - def self.find_by_extension(extension) - @extension_index[extension] - end - - # Public: Look up Language by filename. + # Public: Look up Languages by filename. # # filename - The path String. # # Examples # # Language.find_by_filename('foo.rb') - # # => # + # # => [#] # - # Returns the Language or nil if none was found. + # Returns all matching Languages or [] if none were found. def self.find_by_filename(filename) basename, extname = File.basename(filename), File.extname(filename) - @filename_index[basename] || @extension_index[extname] + @filename_index[basename] + @extension_index[extname] end # Public: Look up Language by its name or lexer. diff --git a/test/test_language.rb b/test/test_language.rb index f94acf34..3e788d57 100644 --- a/test/test_language.rb +++ b/test/test_language.rb @@ -10,25 +10,12 @@ class TestLanguage < Test::Unit::TestCase def test_ambiguous_extensions assert Language.ambiguous?('.cls') - assert_equal Language['Apex'], Language.find_by_extension('cls') - assert Language.ambiguous?('.h') - assert_equal Language['C'], Language.find_by_extension('h') - assert Language.ambiguous?('.m') - assert_equal Language['Objective-C'], Language.find_by_extension('m') - assert Language.ambiguous?('.pl') - assert_equal Language['Perl'], Language.find_by_extension('pl') - assert Language.ambiguous?('.r') - assert_equal Language['R'], Language.find_by_extension('r') - assert Language.ambiguous?('.t') - assert_equal Language['Turing'], Language.find_by_extension('t') - assert Language.ambiguous?('.v') - assert_equal Language['Verilog'], Language.find_by_extension('v') end def test_lexer @@ -242,48 +229,16 @@ class TestLanguage < Test::Unit::TestCase end end - def test_find_by_extension - assert_equal Language['Ruby'], Language.find_by_extension('.rb') - assert_equal Language['Ruby'], Language.find_by_extension('rb') - assert_equal Language['Dart'], Language.find_by_extension('dart') - assert_equal Language['Groff'], Language.find_by_extension('man') - assert_equal Language['Groff'], Language.find_by_extension('1') - assert_equal Language['Groff'], Language.find_by_extension('2') - assert_equal Language['Groff'], Language.find_by_extension('3') - assert_equal Language['PHP'], Language.find_by_extension('php') - assert_equal Language['PHP'], Language.find_by_extension('php3') - assert_equal Language['PHP'], Language.find_by_extension('php4') - assert_equal Language['PHP'], Language.find_by_extension('php5') - assert_equal Language['PowerShell'], Language.find_by_extension('psm1') - assert_equal Language['PowerShell'], Language.find_by_extension('ps1') - - # Aliases for Streamline.js ( https://github.com/Sage/streamlinejs ) - assert_equal Language['JavaScript'], Language.find_by_extension('_js') - assert_equal Language['CoffeeScript'], Language.find_by_extension('_coffee') - - assert_nil Language.find_by_extension('.nkt') - end - - def test_find_all_by_extension - Language.all.each do |language| - assert_equal language, Language.find_by_extension(language.primary_extension) - - language.extensions.each do |extension| - unless Language.ambiguous?(extension) - assert_equal language, Language.find_by_extension(extension) - end - end - end - end - def test_find_by_filename - assert_equal Language['Shell'], Language.find_by_filename('PKGBUILD') - assert_equal Language['Ruby'], Language.find_by_filename('foo.rb') - assert_equal Language['Ruby'], Language.find_by_filename('foo/bar.rb') - assert_equal Language['Ruby'], Language.find_by_filename('Rakefile') - assert_nil Language.find_by_filename('rb') - assert_nil Language.find_by_filename('.rb') - assert_nil Language.find_by_filename('.nkt') + assert_equal [Language['Shell']], Language.find_by_filename('PKGBUILD') + assert_equal [Language['Ruby']], Language.find_by_filename('foo.rb') + assert_equal [Language['Ruby']], Language.find_by_filename('foo/bar.rb') + assert_equal [Language['Ruby']], Language.find_by_filename('Rakefile') + assert_equal [Language['Ruby']], Language.find_by_filename('PKGBUILD.rb') + assert_equal [Language['C'], Language['C++'], Language['Objective-C']], Language.find_by_filename('foo.h') + assert_equal [], Language.find_by_filename('rb') + assert_equal [], Language.find_by_filename('.rb') + assert_equal [], Language.find_by_filename('.nkt') end def test_find