From 804e23e99536187207863eb780e1e30a20c12c8d Mon Sep 17 00:00:00 2001 From: Joshua Peek Date: Fri, 3 Aug 2012 16:03:06 -0500 Subject: [PATCH] Extract seperate language detection method --- lib/linguist/blob_helper.rb | 35 ++--------------------------------- lib/linguist/language.rb | 30 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 33 deletions(-) diff --git a/lib/linguist/blob_helper.rb b/lib/linguist/blob_helper.rb index d805df13..e56d0956 100644 --- a/lib/linguist/blob_helper.rb +++ b/lib/linguist/blob_helper.rb @@ -1,8 +1,6 @@ -require 'linguist/classifier' require 'linguist/generated' require 'linguist/language' require 'linguist/mime' -require 'linguist/samples' require 'charlock_holmes' require 'escape_utils' @@ -261,37 +259,8 @@ module Linguist def language if defined? @language @language - else - @language = guess_language - end - end - - # Internal: Guess language - # - # Please add additional test coverage to - # `test/test_blob.rb#test_language` if you make any changes. - # - # Returns a Language or nil - def guess_language - return if binary_mime_type? - - name = self.name.to_s - - # A bit of an elegant hack. If the file is exectable but extensionless, - # append a "magic" extension so it can be classified with other - # languages that have shebang scripts. - if extname.empty? && mode && (mode.to_i(8) & 05) == 05 - name += ".script!" - end - - possible_languages = Language.find_by_filename(name) - - if possible_languages.length > 1 - if result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first - Language[result[0]] - end - else - possible_languages.first + elsif !binary_mime_type? + @language = Language.detect(name.to_s, lambda { data }, mode) end end diff --git a/lib/linguist/language.rb b/lib/linguist/language.rb index 643a5239..fb242041 100644 --- a/lib/linguist/language.rb +++ b/lib/linguist/language.rb @@ -2,6 +2,7 @@ require 'escape_utils' require 'pygments' require 'yaml' +require 'linguist/classifier' require 'linguist/samples' module Linguist @@ -62,6 +63,35 @@ module Linguist language end + # Public: Detects the Language of the blob. + # + # name - String filename + # data - String blob data. A block also maybe passed in for lazy + # loading. This behavior is deprecated and you should always + # pass in a String. + # mode - Optional String mode (defaults to nil) + # + # Returns Language or nil. + def self.detect(name, data, mode = nil) + # A bit of an elegant hack. If the file is exectable but extensionless, + # append a "magic" extension so it can be classified with other + # languages that have shebang scripts. + if File.extname(name).empty? && mode && (mode.to_i(8) & 05) == 05 + name += ".script!" + end + + possible_languages = find_by_filename(name) + + if possible_languages.length > 1 + data = data.call() if data.respond_to?(:call) + if result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first + Language[result[0]] + end + else + possible_languages.first + end + end + # Public: Get all Languages # # Returns an Array of Languages