Extract seperate language detection method

This commit is contained in:
Joshua Peek
2012-08-03 16:03:06 -05:00
parent 41b7d13aa7
commit 804e23e995
2 changed files with 32 additions and 33 deletions

View File

@@ -1,8 +1,6 @@
require 'linguist/classifier'
require 'linguist/generated'
require 'linguist/language'
require 'linguist/mime'
require 'linguist/samples'
require 'charlock_holmes'
require 'escape_utils'
@@ -261,37 +259,8 @@ module Linguist
def language
if defined? @language
@language
else
@language = guess_language
end
end
# Internal: Guess language
#
# Please add additional test coverage to
# `test/test_blob.rb#test_language` if you make any changes.
#
# Returns a Language or nil
def guess_language
return if binary_mime_type?
name = self.name.to_s
# A bit of an elegant hack. If the file is exectable but extensionless,
# append a "magic" extension so it can be classified with other
# languages that have shebang scripts.
if extname.empty? && mode && (mode.to_i(8) & 05) == 05
name += ".script!"
end
possible_languages = Language.find_by_filename(name)
if possible_languages.length > 1
if result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
Language[result[0]]
end
else
possible_languages.first
elsif !binary_mime_type?
@language = Language.detect(name.to_s, lambda { data }, mode)
end
end

View File

@@ -2,6 +2,7 @@ require 'escape_utils'
require 'pygments'
require 'yaml'
require 'linguist/classifier'
require 'linguist/samples'
module Linguist
@@ -62,6 +63,35 @@ module Linguist
language
end
# Public: Detects the Language of the blob.
#
# name - String filename
# data - String blob data. A block also maybe passed in for lazy
# loading. This behavior is deprecated and you should always
# pass in a String.
# mode - Optional String mode (defaults to nil)
#
# Returns Language or nil.
def self.detect(name, data, mode = nil)
# A bit of an elegant hack. If the file is exectable but extensionless,
# append a "magic" extension so it can be classified with other
# languages that have shebang scripts.
if File.extname(name).empty? && mode && (mode.to_i(8) & 05) == 05
name += ".script!"
end
possible_languages = find_by_filename(name)
if possible_languages.length > 1
data = data.call() if data.respond_to?(:call)
if result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
Language[result[0]]
end
else
possible_languages.first
end
end
# Public: Get all Languages
#
# Returns an Array of Languages