mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Extract seperate language detection method
This commit is contained in:
@@ -1,8 +1,6 @@
|
|||||||
require 'linguist/classifier'
|
|
||||||
require 'linguist/generated'
|
require 'linguist/generated'
|
||||||
require 'linguist/language'
|
require 'linguist/language'
|
||||||
require 'linguist/mime'
|
require 'linguist/mime'
|
||||||
require 'linguist/samples'
|
|
||||||
|
|
||||||
require 'charlock_holmes'
|
require 'charlock_holmes'
|
||||||
require 'escape_utils'
|
require 'escape_utils'
|
||||||
@@ -261,37 +259,8 @@ module Linguist
|
|||||||
def language
|
def language
|
||||||
if defined? @language
|
if defined? @language
|
||||||
@language
|
@language
|
||||||
else
|
elsif !binary_mime_type?
|
||||||
@language = guess_language
|
@language = Language.detect(name.to_s, lambda { data }, mode)
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Internal: Guess language
|
|
||||||
#
|
|
||||||
# Please add additional test coverage to
|
|
||||||
# `test/test_blob.rb#test_language` if you make any changes.
|
|
||||||
#
|
|
||||||
# Returns a Language or nil
|
|
||||||
def guess_language
|
|
||||||
return if binary_mime_type?
|
|
||||||
|
|
||||||
name = self.name.to_s
|
|
||||||
|
|
||||||
# A bit of an elegant hack. If the file is exectable but extensionless,
|
|
||||||
# append a "magic" extension so it can be classified with other
|
|
||||||
# languages that have shebang scripts.
|
|
||||||
if extname.empty? && mode && (mode.to_i(8) & 05) == 05
|
|
||||||
name += ".script!"
|
|
||||||
end
|
|
||||||
|
|
||||||
possible_languages = Language.find_by_filename(name)
|
|
||||||
|
|
||||||
if possible_languages.length > 1
|
|
||||||
if result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
|
|
||||||
Language[result[0]]
|
|
||||||
end
|
|
||||||
else
|
|
||||||
possible_languages.first
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ require 'escape_utils'
|
|||||||
require 'pygments'
|
require 'pygments'
|
||||||
require 'yaml'
|
require 'yaml'
|
||||||
|
|
||||||
|
require 'linguist/classifier'
|
||||||
require 'linguist/samples'
|
require 'linguist/samples'
|
||||||
|
|
||||||
module Linguist
|
module Linguist
|
||||||
@@ -62,6 +63,35 @@ module Linguist
|
|||||||
language
|
language
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Public: Detects the Language of the blob.
|
||||||
|
#
|
||||||
|
# name - String filename
|
||||||
|
# data - String blob data. A block also maybe passed in for lazy
|
||||||
|
# loading. This behavior is deprecated and you should always
|
||||||
|
# pass in a String.
|
||||||
|
# mode - Optional String mode (defaults to nil)
|
||||||
|
#
|
||||||
|
# Returns Language or nil.
|
||||||
|
def self.detect(name, data, mode = nil)
|
||||||
|
# A bit of an elegant hack. If the file is exectable but extensionless,
|
||||||
|
# append a "magic" extension so it can be classified with other
|
||||||
|
# languages that have shebang scripts.
|
||||||
|
if File.extname(name).empty? && mode && (mode.to_i(8) & 05) == 05
|
||||||
|
name += ".script!"
|
||||||
|
end
|
||||||
|
|
||||||
|
possible_languages = find_by_filename(name)
|
||||||
|
|
||||||
|
if possible_languages.length > 1
|
||||||
|
data = data.call() if data.respond_to?(:call)
|
||||||
|
if result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
|
||||||
|
Language[result[0]]
|
||||||
|
end
|
||||||
|
else
|
||||||
|
possible_languages.first
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
# Public: Get all Languages
|
# Public: Get all Languages
|
||||||
#
|
#
|
||||||
# Returns an Array of Languages
|
# Returns an Array of Languages
|
||||||
|
|||||||
Reference in New Issue
Block a user