mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Allow classifer languages to be scoped
This commit is contained in:
@@ -74,7 +74,8 @@ module Linguist
|
|||||||
|
|
||||||
# Public: Guess language of data.
|
# Public: Guess language of data.
|
||||||
#
|
#
|
||||||
# data - Array of tokens or String data to analyze.
|
# data - Array of tokens or String data to analyze.
|
||||||
|
# languages - Array of Languages to restrict to.
|
||||||
#
|
#
|
||||||
# Examples
|
# Examples
|
||||||
#
|
#
|
||||||
@@ -83,12 +84,14 @@ module Linguist
|
|||||||
#
|
#
|
||||||
# Returns sorted Array of result pairs. Each pair contains the
|
# Returns sorted Array of result pairs. Each pair contains the
|
||||||
# Language and a Float score.
|
# Language and a Float score.
|
||||||
def classify(tokens)
|
def classify(tokens, languages = @languages.keys)
|
||||||
tokens = Tokenizer.new(tokens).tokens if tokens.is_a?(String)
|
tokens = Tokenizer.new(tokens).tokens if tokens.is_a?(String)
|
||||||
|
|
||||||
scores = {}
|
scores = {}
|
||||||
@languages.keys.each do |language|
|
languages.each do |language|
|
||||||
scores[language] = tokens_probability(tokens, language) * language_probability(language)
|
language_name = language.is_a?(Language) ? language.name : language
|
||||||
|
scores[language_name] = tokens_probability(tokens, language_name) *
|
||||||
|
language_probability(language_name)
|
||||||
end
|
end
|
||||||
|
|
||||||
scores.sort { |a, b| b[1] <=> a[1] }.map { |score| [Language[score[0]], score[1]] }
|
scores.sort { |a, b| b[1] <=> a[1] }.map { |score| [Language[score[0]], score[1]] }
|
||||||
|
|||||||
@@ -30,6 +30,19 @@ class TestClassifier < Test::Unit::TestCase
|
|||||||
assert_equal Language["Objective-C"], results.first[0]
|
assert_equal Language["Objective-C"], results.first[0]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_restricted_classify
|
||||||
|
classifier = Classifier.new
|
||||||
|
classifier.train Language["Ruby"], fixture("ruby/foo.rb")
|
||||||
|
classifier.train Language["Objective-C"], fixture("objective-c/Foo.h")
|
||||||
|
classifier.train Language["Objective-C"], fixture("objective-c/Foo.m")
|
||||||
|
|
||||||
|
results = classifier.classify(fixture("objective-c/hello.m"), [Language["Objective-C"]])
|
||||||
|
assert_equal Language["Objective-C"], results.first[0]
|
||||||
|
|
||||||
|
results = classifier.classify(fixture("objective-c/hello.m"), [Language["Ruby"]])
|
||||||
|
assert_equal Language["Ruby"], results.first[0]
|
||||||
|
end
|
||||||
|
|
||||||
def test_instance_classify_empty
|
def test_instance_classify_empty
|
||||||
results = Classifier.instance.classify("")
|
results = Classifier.instance.classify("")
|
||||||
assert results.first[1] < 0.5, results.first.inspect
|
assert results.first[1] < 0.5, results.first.inspect
|
||||||
|
|||||||
Reference in New Issue
Block a user