Allow classifer languages to be scoped

This commit is contained in:
Joshua Peek
2012-06-19 14:21:42 -05:00
parent 8f85a447de
commit d566b35020
2 changed files with 20 additions and 4 deletions

View File

@@ -74,7 +74,8 @@ module Linguist
# Public: Guess language of data. # Public: Guess language of data.
# #
# data - Array of tokens or String data to analyze. # data - Array of tokens or String data to analyze.
# languages - Array of Languages to restrict to.
# #
# Examples # Examples
# #
@@ -83,12 +84,14 @@ module Linguist
# #
# Returns sorted Array of result pairs. Each pair contains the # Returns sorted Array of result pairs. Each pair contains the
# Language and a Float score. # Language and a Float score.
def classify(tokens) def classify(tokens, languages = @languages.keys)
tokens = Tokenizer.new(tokens).tokens if tokens.is_a?(String) tokens = Tokenizer.new(tokens).tokens if tokens.is_a?(String)
scores = {} scores = {}
@languages.keys.each do |language| languages.each do |language|
scores[language] = tokens_probability(tokens, language) * language_probability(language) language_name = language.is_a?(Language) ? language.name : language
scores[language_name] = tokens_probability(tokens, language_name) *
language_probability(language_name)
end end
scores.sort { |a, b| b[1] <=> a[1] }.map { |score| [Language[score[0]], score[1]] } scores.sort { |a, b| b[1] <=> a[1] }.map { |score| [Language[score[0]], score[1]] }

View File

@@ -30,6 +30,19 @@ class TestClassifier < Test::Unit::TestCase
assert_equal Language["Objective-C"], results.first[0] assert_equal Language["Objective-C"], results.first[0]
end end
def test_restricted_classify
classifier = Classifier.new
classifier.train Language["Ruby"], fixture("ruby/foo.rb")
classifier.train Language["Objective-C"], fixture("objective-c/Foo.h")
classifier.train Language["Objective-C"], fixture("objective-c/Foo.m")
results = classifier.classify(fixture("objective-c/hello.m"), [Language["Objective-C"]])
assert_equal Language["Objective-C"], results.first[0]
results = classifier.classify(fixture("objective-c/hello.m"), [Language["Ruby"]])
assert_equal Language["Ruby"], results.first[0]
end
def test_instance_classify_empty def test_instance_classify_empty
results = Classifier.instance.classify("") results = Classifier.instance.classify("")
assert results.first[1] < 0.5, results.first.inspect assert results.first[1] < 0.5, results.first.inspect