Dump classifier results

This commit is contained in:
Joshua Peek
2012-06-08 14:13:26 -05:00
parent 0172623061
commit 9ecab364d1
4 changed files with 6157 additions and 1 deletions

View File

@@ -1,3 +1,4 @@
require 'rake/clean'
require 'rake/testtask'
task :default => :test
@@ -5,3 +6,14 @@ task :default => :test
Rake::TestTask.new do |t|
t.warning = true
end
file 'lib/linguist/classifier.yml' do |f|
require 'linguist/sample'
classifier = Linguist::Sample.classifier
File.open(f.name, 'w') { |io| YAML.dump(classifier, io) }
end
CLOBBER.include 'lib/linguist/classifier.yml'
task :classifier => ['lib/linguist/classifier.yml']

View File

@@ -3,6 +3,12 @@ require 'linguist/tokenizer'
module Linguist
# Language bayesian classifier.
class Classifier
PATH = File.expand_path('../classifier.yml', __FILE__)
def self.instance
@instance ||= YAML.load_file(PATH)
end
def initialize
@tokens_total = 0
@languages_total = 0
@@ -53,4 +59,7 @@ module Linguist
@languages[language].to_f / @languages_total.to_f
end
end
# Eager load instance
Classifier.instance
end

6122
lib/linguist/classifier.yml Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,6 @@
require 'linguist/classifier'
require 'linguist/language'
require 'linguist/sample'
require 'test/unit'
@@ -14,7 +15,7 @@ class TestClassifier < Test::Unit::TestCase
File.read(File.join(fixtures_path, name))
end
def test_classify
def test_train_and_classify
classifier = Classifier.new
classifier.train Language["Ruby"], fixture("ruby/foo.rb")
classifier.train Language["Objective-C"], fixture("objective-c/Foo.h")
@@ -23,4 +24,16 @@ class TestClassifier < Test::Unit::TestCase
results = classifier.classify(fixture("objective-c/hello.m"))
assert_equal Language["Objective-C"], results.first[0]
end
def test_instance_classify_empty
results = Classifier.instance.classify("")
assert results.first[1] < 0.5, results.first.inspect
end
# def test_instance_classify
# Sample.each do |sample|
# results = Classifier.instance.classify(sample.data)
# assert_equal sample.language, results.first[0], sample.path
# end
# end
end