mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Dump classifier results
This commit is contained in:
12
Rakefile
12
Rakefile
@@ -1,3 +1,4 @@
|
||||
require 'rake/clean'
|
||||
require 'rake/testtask'
|
||||
|
||||
task :default => :test
|
||||
@@ -5,3 +6,14 @@ task :default => :test
|
||||
Rake::TestTask.new do |t|
|
||||
t.warning = true
|
||||
end
|
||||
|
||||
|
||||
file 'lib/linguist/classifier.yml' do |f|
|
||||
require 'linguist/sample'
|
||||
classifier = Linguist::Sample.classifier
|
||||
File.open(f.name, 'w') { |io| YAML.dump(classifier, io) }
|
||||
end
|
||||
|
||||
CLOBBER.include 'lib/linguist/classifier.yml'
|
||||
|
||||
task :classifier => ['lib/linguist/classifier.yml']
|
||||
|
||||
@@ -3,6 +3,12 @@ require 'linguist/tokenizer'
|
||||
module Linguist
|
||||
# Language bayesian classifier.
|
||||
class Classifier
|
||||
PATH = File.expand_path('../classifier.yml', __FILE__)
|
||||
|
||||
def self.instance
|
||||
@instance ||= YAML.load_file(PATH)
|
||||
end
|
||||
|
||||
def initialize
|
||||
@tokens_total = 0
|
||||
@languages_total = 0
|
||||
@@ -53,4 +59,7 @@ module Linguist
|
||||
@languages[language].to_f / @languages_total.to_f
|
||||
end
|
||||
end
|
||||
|
||||
# Eager load instance
|
||||
Classifier.instance
|
||||
end
|
||||
|
||||
6122
lib/linguist/classifier.yml
Normal file
6122
lib/linguist/classifier.yml
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,6 @@
|
||||
require 'linguist/classifier'
|
||||
require 'linguist/language'
|
||||
require 'linguist/sample'
|
||||
|
||||
require 'test/unit'
|
||||
|
||||
@@ -14,7 +15,7 @@ class TestClassifier < Test::Unit::TestCase
|
||||
File.read(File.join(fixtures_path, name))
|
||||
end
|
||||
|
||||
def test_classify
|
||||
def test_train_and_classify
|
||||
classifier = Classifier.new
|
||||
classifier.train Language["Ruby"], fixture("ruby/foo.rb")
|
||||
classifier.train Language["Objective-C"], fixture("objective-c/Foo.h")
|
||||
@@ -23,4 +24,16 @@ class TestClassifier < Test::Unit::TestCase
|
||||
results = classifier.classify(fixture("objective-c/hello.m"))
|
||||
assert_equal Language["Objective-C"], results.first[0]
|
||||
end
|
||||
|
||||
def test_instance_classify_empty
|
||||
results = Classifier.instance.classify("")
|
||||
assert results.first[1] < 0.5, results.first.inspect
|
||||
end
|
||||
|
||||
# def test_instance_classify
|
||||
# Sample.each do |sample|
|
||||
# results = Classifier.instance.classify(sample.data)
|
||||
# assert_equal sample.language, results.first[0], sample.path
|
||||
# end
|
||||
# end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user