mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Dump classifier results
This commit is contained in:
12
Rakefile
12
Rakefile
@@ -1,3 +1,4 @@
|
|||||||
|
require 'rake/clean'
|
||||||
require 'rake/testtask'
|
require 'rake/testtask'
|
||||||
|
|
||||||
task :default => :test
|
task :default => :test
|
||||||
@@ -5,3 +6,14 @@ task :default => :test
|
|||||||
Rake::TestTask.new do |t|
|
Rake::TestTask.new do |t|
|
||||||
t.warning = true
|
t.warning = true
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
file 'lib/linguist/classifier.yml' do |f|
|
||||||
|
require 'linguist/sample'
|
||||||
|
classifier = Linguist::Sample.classifier
|
||||||
|
File.open(f.name, 'w') { |io| YAML.dump(classifier, io) }
|
||||||
|
end
|
||||||
|
|
||||||
|
CLOBBER.include 'lib/linguist/classifier.yml'
|
||||||
|
|
||||||
|
task :classifier => ['lib/linguist/classifier.yml']
|
||||||
|
|||||||
@@ -3,6 +3,12 @@ require 'linguist/tokenizer'
|
|||||||
module Linguist
|
module Linguist
|
||||||
# Language bayesian classifier.
|
# Language bayesian classifier.
|
||||||
class Classifier
|
class Classifier
|
||||||
|
PATH = File.expand_path('../classifier.yml', __FILE__)
|
||||||
|
|
||||||
|
def self.instance
|
||||||
|
@instance ||= YAML.load_file(PATH)
|
||||||
|
end
|
||||||
|
|
||||||
def initialize
|
def initialize
|
||||||
@tokens_total = 0
|
@tokens_total = 0
|
||||||
@languages_total = 0
|
@languages_total = 0
|
||||||
@@ -53,4 +59,7 @@ module Linguist
|
|||||||
@languages[language].to_f / @languages_total.to_f
|
@languages[language].to_f / @languages_total.to_f
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Eager load instance
|
||||||
|
Classifier.instance
|
||||||
end
|
end
|
||||||
|
|||||||
6122
lib/linguist/classifier.yml
Normal file
6122
lib/linguist/classifier.yml
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,6 @@
|
|||||||
require 'linguist/classifier'
|
require 'linguist/classifier'
|
||||||
require 'linguist/language'
|
require 'linguist/language'
|
||||||
|
require 'linguist/sample'
|
||||||
|
|
||||||
require 'test/unit'
|
require 'test/unit'
|
||||||
|
|
||||||
@@ -14,7 +15,7 @@ class TestClassifier < Test::Unit::TestCase
|
|||||||
File.read(File.join(fixtures_path, name))
|
File.read(File.join(fixtures_path, name))
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_classify
|
def test_train_and_classify
|
||||||
classifier = Classifier.new
|
classifier = Classifier.new
|
||||||
classifier.train Language["Ruby"], fixture("ruby/foo.rb")
|
classifier.train Language["Ruby"], fixture("ruby/foo.rb")
|
||||||
classifier.train Language["Objective-C"], fixture("objective-c/Foo.h")
|
classifier.train Language["Objective-C"], fixture("objective-c/Foo.h")
|
||||||
@@ -23,4 +24,16 @@ class TestClassifier < Test::Unit::TestCase
|
|||||||
results = classifier.classify(fixture("objective-c/hello.m"))
|
results = classifier.classify(fixture("objective-c/hello.m"))
|
||||||
assert_equal Language["Objective-C"], results.first[0]
|
assert_equal Language["Objective-C"], results.first[0]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_instance_classify_empty
|
||||||
|
results = Classifier.instance.classify("")
|
||||||
|
assert results.first[1] < 0.5, results.first.inspect
|
||||||
|
end
|
||||||
|
|
||||||
|
# def test_instance_classify
|
||||||
|
# Sample.each do |sample|
|
||||||
|
# results = Classifier.instance.classify(sample.data)
|
||||||
|
# assert_equal sample.language, results.first[0], sample.path
|
||||||
|
# end
|
||||||
|
# end
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user