GC classifier db

This commit is contained in:
Joshua Peek
2012-06-08 16:04:43 -05:00
parent fd8b70ffa4
commit 8a75d4d208
3 changed files with 21 additions and 2 deletions

View File

@@ -34,6 +34,21 @@ module Linguist
@languages_total += 1 @languages_total += 1
end end
def gc
@tokens.each do |language, tokens|
if @language_tokens[language] > 20
tokens.each do |name, count|
if count == 1
@tokens[language].delete(name)
@language_tokens[language] -= 1
@tokens_total -= 1
end
end
end
end
self
end
def classify(data) def classify(data)
tokens = Tokenizer.new(data).tokens tokens = Tokenizer.new(data).tokens

View File

@@ -29,7 +29,7 @@ module Linguist
def self.classifier def self.classifier
classifier = Classifier.new classifier = Classifier.new
each { |sample| classifier.train(sample.language, sample.data) } each { |sample| classifier.train(sample.language, sample.data) }
classifier classifier.gc
end end
def initialize(path, language) def initialize(path, language)

View File

@@ -15,7 +15,7 @@ class TestClassifier < Test::Unit::TestCase
File.read(File.join(fixtures_path, name)) File.read(File.join(fixtures_path, name))
end end
def test_train_and_classify def test_classify
classifier = Classifier.new classifier = Classifier.new
classifier.train Language["Ruby"], fixture("ruby/foo.rb") classifier.train Language["Ruby"], fixture("ruby/foo.rb")
classifier.train Language["Objective-C"], fixture("objective-c/Foo.h") classifier.train Language["Objective-C"], fixture("objective-c/Foo.h")
@@ -30,6 +30,10 @@ class TestClassifier < Test::Unit::TestCase
assert results.first[1] < 0.5, results.first.inspect assert results.first[1] < 0.5, results.first.inspect
end end
def test_gc
Classifier.instance.gc
end
# def test_instance_classify # def test_instance_classify
# Sample.each do |sample| # Sample.each do |sample|
# results = Classifier.instance.classify(sample.data) # results = Classifier.instance.classify(sample.data)