mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
GC classifier db
This commit is contained in:
@@ -34,6 +34,21 @@ module Linguist
|
||||
@languages_total += 1
|
||||
end
|
||||
|
||||
def gc
|
||||
@tokens.each do |language, tokens|
|
||||
if @language_tokens[language] > 20
|
||||
tokens.each do |name, count|
|
||||
if count == 1
|
||||
@tokens[language].delete(name)
|
||||
@language_tokens[language] -= 1
|
||||
@tokens_total -= 1
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
self
|
||||
end
|
||||
|
||||
def classify(data)
|
||||
tokens = Tokenizer.new(data).tokens
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ module Linguist
|
||||
def self.classifier
|
||||
classifier = Classifier.new
|
||||
each { |sample| classifier.train(sample.language, sample.data) }
|
||||
classifier
|
||||
classifier.gc
|
||||
end
|
||||
|
||||
def initialize(path, language)
|
||||
|
||||
@@ -15,7 +15,7 @@ class TestClassifier < Test::Unit::TestCase
|
||||
File.read(File.join(fixtures_path, name))
|
||||
end
|
||||
|
||||
def test_train_and_classify
|
||||
def test_classify
|
||||
classifier = Classifier.new
|
||||
classifier.train Language["Ruby"], fixture("ruby/foo.rb")
|
||||
classifier.train Language["Objective-C"], fixture("objective-c/Foo.h")
|
||||
@@ -30,6 +30,10 @@ class TestClassifier < Test::Unit::TestCase
|
||||
assert results.first[1] < 0.5, results.first.inspect
|
||||
end
|
||||
|
||||
def test_gc
|
||||
Classifier.instance.gc
|
||||
end
|
||||
|
||||
# def test_instance_classify
|
||||
# Sample.each do |sample|
|
||||
# results = Classifier.instance.classify(sample.data)
|
||||
|
||||
Reference in New Issue
Block a user