GC classifier db

This commit is contained in:
Joshua Peek
2012-06-08 16:04:43 -05:00
parent fd8b70ffa4
commit 8a75d4d208
3 changed files with 21 additions and 2 deletions

View File

@@ -34,6 +34,21 @@ module Linguist
@languages_total += 1
end
def gc
@tokens.each do |language, tokens|
if @language_tokens[language] > 20
tokens.each do |name, count|
if count == 1
@tokens[language].delete(name)
@language_tokens[language] -= 1
@tokens_total -= 1
end
end
end
end
self
end
def classify(data)
tokens = Tokenizer.new(data).tokens

View File

@@ -29,7 +29,7 @@ module Linguist
def self.classifier
classifier = Classifier.new
each { |sample| classifier.train(sample.language, sample.data) }
classifier
classifier.gc
end
def initialize(path, language)