mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
GC classifier db
This commit is contained in:
@@ -34,6 +34,21 @@ module Linguist
|
|||||||
@languages_total += 1
|
@languages_total += 1
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def gc
|
||||||
|
@tokens.each do |language, tokens|
|
||||||
|
if @language_tokens[language] > 20
|
||||||
|
tokens.each do |name, count|
|
||||||
|
if count == 1
|
||||||
|
@tokens[language].delete(name)
|
||||||
|
@language_tokens[language] -= 1
|
||||||
|
@tokens_total -= 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
self
|
||||||
|
end
|
||||||
|
|
||||||
def classify(data)
|
def classify(data)
|
||||||
tokens = Tokenizer.new(data).tokens
|
tokens = Tokenizer.new(data).tokens
|
||||||
|
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ module Linguist
|
|||||||
def self.classifier
|
def self.classifier
|
||||||
classifier = Classifier.new
|
classifier = Classifier.new
|
||||||
each { |sample| classifier.train(sample.language, sample.data) }
|
each { |sample| classifier.train(sample.language, sample.data) }
|
||||||
classifier
|
classifier.gc
|
||||||
end
|
end
|
||||||
|
|
||||||
def initialize(path, language)
|
def initialize(path, language)
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ class TestClassifier < Test::Unit::TestCase
|
|||||||
File.read(File.join(fixtures_path, name))
|
File.read(File.join(fixtures_path, name))
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_train_and_classify
|
def test_classify
|
||||||
classifier = Classifier.new
|
classifier = Classifier.new
|
||||||
classifier.train Language["Ruby"], fixture("ruby/foo.rb")
|
classifier.train Language["Ruby"], fixture("ruby/foo.rb")
|
||||||
classifier.train Language["Objective-C"], fixture("objective-c/Foo.h")
|
classifier.train Language["Objective-C"], fixture("objective-c/Foo.h")
|
||||||
@@ -30,6 +30,10 @@ class TestClassifier < Test::Unit::TestCase
|
|||||||
assert results.first[1] < 0.5, results.first.inspect
|
assert results.first[1] < 0.5, results.first.inspect
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_gc
|
||||||
|
Classifier.instance.gc
|
||||||
|
end
|
||||||
|
|
||||||
# def test_instance_classify
|
# def test_instance_classify
|
||||||
# Sample.each do |sample|
|
# Sample.each do |sample|
|
||||||
# results = Classifier.instance.classify(sample.data)
|
# results = Classifier.instance.classify(sample.data)
|
||||||
|
|||||||
Reference in New Issue
Block a user