Add sample gathering class

This commit is contained in:
Joshua Peek
2012-06-08 13:51:49 -05:00
parent e5ae9c328b
commit 0172623061
2 changed files with 50 additions and 20 deletions

46
lib/linguist/sample.rb Normal file
View File

@@ -0,0 +1,46 @@
require 'linguist/classifier'
require 'linguist/language'
module Linguist
class Sample
# Samples live in test/ for now, we'll eventually move them out
PATH = File.expand_path("../../../test/fixtures", __FILE__)
def self.each(&block)
Dir.entries(PATH).each do |category|
next if category == '.' || category == '..'
# Skip text and binary for now
next if category == 'text' || category == 'binary'
language = Linguist::Language.find_by_alias(category)
raise "No language for #{category.inspect}" unless language
dirname = File.join(PATH, category)
Dir.entries(dirname).each do |filename|
next if filename == '.' || filename == '..'
yield new(File.join(dirname, filename), language)
end
end
nil
end
def self.classifier
classifier = Classifier.new
each { |sample| classifier.train(sample.language, sample.data) }
classifier
end
def initialize(path, language)
@path = path
@language = language
end
def data
File.read(path)
end
attr_reader :path, :language
end
end

View File

@@ -1,4 +1,5 @@
require 'linguist/file_blob'
require 'linguist/sample'
require 'test/unit'
require 'mime/types'
@@ -24,23 +25,6 @@ class TestBlob < Test::Unit::TestCase
blob
end
def each_language_fixture
Dir["#{fixtures_path}/*"].each do |path|
name = File.basename(path)
if name == 'text' || name == 'binary'
next
else
assert language = Language.find_by_alias(name), "No language alias for #{name.inspect}"
end
Dir.entries(path).each do |filename|
next if filename == '.' || filename == '..'
yield language, blob(File.join(path, filename))
end
end
end
def test_name
assert_equal "foo.rb", blob("foo.rb").name
end
@@ -287,9 +271,9 @@ class TestBlob < Test::Unit::TestCase
end
def test_language
# Drop any files under test/fixtures/LANGUAGE
each_language_fixture do |language, blob|
assert_equal language, blob.language, blob.name
Sample.each do |sample|
blob = blob(sample.path)
assert_equal sample.language, blob.language, blob.name
end
end