From 0172623061f45d92da66438a31e0a532c2223d35 Mon Sep 17 00:00:00 2001 From: Joshua Peek Date: Fri, 8 Jun 2012 13:51:49 -0500 Subject: [PATCH] Add sample gathering class --- lib/linguist/sample.rb | 46 ++++++++++++++++++++++++++++++++++++++++++ test/test_blob.rb | 24 ++++------------------ 2 files changed, 50 insertions(+), 20 deletions(-) create mode 100644 lib/linguist/sample.rb diff --git a/lib/linguist/sample.rb b/lib/linguist/sample.rb new file mode 100644 index 00000000..53800b97 --- /dev/null +++ b/lib/linguist/sample.rb @@ -0,0 +1,46 @@ +require 'linguist/classifier' +require 'linguist/language' + +module Linguist + class Sample + # Samples live in test/ for now, we'll eventually move them out + PATH = File.expand_path("../../../test/fixtures", __FILE__) + + def self.each(&block) + Dir.entries(PATH).each do |category| + next if category == '.' || category == '..' + + # Skip text and binary for now + next if category == 'text' || category == 'binary' + + language = Linguist::Language.find_by_alias(category) + raise "No language for #{category.inspect}" unless language + + dirname = File.join(PATH, category) + Dir.entries(dirname).each do |filename| + next if filename == '.' || filename == '..' + yield new(File.join(dirname, filename), language) + end + end + + nil + end + + def self.classifier + classifier = Classifier.new + each { |sample| classifier.train(sample.language, sample.data) } + classifier + end + + def initialize(path, language) + @path = path + @language = language + end + + def data + File.read(path) + end + + attr_reader :path, :language + end +end diff --git a/test/test_blob.rb b/test/test_blob.rb index 1d17e60d..12ac0f74 100644 --- a/test/test_blob.rb +++ b/test/test_blob.rb @@ -1,4 +1,5 @@ require 'linguist/file_blob' +require 'linguist/sample' require 'test/unit' require 'mime/types' @@ -24,23 +25,6 @@ class TestBlob < Test::Unit::TestCase blob end - def each_language_fixture - Dir["#{fixtures_path}/*"].each do |path| - name = File.basename(path) - - if name == 'text' || name == 'binary' - next - else - assert language = Language.find_by_alias(name), "No language alias for #{name.inspect}" - end - - Dir.entries(path).each do |filename| - next if filename == '.' || filename == '..' - yield language, blob(File.join(path, filename)) - end - end - end - def test_name assert_equal "foo.rb", blob("foo.rb").name end @@ -287,9 +271,9 @@ class TestBlob < Test::Unit::TestCase end def test_language - # Drop any files under test/fixtures/LANGUAGE - each_language_fixture do |language, blob| - assert_equal language, blob.language, blob.name + Sample.each do |sample| + blob = blob(sample.path) + assert_equal sample.language, blob.language, blob.name end end