mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Add sample gathering class
This commit is contained in:
46
lib/linguist/sample.rb
Normal file
46
lib/linguist/sample.rb
Normal file
@@ -0,0 +1,46 @@
|
||||
require 'linguist/classifier'
|
||||
require 'linguist/language'
|
||||
|
||||
module Linguist
|
||||
class Sample
|
||||
# Samples live in test/ for now, we'll eventually move them out
|
||||
PATH = File.expand_path("../../../test/fixtures", __FILE__)
|
||||
|
||||
def self.each(&block)
|
||||
Dir.entries(PATH).each do |category|
|
||||
next if category == '.' || category == '..'
|
||||
|
||||
# Skip text and binary for now
|
||||
next if category == 'text' || category == 'binary'
|
||||
|
||||
language = Linguist::Language.find_by_alias(category)
|
||||
raise "No language for #{category.inspect}" unless language
|
||||
|
||||
dirname = File.join(PATH, category)
|
||||
Dir.entries(dirname).each do |filename|
|
||||
next if filename == '.' || filename == '..'
|
||||
yield new(File.join(dirname, filename), language)
|
||||
end
|
||||
end
|
||||
|
||||
nil
|
||||
end
|
||||
|
||||
def self.classifier
|
||||
classifier = Classifier.new
|
||||
each { |sample| classifier.train(sample.language, sample.data) }
|
||||
classifier
|
||||
end
|
||||
|
||||
def initialize(path, language)
|
||||
@path = path
|
||||
@language = language
|
||||
end
|
||||
|
||||
def data
|
||||
File.read(path)
|
||||
end
|
||||
|
||||
attr_reader :path, :language
|
||||
end
|
||||
end
|
||||
@@ -1,4 +1,5 @@
|
||||
require 'linguist/file_blob'
|
||||
require 'linguist/sample'
|
||||
|
||||
require 'test/unit'
|
||||
require 'mime/types'
|
||||
@@ -24,23 +25,6 @@ class TestBlob < Test::Unit::TestCase
|
||||
blob
|
||||
end
|
||||
|
||||
def each_language_fixture
|
||||
Dir["#{fixtures_path}/*"].each do |path|
|
||||
name = File.basename(path)
|
||||
|
||||
if name == 'text' || name == 'binary'
|
||||
next
|
||||
else
|
||||
assert language = Language.find_by_alias(name), "No language alias for #{name.inspect}"
|
||||
end
|
||||
|
||||
Dir.entries(path).each do |filename|
|
||||
next if filename == '.' || filename == '..'
|
||||
yield language, blob(File.join(path, filename))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def test_name
|
||||
assert_equal "foo.rb", blob("foo.rb").name
|
||||
end
|
||||
@@ -287,9 +271,9 @@ class TestBlob < Test::Unit::TestCase
|
||||
end
|
||||
|
||||
def test_language
|
||||
# Drop any files under test/fixtures/LANGUAGE
|
||||
each_language_fixture do |language, blob|
|
||||
assert_equal language, blob.language, blob.name
|
||||
Sample.each do |sample|
|
||||
blob = blob(sample.path)
|
||||
assert_equal sample.language, blob.language, blob.name
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
Reference in New Issue
Block a user