mirror of
https://github.com/KevinMidboe/linguist.git
synced 2026-02-15 04:39:47 +00:00
Add sample gathering class
This commit is contained in:
46
lib/linguist/sample.rb
Normal file
46
lib/linguist/sample.rb
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
require 'linguist/classifier'
|
||||||
|
require 'linguist/language'
|
||||||
|
|
||||||
|
module Linguist
|
||||||
|
class Sample
|
||||||
|
# Samples live in test/ for now, we'll eventually move them out
|
||||||
|
PATH = File.expand_path("../../../test/fixtures", __FILE__)
|
||||||
|
|
||||||
|
def self.each(&block)
|
||||||
|
Dir.entries(PATH).each do |category|
|
||||||
|
next if category == '.' || category == '..'
|
||||||
|
|
||||||
|
# Skip text and binary for now
|
||||||
|
next if category == 'text' || category == 'binary'
|
||||||
|
|
||||||
|
language = Linguist::Language.find_by_alias(category)
|
||||||
|
raise "No language for #{category.inspect}" unless language
|
||||||
|
|
||||||
|
dirname = File.join(PATH, category)
|
||||||
|
Dir.entries(dirname).each do |filename|
|
||||||
|
next if filename == '.' || filename == '..'
|
||||||
|
yield new(File.join(dirname, filename), language)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.classifier
|
||||||
|
classifier = Classifier.new
|
||||||
|
each { |sample| classifier.train(sample.language, sample.data) }
|
||||||
|
classifier
|
||||||
|
end
|
||||||
|
|
||||||
|
def initialize(path, language)
|
||||||
|
@path = path
|
||||||
|
@language = language
|
||||||
|
end
|
||||||
|
|
||||||
|
def data
|
||||||
|
File.read(path)
|
||||||
|
end
|
||||||
|
|
||||||
|
attr_reader :path, :language
|
||||||
|
end
|
||||||
|
end
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
require 'linguist/file_blob'
|
require 'linguist/file_blob'
|
||||||
|
require 'linguist/sample'
|
||||||
|
|
||||||
require 'test/unit'
|
require 'test/unit'
|
||||||
require 'mime/types'
|
require 'mime/types'
|
||||||
@@ -24,23 +25,6 @@ class TestBlob < Test::Unit::TestCase
|
|||||||
blob
|
blob
|
||||||
end
|
end
|
||||||
|
|
||||||
def each_language_fixture
|
|
||||||
Dir["#{fixtures_path}/*"].each do |path|
|
|
||||||
name = File.basename(path)
|
|
||||||
|
|
||||||
if name == 'text' || name == 'binary'
|
|
||||||
next
|
|
||||||
else
|
|
||||||
assert language = Language.find_by_alias(name), "No language alias for #{name.inspect}"
|
|
||||||
end
|
|
||||||
|
|
||||||
Dir.entries(path).each do |filename|
|
|
||||||
next if filename == '.' || filename == '..'
|
|
||||||
yield language, blob(File.join(path, filename))
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def test_name
|
def test_name
|
||||||
assert_equal "foo.rb", blob("foo.rb").name
|
assert_equal "foo.rb", blob("foo.rb").name
|
||||||
end
|
end
|
||||||
@@ -287,9 +271,9 @@ class TestBlob < Test::Unit::TestCase
|
|||||||
end
|
end
|
||||||
|
|
||||||
def test_language
|
def test_language
|
||||||
# Drop any files under test/fixtures/LANGUAGE
|
Sample.each do |sample|
|
||||||
each_language_fixture do |language, blob|
|
blob = blob(sample.path)
|
||||||
assert_equal language, blob.language, blob.name
|
assert_equal sample.language, blob.language, blob.name
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user