Doc sample class

This commit is contained in:
Joshua Peek
2012-06-19 13:30:28 -05:00
parent d5fa8cbcb7
commit d9ecbf0c24

View File

@@ -2,17 +2,25 @@ require 'linguist/classifier'
require 'linguist/language' require 'linguist/language'
module Linguist module Linguist
# Model for accessing classifier training data.
class Sample class Sample
# Samples live in test/ for now, we'll eventually move them out # Samples live in test/ for now, we'll eventually move them out
PATH = File.expand_path("../../../test/fixtures", __FILE__) PATH = File.expand_path("../../../test/fixtures", __FILE__)
# Public: Iterate over each Sample.
#
# &block - Yields Sample to block
#
# Returns nothing.
def self.each(&block) def self.each(&block)
Dir.entries(PATH).each do |category| Dir.entries(PATH).each do |category|
next if category == '.' || category == '..' next if category == '.' || category == '..'
# Skip text and binary for now # Skip text and binary for now
# Possibly reconsider this later
next if category == 'text' || category == 'binary' next if category == 'text' || category == 'binary'
# Map directory name to a Language alias
language = Linguist::Language.find_by_alias(category) language = Linguist::Language.find_by_alias(category)
raise "No language for #{category.inspect}" unless language raise "No language for #{category.inspect}" unless language
@@ -26,21 +34,41 @@ module Linguist
nil nil
end end
# Public: Build Classifier from all samples.
#
# Returns trained Classifier.
def self.classifier def self.classifier
classifier = Classifier.new classifier = Classifier.new
each { |sample| classifier.train(sample.language, sample.data) } each { |sample| classifier.train(sample.language, sample.data) }
classifier.gc classifier.gc
end end
# Internal: Initialize Sample.
#
# Samples should be initialized by Sample.each.
#
# path - String full path to file.
# language - Language of sample.
def initialize(path, language) def initialize(path, language)
@path = path @path = path
@language = language @language = language
end end
# Public: Get full path to file.
#
# Returns String.
attr_reader :path
# Public: Get sample language.
#
# Returns Language.
attr_reader :language
# Public: Read file contents.
#
# Returns String.
def data def data
File.read(path) File.read(path)
end end
attr_reader :path, :language
end end
end end