From d9ecbf0c2435f39c91c5c779084a37ccd2ed9031 Mon Sep 17 00:00:00 2001 From: Joshua Peek Date: Tue, 19 Jun 2012 13:30:28 -0500 Subject: [PATCH] Doc sample class --- lib/linguist/sample.rb | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/lib/linguist/sample.rb b/lib/linguist/sample.rb index a2248daa..8bd5f04b 100644 --- a/lib/linguist/sample.rb +++ b/lib/linguist/sample.rb @@ -2,17 +2,25 @@ require 'linguist/classifier' require 'linguist/language' module Linguist + # Model for accessing classifier training data. class Sample # Samples live in test/ for now, we'll eventually move them out PATH = File.expand_path("../../../test/fixtures", __FILE__) + # Public: Iterate over each Sample. + # + # &block - Yields Sample to block + # + # Returns nothing. def self.each(&block) Dir.entries(PATH).each do |category| next if category == '.' || category == '..' # Skip text and binary for now + # Possibly reconsider this later next if category == 'text' || category == 'binary' + # Map directory name to a Language alias language = Linguist::Language.find_by_alias(category) raise "No language for #{category.inspect}" unless language @@ -26,21 +34,41 @@ module Linguist nil end + # Public: Build Classifier from all samples. + # + # Returns trained Classifier. def self.classifier classifier = Classifier.new each { |sample| classifier.train(sample.language, sample.data) } classifier.gc end + # Internal: Initialize Sample. + # + # Samples should be initialized by Sample.each. + # + # path - String full path to file. + # language - Language of sample. def initialize(path, language) @path = path @language = language end + # Public: Get full path to file. + # + # Returns String. + attr_reader :path + + # Public: Get sample language. + # + # Returns Language. + attr_reader :language + + # Public: Read file contents. + # + # Returns String. def data File.read(path) end - - attr_reader :path, :language end end