mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Doc sample class
This commit is contained in:
@@ -2,17 +2,25 @@ require 'linguist/classifier'
|
|||||||
require 'linguist/language'
|
require 'linguist/language'
|
||||||
|
|
||||||
module Linguist
|
module Linguist
|
||||||
|
# Model for accessing classifier training data.
|
||||||
class Sample
|
class Sample
|
||||||
# Samples live in test/ for now, we'll eventually move them out
|
# Samples live in test/ for now, we'll eventually move them out
|
||||||
PATH = File.expand_path("../../../test/fixtures", __FILE__)
|
PATH = File.expand_path("../../../test/fixtures", __FILE__)
|
||||||
|
|
||||||
|
# Public: Iterate over each Sample.
|
||||||
|
#
|
||||||
|
# &block - Yields Sample to block
|
||||||
|
#
|
||||||
|
# Returns nothing.
|
||||||
def self.each(&block)
|
def self.each(&block)
|
||||||
Dir.entries(PATH).each do |category|
|
Dir.entries(PATH).each do |category|
|
||||||
next if category == '.' || category == '..'
|
next if category == '.' || category == '..'
|
||||||
|
|
||||||
# Skip text and binary for now
|
# Skip text and binary for now
|
||||||
|
# Possibly reconsider this later
|
||||||
next if category == 'text' || category == 'binary'
|
next if category == 'text' || category == 'binary'
|
||||||
|
|
||||||
|
# Map directory name to a Language alias
|
||||||
language = Linguist::Language.find_by_alias(category)
|
language = Linguist::Language.find_by_alias(category)
|
||||||
raise "No language for #{category.inspect}" unless language
|
raise "No language for #{category.inspect}" unless language
|
||||||
|
|
||||||
@@ -26,21 +34,41 @@ module Linguist
|
|||||||
nil
|
nil
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Public: Build Classifier from all samples.
|
||||||
|
#
|
||||||
|
# Returns trained Classifier.
|
||||||
def self.classifier
|
def self.classifier
|
||||||
classifier = Classifier.new
|
classifier = Classifier.new
|
||||||
each { |sample| classifier.train(sample.language, sample.data) }
|
each { |sample| classifier.train(sample.language, sample.data) }
|
||||||
classifier.gc
|
classifier.gc
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Internal: Initialize Sample.
|
||||||
|
#
|
||||||
|
# Samples should be initialized by Sample.each.
|
||||||
|
#
|
||||||
|
# path - String full path to file.
|
||||||
|
# language - Language of sample.
|
||||||
def initialize(path, language)
|
def initialize(path, language)
|
||||||
@path = path
|
@path = path
|
||||||
@language = language
|
@language = language
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Public: Get full path to file.
|
||||||
|
#
|
||||||
|
# Returns String.
|
||||||
|
attr_reader :path
|
||||||
|
|
||||||
|
# Public: Get sample language.
|
||||||
|
#
|
||||||
|
# Returns Language.
|
||||||
|
attr_reader :language
|
||||||
|
|
||||||
|
# Public: Read file contents.
|
||||||
|
#
|
||||||
|
# Returns String.
|
||||||
def data
|
def data
|
||||||
File.read(path)
|
File.read(path)
|
||||||
end
|
end
|
||||||
|
|
||||||
attr_reader :path, :language
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user