Move outdated check to samples

This commit is contained in:
Joshua Peek
2012-07-23 13:21:30 -05:00
parent 80e8ee7ce6
commit b9779e805e
2 changed files with 19 additions and 12 deletions

View File

@@ -1,33 +1,44 @@
require 'set' require 'set'
require 'yaml' require 'yaml'
require 'linguist/md5'
module Linguist module Linguist
# Model for accessing classifier training data. # Model for accessing classifier training data.
module Samples module Samples
# Samples live in test/ for now, we'll eventually move them out # Path to samples root directory
PATH = File.expand_path("../../../samples", __FILE__) ROOT = File.expand_path("../../../samples", __FILE__)
YML = File.expand_path('../samples.yml', __FILE__) # Path for serialized samples db
if File.exist?(YML) PATH = File.expand_path('../samples.yml', __FILE__)
DATA = YAML.load_file(YML)
# Hash of serialized samples object
if File.exist?(PATH)
DATA = YAML.load_file(PATH)
else else
DATA = nil DATA = nil
end end
# Check if serialized db is out of sync from db directory.
#
# Returns Boolean.
def self.outdated?
MD5.hexdigest(DATA) != MD5.hexdigest(classifier.to_hash)
end
# Public: Iterate over each sample. # Public: Iterate over each sample.
# #
# &block - Yields Sample to block # &block - Yields Sample to block
# #
# Returns nothing. # Returns nothing.
def self.each(&block) def self.each(&block)
Dir.entries(PATH).each do |category| Dir.entries(ROOT).each do |category|
next if category == '.' || category == '..' next if category == '.' || category == '..'
# Skip text and binary for now # Skip text and binary for now
# Possibly reconsider this later # Possibly reconsider this later
next if category == 'text' || category == 'binary' next if category == 'text' || category == 'binary'
dirname = File.join(PATH, category) dirname = File.join(ROOT, category)
Dir.entries(dirname).each do |filename| Dir.entries(dirname).each do |filename|
next if filename == '.' || filename == '..' next if filename == '.' || filename == '..'

View File

@@ -2,7 +2,6 @@ require 'linguist/classifier'
require 'linguist/language' require 'linguist/language'
require 'linguist/samples' require 'linguist/samples'
require 'linguist/tokenizer' require 'linguist/tokenizer'
require 'linguist/md5'
require 'test/unit' require 'test/unit'
@@ -18,11 +17,8 @@ class TestClassifier < Test::Unit::TestCase
end end
def test_instance_freshness def test_instance_freshness
serialized = Linguist::MD5.hexdigest(Samples::DATA)
latest = Linguist::MD5.hexdigest(Linguist::Samples.classifier.to_hash)
# Just warn, it shouldn't scare people off by breaking the build. # Just warn, it shouldn't scare people off by breaking the build.
if serialized != latest if Samples.outdated?
warn "Classifier database is out of date. Run `bundle exec rake classifier`." warn "Classifier database is out of date. Run `bundle exec rake classifier`."
end end
end end