Move outdated check to samples

This commit is contained in:
Joshua Peek
2012-07-23 13:21:30 -05:00
parent 80e8ee7ce6
commit b9779e805e
2 changed files with 19 additions and 12 deletions

View File

@@ -1,33 +1,44 @@
require 'set'
require 'yaml'
require 'linguist/md5'
module Linguist
# Model for accessing classifier training data.
module Samples
# Samples live in test/ for now, we'll eventually move them out
PATH = File.expand_path("../../../samples", __FILE__)
# Path to samples root directory
ROOT = File.expand_path("../../../samples", __FILE__)
YML = File.expand_path('../samples.yml', __FILE__)
if File.exist?(YML)
DATA = YAML.load_file(YML)
# Path for serialized samples db
PATH = File.expand_path('../samples.yml', __FILE__)
# Hash of serialized samples object
if File.exist?(PATH)
DATA = YAML.load_file(PATH)
else
DATA = nil
end
# Check if serialized db is out of sync from db directory.
#
# Returns Boolean.
def self.outdated?
MD5.hexdigest(DATA) != MD5.hexdigest(classifier.to_hash)
end
# Public: Iterate over each sample.
#
# &block - Yields Sample to block
#
# Returns nothing.
def self.each(&block)
Dir.entries(PATH).each do |category|
Dir.entries(ROOT).each do |category|
next if category == '.' || category == '..'
# Skip text and binary for now
# Possibly reconsider this later
next if category == 'text' || category == 'binary'
dirname = File.join(PATH, category)
dirname = File.join(ROOT, category)
Dir.entries(dirname).each do |filename|
next if filename == '.' || filename == '..'

View File

@@ -2,7 +2,6 @@ require 'linguist/classifier'
require 'linguist/language'
require 'linguist/samples'
require 'linguist/tokenizer'
require 'linguist/md5'
require 'test/unit'
@@ -18,11 +17,8 @@ class TestClassifier < Test::Unit::TestCase
end
def test_instance_freshness
serialized = Linguist::MD5.hexdigest(Samples::DATA)
latest = Linguist::MD5.hexdigest(Linguist::Samples.classifier.to_hash)
# Just warn, it shouldn't scare people off by breaking the build.
if serialized != latest
if Samples.outdated?
warn "Classifier database is out of date. Run `bundle exec rake classifier`."
end
end