From b9779e805e16b7097a5883334f6ed712abb10314 Mon Sep 17 00:00:00 2001 From: Joshua Peek Date: Mon, 23 Jul 2012 13:21:30 -0500 Subject: [PATCH] Move outdated check to samples --- lib/linguist/samples.rb | 25 ++++++++++++++++++------- test/test_classifier.rb | 6 +----- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/lib/linguist/samples.rb b/lib/linguist/samples.rb index f85f9ae0..85d3193a 100644 --- a/lib/linguist/samples.rb +++ b/lib/linguist/samples.rb @@ -1,33 +1,44 @@ require 'set' require 'yaml' +require 'linguist/md5' module Linguist # Model for accessing classifier training data. module Samples - # Samples live in test/ for now, we'll eventually move them out - PATH = File.expand_path("../../../samples", __FILE__) + # Path to samples root directory + ROOT = File.expand_path("../../../samples", __FILE__) - YML = File.expand_path('../samples.yml', __FILE__) - if File.exist?(YML) - DATA = YAML.load_file(YML) + # Path for serialized samples db + PATH = File.expand_path('../samples.yml', __FILE__) + + # Hash of serialized samples object + if File.exist?(PATH) + DATA = YAML.load_file(PATH) else DATA = nil end + # Check if serialized db is out of sync from db directory. + # + # Returns Boolean. + def self.outdated? + MD5.hexdigest(DATA) != MD5.hexdigest(classifier.to_hash) + end + # Public: Iterate over each sample. # # &block - Yields Sample to block # # Returns nothing. def self.each(&block) - Dir.entries(PATH).each do |category| + Dir.entries(ROOT).each do |category| next if category == '.' || category == '..' # Skip text and binary for now # Possibly reconsider this later next if category == 'text' || category == 'binary' - dirname = File.join(PATH, category) + dirname = File.join(ROOT, category) Dir.entries(dirname).each do |filename| next if filename == '.' || filename == '..' diff --git a/test/test_classifier.rb b/test/test_classifier.rb index 90a7d239..890696fb 100644 --- a/test/test_classifier.rb +++ b/test/test_classifier.rb @@ -2,7 +2,6 @@ require 'linguist/classifier' require 'linguist/language' require 'linguist/samples' require 'linguist/tokenizer' -require 'linguist/md5' require 'test/unit' @@ -18,11 +17,8 @@ class TestClassifier < Test::Unit::TestCase end def test_instance_freshness - serialized = Linguist::MD5.hexdigest(Samples::DATA) - latest = Linguist::MD5.hexdigest(Linguist::Samples.classifier.to_hash) - # Just warn, it shouldn't scare people off by breaking the build. - if serialized != latest + if Samples.outdated? warn "Classifier database is out of date. Run `bundle exec rake classifier`." end end