From b7f58d96cbc5ff4c2954976f65adbf18930ce810 Mon Sep 17 00:00:00 2001 From: Joshua Peek Date: Mon, 23 Jul 2012 12:17:32 -0500 Subject: [PATCH] Compare md5s of dbs --- lib/linguist/classifier.rb | 26 +++++++++++++------------- test/test_classifier.rb | 6 +++++- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/lib/linguist/classifier.rb b/lib/linguist/classifier.rb index 7a348aa4..843ffa3c 100644 --- a/lib/linguist/classifier.rb +++ b/lib/linguist/classifier.rb @@ -29,19 +29,6 @@ module Linguist @languages = attrs['languages'] || {} end - # Public: Compare Classifier objects. - # - # other - Classifier object to compare to. - # - # Returns Boolean. - def eql?(other) - # Lazy fast check counts only - other.is_a?(self.class) && - @tokens_total == other.instance_variable_get(:@tokens_total) && - @languages_total == other.instance_variable_get(:@languages_total) - end - alias_method :==, :eql? - # Public: Train classifier that data is a certain language. # # language - String language of data @@ -146,6 +133,19 @@ module Linguist Math.log(@languages[language].to_f / @languages_total.to_f) end + # Public: Returns serializable hash representation. + # + # Returns Hash. + def to_hash + { + 'tokens_total' => @tokens_total, + 'languages_total' => @languages_total, + 'tokens' => @tokens, + 'language_tokens' => @language_tokens, + 'languages' => @languages + } + end + # Public: Serialize classifier to YAML. # # opts - Hash of YAML options. diff --git a/test/test_classifier.rb b/test/test_classifier.rb index 80df7e77..0571f85e 100644 --- a/test/test_classifier.rb +++ b/test/test_classifier.rb @@ -2,6 +2,7 @@ require 'linguist/classifier' require 'linguist/language' require 'linguist/sample' require 'linguist/tokenizer' +require 'linguist/md5' require 'test/unit' @@ -17,8 +18,11 @@ class TestClassifier < Test::Unit::TestCase end def test_instance_freshness + serialized = Linguist::MD5.hexdigest(Classifier.instance.to_hash) + latest = Linguist::MD5.hexdigest(Linguist::Sample.classifier.to_hash) + # Just warn, it shouldn't scare people off by breaking the build. - unless Classifier.instance.eql?(Linguist::Sample.classifier) + if serialized != latest warn "Classifier database is out of date. Run `bundle exec rake classifier`." end end