Compare md5s of dbs

This commit is contained in:
Joshua Peek
2012-07-23 12:17:32 -05:00
parent d6fb95b06f
commit b7f58d96cb
2 changed files with 18 additions and 14 deletions

View File

@@ -29,19 +29,6 @@ module Linguist
@languages = attrs['languages'] || {}
end
# Public: Compare Classifier objects.
#
# other - Classifier object to compare to.
#
# Returns Boolean.
def eql?(other)
# Lazy fast check counts only
other.is_a?(self.class) &&
@tokens_total == other.instance_variable_get(:@tokens_total) &&
@languages_total == other.instance_variable_get(:@languages_total)
end
alias_method :==, :eql?
# Public: Train classifier that data is a certain language.
#
# language - String language of data
@@ -146,6 +133,19 @@ module Linguist
Math.log(@languages[language].to_f / @languages_total.to_f)
end
# Public: Returns serializable hash representation.
#
# Returns Hash.
def to_hash
{
'tokens_total' => @tokens_total,
'languages_total' => @languages_total,
'tokens' => @tokens,
'language_tokens' => @language_tokens,
'languages' => @languages
}
end
# Public: Serialize classifier to YAML.
#
# opts - Hash of YAML options.

View File

@@ -2,6 +2,7 @@ require 'linguist/classifier'
require 'linguist/language'
require 'linguist/sample'
require 'linguist/tokenizer'
require 'linguist/md5'
require 'test/unit'
@@ -17,8 +18,11 @@ class TestClassifier < Test::Unit::TestCase
end
def test_instance_freshness
serialized = Linguist::MD5.hexdigest(Classifier.instance.to_hash)
latest = Linguist::MD5.hexdigest(Linguist::Sample.classifier.to_hash)
# Just warn, it shouldn't scare people off by breaking the build.
unless Classifier.instance.eql?(Linguist::Sample.classifier)
if serialized != latest
warn "Classifier database is out of date. Run `bundle exec rake classifier`."
end
end