mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Compare md5s of dbs
This commit is contained in:
@@ -29,19 +29,6 @@ module Linguist
|
|||||||
@languages = attrs['languages'] || {}
|
@languages = attrs['languages'] || {}
|
||||||
end
|
end
|
||||||
|
|
||||||
# Public: Compare Classifier objects.
|
|
||||||
#
|
|
||||||
# other - Classifier object to compare to.
|
|
||||||
#
|
|
||||||
# Returns Boolean.
|
|
||||||
def eql?(other)
|
|
||||||
# Lazy fast check counts only
|
|
||||||
other.is_a?(self.class) &&
|
|
||||||
@tokens_total == other.instance_variable_get(:@tokens_total) &&
|
|
||||||
@languages_total == other.instance_variable_get(:@languages_total)
|
|
||||||
end
|
|
||||||
alias_method :==, :eql?
|
|
||||||
|
|
||||||
# Public: Train classifier that data is a certain language.
|
# Public: Train classifier that data is a certain language.
|
||||||
#
|
#
|
||||||
# language - String language of data
|
# language - String language of data
|
||||||
@@ -146,6 +133,19 @@ module Linguist
|
|||||||
Math.log(@languages[language].to_f / @languages_total.to_f)
|
Math.log(@languages[language].to_f / @languages_total.to_f)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Public: Returns serializable hash representation.
|
||||||
|
#
|
||||||
|
# Returns Hash.
|
||||||
|
def to_hash
|
||||||
|
{
|
||||||
|
'tokens_total' => @tokens_total,
|
||||||
|
'languages_total' => @languages_total,
|
||||||
|
'tokens' => @tokens,
|
||||||
|
'language_tokens' => @language_tokens,
|
||||||
|
'languages' => @languages
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
# Public: Serialize classifier to YAML.
|
# Public: Serialize classifier to YAML.
|
||||||
#
|
#
|
||||||
# opts - Hash of YAML options.
|
# opts - Hash of YAML options.
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ require 'linguist/classifier'
|
|||||||
require 'linguist/language'
|
require 'linguist/language'
|
||||||
require 'linguist/sample'
|
require 'linguist/sample'
|
||||||
require 'linguist/tokenizer'
|
require 'linguist/tokenizer'
|
||||||
|
require 'linguist/md5'
|
||||||
|
|
||||||
require 'test/unit'
|
require 'test/unit'
|
||||||
|
|
||||||
@@ -17,8 +18,11 @@ class TestClassifier < Test::Unit::TestCase
|
|||||||
end
|
end
|
||||||
|
|
||||||
def test_instance_freshness
|
def test_instance_freshness
|
||||||
|
serialized = Linguist::MD5.hexdigest(Classifier.instance.to_hash)
|
||||||
|
latest = Linguist::MD5.hexdigest(Linguist::Sample.classifier.to_hash)
|
||||||
|
|
||||||
# Just warn, it shouldn't scare people off by breaking the build.
|
# Just warn, it shouldn't scare people off by breaking the build.
|
||||||
unless Classifier.instance.eql?(Linguist::Sample.classifier)
|
if serialized != latest
|
||||||
warn "Classifier database is out of date. Run `bundle exec rake classifier`."
|
warn "Classifier database is out of date. Run `bundle exec rake classifier`."
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user