mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	Compare md5s of dbs
This commit is contained in:
		| @@ -29,19 +29,6 @@ module Linguist | ||||
|       @languages       = attrs['languages'] || {} | ||||
|     end | ||||
|  | ||||
|     # Public: Compare Classifier objects. | ||||
|     # | ||||
|     # other - Classifier object to compare to. | ||||
|     # | ||||
|     # Returns Boolean. | ||||
|     def eql?(other) | ||||
|       # Lazy fast check counts only | ||||
|       other.is_a?(self.class) && | ||||
|         @tokens_total == other.instance_variable_get(:@tokens_total) && | ||||
|         @languages_total == other.instance_variable_get(:@languages_total) | ||||
|     end | ||||
|     alias_method :==, :eql? | ||||
|  | ||||
|     # Public: Train classifier that data is a certain language. | ||||
|     # | ||||
|     # language - String language of data | ||||
| @@ -146,6 +133,19 @@ module Linguist | ||||
|       Math.log(@languages[language].to_f / @languages_total.to_f) | ||||
|     end | ||||
|  | ||||
|     # Public: Returns serializable hash representation. | ||||
|     # | ||||
|     # Returns Hash. | ||||
|     def to_hash | ||||
|       { | ||||
|         'tokens_total'    => @tokens_total, | ||||
|         'languages_total' => @languages_total, | ||||
|         'tokens'          => @tokens, | ||||
|         'language_tokens' => @language_tokens, | ||||
|         'languages'       => @languages | ||||
|       } | ||||
|     end | ||||
|  | ||||
|     # Public: Serialize classifier to YAML. | ||||
|     # | ||||
|     # opts - Hash of YAML options. | ||||
|   | ||||
| @@ -2,6 +2,7 @@ require 'linguist/classifier' | ||||
| require 'linguist/language' | ||||
| require 'linguist/sample' | ||||
| require 'linguist/tokenizer' | ||||
| require 'linguist/md5' | ||||
|  | ||||
| require 'test/unit' | ||||
|  | ||||
| @@ -17,8 +18,11 @@ class TestClassifier < Test::Unit::TestCase | ||||
|   end | ||||
|  | ||||
|   def test_instance_freshness | ||||
|     serialized = Linguist::MD5.hexdigest(Classifier.instance.to_hash) | ||||
|     latest     = Linguist::MD5.hexdigest(Linguist::Sample.classifier.to_hash) | ||||
|  | ||||
|     # Just warn, it shouldn't scare people off by breaking the build. | ||||
|     unless Classifier.instance.eql?(Linguist::Sample.classifier) | ||||
|     if serialized != latest | ||||
|       warn "Classifier database is out of date. Run `bundle exec rake classifier`." | ||||
|     end | ||||
|   end | ||||
|   | ||||
		Reference in New Issue
	
	Block a user