diff --git a/lib/linguist/samples.rb b/lib/linguist/samples.rb index b8902c5f..9d1a5c53 100644 --- a/lib/linguist/samples.rb +++ b/lib/linguist/samples.rb @@ -18,13 +18,6 @@ module Linguist DATA = nil end - # Check if serialized db is out of sync from db directory. - # - # Returns Boolean. - def self.outdated? - MD5.hexdigest(DATA) != MD5.hexdigest(data) - end - # Public: Iterate over each sample. # # &block - Yields Sample to block @@ -108,6 +101,7 @@ module Linguist data = File.read(sample[:path]) Classifier.train!(db, language.name, data) end + db['md5'] = MD5.hexdigest(db) db end @@ -120,6 +114,8 @@ module Linguist out = "" escape = lambda { |s| s.inspect.gsub(/\\#/, "\#") } + out << "md5: #{db['md5']}\n" + out << "languages_total: #{db['languages_total']}\n" out << "tokens_total: #{db['tokens_total']}\n" diff --git a/lib/linguist/samples.yml b/lib/linguist/samples.yml index 23b44474..b63f113a 100644 --- a/lib/linguist/samples.yml +++ b/lib/linguist/samples.yml @@ -1,3 +1,4 @@ +md5: b445a8a3e3414d6b628939c347e7a4f3 languages_total: 243 tokens_total: 164127 languages: diff --git a/test/test_classifier.rb b/test/test_classifier.rb index 33f3bae9..82d957f6 100644 --- a/test/test_classifier.rb +++ b/test/test_classifier.rb @@ -18,7 +18,7 @@ class TestClassifier < Test::Unit::TestCase def test_instance_freshness # Just warn, it shouldn't scare people off by breaking the build. - if Samples.outdated? + if Samples::DATA['md5'] != Samples.data['md5'] warn "Classifier database is out of date. Run `bundle exec rake classifier`." end end