diff --git a/Rakefile b/Rakefile index d99f8216..18e395a2 100644 --- a/Rakefile +++ b/Rakefile @@ -16,7 +16,7 @@ end CLOBBER.include 'lib/linguist/samples.yml' -task :classifier => [:clobber, 'lib/linguist/samples.yml'] +task :samples => [:clobber, 'lib/linguist/samples.yml'] namespace :classifier do LIMIT = 1_000 diff --git a/test/test_classifier.rb b/test/test_classifier.rb index 82d957f6..e36bdbfc 100644 --- a/test/test_classifier.rb +++ b/test/test_classifier.rb @@ -16,13 +16,6 @@ class TestClassifier < Test::Unit::TestCase File.read(File.join(samples_path, name)) end - def test_instance_freshness - # Just warn, it shouldn't scare people off by breaking the build. - if Samples::DATA['md5'] != Samples.data['md5'] - warn "Classifier database is out of date. Run `bundle exec rake classifier`." - end - end - def test_classify db = {} Classifier.train! db, "Ruby", fixture("ruby/foo.rb") @@ -59,14 +52,6 @@ class TestClassifier < Test::Unit::TestCase assert_equal [], Classifier.classify(Samples::DATA, nil) end - def test_verify - data = Samples::DATA - - assert_equal data['languages_total'], data['languages'].inject(0) { |n, (_, c)| n += c } - assert_equal data['tokens_total'], data['language_tokens'].inject(0) { |n, (_, c)| n += c } - assert_equal data['tokens_total'], data['tokens'].inject(0) { |n, (_, ts)| n += ts.inject(0) { |m, (_, c)| m += c } } - end - def test_classify_ambiguous_languages Samples.each do |sample| language = Linguist::Language.find_by_alias(sample[:language]) diff --git a/test/test_samples.rb b/test/test_samples.rb new file mode 100644 index 00000000..e9699092 --- /dev/null +++ b/test/test_samples.rb @@ -0,0 +1,25 @@ +require 'linguist/samples' + +require 'test/unit' + +class TestSamples < Test::Unit::TestCase + include Linguist + + def test_up_to_date + assert serialized = Samples::DATA + assert latest = Samples.data + + # Just warn, it shouldn't scare people off by breaking the build. + if serialized['md5'] != latest['md5'] + warn "Samples database is out of date. Run `bundle exec rake samples`." + end + end + + def test_verify + assert data = Samples::DATA + + assert_equal data['languages_total'], data['languages'].inject(0) { |n, (_, c)| n += c } + assert_equal data['tokens_total'], data['language_tokens'].inject(0) { |n, (_, c)| n += c } + assert_equal data['tokens_total'], data['tokens'].inject(0) { |n, (_, ts)| n += ts.inject(0) { |m, (_, c)| m += c } } + end +end