mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Move outdated check to samples
This commit is contained in:
@@ -1,33 +1,44 @@
|
|||||||
require 'set'
|
require 'set'
|
||||||
require 'yaml'
|
require 'yaml'
|
||||||
|
require 'linguist/md5'
|
||||||
|
|
||||||
module Linguist
|
module Linguist
|
||||||
# Model for accessing classifier training data.
|
# Model for accessing classifier training data.
|
||||||
module Samples
|
module Samples
|
||||||
# Samples live in test/ for now, we'll eventually move them out
|
# Path to samples root directory
|
||||||
PATH = File.expand_path("../../../samples", __FILE__)
|
ROOT = File.expand_path("../../../samples", __FILE__)
|
||||||
|
|
||||||
YML = File.expand_path('../samples.yml', __FILE__)
|
# Path for serialized samples db
|
||||||
if File.exist?(YML)
|
PATH = File.expand_path('../samples.yml', __FILE__)
|
||||||
DATA = YAML.load_file(YML)
|
|
||||||
|
# Hash of serialized samples object
|
||||||
|
if File.exist?(PATH)
|
||||||
|
DATA = YAML.load_file(PATH)
|
||||||
else
|
else
|
||||||
DATA = nil
|
DATA = nil
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Check if serialized db is out of sync from db directory.
|
||||||
|
#
|
||||||
|
# Returns Boolean.
|
||||||
|
def self.outdated?
|
||||||
|
MD5.hexdigest(DATA) != MD5.hexdigest(classifier.to_hash)
|
||||||
|
end
|
||||||
|
|
||||||
# Public: Iterate over each sample.
|
# Public: Iterate over each sample.
|
||||||
#
|
#
|
||||||
# &block - Yields Sample to block
|
# &block - Yields Sample to block
|
||||||
#
|
#
|
||||||
# Returns nothing.
|
# Returns nothing.
|
||||||
def self.each(&block)
|
def self.each(&block)
|
||||||
Dir.entries(PATH).each do |category|
|
Dir.entries(ROOT).each do |category|
|
||||||
next if category == '.' || category == '..'
|
next if category == '.' || category == '..'
|
||||||
|
|
||||||
# Skip text and binary for now
|
# Skip text and binary for now
|
||||||
# Possibly reconsider this later
|
# Possibly reconsider this later
|
||||||
next if category == 'text' || category == 'binary'
|
next if category == 'text' || category == 'binary'
|
||||||
|
|
||||||
dirname = File.join(PATH, category)
|
dirname = File.join(ROOT, category)
|
||||||
Dir.entries(dirname).each do |filename|
|
Dir.entries(dirname).each do |filename|
|
||||||
next if filename == '.' || filename == '..'
|
next if filename == '.' || filename == '..'
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ require 'linguist/classifier'
|
|||||||
require 'linguist/language'
|
require 'linguist/language'
|
||||||
require 'linguist/samples'
|
require 'linguist/samples'
|
||||||
require 'linguist/tokenizer'
|
require 'linguist/tokenizer'
|
||||||
require 'linguist/md5'
|
|
||||||
|
|
||||||
require 'test/unit'
|
require 'test/unit'
|
||||||
|
|
||||||
@@ -18,11 +17,8 @@ class TestClassifier < Test::Unit::TestCase
|
|||||||
end
|
end
|
||||||
|
|
||||||
def test_instance_freshness
|
def test_instance_freshness
|
||||||
serialized = Linguist::MD5.hexdigest(Samples::DATA)
|
|
||||||
latest = Linguist::MD5.hexdigest(Linguist::Samples.classifier.to_hash)
|
|
||||||
|
|
||||||
# Just warn, it shouldn't scare people off by breaking the build.
|
# Just warn, it shouldn't scare people off by breaking the build.
|
||||||
if serialized != latest
|
if Samples.outdated?
|
||||||
warn "Classifier database is out of date. Run `bundle exec rake classifier`."
|
warn "Classifier database is out of date. Run `bundle exec rake classifier`."
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user