Move Samples::DATA constant to Samples.cache method

This commit is contained in:
Brandon Keepers
2014-09-16 10:18:33 -04:00
parent 156985ed52
commit 015af19eaf
5 changed files with 16 additions and 14 deletions

View File

@@ -99,7 +99,7 @@ namespace :classifier do
next if file_language.nil? || file_language == 'Text'
begin
data = open(file_url).read
guessed_language, score = Linguist::Classifier.classify(Linguist::Samples::DATA, data).first
guessed_language, score = Linguist::Classifier.classify(Linguist::Samples.cache, data).first
total += 1
guessed_language == file_language ? correct += 1 : incorrect += 1

View File

@@ -136,7 +136,7 @@ module Linguist
elsif (determined = Heuristics.find_by_heuristics(data, possible_language_names)) && !determined.empty?
determined.first
# Lastly, fall back to the probabilistic classifier.
elsif classified = Classifier.classify(Samples::DATA, data, possible_language_names).first
elsif classified = Classifier.classify(Samples.cache, data, possible_language_names).first
# Return the actual Language object based of the string language name (i.e., first element of `#classify`)
Language[classified[0]]
end
@@ -510,9 +510,9 @@ module Linguist
end
end
extensions = Samples::DATA['extnames']
interpreters = Samples::DATA['interpreters']
filenames = Samples::DATA['filenames']
extensions = Samples.cache['extnames']
interpreters = Samples.cache['interpreters']
filenames = Samples.cache['filenames']
popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__))
languages_yml = File.expand_path("../languages.yml", __FILE__)

View File

@@ -17,9 +17,11 @@ module Linguist
PATH = File.expand_path('../samples.json', __FILE__)
# Hash of serialized samples object
if File.exist?(PATH)
serializer = defined?(JSON) ? JSON : YAML
DATA = serializer.load(File.read(PATH))
def self.cache
@cache ||= begin
serializer = defined?(JSON) ? JSON : YAML
serializer.load(File.read(PATH))
end
end
# Public: Iterate over each sample.

View File

@@ -44,12 +44,12 @@ class TestClassifier < Test::Unit::TestCase
end
def test_instance_classify_empty
results = Classifier.classify(Samples::DATA, "")
results = Classifier.classify(Samples.cache, "")
assert results.first[1] < 0.5, results.first.inspect
end
def test_instance_classify_nil
assert_equal [], Classifier.classify(Samples::DATA, nil)
assert_equal [], Classifier.classify(Samples.cache, nil)
end
def test_classify_ambiguous_languages
@@ -58,7 +58,7 @@ class TestClassifier < Test::Unit::TestCase
languages = Language.find_by_filename(sample[:path]).map(&:name)
next unless languages.length > 1
results = Classifier.classify(Samples::DATA, File.read(sample[:path]), languages)
results = Classifier.classify(Samples.cache, File.read(sample[:path]), languages)
assert_equal language.name, results.first[0], "#{sample[:path]}\n#{results.inspect}"
end
end

View File

@@ -8,7 +8,7 @@ class TestSamples < Test::Unit::TestCase
include Linguist
def test_up_to_date
assert serialized = Samples::DATA
assert serialized = Samples.cache
assert latest = Samples.data
# Just warn, it shouldn't scare people off by breaking the build.
@@ -29,7 +29,7 @@ class TestSamples < Test::Unit::TestCase
end
def test_verify
assert data = Samples::DATA
assert data = Samples.cache
assert_equal data['languages_total'], data['languages'].inject(0) { |n, (_, c)| n += c }
assert_equal data['tokens_total'], data['language_tokens'].inject(0) { |n, (_, c)| n += c }
@@ -38,7 +38,7 @@ class TestSamples < Test::Unit::TestCase
# Check that there aren't samples with extensions that aren't explicitly defined in languages.yml
def test_parity
extensions = Samples::DATA['extnames']
extensions = Samples.cache['extnames']
languages_yml = File.expand_path("../../lib/linguist/languages.yml", __FILE__)
languages = YAML.load_file(languages_yml)