Move Samples::DATA constant to Samples.cache method

This commit is contained in:
Brandon Keepers
2014-09-16 10:18:33 -04:00
parent 156985ed52
commit 015af19eaf
5 changed files with 16 additions and 14 deletions

View File

@@ -99,7 +99,7 @@ namespace :classifier do
next if file_language.nil? || file_language == 'Text' next if file_language.nil? || file_language == 'Text'
begin begin
data = open(file_url).read data = open(file_url).read
guessed_language, score = Linguist::Classifier.classify(Linguist::Samples::DATA, data).first guessed_language, score = Linguist::Classifier.classify(Linguist::Samples.cache, data).first
total += 1 total += 1
guessed_language == file_language ? correct += 1 : incorrect += 1 guessed_language == file_language ? correct += 1 : incorrect += 1

View File

@@ -136,7 +136,7 @@ module Linguist
elsif (determined = Heuristics.find_by_heuristics(data, possible_language_names)) && !determined.empty? elsif (determined = Heuristics.find_by_heuristics(data, possible_language_names)) && !determined.empty?
determined.first determined.first
# Lastly, fall back to the probabilistic classifier. # Lastly, fall back to the probabilistic classifier.
elsif classified = Classifier.classify(Samples::DATA, data, possible_language_names).first elsif classified = Classifier.classify(Samples.cache, data, possible_language_names).first
# Return the actual Language object based of the string language name (i.e., first element of `#classify`) # Return the actual Language object based of the string language name (i.e., first element of `#classify`)
Language[classified[0]] Language[classified[0]]
end end
@@ -510,9 +510,9 @@ module Linguist
end end
end end
extensions = Samples::DATA['extnames'] extensions = Samples.cache['extnames']
interpreters = Samples::DATA['interpreters'] interpreters = Samples.cache['interpreters']
filenames = Samples::DATA['filenames'] filenames = Samples.cache['filenames']
popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__)) popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__))
languages_yml = File.expand_path("../languages.yml", __FILE__) languages_yml = File.expand_path("../languages.yml", __FILE__)

View File

@@ -17,9 +17,11 @@ module Linguist
PATH = File.expand_path('../samples.json', __FILE__) PATH = File.expand_path('../samples.json', __FILE__)
# Hash of serialized samples object # Hash of serialized samples object
if File.exist?(PATH) def self.cache
serializer = defined?(JSON) ? JSON : YAML @cache ||= begin
DATA = serializer.load(File.read(PATH)) serializer = defined?(JSON) ? JSON : YAML
serializer.load(File.read(PATH))
end
end end
# Public: Iterate over each sample. # Public: Iterate over each sample.

View File

@@ -44,12 +44,12 @@ class TestClassifier < Test::Unit::TestCase
end end
def test_instance_classify_empty def test_instance_classify_empty
results = Classifier.classify(Samples::DATA, "") results = Classifier.classify(Samples.cache, "")
assert results.first[1] < 0.5, results.first.inspect assert results.first[1] < 0.5, results.first.inspect
end end
def test_instance_classify_nil def test_instance_classify_nil
assert_equal [], Classifier.classify(Samples::DATA, nil) assert_equal [], Classifier.classify(Samples.cache, nil)
end end
def test_classify_ambiguous_languages def test_classify_ambiguous_languages
@@ -58,7 +58,7 @@ class TestClassifier < Test::Unit::TestCase
languages = Language.find_by_filename(sample[:path]).map(&:name) languages = Language.find_by_filename(sample[:path]).map(&:name)
next unless languages.length > 1 next unless languages.length > 1
results = Classifier.classify(Samples::DATA, File.read(sample[:path]), languages) results = Classifier.classify(Samples.cache, File.read(sample[:path]), languages)
assert_equal language.name, results.first[0], "#{sample[:path]}\n#{results.inspect}" assert_equal language.name, results.first[0], "#{sample[:path]}\n#{results.inspect}"
end end
end end

View File

@@ -8,7 +8,7 @@ class TestSamples < Test::Unit::TestCase
include Linguist include Linguist
def test_up_to_date def test_up_to_date
assert serialized = Samples::DATA assert serialized = Samples.cache
assert latest = Samples.data assert latest = Samples.data
# Just warn, it shouldn't scare people off by breaking the build. # Just warn, it shouldn't scare people off by breaking the build.
@@ -29,7 +29,7 @@ class TestSamples < Test::Unit::TestCase
end end
def test_verify def test_verify
assert data = Samples::DATA assert data = Samples.cache
assert_equal data['languages_total'], data['languages'].inject(0) { |n, (_, c)| n += c } assert_equal data['languages_total'], data['languages'].inject(0) { |n, (_, c)| n += c }
assert_equal data['tokens_total'], data['language_tokens'].inject(0) { |n, (_, c)| n += c } assert_equal data['tokens_total'], data['language_tokens'].inject(0) { |n, (_, c)| n += c }
@@ -38,7 +38,7 @@ class TestSamples < Test::Unit::TestCase
# Check that there aren't samples with extensions that aren't explicitly defined in languages.yml # Check that there aren't samples with extensions that aren't explicitly defined in languages.yml
def test_parity def test_parity
extensions = Samples::DATA['extnames'] extensions = Samples.cache['extnames']
languages_yml = File.expand_path("../../lib/linguist/languages.yml", __FILE__) languages_yml = File.expand_path("../../lib/linguist/languages.yml", __FILE__)
languages = YAML.load_file(languages_yml) languages = YAML.load_file(languages_yml)