From 015af19eaf1067d578bcc1b36a36c8426be1f53f Mon Sep 17 00:00:00 2001 From: Brandon Keepers Date: Tue, 16 Sep 2014 10:18:33 -0400 Subject: [PATCH] Move `Samples::DATA` constant to `Samples.cache` method --- Rakefile | 2 +- lib/linguist/language.rb | 8 ++++---- lib/linguist/samples.rb | 8 +++++--- test/test_classifier.rb | 6 +++--- test/test_samples.rb | 6 +++--- 5 files changed, 16 insertions(+), 14 deletions(-) diff --git a/Rakefile b/Rakefile index 9c3aa2ee..70f0f787 100644 --- a/Rakefile +++ b/Rakefile @@ -99,7 +99,7 @@ namespace :classifier do next if file_language.nil? || file_language == 'Text' begin data = open(file_url).read - guessed_language, score = Linguist::Classifier.classify(Linguist::Samples::DATA, data).first + guessed_language, score = Linguist::Classifier.classify(Linguist::Samples.cache, data).first total += 1 guessed_language == file_language ? correct += 1 : incorrect += 1 diff --git a/lib/linguist/language.rb b/lib/linguist/language.rb index e9e519b8..9c53eb9d 100644 --- a/lib/linguist/language.rb +++ b/lib/linguist/language.rb @@ -136,7 +136,7 @@ module Linguist elsif (determined = Heuristics.find_by_heuristics(data, possible_language_names)) && !determined.empty? determined.first # Lastly, fall back to the probabilistic classifier. - elsif classified = Classifier.classify(Samples::DATA, data, possible_language_names).first + elsif classified = Classifier.classify(Samples.cache, data, possible_language_names).first # Return the actual Language object based of the string language name (i.e., first element of `#classify`) Language[classified[0]] end @@ -510,9 +510,9 @@ module Linguist end end - extensions = Samples::DATA['extnames'] - interpreters = Samples::DATA['interpreters'] - filenames = Samples::DATA['filenames'] + extensions = Samples.cache['extnames'] + interpreters = Samples.cache['interpreters'] + filenames = Samples.cache['filenames'] popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__)) languages_yml = File.expand_path("../languages.yml", __FILE__) diff --git a/lib/linguist/samples.rb b/lib/linguist/samples.rb index 9a291b2d..920bb87e 100644 --- a/lib/linguist/samples.rb +++ b/lib/linguist/samples.rb @@ -17,9 +17,11 @@ module Linguist PATH = File.expand_path('../samples.json', __FILE__) # Hash of serialized samples object - if File.exist?(PATH) - serializer = defined?(JSON) ? JSON : YAML - DATA = serializer.load(File.read(PATH)) + def self.cache + @cache ||= begin + serializer = defined?(JSON) ? JSON : YAML + serializer.load(File.read(PATH)) + end end # Public: Iterate over each sample. diff --git a/test/test_classifier.rb b/test/test_classifier.rb index 0a477831..87c6feb2 100644 --- a/test/test_classifier.rb +++ b/test/test_classifier.rb @@ -44,12 +44,12 @@ class TestClassifier < Test::Unit::TestCase end def test_instance_classify_empty - results = Classifier.classify(Samples::DATA, "") + results = Classifier.classify(Samples.cache, "") assert results.first[1] < 0.5, results.first.inspect end def test_instance_classify_nil - assert_equal [], Classifier.classify(Samples::DATA, nil) + assert_equal [], Classifier.classify(Samples.cache, nil) end def test_classify_ambiguous_languages @@ -58,7 +58,7 @@ class TestClassifier < Test::Unit::TestCase languages = Language.find_by_filename(sample[:path]).map(&:name) next unless languages.length > 1 - results = Classifier.classify(Samples::DATA, File.read(sample[:path]), languages) + results = Classifier.classify(Samples.cache, File.read(sample[:path]), languages) assert_equal language.name, results.first[0], "#{sample[:path]}\n#{results.inspect}" end end diff --git a/test/test_samples.rb b/test/test_samples.rb index 3ee5b64d..899992dd 100644 --- a/test/test_samples.rb +++ b/test/test_samples.rb @@ -8,7 +8,7 @@ class TestSamples < Test::Unit::TestCase include Linguist def test_up_to_date - assert serialized = Samples::DATA + assert serialized = Samples.cache assert latest = Samples.data # Just warn, it shouldn't scare people off by breaking the build. @@ -29,7 +29,7 @@ class TestSamples < Test::Unit::TestCase end def test_verify - assert data = Samples::DATA + assert data = Samples.cache assert_equal data['languages_total'], data['languages'].inject(0) { |n, (_, c)| n += c } assert_equal data['tokens_total'], data['language_tokens'].inject(0) { |n, (_, c)| n += c } @@ -38,7 +38,7 @@ class TestSamples < Test::Unit::TestCase # Check that there aren't samples with extensions that aren't explicitly defined in languages.yml def test_parity - extensions = Samples::DATA['extnames'] + extensions = Samples.cache['extnames'] languages_yml = File.expand_path("../../lib/linguist/languages.yml", __FILE__) languages = YAML.load_file(languages_yml)