mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Move Samples::DATA constant to Samples.cache method
This commit is contained in:
2
Rakefile
2
Rakefile
@@ -99,7 +99,7 @@ namespace :classifier do
|
||||
next if file_language.nil? || file_language == 'Text'
|
||||
begin
|
||||
data = open(file_url).read
|
||||
guessed_language, score = Linguist::Classifier.classify(Linguist::Samples::DATA, data).first
|
||||
guessed_language, score = Linguist::Classifier.classify(Linguist::Samples.cache, data).first
|
||||
|
||||
total += 1
|
||||
guessed_language == file_language ? correct += 1 : incorrect += 1
|
||||
|
||||
@@ -136,7 +136,7 @@ module Linguist
|
||||
elsif (determined = Heuristics.find_by_heuristics(data, possible_language_names)) && !determined.empty?
|
||||
determined.first
|
||||
# Lastly, fall back to the probabilistic classifier.
|
||||
elsif classified = Classifier.classify(Samples::DATA, data, possible_language_names).first
|
||||
elsif classified = Classifier.classify(Samples.cache, data, possible_language_names).first
|
||||
# Return the actual Language object based of the string language name (i.e., first element of `#classify`)
|
||||
Language[classified[0]]
|
||||
end
|
||||
@@ -510,9 +510,9 @@ module Linguist
|
||||
end
|
||||
end
|
||||
|
||||
extensions = Samples::DATA['extnames']
|
||||
interpreters = Samples::DATA['interpreters']
|
||||
filenames = Samples::DATA['filenames']
|
||||
extensions = Samples.cache['extnames']
|
||||
interpreters = Samples.cache['interpreters']
|
||||
filenames = Samples.cache['filenames']
|
||||
popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__))
|
||||
|
||||
languages_yml = File.expand_path("../languages.yml", __FILE__)
|
||||
|
||||
@@ -17,9 +17,11 @@ module Linguist
|
||||
PATH = File.expand_path('../samples.json', __FILE__)
|
||||
|
||||
# Hash of serialized samples object
|
||||
if File.exist?(PATH)
|
||||
def self.cache
|
||||
@cache ||= begin
|
||||
serializer = defined?(JSON) ? JSON : YAML
|
||||
DATA = serializer.load(File.read(PATH))
|
||||
serializer.load(File.read(PATH))
|
||||
end
|
||||
end
|
||||
|
||||
# Public: Iterate over each sample.
|
||||
|
||||
@@ -44,12 +44,12 @@ class TestClassifier < Test::Unit::TestCase
|
||||
end
|
||||
|
||||
def test_instance_classify_empty
|
||||
results = Classifier.classify(Samples::DATA, "")
|
||||
results = Classifier.classify(Samples.cache, "")
|
||||
assert results.first[1] < 0.5, results.first.inspect
|
||||
end
|
||||
|
||||
def test_instance_classify_nil
|
||||
assert_equal [], Classifier.classify(Samples::DATA, nil)
|
||||
assert_equal [], Classifier.classify(Samples.cache, nil)
|
||||
end
|
||||
|
||||
def test_classify_ambiguous_languages
|
||||
@@ -58,7 +58,7 @@ class TestClassifier < Test::Unit::TestCase
|
||||
languages = Language.find_by_filename(sample[:path]).map(&:name)
|
||||
next unless languages.length > 1
|
||||
|
||||
results = Classifier.classify(Samples::DATA, File.read(sample[:path]), languages)
|
||||
results = Classifier.classify(Samples.cache, File.read(sample[:path]), languages)
|
||||
assert_equal language.name, results.first[0], "#{sample[:path]}\n#{results.inspect}"
|
||||
end
|
||||
end
|
||||
|
||||
@@ -8,7 +8,7 @@ class TestSamples < Test::Unit::TestCase
|
||||
include Linguist
|
||||
|
||||
def test_up_to_date
|
||||
assert serialized = Samples::DATA
|
||||
assert serialized = Samples.cache
|
||||
assert latest = Samples.data
|
||||
|
||||
# Just warn, it shouldn't scare people off by breaking the build.
|
||||
@@ -29,7 +29,7 @@ class TestSamples < Test::Unit::TestCase
|
||||
end
|
||||
|
||||
def test_verify
|
||||
assert data = Samples::DATA
|
||||
assert data = Samples.cache
|
||||
|
||||
assert_equal data['languages_total'], data['languages'].inject(0) { |n, (_, c)| n += c }
|
||||
assert_equal data['tokens_total'], data['language_tokens'].inject(0) { |n, (_, c)| n += c }
|
||||
@@ -38,7 +38,7 @@ class TestSamples < Test::Unit::TestCase
|
||||
|
||||
# Check that there aren't samples with extensions that aren't explicitly defined in languages.yml
|
||||
def test_parity
|
||||
extensions = Samples::DATA['extnames']
|
||||
extensions = Samples.cache['extnames']
|
||||
languages_yml = File.expand_path("../../lib/linguist/languages.yml", __FILE__)
|
||||
languages = YAML.load_file(languages_yml)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user