mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	Move Samples::DATA constant to Samples.cache method
				
					
				
			This commit is contained in:
		
							
								
								
									
										2
									
								
								Rakefile
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								Rakefile
									
									
									
									
									
								
							| @@ -99,7 +99,7 @@ namespace :classifier do | |||||||
|       next if file_language.nil? || file_language == 'Text' |       next if file_language.nil? || file_language == 'Text' | ||||||
|       begin |       begin | ||||||
|         data = open(file_url).read |         data = open(file_url).read | ||||||
|         guessed_language, score = Linguist::Classifier.classify(Linguist::Samples::DATA, data).first |         guessed_language, score = Linguist::Classifier.classify(Linguist::Samples.cache, data).first | ||||||
|  |  | ||||||
|         total += 1 |         total += 1 | ||||||
|         guessed_language == file_language ? correct += 1 : incorrect += 1 |         guessed_language == file_language ? correct += 1 : incorrect += 1 | ||||||
|   | |||||||
| @@ -136,7 +136,7 @@ module Linguist | |||||||
|         elsif (determined = Heuristics.find_by_heuristics(data, possible_language_names)) && !determined.empty? |         elsif (determined = Heuristics.find_by_heuristics(data, possible_language_names)) && !determined.empty? | ||||||
|           determined.first |           determined.first | ||||||
|         # Lastly, fall back to the probabilistic classifier. |         # Lastly, fall back to the probabilistic classifier. | ||||||
|         elsif classified = Classifier.classify(Samples::DATA, data, possible_language_names).first |         elsif classified = Classifier.classify(Samples.cache, data, possible_language_names).first | ||||||
|           # Return the actual Language object based of the string language name (i.e., first element of `#classify`) |           # Return the actual Language object based of the string language name (i.e., first element of `#classify`) | ||||||
|           Language[classified[0]] |           Language[classified[0]] | ||||||
|         end |         end | ||||||
| @@ -510,9 +510,9 @@ module Linguist | |||||||
|     end |     end | ||||||
|   end |   end | ||||||
|  |  | ||||||
|   extensions = Samples::DATA['extnames'] |   extensions = Samples.cache['extnames'] | ||||||
|   interpreters = Samples::DATA['interpreters'] |   interpreters = Samples.cache['interpreters'] | ||||||
|   filenames = Samples::DATA['filenames'] |   filenames = Samples.cache['filenames'] | ||||||
|   popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__)) |   popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__)) | ||||||
|  |  | ||||||
|   languages_yml = File.expand_path("../languages.yml", __FILE__) |   languages_yml = File.expand_path("../languages.yml", __FILE__) | ||||||
|   | |||||||
| @@ -17,9 +17,11 @@ module Linguist | |||||||
|     PATH = File.expand_path('../samples.json', __FILE__) |     PATH = File.expand_path('../samples.json', __FILE__) | ||||||
|  |  | ||||||
|     # Hash of serialized samples object |     # Hash of serialized samples object | ||||||
|     if File.exist?(PATH) |     def self.cache | ||||||
|  |       @cache ||= begin | ||||||
|         serializer = defined?(JSON) ? JSON : YAML |         serializer = defined?(JSON) ? JSON : YAML | ||||||
|       DATA = serializer.load(File.read(PATH)) |         serializer.load(File.read(PATH)) | ||||||
|  |       end | ||||||
|     end |     end | ||||||
|  |  | ||||||
|     # Public: Iterate over each sample. |     # Public: Iterate over each sample. | ||||||
|   | |||||||
| @@ -44,12 +44,12 @@ class TestClassifier < Test::Unit::TestCase | |||||||
|   end |   end | ||||||
|  |  | ||||||
|   def test_instance_classify_empty |   def test_instance_classify_empty | ||||||
|     results = Classifier.classify(Samples::DATA, "") |     results = Classifier.classify(Samples.cache, "") | ||||||
|     assert results.first[1] < 0.5, results.first.inspect |     assert results.first[1] < 0.5, results.first.inspect | ||||||
|   end |   end | ||||||
|  |  | ||||||
|   def test_instance_classify_nil |   def test_instance_classify_nil | ||||||
|     assert_equal [], Classifier.classify(Samples::DATA, nil) |     assert_equal [], Classifier.classify(Samples.cache, nil) | ||||||
|   end |   end | ||||||
|  |  | ||||||
|   def test_classify_ambiguous_languages |   def test_classify_ambiguous_languages | ||||||
| @@ -58,7 +58,7 @@ class TestClassifier < Test::Unit::TestCase | |||||||
|       languages = Language.find_by_filename(sample[:path]).map(&:name) |       languages = Language.find_by_filename(sample[:path]).map(&:name) | ||||||
|       next unless languages.length > 1 |       next unless languages.length > 1 | ||||||
|  |  | ||||||
|       results = Classifier.classify(Samples::DATA, File.read(sample[:path]), languages) |       results = Classifier.classify(Samples.cache, File.read(sample[:path]), languages) | ||||||
|       assert_equal language.name, results.first[0], "#{sample[:path]}\n#{results.inspect}" |       assert_equal language.name, results.first[0], "#{sample[:path]}\n#{results.inspect}" | ||||||
|     end |     end | ||||||
|   end |   end | ||||||
|   | |||||||
| @@ -8,7 +8,7 @@ class TestSamples < Test::Unit::TestCase | |||||||
|   include Linguist |   include Linguist | ||||||
|  |  | ||||||
|   def test_up_to_date |   def test_up_to_date | ||||||
|     assert serialized = Samples::DATA |     assert serialized = Samples.cache | ||||||
|     assert latest = Samples.data |     assert latest = Samples.data | ||||||
|  |  | ||||||
|     # Just warn, it shouldn't scare people off by breaking the build. |     # Just warn, it shouldn't scare people off by breaking the build. | ||||||
| @@ -29,7 +29,7 @@ class TestSamples < Test::Unit::TestCase | |||||||
|   end |   end | ||||||
|  |  | ||||||
|   def test_verify |   def test_verify | ||||||
|     assert data = Samples::DATA |     assert data = Samples.cache | ||||||
|  |  | ||||||
|     assert_equal data['languages_total'], data['languages'].inject(0) { |n, (_, c)| n += c } |     assert_equal data['languages_total'], data['languages'].inject(0) { |n, (_, c)| n += c } | ||||||
|     assert_equal data['tokens_total'], data['language_tokens'].inject(0) { |n, (_, c)| n += c } |     assert_equal data['tokens_total'], data['language_tokens'].inject(0) { |n, (_, c)| n += c } | ||||||
| @@ -38,7 +38,7 @@ class TestSamples < Test::Unit::TestCase | |||||||
|  |  | ||||||
|   # Check that there aren't samples with extensions that aren't explicitly defined in languages.yml |   # Check that there aren't samples with extensions that aren't explicitly defined in languages.yml | ||||||
|   def test_parity |   def test_parity | ||||||
|     extensions = Samples::DATA['extnames'] |     extensions = Samples.cache['extnames'] | ||||||
|     languages_yml = File.expand_path("../../lib/linguist/languages.yml", __FILE__) |     languages_yml = File.expand_path("../../lib/linguist/languages.yml", __FILE__) | ||||||
|     languages = YAML.load_file(languages_yml) |     languages = YAML.load_file(languages_yml) | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user