mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	Test against real gist data
This commit is contained in:
		
							
								
								
									
										49
									
								
								Rakefile
									
									
									
									
									
								
							
							
						
						
									
										49
									
								
								Rakefile
									
									
									
									
									
								
							| @@ -17,3 +17,52 @@ end | ||||
| CLOBBER.include 'lib/linguist/classifier.yml' | ||||
|  | ||||
| task :classifier => ['lib/linguist/classifier.yml'] | ||||
|  | ||||
| namespace :classifier do | ||||
|   LIMIT = 1_000 | ||||
|  | ||||
|   desc "Run classifier against #{LIMIT} public gists" | ||||
|   task :test do | ||||
|     require 'linguist/classifier' | ||||
|  | ||||
|     total, correct, incorrect = 0, 0, 0 | ||||
|     $stdout.sync = true | ||||
|  | ||||
|     each_public_gist do |gist_url, file_url, file_language| | ||||
|       next if file_language.nil? || file_language == 'Text' | ||||
|       begin | ||||
|         data = open(file_url).read | ||||
|         guessed_language, score = Linguist::Classifier.instance.classify(data).first | ||||
|  | ||||
|         total += 1 | ||||
|         guessed_language.name == file_language ? correct += 1 : incorrect += 1 | ||||
|  | ||||
|         print "\r\e[0K%d:%d  %g%%" % [correct, incorrect, (correct.to_f/total.to_f)*100] | ||||
|         $stdout.flush | ||||
|       rescue URI::InvalidURIError | ||||
|       else | ||||
|         break if total >= LIMIT | ||||
|       end | ||||
|     end | ||||
|     puts "" | ||||
|   end | ||||
|  | ||||
|   def each_public_gist | ||||
|     require 'open-uri' | ||||
|     require 'json' | ||||
|  | ||||
|     url = "https://api.github.com/gists/public" | ||||
|  | ||||
|     loop do | ||||
|       resp = open(url) | ||||
|       url = resp.meta['link'][/<([^>]+)>; rel="next"/, 1] | ||||
|       gists = JSON.parse(resp.read) | ||||
|  | ||||
|       for gist in gists | ||||
|         for filename, attrs in gist['files'] | ||||
|           yield gist['url'], attrs['raw_url'], attrs['language'] | ||||
|         end | ||||
|       end | ||||
|     end | ||||
|   end | ||||
| end | ||||
|   | ||||
| @@ -12,5 +12,6 @@ Gem::Specification.new do |s| | ||||
|   s.add_dependency 'escape_utils',    '~> 0.2.3' | ||||
|   s.add_dependency 'mime-types',      '~> 1.18' | ||||
|   s.add_dependency 'pygments.rb',     '~> 0.2.11' | ||||
|   s.add_development_dependency 'json' | ||||
|   s.add_development_dependency 'rake' | ||||
| end | ||||
|   | ||||
		Reference in New Issue
	
	Block a user