mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			74 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
			
		
		
	
	
			74 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
| require 'json'
 | |
| require 'rake/clean'
 | |
| require 'rake/testtask'
 | |
| require 'yaml'
 | |
| 
 | |
| task :default => :test
 | |
| 
 | |
| Rake::TestTask.new
 | |
| 
 | |
| task :samples do
 | |
|   require 'linguist/samples'
 | |
|   require 'yajl'
 | |
|   data = Linguist::Samples.data
 | |
|   json = Yajl::Encoder.encode(data, :pretty => true)
 | |
|   File.open('lib/linguist/samples.json', 'w') { |io| io.write json }
 | |
| end
 | |
| 
 | |
| task :build_gem do
 | |
|   languages = YAML.load_file("lib/linguist/languages.yml")
 | |
|   File.write("lib/linguist/languages.json", JSON.dump(languages))
 | |
|   `gem build github-linguist.gemspec`
 | |
|   File.delete("lib/linguist/languages.json")
 | |
| end
 | |
| 
 | |
| namespace :classifier do
 | |
|   LIMIT = 1_000
 | |
| 
 | |
|   desc "Run classifier against #{LIMIT} public gists"
 | |
|   task :test do
 | |
|     require 'linguist/classifier'
 | |
|     require 'linguist/samples'
 | |
| 
 | |
|     total, correct, incorrect = 0, 0, 0
 | |
|     $stdout.sync = true
 | |
| 
 | |
|     each_public_gist do |gist_url, file_url, file_language|
 | |
|       next if file_language.nil? || file_language == 'Text'
 | |
|       begin
 | |
|         data = open(file_url).read
 | |
|         guessed_language, score = Linguist::Classifier.classify(Linguist::Samples::DATA, data).first
 | |
| 
 | |
|         total += 1
 | |
|         guessed_language == file_language ? correct += 1 : incorrect += 1
 | |
| 
 | |
|         print "\r\e[0K%d:%d  %g%%" % [correct, incorrect, (correct.to_f/total.to_f)*100]
 | |
|         $stdout.flush
 | |
|       rescue URI::InvalidURIError
 | |
|       else
 | |
|         break if total >= LIMIT
 | |
|       end
 | |
|     end
 | |
|     puts ""
 | |
|   end
 | |
| 
 | |
|   def each_public_gist
 | |
|     require 'open-uri'
 | |
|     require 'json'
 | |
| 
 | |
|     url = "https://api.github.com/gists/public"
 | |
| 
 | |
|     loop do
 | |
|       resp = open(url)
 | |
|       url = resp.meta['link'][/<([^>]+)>; rel="next"/, 1]
 | |
|       gists = JSON.parse(resp.read)
 | |
| 
 | |
|       for gist in gists
 | |
|         for filename, attrs in gist['files']
 | |
|           yield gist['url'], attrs['raw_url'], attrs['language']
 | |
|         end
 | |
|       end
 | |
|     end
 | |
|   end
 | |
| end
 |