mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -1,3 +1,4 @@ | |||||||
| Gemfile.lock | Gemfile.lock | ||||||
| .bundle/ | .bundle/ | ||||||
| vendor/ | vendor/ | ||||||
|  | benchmark/ | ||||||
|   | |||||||
							
								
								
									
										69
									
								
								Rakefile
									
									
									
									
									
								
							
							
						
						
									
										69
									
								
								Rakefile
									
									
									
									
									
								
							| @@ -2,6 +2,7 @@ require 'json' | |||||||
| require 'rake/clean' | require 'rake/clean' | ||||||
| require 'rake/testtask' | require 'rake/testtask' | ||||||
| require 'yaml' | require 'yaml' | ||||||
|  | require 'pry' | ||||||
|  |  | ||||||
| task :default => :test | task :default => :test | ||||||
|  |  | ||||||
| @@ -22,6 +23,67 @@ task :build_gem do | |||||||
|   File.delete("lib/linguist/languages.json") |   File.delete("lib/linguist/languages.json") | ||||||
| end | end | ||||||
|  |  | ||||||
|  | namespace :benchmark do | ||||||
|  |   benchmark_path = "benchmark/results" | ||||||
|  |  | ||||||
|  |   # $ bundle exec rake benchmark:generate CORPUS=path/to/samples | ||||||
|  |   desc "Generate results for" | ||||||
|  |   task :generate do | ||||||
|  |     ref = `git rev-parse HEAD`.strip[0,8] | ||||||
|  |  | ||||||
|  |     corpus = File.expand_path(ENV["CORPUS"] || "samples") | ||||||
|  |  | ||||||
|  |     require 'linguist/language' | ||||||
|  |  | ||||||
|  |     results = Hash.new | ||||||
|  |     Dir.glob("#{corpus}/**/*").each do |file| | ||||||
|  |       next unless File.file?(file) | ||||||
|  |       filename = file.gsub("#{corpus}/", "") | ||||||
|  |       results[filename] = Linguist::FileBlob.new(file).language | ||||||
|  |     end | ||||||
|  |  | ||||||
|  |     # Ensure results directory exists | ||||||
|  |     FileUtils.mkdir_p("benchmark/results") | ||||||
|  |  | ||||||
|  |     # Write results | ||||||
|  |     if `git status`.include?('working directory clean') | ||||||
|  |       result_filename = "benchmark/results/#{File.basename(corpus)}-#{ref}.json" | ||||||
|  |     else | ||||||
|  |       result_filename = "benchmark/results/#{File.basename(corpus)}-#{ref}-unstaged.json" | ||||||
|  |     end | ||||||
|  |  | ||||||
|  |     File.write(result_filename, results.to_json) | ||||||
|  |     puts "wrote #{result_filename}" | ||||||
|  |   end | ||||||
|  |  | ||||||
|  |   # $ bundle exec rake benchmark:compare REFERENCE=path/to/reference.json CANDIDATE=path/to/candidate.json | ||||||
|  |   desc "Compare results" | ||||||
|  |   task :compare do | ||||||
|  |     reference_file = ENV["REFERENCE"] | ||||||
|  |     candidate_file = ENV["CANDIDATE"] | ||||||
|  |  | ||||||
|  |     reference = JSON.parse(File.read(reference_file)) | ||||||
|  |     reference_counts = Hash.new(0) | ||||||
|  |     reference.each { |filename, language| reference_counts[language] += 1 } | ||||||
|  |  | ||||||
|  |     candidate = JSON.parse(File.read(candidate_file)) | ||||||
|  |     candidate_counts = Hash.new(0) | ||||||
|  |     candidate.each { |filename, language| candidate_counts[language] += 1 } | ||||||
|  |  | ||||||
|  |     changes = diff(reference_counts, candidate_counts) | ||||||
|  |  | ||||||
|  |     if changes.any? | ||||||
|  |       changes.each do |language, (before, after)| | ||||||
|  |         before_percent = 100 * before / reference.size.to_f | ||||||
|  |         after_percent = 100 * after / candidate.size.to_f | ||||||
|  |         puts "%s changed from %.1f%% to %.1f%%" % [language || 'unknown', before_percent, after_percent] | ||||||
|  |       end | ||||||
|  |     else | ||||||
|  |       puts "No changes" | ||||||
|  |     end | ||||||
|  |   end | ||||||
|  | end | ||||||
|  |  | ||||||
| namespace :classifier do | namespace :classifier do | ||||||
|   LIMIT = 1_000 |   LIMIT = 1_000 | ||||||
|  |  | ||||||
| @@ -71,3 +133,10 @@ namespace :classifier do | |||||||
|     end |     end | ||||||
|   end |   end | ||||||
| end | end | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def diff(a, b) | ||||||
|  |   (a.keys | b.keys).each_with_object({}) do |key, diff| | ||||||
|  |     diff[key] = [a[key], b[key]] unless a[key] == b[key] | ||||||
|  |   end | ||||||
|  | end | ||||||
|   | |||||||
| @@ -21,6 +21,7 @@ Gem::Specification.new do |s| | |||||||
|  |  | ||||||
|   s.add_development_dependency 'json' |   s.add_development_dependency 'json' | ||||||
|   s.add_development_dependency 'mocha' |   s.add_development_dependency 'mocha' | ||||||
|  |   s.add_development_dependency 'pry' | ||||||
|   s.add_development_dependency 'rake' |   s.add_development_dependency 'rake' | ||||||
|   s.add_development_dependency 'yajl-ruby' |   s.add_development_dependency 'yajl-ruby' | ||||||
| end | end | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user