mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,3 +1,4 @@
|
|||||||
Gemfile.lock
|
Gemfile.lock
|
||||||
.bundle/
|
.bundle/
|
||||||
vendor/
|
vendor/
|
||||||
|
benchmark/
|
||||||
|
|||||||
69
Rakefile
69
Rakefile
@@ -2,6 +2,7 @@ require 'json'
|
|||||||
require 'rake/clean'
|
require 'rake/clean'
|
||||||
require 'rake/testtask'
|
require 'rake/testtask'
|
||||||
require 'yaml'
|
require 'yaml'
|
||||||
|
require 'pry'
|
||||||
|
|
||||||
task :default => :test
|
task :default => :test
|
||||||
|
|
||||||
@@ -22,6 +23,67 @@ task :build_gem do
|
|||||||
File.delete("lib/linguist/languages.json")
|
File.delete("lib/linguist/languages.json")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
namespace :benchmark do
|
||||||
|
benchmark_path = "benchmark/results"
|
||||||
|
|
||||||
|
# $ bundle exec rake benchmark:generate CORPUS=path/to/samples
|
||||||
|
desc "Generate results for"
|
||||||
|
task :generate do
|
||||||
|
ref = `git rev-parse HEAD`.strip[0,8]
|
||||||
|
|
||||||
|
corpus = File.expand_path(ENV["CORPUS"] || "samples")
|
||||||
|
|
||||||
|
require 'linguist/language'
|
||||||
|
|
||||||
|
results = Hash.new
|
||||||
|
Dir.glob("#{corpus}/**/*").each do |file|
|
||||||
|
next unless File.file?(file)
|
||||||
|
filename = file.gsub("#{corpus}/", "")
|
||||||
|
results[filename] = Linguist::FileBlob.new(file).language
|
||||||
|
end
|
||||||
|
|
||||||
|
# Ensure results directory exists
|
||||||
|
FileUtils.mkdir_p("benchmark/results")
|
||||||
|
|
||||||
|
# Write results
|
||||||
|
if `git status`.include?('working directory clean')
|
||||||
|
result_filename = "benchmark/results/#{File.basename(corpus)}-#{ref}.json"
|
||||||
|
else
|
||||||
|
result_filename = "benchmark/results/#{File.basename(corpus)}-#{ref}-unstaged.json"
|
||||||
|
end
|
||||||
|
|
||||||
|
File.write(result_filename, results.to_json)
|
||||||
|
puts "wrote #{result_filename}"
|
||||||
|
end
|
||||||
|
|
||||||
|
# $ bundle exec rake benchmark:compare REFERENCE=path/to/reference.json CANDIDATE=path/to/candidate.json
|
||||||
|
desc "Compare results"
|
||||||
|
task :compare do
|
||||||
|
reference_file = ENV["REFERENCE"]
|
||||||
|
candidate_file = ENV["CANDIDATE"]
|
||||||
|
|
||||||
|
reference = JSON.parse(File.read(reference_file))
|
||||||
|
reference_counts = Hash.new(0)
|
||||||
|
reference.each { |filename, language| reference_counts[language] += 1 }
|
||||||
|
|
||||||
|
candidate = JSON.parse(File.read(candidate_file))
|
||||||
|
candidate_counts = Hash.new(0)
|
||||||
|
candidate.each { |filename, language| candidate_counts[language] += 1 }
|
||||||
|
|
||||||
|
changes = diff(reference_counts, candidate_counts)
|
||||||
|
|
||||||
|
if changes.any?
|
||||||
|
changes.each do |language, (before, after)|
|
||||||
|
before_percent = 100 * before / reference.size.to_f
|
||||||
|
after_percent = 100 * after / candidate.size.to_f
|
||||||
|
puts "%s changed from %.1f%% to %.1f%%" % [language || 'unknown', before_percent, after_percent]
|
||||||
|
end
|
||||||
|
else
|
||||||
|
puts "No changes"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
namespace :classifier do
|
namespace :classifier do
|
||||||
LIMIT = 1_000
|
LIMIT = 1_000
|
||||||
|
|
||||||
@@ -71,3 +133,10 @@ namespace :classifier do
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
def diff(a, b)
|
||||||
|
(a.keys | b.keys).each_with_object({}) do |key, diff|
|
||||||
|
diff[key] = [a[key], b[key]] unless a[key] == b[key]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ Gem::Specification.new do |s|
|
|||||||
|
|
||||||
s.add_development_dependency 'json'
|
s.add_development_dependency 'json'
|
||||||
s.add_development_dependency 'mocha'
|
s.add_development_dependency 'mocha'
|
||||||
|
s.add_development_dependency 'pry'
|
||||||
s.add_development_dependency 'rake'
|
s.add_development_dependency 'rake'
|
||||||
s.add_development_dependency 'yajl-ruby'
|
s.add_development_dependency 'yajl-ruby'
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user