Files
linguist/Rakefile
Arfon Smith dab9777621 Branches
2014-07-23 10:26:20 -05:00

107 lines
2.5 KiB
Ruby

require 'json'
require 'rake/clean'
require 'rake/testtask'
require 'yaml'
task :default => :test
Rake::TestTask.new
task :samples do
require 'linguist/samples'
require 'yajl'
data = Linguist::Samples.data
json = Yajl::Encoder.encode(data, :pretty => true)
File.open('lib/linguist/samples.json', 'w') { |io| io.write json }
end
task :build_gem do
languages = YAML.load_file("lib/linguist/languages.yml")
File.write("lib/linguist/languages.json", JSON.dump(languages))
`gem build github-linguist.gemspec`
File.delete("lib/linguist/languages.json")
end
namespace :benchmark do
require 'git'
git = Git.open('.')
desc "Testin'"
task :run do
reference, compare = ENV['compare'].split('...')
puts "Comparing #{reference}...#{current}"
puts "Unstaged changes" and return if git.status.changed.any?
# Get the current branch
current_branch = `git rev-parse --abbrev-ref HEAD`.strip
# Create tmp branch for reference commit
git.branch("tmp_#{reference}").checkout
git.reset_hard(reference)
# RUN BENCHMARK
git.branch("tmp_#{compare}").checkout
git.reset_hard(compare)
# RUN BENCHMARK AGAIN
git.branch(current_branch).checkout
# CLEAN UP
git.branch("tmp_#{reference}").delete
git.branch("tmp_#{compare}").delete
end
end
namespace :classifier do
LIMIT = 1_000
desc "Run classifier against #{LIMIT} public gists"
task :test do
require 'linguist/classifier'
require 'linguist/samples'
total, correct, incorrect = 0, 0, 0
$stdout.sync = true
each_public_gist do |gist_url, file_url, file_language|
next if file_language.nil? || file_language == 'Text'
begin
data = open(file_url).read
guessed_language, score = Linguist::Classifier.classify(Linguist::Samples::DATA, data).first
total += 1
guessed_language == file_language ? correct += 1 : incorrect += 1
print "\r\e[0K%d:%d %g%%" % [correct, incorrect, (correct.to_f/total.to_f)*100]
$stdout.flush
rescue URI::InvalidURIError
else
break if total >= LIMIT
end
end
puts ""
end
def each_public_gist
require 'open-uri'
require 'json'
url = "https://api.github.com/gists/public"
loop do
resp = open(url)
url = resp.meta['link'][/<([^>]+)>; rel="next"/, 1]
gists = JSON.parse(resp.read)
for gist in gists
for filename, attrs in gist['files']
yield gist['url'], attrs['raw_url'], attrs['language']
end
end
end
end
end