mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Test against real gist data
This commit is contained in:
49
Rakefile
49
Rakefile
@@ -17,3 +17,52 @@ end
|
||||
CLOBBER.include 'lib/linguist/classifier.yml'
|
||||
|
||||
task :classifier => ['lib/linguist/classifier.yml']
|
||||
|
||||
namespace :classifier do
|
||||
LIMIT = 1_000
|
||||
|
||||
desc "Run classifier against #{LIMIT} public gists"
|
||||
task :test do
|
||||
require 'linguist/classifier'
|
||||
|
||||
total, correct, incorrect = 0, 0, 0
|
||||
$stdout.sync = true
|
||||
|
||||
each_public_gist do |gist_url, file_url, file_language|
|
||||
next if file_language.nil? || file_language == 'Text'
|
||||
begin
|
||||
data = open(file_url).read
|
||||
guessed_language, score = Linguist::Classifier.instance.classify(data).first
|
||||
|
||||
total += 1
|
||||
guessed_language.name == file_language ? correct += 1 : incorrect += 1
|
||||
|
||||
print "\r\e[0K%d:%d %g%%" % [correct, incorrect, (correct.to_f/total.to_f)*100]
|
||||
$stdout.flush
|
||||
rescue URI::InvalidURIError
|
||||
else
|
||||
break if total >= LIMIT
|
||||
end
|
||||
end
|
||||
puts ""
|
||||
end
|
||||
|
||||
def each_public_gist
|
||||
require 'open-uri'
|
||||
require 'json'
|
||||
|
||||
url = "https://api.github.com/gists/public"
|
||||
|
||||
loop do
|
||||
resp = open(url)
|
||||
url = resp.meta['link'][/<([^>]+)>; rel="next"/, 1]
|
||||
gists = JSON.parse(resp.read)
|
||||
|
||||
for gist in gists
|
||||
for filename, attrs in gist['files']
|
||||
yield gist['url'], attrs['raw_url'], attrs['language']
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -12,5 +12,6 @@ Gem::Specification.new do |s|
|
||||
s.add_dependency 'escape_utils', '~> 0.2.3'
|
||||
s.add_dependency 'mime-types', '~> 1.18'
|
||||
s.add_dependency 'pygments.rb', '~> 0.2.11'
|
||||
s.add_development_dependency 'json'
|
||||
s.add_development_dependency 'rake'
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user