Merge branch 'master' into 1233-local

Conflicts:
	lib/linguist/language.rb
	lib/linguist/languages.yml
	lib/linguist/samples.json
This commit is contained in:
Arfon Smith
2014-11-01 09:59:29 -05:00
parent 12b78c5357
commit 9e50e188a8
77 changed files with 3066 additions and 74080 deletions

View File

@@ -1,24 +1,32 @@
require 'json'
require 'bundler/setup'
require 'rake/clean'
require 'rake/testtask'
require 'yaml'
require 'pry'
require 'yajl'
task :default => :test
Rake::TestTask.new
task :samples do
require 'linguist/samples'
require 'yajl'
data = Linguist::Samples.data
json = Yajl::Encoder.encode(data, :pretty => true)
File.open('lib/linguist/samples.json', 'w') { |io| io.write json }
# Extend test task to check for samples
task :test => :check_samples
desc "Check that we have samples.json generated"
task :check_samples do
unless File.exist?('lib/linguist/samples.json')
Rake::Task[:samples].invoke
end
end
task :build_gem do
task :samples do
require 'linguist/samples'
json = Yajl.dump(Linguist::Samples.data, :pretty => true)
File.write 'lib/linguist/samples.json', json
end
task :build_gem => :samples do
languages = YAML.load_file("lib/linguist/languages.yml")
File.write("lib/linguist/languages.json", JSON.dump(languages))
File.write("lib/linguist/languages.json", Yajl.dump(languages))
`gem build github-linguist.gemspec`
File.delete("lib/linguist/languages.json")
end
@@ -62,11 +70,11 @@ namespace :benchmark do
reference_file = ENV["REFERENCE"]
candidate_file = ENV["CANDIDATE"]
reference = JSON.parse(File.read(reference_file))
reference = Yajl.load(File.read(reference_file))
reference_counts = Hash.new(0)
reference.each { |filename, language| reference_counts[language] += 1 }
candidate = JSON.parse(File.read(candidate_file))
candidate = Yajl.load(File.read(candidate_file))
candidate_counts = Hash.new(0)
candidate.each { |filename, language| candidate_counts[language] += 1 }
@@ -99,7 +107,7 @@ namespace :classifier do
next if file_language.nil? || file_language == 'Text'
begin
data = open(file_url).read
guessed_language, score = Linguist::Classifier.classify(Linguist::Samples::DATA, data).first
guessed_language, score = Linguist::Classifier.classify(Linguist::Samples.cache, data).first
total += 1
guessed_language == file_language ? correct += 1 : incorrect += 1
@@ -116,14 +124,12 @@ namespace :classifier do
def each_public_gist
require 'open-uri'
require 'json'
url = "https://api.github.com/gists/public"
loop do
resp = open(url)
url = resp.meta['link'][/<([^>]+)>; rel="next"/, 1]
gists = JSON.parse(resp.read)
gists = Yajl.load(resp.read)
for gist in gists
for filename, attrs in gist['files']