Files
linguist/script/download-grammars
Adam Roben 046fb18980 Add github-linguist-grammars gem
The purpose of this gem is to package up the language grammars that are
used for syntax highlighting on github.com. The grammars are TextMate,
Sublime Text, or Atom language grammars, converted to JSON and given the
filename SCOPE.json, where SCOPE is the language scope that the grammar
defines.

The github-linguist-grammars gem packages up all the grammars, and also
exports a Linguist::Grammars.path method to locate the directory
containing the grammars.

To build the gem, simply run `rake build_grammars_gem`. The grammars.yml
file lists all the repositories we download grammars from, as well as
which scopes are defined by each repository. The
script/download-grammars script takes that list and downloads and
processes the grammars into the format expected by the gem.
2014-11-13 11:03:53 -05:00

222 lines
4.9 KiB
Ruby
Executable File

#!/usr/bin/env ruby
require 'json'
require 'net/http'
require 'plist'
require 'set'
require 'tmpdir'
require 'uri'
require 'yaml'
GRAMMARS_PATH = File.expand_path("../../grammars", __FILE__)
SOURCES_FILE = File.expand_path("../../grammars.yml", __FILE__)
CSONC = File.expand_path("../../node_modules/.bin/csonc", __FILE__)
class TarballPackage
def self.fetch(tmp_dir, url)
`curl --silent --location --max-time 10 --output "#{tmp_dir}/archive" "#{url}"`
raise "Failed to fetch GH package: #{url} #{$?.to_s}" unless $?.success?
output = File.join(tmp_dir, 'extracted')
Dir.mkdir(output)
`tar -C "#{output}" -xf "#{tmp_dir}/archive"`
raise "Failed to uncompress tarball: #{tmp_dir}/archive (from #{url}) #{$?.to_s}" unless $?.success?
Dir["#{output}/**/*"].select do |path|
case File.extname(path.downcase)
when '.plist'
path.split('/')[-2] == 'Syntaxes'
when '.tmlanguage'
true
when '.cson'
path.split('/')[-2] == 'grammars'
else
false
end
end
end
attr_reader :url
def initialize(url)
@url = url
end
def fetch(tmp_dir)
self.class.fetch(tmp_dir, url)
end
end
class SingleGrammar
attr_reader :url
def initialize(url)
@url = url
end
def fetch(tmp_dir)
filename = File.join(tmp_dir, File.basename(url))
`curl --silent --location --max-time 10 --output "#{filename}" "#{url}"`
raise "Failed to fetch grammar: #{url}: #{$?.to_s}" unless $?.success?
[filename]
end
end
class SVNPackage
attr_reader :url
def initialize(url)
@url = url
end
def fetch(tmp_dir)
`svn export -q "#{url}/Syntaxes" "#{tmp_dir}/Syntaxes"`
raise "Failed to export SVN repository: #{url}: #{$?.to_s}" unless $?.success?
Dir["#{tmp_dir}/Syntaxes/*.{plist,tmLanguage,tmlanguage}"]
end
end
class GitHubPackage
def self.parse_url(url)
url, ref = url.split("@", 2)
path = URI.parse(url).path.split('/')
[path[1], path[2].chomp('.git'), ref || "master"]
end
attr_reader :user
attr_reader :repo
attr_reader :ref
def initialize(url)
@user, @repo, @ref = self.class.parse_url(url)
end
def url
suffix = "@#{ref}" unless ref == "master"
"https://github.com/#{user}/#{repo}#{suffix}"
end
def fetch(tmp_dir)
url = "https://github.com/#{user}/#{repo}/archive/#{ref}.tar.gz"
TarballPackage.fetch(tmp_dir, url)
end
end
def load_grammar(path)
case File.extname(path.downcase)
when '.plist', '.tmlanguage'
Plist::parse_xml(path)
when '.cson'
cson = `"#{CSONC}" "#{path}"`
raise "Failed to convert CSON grammar '#{path}': #{$?.to_s}" unless $?.success?
JSON.parse(cson)
else
raise "Invalid document type #{path}"
end
end
def install_grammar(tmp_dir, source, all_scopes)
p = if source.end_with?('.tmLanguage', '.plist')
SingleGrammar.new(source)
elsif source.start_with?('https://github.com')
GitHubPackage.new(source)
elsif source.start_with?('http://svn.textmate.org')
SVNPackage.new(source)
elsif source.end_with?('.tar.gz')
TarballPackage.new(source)
else
nil
end
raise "Unsupported source: #{source}" unless p
installed = []
p.fetch(tmp_dir).each do |path|
grammar = load_grammar(path)
scope = grammar['scopeName']
if all_scopes.key?(scope)
$stderr.puts "WARN: Duplicated scope #{scope}\n" +
" Current package: #{p.url}\n" +
" Previous package: #{all_scopes[scope]}"
next
end
File.write(File.join(GRAMMARS_PATH, "#{scope}.json"), JSON.pretty_generate(grammar))
all_scopes[scope] = p.url
installed << scope
end
$stderr.puts("OK #{p.url} (#{installed.join(', ')})")
end
def run_thread(queue, all_scopes)
Dir.mktmpdir do |tmpdir|
loop do
source, index = begin
queue.pop(true)
rescue ThreadError
# The queue is empty.
break
end
dir = "#{tmpdir}/#{index}"
Dir.mkdir(dir)
install_grammar(dir, source, all_scopes)
end
end
end
def generate_yaml(all_scopes, base)
yaml = all_scopes.each_with_object(base) do |(key,value),out|
out[value] ||= []
out[value] << key
end
yaml = yaml.sort.to_h
yaml.each { |k, v| v.sort! }
yaml
end
def main(sources)
begin
Dir.mkdir(GRAMMARS_PATH)
rescue Errno::EEXIST
end
`npm install`
all_scopes = {}
if ARGV[0] == '--add'
Dir.mktmpdir do |tmpdir|
install_grammar(tmpdir, ARGV[1], all_scopes)
end
generate_yaml(all_scopes, sources)
else
queue = Queue.new
sources.each do |url, scopes|
queue.push([url, queue.length])
end
threads = 8.times.map do
Thread.new { run_thread(queue, all_scopes) }
end
threads.each(&:join)
generate_yaml(all_scopes, {})
end
end
sources = File.open(SOURCES_FILE) do |file|
YAML.load(file)
end
yaml = main(sources)
File.write(SOURCES_FILE, YAML.dump(yaml))
$stderr.puts("Done")