Files
linguist/script/download-grammars
Adam Roben f2ab426d38 Move all grammars that use Git repos to submodules
This makes it so we don't have to redownload all the grammars every time
we build the grammars gem. It will also let us verify that grammars.yml
is accurate in the future by checking it against the submodules on disk.

script/bootstrap now updates the submodules.
2014-12-19 15:39:02 -05:00

264 lines
5.5 KiB
Ruby
Executable File

#!/usr/bin/env ruby
require 'json'
require 'net/http'
require 'plist'
require 'set'
require 'tmpdir'
require 'uri'
require 'yaml'
ROOT = File.expand_path("../..", __FILE__)
GRAMMARS_PATH = File.join(ROOT, "grammars")
SOURCES_FILE = File.join(ROOT, "grammars.yml")
CSONC = File.join(ROOT, "node_modules", ".bin", "csonc")
class SingleFile
def initialize(path)
@path = path
end
def url
@path
end
def fetch(tmp_dir)
[@path]
end
end
class DirectoryPackage
def self.fetch(dir)
Dir["#{dir}/**/*"].select do |path|
case File.extname(path.downcase)
when '.plist'
path.split('/')[-2] == 'Syntaxes'
when '.tmlanguage'
true
when '.cson'
path.split('/')[-2] == 'grammars'
else
false
end
end
end
def initialize(directory)
@directory = directory
end
def url
@directory
end
def fetch(tmp_dir)
self.class.fetch(File.join(ROOT, @directory))
end
end
class TarballPackage
def self.fetch(tmp_dir, url)
`curl --silent --location --max-time 30 --output "#{tmp_dir}/archive" "#{url}"`
raise "Failed to fetch GH package: #{url} #{$?.to_s}" unless $?.success?
output = File.join(tmp_dir, 'extracted')
Dir.mkdir(output)
`tar -C "#{output}" -xf "#{tmp_dir}/archive"`
raise "Failed to uncompress tarball: #{tmp_dir}/archive (from #{url}) #{$?.to_s}" unless $?.success?
DirectoryPackage.fetch(output)
end
attr_reader :url
def initialize(url)
@url = url
end
def fetch(tmp_dir)
self.class.fetch(tmp_dir, url)
end
end
class SingleGrammar
attr_reader :url
def initialize(url)
@url = url
end
def fetch(tmp_dir)
filename = File.join(tmp_dir, File.basename(url))
`curl --silent --location --max-time 10 --output "#{filename}" "#{url}"`
raise "Failed to fetch grammar: #{url}: #{$?.to_s}" unless $?.success?
[filename]
end
end
class SVNPackage
attr_reader :url
def initialize(url)
@url = url
end
def fetch(tmp_dir)
`svn export -q "#{url}/Syntaxes" "#{tmp_dir}/Syntaxes"`
raise "Failed to export SVN repository: #{url}: #{$?.to_s}" unless $?.success?
Dir["#{tmp_dir}/Syntaxes/*.{plist,tmLanguage,tmlanguage}"]
end
end
class GitHubPackage
def self.parse_url(url)
url, ref = url.split("@", 2)
path = URI.parse(url).path.split('/')
[path[1], path[2].chomp('.git'), ref || "master"]
end
attr_reader :user
attr_reader :repo
attr_reader :ref
def initialize(url)
@user, @repo, @ref = self.class.parse_url(url)
end
def url
suffix = "@#{ref}" unless ref == "master"
"https://github.com/#{user}/#{repo}#{suffix}"
end
def fetch(tmp_dir)
url = "https://github.com/#{user}/#{repo}/archive/#{ref}.tar.gz"
TarballPackage.fetch(tmp_dir, url)
end
end
def load_grammar(path)
case File.extname(path.downcase)
when '.plist', '.tmlanguage'
Plist::parse_xml(path)
when '.cson'
cson = `"#{CSONC}" "#{path}"`
raise "Failed to convert CSON grammar '#{path}': #{$?.to_s}" unless $?.success?
JSON.parse(cson)
else
raise "Invalid document type #{path}"
end
end
def install_grammar(tmp_dir, source, all_scopes)
is_url = source.start_with?("http:", "https:")
is_single_file = source.end_with?('.tmLanguage', '.plist')
p = if !is_url
if is_single_file
SingleFile.new(source)
else
DirectoryPackage.new(source)
end
elsif is_single_file
SingleGrammar.new(source)
elsif source.start_with?('https://github.com')
GitHubPackage.new(source)
elsif source.start_with?('http://svn.textmate.org')
SVNPackage.new(source)
elsif source.end_with?('.tar.gz')
TarballPackage.new(source)
else
nil
end
raise "Unsupported source: #{source}" unless p
installed = []
p.fetch(tmp_dir).each do |path|
grammar = load_grammar(path)
scope = grammar['scopeName']
if all_scopes.key?(scope)
$stderr.puts "WARN: Duplicated scope #{scope}\n" +
" Current package: #{p.url}\n" +
" Previous package: #{all_scopes[scope]}"
next
end
File.write(File.join(GRAMMARS_PATH, "#{scope}.json"), JSON.pretty_generate(grammar))
all_scopes[scope] = p.url
installed << scope
end
$stderr.puts("OK #{p.url} (#{installed.join(', ')})")
end
def run_thread(queue, all_scopes)
Dir.mktmpdir do |tmpdir|
loop do
source, index = begin
queue.pop(true)
rescue ThreadError
# The queue is empty.
break
end
dir = "#{tmpdir}/#{index}"
Dir.mkdir(dir)
install_grammar(dir, source, all_scopes)
end
end
end
def generate_yaml(all_scopes, base)
yaml = all_scopes.each_with_object(base) do |(key,value),out|
out[value] ||= []
out[value] << key
end
yaml = yaml.sort.to_h
yaml.each { |k, v| v.sort! }
yaml
end
def main(sources)
begin
Dir.mkdir(GRAMMARS_PATH)
rescue Errno::EEXIST
end
`npm install`
all_scopes = {}
if ARGV[0] == '--add'
Dir.mktmpdir do |tmpdir|
install_grammar(tmpdir, ARGV[1], all_scopes)
end
generate_yaml(all_scopes, sources)
else
queue = Queue.new
sources.each do |url, scopes|
queue.push([url, queue.length])
end
threads = 8.times.map do
Thread.new { run_thread(queue, all_scopes) }
end
threads.each(&:join)
generate_yaml(all_scopes, {})
end
end
sources = File.open(SOURCES_FILE) do |file|
YAML.load(file)
end
yaml = main(sources)
File.write(SOURCES_FILE, YAML.dump(yaml))
$stderr.puts("Done")