New Grammars Compiler (#3915)

* grammars: Update several grammars with compat issues

* [WIP] Add new grammar conversion tools

* Wrap in a Docker script

* Proper Dockerfile support

* Add Javadoc grammar

* Remove NPM package.json

* Remove superfluous test

This is now always checked by the grammars compiler

* Update JSyntax grammar to new submodule

* Approve Javadoc license

* grammars: Remove checked-in dependencies

* grammars: Add regex checks to the compiler

* grammars: Point Oz to its actual submodule

* grammars: Refactor compiler to group errors by repo

* grammars: Cleanups to error reporting
This commit is contained in:
Vicent Martí
2017-11-30 16:15:48 +01:00
committed by GitHub
parent 4f46155c05
commit e335d48625
37 changed files with 1445 additions and 416 deletions

View File

@@ -84,13 +84,13 @@ if repo_old
log "Deregistering: #{repo_old}"
`git submodule deinit #{repo_old}`
`git rm -rf #{repo_old}`
`script/convert-grammars`
`script/grammar-compiler -update`
end
log "Registering new submodule: #{repo_new}"
`git submodule add -f #{https} #{repo_new}`
exit 1 if $?.exitstatus > 0
`script/convert-grammars --add #{repo_new}`
`script/grammar-compiler -add #{repo_new}`
log "Confirming license"
if repo_old

View File

@@ -1,319 +0,0 @@
#!/usr/bin/env ruby
require 'bundler/setup'
require 'json'
require 'net/http'
require 'optparse'
require 'plist'
require 'set'
require 'thread'
require 'tmpdir'
require 'uri'
require 'yaml'
ROOT = File.expand_path("../..", __FILE__)
GRAMMARS_PATH = File.join(ROOT, "grammars")
SOURCES_FILE = File.join(ROOT, "grammars.yml")
CSONC = File.join(ROOT, "node_modules", ".bin", "csonc")
$options = {
:add => false,
:install => true,
:output => SOURCES_FILE,
:remote => true,
}
class SingleFile
def initialize(path)
@path = path
end
def url
@path
end
def fetch(tmp_dir)
[@path]
end
end
class DirectoryPackage
def self.fetch(dir)
Dir["#{dir}/**/*"].select do |path|
case File.extname(path.downcase)
when '.plist'
path.split('/')[-2] == 'Syntaxes'
when '.tmlanguage', '.yaml-tmlanguage'
true
when '.cson', '.json'
path.split('/')[-2] == 'grammars'
else
false
end
end
end
def initialize(directory)
@directory = directory
end
def url
@directory
end
def fetch(tmp_dir)
self.class.fetch(File.join(ROOT, @directory))
end
end
class TarballPackage
def self.fetch(tmp_dir, url)
`curl --silent --location --max-time 30 --output "#{tmp_dir}/archive" "#{url}"`
raise "Failed to fetch GH package: #{url} #{$?.to_s}" unless $?.success?
output = File.join(tmp_dir, 'extracted')
Dir.mkdir(output)
`tar -C "#{output}" -xf "#{tmp_dir}/archive"`
raise "Failed to uncompress tarball: #{tmp_dir}/archive (from #{url}) #{$?.to_s}" unless $?.success?
DirectoryPackage.fetch(output)
end
attr_reader :url
def initialize(url)
@url = url
end
def fetch(tmp_dir)
self.class.fetch(tmp_dir, url)
end
end
class SingleGrammar
attr_reader :url
def initialize(url)
@url = url
end
def fetch(tmp_dir)
filename = File.join(tmp_dir, File.basename(url))
`curl --silent --location --max-time 10 --output "#{filename}" "#{url}"`
raise "Failed to fetch grammar: #{url}: #{$?.to_s}" unless $?.success?
[filename]
end
end
class SVNPackage
attr_reader :url
def initialize(url)
@url = url
end
def fetch(tmp_dir)
`svn export -q "#{url}/Syntaxes" "#{tmp_dir}/Syntaxes"`
raise "Failed to export SVN repository: #{url}: #{$?.to_s}" unless $?.success?
Dir["#{tmp_dir}/Syntaxes/*.{plist,tmLanguage,tmlanguage,YAML-tmLanguage}"]
end
end
class GitHubPackage
def self.parse_url(url)
url, ref = url.split("@", 2)
path = URI.parse(url).path.split('/')
[path[1], path[2].chomp('.git'), ref || "master"]
end
attr_reader :user
attr_reader :repo
attr_reader :ref
def initialize(url)
@user, @repo, @ref = self.class.parse_url(url)
end
def url
suffix = "@#{ref}" unless ref == "master"
"https://github.com/#{user}/#{repo}#{suffix}"
end
def fetch(tmp_dir)
url = "https://github.com/#{user}/#{repo}/archive/#{ref}.tar.gz"
TarballPackage.fetch(tmp_dir, url)
end
end
def load_grammar(path)
case File.extname(path.downcase)
when '.plist', '.tmlanguage'
Plist::parse_xml(path)
when '.yaml-tmlanguage'
content = File.read(path)
# Attempt to parse YAML file even if it has a YAML 1.2 header
if content.lines[0] =~ /^%YAML[ :]1\.2/
content = content.lines[1..-1].join
end
begin
YAML.load(content)
rescue Psych::SyntaxError => e
$stderr.puts "Failed to parse YAML grammar '#{path}'"
end
when '.cson'
cson = `"#{CSONC}" "#{path}"`
raise "Failed to convert CSON grammar '#{path}': #{$?.to_s}" unless $?.success?
JSON.parse(cson)
when '.json'
JSON.parse(File.read(path))
else
raise "Invalid document type #{path}"
end
end
def load_grammars(tmp_dir, source, all_scopes)
is_url = source.start_with?("http:", "https:")
return [] if is_url && !$options[:remote]
return [] if !is_url && !File.exist?(source)
p = if !is_url
if File.directory?(source)
DirectoryPackage.new(source)
else
SingleFile.new(source)
end
elsif source.end_with?('.tmLanguage', '.plist', '.YAML-tmLanguage')
SingleGrammar.new(source)
elsif source.start_with?('https://github.com')
GitHubPackage.new(source)
elsif source.start_with?('http://svn.textmate.org')
SVNPackage.new(source)
elsif source.end_with?('.tar.gz')
TarballPackage.new(source)
else
nil
end
raise "Unsupported source: #{source}" unless p
p.fetch(tmp_dir).map do |path|
grammar = load_grammar(path)
scope = grammar['scopeName'] || grammar['scope']
if all_scopes.key?(scope)
unless all_scopes[scope] == p.url
$stderr.puts "WARN: Duplicated scope #{scope}\n" +
" Current package: #{p.url}\n" +
" Previous package: #{all_scopes[scope]}"
end
next
end
all_scopes[scope] = p.url
grammar
end.compact
end
def install_grammars(grammars, path)
installed = []
grammars.each do |grammar|
scope = grammar['scopeName'] || grammar['scope']
File.write(File.join(GRAMMARS_PATH, "#{scope}.json"), JSON.pretty_generate(grammar))
installed << scope
end
$stderr.puts("OK #{path} (#{installed.join(', ')})")
end
def run_thread(queue, all_scopes)
Dir.mktmpdir do |tmpdir|
loop do
source, index = begin
queue.pop(true)
rescue ThreadError
# The queue is empty.
break
end
dir = "#{tmpdir}/#{index}"
Dir.mkdir(dir)
grammars = load_grammars(dir, source, all_scopes)
install_grammars(grammars, source) if $options[:install]
end
end
end
def generate_yaml(all_scopes, base)
yaml = all_scopes.each_with_object(base) do |(key,value),out|
out[value] ||= []
out[value] << key
end
yaml = Hash[yaml.sort]
yaml.each { |k, v| v.sort! }
yaml
end
def main(sources)
begin
Dir.mkdir(GRAMMARS_PATH)
rescue Errno::EEXIST
end
`npm install`
all_scopes = {}
if source = $options[:add]
Dir.mktmpdir do |tmpdir|
grammars = load_grammars(tmpdir, source, all_scopes)
install_grammars(grammars, source) if $options[:install]
end
generate_yaml(all_scopes, sources)
else
queue = Queue.new
sources.each do |url, scopes|
queue.push([url, queue.length])
end
threads = 8.times.map do
Thread.new { run_thread(queue, all_scopes) }
end
threads.each(&:join)
generate_yaml(all_scopes, {})
end
end
OptionParser.new do |opts|
opts.banner = "Usage: #{$0} [options]"
opts.on("--add GRAMMAR", "Add a new grammar. GRAMMAR may be a file path or URL.") do |a|
$options[:add] = a
end
opts.on("--[no-]install", "Install grammars into grammars/ directory.") do |i|
$options[:install] = i
end
opts.on("--output FILE", "Write output to FILE. Use - for stdout.") do |o|
$options[:output] = o == "-" ? $stdout : o
end
opts.on("--[no-]remote", "Download remote grammars.") do |r|
$options[:remote] = r
end
end.parse!
sources = File.open(SOURCES_FILE) do |file|
YAML.load(file)
end
yaml = main(sources)
if $options[:output].is_a?(IO)
$options[:output].write(YAML.dump(yaml))
else
File.write($options[:output], YAML.dump(yaml))
end

12
script/grammar-compiler Executable file
View File

@@ -0,0 +1,12 @@
#!/bin/sh
set -e
cd "$(dirname "$0")/.."
image="linguist/grammar-compiler:latest"
mkdir -p grammars
exec docker run --rm \
-u $(id -u $USER):$(id -g $USER) \
-v $PWD:/src/linguist \
-w /src/linguist -ti $image "$@"

View File

@@ -1,60 +0,0 @@
#!/usr/bin/env ruby
require "bundler/setup"
require "json"
require "linguist"
require "set"
require "yaml"
ROOT = File.expand_path("../../", __FILE__)
def find_includes(json)
case json
when Hash
result = []
if inc = json["include"]
result << inc.split("#", 2).first unless inc.start_with?("#", "$")
end
result + json.values.flat_map { |v| find_includes(v) }
when Array
json.flat_map { |v| find_includes(v) }
else
[]
end
end
def transitive_includes(scope, includes)
scopes = Set.new
queue = includes[scope] || []
while s = queue.shift
next if scopes.include?(s)
scopes << s
queue += includes[s] || []
end
scopes
end
includes = {}
Dir[File.join(ROOT, "grammars/*.json")].each do |path|
scope = File.basename(path).sub(/\.json/, '')
json = JSON.load(File.read(path))
incs = find_includes(json)
next if incs.empty?
includes[scope] ||= []
includes[scope] += incs
end
yaml = YAML.load(File.read(File.join(ROOT, "grammars.yml")))
language_scopes = Linguist::Language.all.map(&:tm_scope).to_set
# The set of used scopes is the scopes for each language, plus all the scopes
# they include, transitively.
used_scopes = language_scopes + language_scopes.flat_map { |s| transitive_includes(s, includes).to_a }.to_set
unused = yaml.reject { |repo, scopes| scopes.any? { |scope| used_scopes.include?(scope) } }
puts "Unused grammar repos"
puts unused.map { |repo, scopes| sprintf("%-100s %s", repo, scopes.join(", ")) }.sort.join("\n")
yaml.delete_if { |k| unused.key?(k) }
File.write(File.join(ROOT, "grammars.yml"), YAML.dump(yaml))