mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
The purpose of this gem is to package up the language grammars that are used for syntax highlighting on github.com. The grammars are TextMate, Sublime Text, or Atom language grammars, converted to JSON and given the filename SCOPE.json, where SCOPE is the language scope that the grammar defines. The github-linguist-grammars gem packages up all the grammars, and also exports a Linguist::Grammars.path method to locate the directory containing the grammars. To build the gem, simply run `rake build_grammars_gem`. The grammars.yml file lists all the repositories we download grammars from, as well as which scopes are defined by each repository. The script/download-grammars script takes that list and downloads and processes the grammars into the format expected by the gem.
222 lines
4.9 KiB
Ruby
Executable File
222 lines
4.9 KiB
Ruby
Executable File
#!/usr/bin/env ruby
|
|
|
|
require 'json'
|
|
require 'net/http'
|
|
require 'plist'
|
|
require 'set'
|
|
require 'tmpdir'
|
|
require 'uri'
|
|
require 'yaml'
|
|
|
|
GRAMMARS_PATH = File.expand_path("../../grammars", __FILE__)
|
|
SOURCES_FILE = File.expand_path("../../grammars.yml", __FILE__)
|
|
CSONC = File.expand_path("../../node_modules/.bin/csonc", __FILE__)
|
|
|
|
class TarballPackage
|
|
def self.fetch(tmp_dir, url)
|
|
`curl --silent --location --max-time 10 --output "#{tmp_dir}/archive" "#{url}"`
|
|
raise "Failed to fetch GH package: #{url} #{$?.to_s}" unless $?.success?
|
|
|
|
output = File.join(tmp_dir, 'extracted')
|
|
Dir.mkdir(output)
|
|
`tar -C "#{output}" -xf "#{tmp_dir}/archive"`
|
|
raise "Failed to uncompress tarball: #{tmp_dir}/archive (from #{url}) #{$?.to_s}" unless $?.success?
|
|
|
|
Dir["#{output}/**/*"].select do |path|
|
|
case File.extname(path.downcase)
|
|
when '.plist'
|
|
path.split('/')[-2] == 'Syntaxes'
|
|
when '.tmlanguage'
|
|
true
|
|
when '.cson'
|
|
path.split('/')[-2] == 'grammars'
|
|
else
|
|
false
|
|
end
|
|
end
|
|
end
|
|
|
|
attr_reader :url
|
|
|
|
def initialize(url)
|
|
@url = url
|
|
end
|
|
|
|
def fetch(tmp_dir)
|
|
self.class.fetch(tmp_dir, url)
|
|
end
|
|
end
|
|
|
|
class SingleGrammar
|
|
attr_reader :url
|
|
|
|
def initialize(url)
|
|
@url = url
|
|
end
|
|
|
|
def fetch(tmp_dir)
|
|
filename = File.join(tmp_dir, File.basename(url))
|
|
`curl --silent --location --max-time 10 --output "#{filename}" "#{url}"`
|
|
raise "Failed to fetch grammar: #{url}: #{$?.to_s}" unless $?.success?
|
|
[filename]
|
|
end
|
|
end
|
|
|
|
class SVNPackage
|
|
attr_reader :url
|
|
|
|
def initialize(url)
|
|
@url = url
|
|
end
|
|
|
|
def fetch(tmp_dir)
|
|
`svn export -q "#{url}/Syntaxes" "#{tmp_dir}/Syntaxes"`
|
|
raise "Failed to export SVN repository: #{url}: #{$?.to_s}" unless $?.success?
|
|
Dir["#{tmp_dir}/Syntaxes/*.{plist,tmLanguage,tmlanguage}"]
|
|
end
|
|
end
|
|
|
|
class GitHubPackage
|
|
def self.parse_url(url)
|
|
url, ref = url.split("@", 2)
|
|
path = URI.parse(url).path.split('/')
|
|
[path[1], path[2].chomp('.git'), ref || "master"]
|
|
end
|
|
|
|
attr_reader :user
|
|
attr_reader :repo
|
|
attr_reader :ref
|
|
|
|
def initialize(url)
|
|
@user, @repo, @ref = self.class.parse_url(url)
|
|
end
|
|
|
|
def url
|
|
suffix = "@#{ref}" unless ref == "master"
|
|
"https://github.com/#{user}/#{repo}#{suffix}"
|
|
end
|
|
|
|
def fetch(tmp_dir)
|
|
url = "https://github.com/#{user}/#{repo}/archive/#{ref}.tar.gz"
|
|
TarballPackage.fetch(tmp_dir, url)
|
|
end
|
|
end
|
|
|
|
def load_grammar(path)
|
|
case File.extname(path.downcase)
|
|
when '.plist', '.tmlanguage'
|
|
Plist::parse_xml(path)
|
|
when '.cson'
|
|
cson = `"#{CSONC}" "#{path}"`
|
|
raise "Failed to convert CSON grammar '#{path}': #{$?.to_s}" unless $?.success?
|
|
JSON.parse(cson)
|
|
else
|
|
raise "Invalid document type #{path}"
|
|
end
|
|
end
|
|
|
|
def install_grammar(tmp_dir, source, all_scopes)
|
|
p = if source.end_with?('.tmLanguage', '.plist')
|
|
SingleGrammar.new(source)
|
|
elsif source.start_with?('https://github.com')
|
|
GitHubPackage.new(source)
|
|
elsif source.start_with?('http://svn.textmate.org')
|
|
SVNPackage.new(source)
|
|
elsif source.end_with?('.tar.gz')
|
|
TarballPackage.new(source)
|
|
else
|
|
nil
|
|
end
|
|
|
|
raise "Unsupported source: #{source}" unless p
|
|
|
|
installed = []
|
|
|
|
p.fetch(tmp_dir).each do |path|
|
|
grammar = load_grammar(path)
|
|
scope = grammar['scopeName']
|
|
|
|
if all_scopes.key?(scope)
|
|
$stderr.puts "WARN: Duplicated scope #{scope}\n" +
|
|
" Current package: #{p.url}\n" +
|
|
" Previous package: #{all_scopes[scope]}"
|
|
next
|
|
end
|
|
|
|
File.write(File.join(GRAMMARS_PATH, "#{scope}.json"), JSON.pretty_generate(grammar))
|
|
all_scopes[scope] = p.url
|
|
installed << scope
|
|
end
|
|
|
|
$stderr.puts("OK #{p.url} (#{installed.join(', ')})")
|
|
end
|
|
|
|
def run_thread(queue, all_scopes)
|
|
Dir.mktmpdir do |tmpdir|
|
|
loop do
|
|
source, index = begin
|
|
queue.pop(true)
|
|
rescue ThreadError
|
|
# The queue is empty.
|
|
break
|
|
end
|
|
|
|
dir = "#{tmpdir}/#{index}"
|
|
Dir.mkdir(dir)
|
|
|
|
install_grammar(dir, source, all_scopes)
|
|
end
|
|
end
|
|
end
|
|
|
|
def generate_yaml(all_scopes, base)
|
|
yaml = all_scopes.each_with_object(base) do |(key,value),out|
|
|
out[value] ||= []
|
|
out[value] << key
|
|
end
|
|
|
|
yaml = yaml.sort.to_h
|
|
yaml.each { |k, v| v.sort! }
|
|
yaml
|
|
end
|
|
|
|
def main(sources)
|
|
begin
|
|
Dir.mkdir(GRAMMARS_PATH)
|
|
rescue Errno::EEXIST
|
|
end
|
|
|
|
`npm install`
|
|
|
|
all_scopes = {}
|
|
|
|
if ARGV[0] == '--add'
|
|
Dir.mktmpdir do |tmpdir|
|
|
install_grammar(tmpdir, ARGV[1], all_scopes)
|
|
end
|
|
generate_yaml(all_scopes, sources)
|
|
else
|
|
queue = Queue.new
|
|
|
|
sources.each do |url, scopes|
|
|
queue.push([url, queue.length])
|
|
end
|
|
|
|
threads = 8.times.map do
|
|
Thread.new { run_thread(queue, all_scopes) }
|
|
end
|
|
threads.each(&:join)
|
|
generate_yaml(all_scopes, {})
|
|
end
|
|
end
|
|
|
|
sources = File.open(SOURCES_FILE) do |file|
|
|
YAML.load(file)
|
|
end
|
|
|
|
yaml = main(sources)
|
|
|
|
File.write(SOURCES_FILE, YAML.dump(yaml))
|
|
|
|
$stderr.puts("Done")
|