mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	Add github-linguist-grammars gem
The purpose of this gem is to package up the language grammars that are used for syntax highlighting on github.com. The grammars are TextMate, Sublime Text, or Atom language grammars, converted to JSON and given the filename SCOPE.json, where SCOPE is the language scope that the grammar defines. The github-linguist-grammars gem packages up all the grammars, and also exports a Linguist::Grammars.path method to locate the directory containing the grammars. To build the gem, simply run `rake build_grammars_gem`. The grammars.yml file lists all the repositories we download grammars from, as well as which scopes are defined by each repository. The script/download-grammars script takes that list and downloads and processes the grammars into the format expected by the gem.
This commit is contained in:
		
							
								
								
									
										221
									
								
								script/download-grammars
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										221
									
								
								script/download-grammars
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,221 @@
 | 
			
		||||
#!/usr/bin/env ruby
 | 
			
		||||
 | 
			
		||||
require 'json'
 | 
			
		||||
require 'net/http'
 | 
			
		||||
require 'plist'
 | 
			
		||||
require 'set'
 | 
			
		||||
require 'tmpdir'
 | 
			
		||||
require 'uri'
 | 
			
		||||
require 'yaml'
 | 
			
		||||
 | 
			
		||||
GRAMMARS_PATH = File.expand_path("../../grammars", __FILE__)
 | 
			
		||||
SOURCES_FILE = File.expand_path("../../grammars.yml", __FILE__)
 | 
			
		||||
CSONC = File.expand_path("../../node_modules/.bin/csonc", __FILE__)
 | 
			
		||||
 | 
			
		||||
class TarballPackage
 | 
			
		||||
  def self.fetch(tmp_dir, url)
 | 
			
		||||
    `curl --silent --location --max-time 10 --output "#{tmp_dir}/archive" "#{url}"`
 | 
			
		||||
    raise "Failed to fetch GH package: #{url} #{$?.to_s}" unless $?.success?
 | 
			
		||||
 | 
			
		||||
    output = File.join(tmp_dir, 'extracted')
 | 
			
		||||
    Dir.mkdir(output)
 | 
			
		||||
    `tar -C "#{output}" -xf "#{tmp_dir}/archive"`
 | 
			
		||||
    raise "Failed to uncompress tarball: #{tmp_dir}/archive (from #{url}) #{$?.to_s}" unless $?.success?
 | 
			
		||||
 | 
			
		||||
    Dir["#{output}/**/*"].select do |path|
 | 
			
		||||
      case File.extname(path.downcase)
 | 
			
		||||
      when '.plist'
 | 
			
		||||
        path.split('/')[-2] == 'Syntaxes'
 | 
			
		||||
      when '.tmlanguage'
 | 
			
		||||
        true
 | 
			
		||||
      when '.cson'
 | 
			
		||||
        path.split('/')[-2] == 'grammars'
 | 
			
		||||
      else
 | 
			
		||||
        false
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  attr_reader :url
 | 
			
		||||
 | 
			
		||||
  def initialize(url)
 | 
			
		||||
    @url = url
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def fetch(tmp_dir)
 | 
			
		||||
    self.class.fetch(tmp_dir, url)
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
class SingleGrammar
 | 
			
		||||
  attr_reader :url
 | 
			
		||||
 | 
			
		||||
  def initialize(url)
 | 
			
		||||
    @url = url
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def fetch(tmp_dir)
 | 
			
		||||
    filename = File.join(tmp_dir, File.basename(url))
 | 
			
		||||
    `curl --silent --location --max-time 10 --output "#{filename}" "#{url}"`
 | 
			
		||||
    raise "Failed to fetch grammar: #{url}: #{$?.to_s}" unless $?.success?
 | 
			
		||||
    [filename]
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
class SVNPackage
 | 
			
		||||
  attr_reader :url
 | 
			
		||||
 | 
			
		||||
  def initialize(url)
 | 
			
		||||
    @url = url
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def fetch(tmp_dir)
 | 
			
		||||
    `svn export -q "#{url}/Syntaxes" "#{tmp_dir}/Syntaxes"`
 | 
			
		||||
    raise "Failed to export SVN repository: #{url}: #{$?.to_s}" unless $?.success?
 | 
			
		||||
    Dir["#{tmp_dir}/Syntaxes/*.{plist,tmLanguage,tmlanguage}"]
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
class GitHubPackage
 | 
			
		||||
  def self.parse_url(url)
 | 
			
		||||
    url, ref = url.split("@", 2)
 | 
			
		||||
    path = URI.parse(url).path.split('/')
 | 
			
		||||
    [path[1], path[2].chomp('.git'), ref || "master"]
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  attr_reader :user
 | 
			
		||||
  attr_reader :repo
 | 
			
		||||
  attr_reader :ref
 | 
			
		||||
 | 
			
		||||
  def initialize(url)
 | 
			
		||||
    @user, @repo, @ref = self.class.parse_url(url)
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def url
 | 
			
		||||
    suffix = "@#{ref}" unless ref == "master"
 | 
			
		||||
    "https://github.com/#{user}/#{repo}#{suffix}"
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def fetch(tmp_dir)
 | 
			
		||||
    url = "https://github.com/#{user}/#{repo}/archive/#{ref}.tar.gz"
 | 
			
		||||
    TarballPackage.fetch(tmp_dir, url)
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
def load_grammar(path)
 | 
			
		||||
  case File.extname(path.downcase)
 | 
			
		||||
  when '.plist', '.tmlanguage'
 | 
			
		||||
    Plist::parse_xml(path)
 | 
			
		||||
  when '.cson'
 | 
			
		||||
    cson = `"#{CSONC}" "#{path}"`
 | 
			
		||||
    raise "Failed to convert CSON grammar '#{path}': #{$?.to_s}" unless $?.success?
 | 
			
		||||
    JSON.parse(cson)
 | 
			
		||||
  else
 | 
			
		||||
    raise "Invalid document type #{path}"
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
def install_grammar(tmp_dir, source, all_scopes)
 | 
			
		||||
  p = if source.end_with?('.tmLanguage', '.plist')
 | 
			
		||||
        SingleGrammar.new(source)
 | 
			
		||||
      elsif source.start_with?('https://github.com')
 | 
			
		||||
        GitHubPackage.new(source)
 | 
			
		||||
      elsif source.start_with?('http://svn.textmate.org')
 | 
			
		||||
        SVNPackage.new(source)
 | 
			
		||||
      elsif source.end_with?('.tar.gz')
 | 
			
		||||
        TarballPackage.new(source)
 | 
			
		||||
      else
 | 
			
		||||
        nil
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
  raise "Unsupported source: #{source}" unless p
 | 
			
		||||
 | 
			
		||||
  installed = []
 | 
			
		||||
 | 
			
		||||
  p.fetch(tmp_dir).each do |path|
 | 
			
		||||
    grammar = load_grammar(path)
 | 
			
		||||
    scope = grammar['scopeName']
 | 
			
		||||
 | 
			
		||||
    if all_scopes.key?(scope)
 | 
			
		||||
      $stderr.puts "WARN: Duplicated scope #{scope}\n" +
 | 
			
		||||
        "  Current package: #{p.url}\n" +
 | 
			
		||||
      "  Previous package: #{all_scopes[scope]}"
 | 
			
		||||
      next
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    File.write(File.join(GRAMMARS_PATH, "#{scope}.json"), JSON.pretty_generate(grammar))
 | 
			
		||||
    all_scopes[scope] = p.url
 | 
			
		||||
    installed << scope
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  $stderr.puts("OK #{p.url} (#{installed.join(', ')})")
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
def run_thread(queue, all_scopes)
 | 
			
		||||
  Dir.mktmpdir do |tmpdir|
 | 
			
		||||
    loop do
 | 
			
		||||
      source, index = begin
 | 
			
		||||
        queue.pop(true)
 | 
			
		||||
      rescue ThreadError
 | 
			
		||||
        # The queue is empty.
 | 
			
		||||
        break
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      dir = "#{tmpdir}/#{index}"
 | 
			
		||||
      Dir.mkdir(dir)
 | 
			
		||||
 | 
			
		||||
      install_grammar(dir, source, all_scopes)
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
def generate_yaml(all_scopes, base)
 | 
			
		||||
  yaml = all_scopes.each_with_object(base) do |(key,value),out|
 | 
			
		||||
    out[value] ||= []
 | 
			
		||||
    out[value] << key
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  yaml = yaml.sort.to_h
 | 
			
		||||
  yaml.each { |k, v| v.sort! }
 | 
			
		||||
  yaml
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
def main(sources)
 | 
			
		||||
  begin
 | 
			
		||||
    Dir.mkdir(GRAMMARS_PATH)
 | 
			
		||||
  rescue Errno::EEXIST
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  `npm install`
 | 
			
		||||
 | 
			
		||||
  all_scopes = {}
 | 
			
		||||
 | 
			
		||||
  if ARGV[0] == '--add'
 | 
			
		||||
    Dir.mktmpdir do |tmpdir|
 | 
			
		||||
      install_grammar(tmpdir, ARGV[1], all_scopes)
 | 
			
		||||
    end
 | 
			
		||||
    generate_yaml(all_scopes, sources)
 | 
			
		||||
  else
 | 
			
		||||
    queue = Queue.new
 | 
			
		||||
 | 
			
		||||
    sources.each do |url, scopes|
 | 
			
		||||
      queue.push([url, queue.length])
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    threads = 8.times.map do
 | 
			
		||||
      Thread.new { run_thread(queue, all_scopes) }
 | 
			
		||||
    end
 | 
			
		||||
    threads.each(&:join)
 | 
			
		||||
    generate_yaml(all_scopes, {})
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
sources = File.open(SOURCES_FILE) do |file|
 | 
			
		||||
  YAML.load(file)
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
yaml = main(sources)
 | 
			
		||||
 | 
			
		||||
File.write(SOURCES_FILE, YAML.dump(yaml))
 | 
			
		||||
 | 
			
		||||
$stderr.puts("Done")
 | 
			
		||||
		Reference in New Issue
	
	Block a user