Merge pull request #1710 from github/grammars

Add github-linguist-grammars gem
This commit is contained in:
Vicent Marti
2014-11-14 16:12:22 +01:00
11 changed files with 709 additions and 2 deletions

2
.gitignore vendored
View File

@@ -2,3 +2,5 @@ Gemfile.lock
.bundle/
benchmark/
lib/linguist/samples.json
/grammars
/node_modules

View File

@@ -1,3 +1,4 @@
source 'https://rubygems.org'
gemspec
gemspec :name => "github-linguist"
gemspec :name => "github-linguist-grammars"
gem 'test-unit', require: false if RUBY_VERSION >= '2.2'

View File

@@ -31,6 +31,12 @@ task :build_gem => :samples do
File.delete("lib/linguist/languages.json")
end
task :build_grammars_gem do
rm_rf "grammars"
sh "script/download-grammars"
sh "gem", "build", "github-linguist-grammars.gemspec"
end
namespace :benchmark do
benchmark_path = "benchmark/results"

View File

@@ -0,0 +1,14 @@
require File.expand_path('../lib/linguist/version', __FILE__)
Gem::Specification.new do |s|
s.name = 'github-linguist-grammars'
s.version = Linguist::VERSION
s.summary = "Language grammars for use with github-linguist"
s.authors = "GitHub"
s.homepage = "https://github.com/github/linguist"
s.files = ['lib/linguist/grammars.rb'] + Dir['grammars/*']
s.add_development_dependency 'plist', '~>3.1'
end

View File

@@ -10,7 +10,7 @@ Gem::Specification.new do |s|
s.homepage = "https://github.com/github/linguist"
s.license = "MIT"
s.files = Dir['lib/**/*']
s.files = Dir['lib/**/*'] - ['lib/linguist/grammars.rb']
s.executables << 'linguist'
s.add_dependency 'charlock_holmes', '~> 0.7.3'

387
grammars.yml Normal file
View File

@@ -0,0 +1,387 @@
---
http://hww3.riverweb.com/dist/Pike_TextMate.tar.gz:
- source.pike
http://svn.edgewall.org/repos/genshi/contrib/textmate/Genshi.tmbundle/Syntaxes/Markup%20Template%20%28XML%29.tmLanguage:
- text.xml.genshi
http://svn.textmate.org/trunk/Review/Bundles/BlitzMax.tmbundle:
- source.blitzmax
http://svn.textmate.org/trunk/Review/Bundles/Cython.tmbundle:
- source.cython
http://svn.textmate.org/trunk/Review/Bundles/F%20Sharp.tmbundle:
- source.fsharp
http://svn.textmate.org/trunk/Review/Bundles/Forth.tmbundle:
- source.forth
http://svn.textmate.org/trunk/Review/Bundles/Ruby%20Sass.tmbundle:
- source.sass
http://svn.textmate.org/trunk/Review/Bundles/SecondLife%20LSL.tmbundle:
- source.lsl
http://svn.textmate.org/trunk/Review/Bundles/VHDL.tmbundle:
- source.vhdl
http://svn.textmate.org/trunk/Review/Bundles/XQuery.tmbundle:
- source.xquery
https://bitbucket.org/Clams/sublimesystemverilog/get/default.tar.gz:
- source.systemverilog
- source.ucfconstraints
https://bitbucket.org/bitlang/sublime_cobol/raw/b0e9c44ac5f7a2fb553421aa986b35854cbfda4a/COBOL.tmLanguage:
- source.cobol
https://fan.googlecode.com/hg-history/Build%201.0.55/adm/tools/textmate/Fan.tmbundle/Syntaxes/Fan.tmLanguage:
- source.fan
https://github.com/AlanQuatermain/go-tmbundle:
- source.go
https://github.com/Anomareh/PHP-Twig.tmbundle:
- text.html.twig
https://github.com/Cirru/sublime-cirru/raw/master/Cirru.tmLanguage:
- source.cirru
https://github.com/Cykey/Sublime-Logos:
- source.logos
https://github.com/Drako/SublimeBrainfuck/raw/master/Brainfuck.tmLanguage:
- source.bf
https://github.com/JohnNilsson/awk-sublime/raw/master/AWK.tmLanguage:
- source.awk
https://github.com/MarioRicalde/SCSS.tmbundle:
- source.scss
https://github.com/Oldes/Sublime-REBOL:
- source.rebol
https://github.com/Red-Nova-Technologies/autoitv3-tmbundle:
- source.autoit.3
https://github.com/SalGnt/Sublime-VimL:
- source.viml
https://github.com/Shammah/boo-sublime/raw/master/Boo.tmLanguage:
- source.boo
https://github.com/SublimeText/ColdFusion:
- source.cfscript
- source.cfscript.cfc
- text.cfml.basic
- text.html.cfm
https://github.com/SublimeText/NSIS:
- source.nsis
https://github.com/Varriount/NimLime:
- source.nimrod
- source.nimrod_filter
- source.nimrodcfg
https://github.com/angryant0007/VBDotNetSyntax:
- source.vbnet
https://github.com/aroben/ada.tmbundle/raw/c45eed4d5f98fe3bcbbffbb9e436601ab5bbde4b/Syntaxes/Ada.plist:
- source.ada
https://github.com/aroben/ruby.tmbundle@4636a3023153c3034eb6ffc613899ba9cf33b41f:
- source.ruby
- text.html.erb
https://github.com/atom/language-coffee-script:
- source.coffee
- source.litcoffee
https://github.com/atom/language-csharp:
- source.cs
- source.csx
- source.nant-build
https://github.com/atom/language-javascript:
- source.js
- source.js.regexp
https://github.com/atom/language-python:
- source.python
- source.regexp.python
- text.python.traceback
https://github.com/atom/language-shellscript:
- source.shell
- text.shell-session
https://github.com/austinwagner/sublime-sourcepawn:
- source.sp
https://github.com/bfad/Sublime-Lasso:
- file.lasso
https://github.com/bholt/chapel-tmbundle:
- source.chapel
https://github.com/brandonwamboldt/sublime-nginx:
- source.nginx
https://github.com/carsonoid/sublime_man_page_support/raw/master/man-groff.tmLanguage:
- text.groff
https://github.com/ccreutzig/sublime-MuPAD:
- source.mupad
https://github.com/cdwilson/nesC.tmbundle:
- source.nesc
https://github.com/clemos/haxe-sublime-bundle:
- source.erazor
- source.haxe.2
- source.hss.1
- source.hxml
- source.nmml
https://github.com/cucumber/cucumber-tmbundle:
- source.ruby.rspec.cucumber.steps
- text.gherkin.feature
https://github.com/daaain/Handlebars/raw/master/Handlebars.tmLanguage:
- text.html.handlebars
https://github.com/davidpeckham/powershell.tmbundle:
- source.powershell
https://github.com/davidrios/jade-tmbundle:
- source.jade
- source.pyjade
https://github.com/elixir-lang/elixir-tmbundle:
- source.elixir
- text.elixir
- text.html.elixir
https://github.com/ericzou/ebundles/raw/master/Bundles/MSDOS%20batch%20file.tmbundle/Syntaxes/MSDOS%20batch%20file.tmLanguage:
- source.dosbatch
https://github.com/euler0/sublime-glsl/raw/master/GLSL.tmLanguage:
- source.glsl
https://github.com/fancy-lang/fancy-tmbundle:
- source.fancy
https://github.com/gingerbeardman/monkey.tmbundle:
- source.monkey
https://github.com/guillermooo/dart-sublime-bundle/raw/master/Dart.tmLanguage:
- source.dart
https://github.com/harrism/sublimetext-cuda-cpp/raw/master/cuda-c%2B%2B.tmLanguage:
- source.cuda-c++
https://github.com/jeancharles-roger/ceylon-sublimetext/raw/master/Ceylon.tmLanguage:
- source.ceylon
https://github.com/jfairbank/Sublime-Text-2-OpenEdge-ABL:
- source.abl
https://github.com/jhasse/sublime-rust:
- source.rust
https://github.com/johanasplund/sublime-befunge/raw/master/Befunge-93.tmLanguage:
- source.befunge
https://github.com/joshaven/RDoc.tmbundle:
- text.rdoc
https://github.com/jpcamara/Textmate-Gosu-Bundle/raw/master/Gosu.tmbundle/Syntaxes/Gosu.tmLanguage:
- source.gosu.2
https://github.com/kswedberg/jquery-tmbundle:
- source.js.jquery
https://github.com/laughedelic/sublime-idris/raw/master/Idris.tmLanguage:
- source.idris
https://github.com/lavrton/sublime-better-typescript:
- source.ts
https://github.com/leafo/moonscript-tmbundle:
- source.moonscript
https://github.com/lunixbochs/x86-assembly-textmate-bundle:
- source.asm.x86
https://github.com/macekond/Alloy.tmbundle:
- source.alloy
https://github.com/mads379/opa.tmbundle:
- source.opa
https://github.com/mads379/scala.tmbundle:
- source.sbt
- source.scala
https://github.com/marconi/mako-tmbundle:
- text.html.mako
https://github.com/mattfoster/gnuplot-tmbundle:
- source.gnuplot
https://github.com/mgalloy/idl.tmbundle:
- source.idl
- source.idl-dlm
- text.idl-idldoc
https://github.com/michaeledgar/protobuf-tmbundle:
- source.protobuf
https://github.com/mkolosick/Sublime-Coq/raw/master/Coq.tmLanguage:
- source.coq
https://github.com/mokus0/Agda.tmbundle:
- source.agda
https://github.com/nanoant/Julia.tmbundle:
- source.julia
https://github.com/nanoant/assembly.tmbundle/raw/master/Syntaxes/objdump%20C%2B%2B.tmLanguage:
- objdump.x86asm
https://github.com/nilium/ooc.tmbundle:
- source.ooc
https://github.com/paulmillr/LiveScript.tmbundle:
- source.livescript
https://github.com/pferruggiaro/sublime-tea:
- source.tea
https://github.com/puppet-textmate-bundle/puppet-textmate-bundle:
- source.puppet
https://github.com/pvl/abap.tmbundle:
- source.abap
https://github.com/scalate/Scalate.tmbundle:
- source.scaml
- text.html.ssp
https://github.com/shadanan/mathematica-tmbundle:
- source.mathematica
https://github.com/shellderp/sublime-robot-plugin:
- text.robot
https://github.com/simongregory/actionscript3-tmbundle:
- source.actionscript.3
- text.html.asdoc
- text.xml.flex-config
https://github.com/skozlovf/Sublime-QML:
- source.qml
https://github.com/slash-lang/Slash.tmbundle:
- text.html.slash
https://github.com/slavapestov/factor/raw/master/misc/Factor.tmbundle/Syntaxes/Factor.tmLanguage:
- source.factor
https://github.com/slim-template/ruby-slim.tmbundle:
- text.slim
https://github.com/smiledawgg/Bro.tmbundle:
- source.bro
- source.bro.sig
https://github.com/staltz/SublimeXtend:
- source.xtend
https://github.com/statatmbundle/Stata.tmbundle:
- source.mata
- source.stata
https://github.com/swannodette/textmate-clojure:
- source.clojure
https://github.com/technosophos/Vala-TMBundle:
- source.vala
https://github.com/textmate/antlr.tmbundle:
- source.antlr
https://github.com/textmate/apache.tmbundle:
- source.apache-config
- source.apache-config.mod_perl
https://github.com/textmate/applescript.tmbundle:
- source.applescript
https://github.com/textmate/asp.tmbundle:
- source.asp
- text.html.asp
https://github.com/textmate/c.tmbundle:
- source.c
- source.c++
https://github.com/textmate/cmake.tmbundle:
- source.cache.cmake
- source.cmake
https://github.com/textmate/cpp-qt.tmbundle:
- source.c++.qt
- source.qmake
https://github.com/textmate/css.tmbundle:
- source.css
https://github.com/textmate/d.tmbundle:
- source.d
https://github.com/textmate/diff.tmbundle:
- source.diff
https://github.com/textmate/dylan.tmbundle:
- source.dylan
- source.lid
- source.makegen
https://github.com/textmate/eiffel.tmbundle:
- source.eiffel
https://github.com/textmate/erlang.tmbundle:
- source.erlang
- text.html.erlang.yaws
https://github.com/textmate/fortran.tmbundle:
- source.fortran
- source.fortran.modern
https://github.com/textmate/gettext.tmbundle:
- source.po
https://github.com/textmate/groovy.tmbundle:
- source.groovy
https://github.com/textmate/haskell.tmbundle:
- source.haskell
- text.tex.latex.haskell
https://github.com/textmate/html.tmbundle:
- text.html.basic
https://github.com/textmate/inform.tmbundle:
- source.inform
https://github.com/textmate/ini.tmbundle:
- source.ini
https://github.com/textmate/io.tmbundle:
- source.io
https://github.com/textmate/java.tmbundle:
- source.java
- source.java-properties
- text.html.jsp
- text.junit-test-report
https://github.com/textmate/javadoc.tmbundle:
- text.html.javadoc
https://github.com/textmate/javascript-objective-j.tmbundle:
- source.js.objj
https://github.com/textmate/json.tmbundle:
- source.json
https://github.com/textmate/latex.tmbundle:
- text.bibtex
- text.log.latex
- text.tex
- text.tex.latex
- text.tex.latex.beamer
- text.tex.latex.memoir
https://github.com/textmate/less.tmbundle:
- source.css.less
https://github.com/textmate/lilypond.tmbundle:
- source.lilypond
https://github.com/textmate/lisp.tmbundle:
- source.lisp
https://github.com/textmate/logtalk.tmbundle:
- source.logtalk
https://github.com/textmate/lua.tmbundle:
- source.lua
https://github.com/textmate/make.tmbundle:
- source.makefile
https://github.com/textmate/markdown.tmbundle:
- text.html.markdown
https://github.com/textmate/matlab.tmbundle:
- source.matlab
- source.octave
https://github.com/textmate/nemerle.tmbundle:
- source.nemerle
https://github.com/textmate/objective-c.tmbundle:
- source.objc
- source.objc++
- source.strings
https://github.com/textmate/ocaml.tmbundle:
- source.camlp4.ocaml
- source.ocaml
- source.ocamllex
- source.ocamlyacc
https://github.com/textmate/pascal.tmbundle:
- source.pascal
https://github.com/textmate/perl.tmbundle:
- source.perl
https://github.com/textmate/php-smarty.tmbundle:
- source.smarty
https://github.com/textmate/php.tmbundle:
- text.html.php
https://github.com/textmate/postscript.tmbundle:
- source.postscript
https://github.com/textmate/processing.tmbundle:
- source.processing
https://github.com/textmate/prolog.tmbundle:
- source.prolog
https://github.com/textmate/python-django.tmbundle:
- source.python.django
- text.html.django
https://github.com/textmate/r.tmbundle:
- source.r
- text.tex.latex.rd
https://github.com/textmate/restructuredtext.tmbundle:
- text.restructuredtext
https://github.com/textmate/ruby-haml.tmbundle:
- text.haml
https://github.com/textmate/ruby-on-rails-tmbundle:
- source.js.erb.rails
- source.ruby.rails
- source.ruby.rails.rjs
- source.sql.ruby
- text.html.erb.rails
https://github.com/textmate/scheme.tmbundle:
- source.scheme
https://github.com/textmate/scilab.tmbundle:
- source.scilab
https://github.com/textmate/sql.tmbundle:
- source.sql
https://github.com/textmate/standard-ml.tmbundle:
- source.cm
- source.ml
https://github.com/textmate/swift.tmbundle:
- source.swift
https://github.com/textmate/tcl.tmbundle:
- source.tcl
- text.html.tcl
https://github.com/textmate/text.tmbundle:
- text.plain
https://github.com/textmate/textile.tmbundle:
- text.html.textile
https://github.com/textmate/textmate.tmbundle:
- source.regexp.oniguruma
- source.tm-properties
https://github.com/textmate/toml.tmbundle:
- source.toml
https://github.com/textmate/verilog.tmbundle:
- source.verilog
https://github.com/textmate/xml.tmbundle:
- text.xml
- text.xml.xsl
https://github.com/textmate/yaml.tmbundle:
- source.yaml
https://github.com/tomas-stefano/smalltalk-tmbundle:
- source.smalltalk
https://github.com/vic/ioke-outdated/raw/master/share/TextMate/Ioke.tmbundle/Syntaxes/Ioke.tmLanguage:
- source.ioke
https://github.com/vkostyukov/kotlin-sublime-package:
- source.Kotlin
https://github.com/vmg/zephir-sublime:
- source.php.zephir
https://github.com/whitequark/llvm.tmbundle:
- source.llvm

13
lib/linguist/grammars.rb Normal file
View File

@@ -0,0 +1,13 @@
# Note: This file is included in the github-linguist-grammars gem, not the
# github-linguist gem.
module Linguist
module Grammars
# Get the path to the directory containing the language grammar JSON files.
#
# Returns a String.
def self.path
File.expand_path("../../../grammars", __FILE__)
end
end
end

6
package.json Normal file
View File

@@ -0,0 +1,6 @@
{
"repository": "https://github.com/github/linguist",
"dependencies": {
"season": "~>3.0"
}
}

221
script/download-grammars Executable file
View File

@@ -0,0 +1,221 @@
#!/usr/bin/env ruby
require 'json'
require 'net/http'
require 'plist'
require 'set'
require 'tmpdir'
require 'uri'
require 'yaml'
GRAMMARS_PATH = File.expand_path("../../grammars", __FILE__)
SOURCES_FILE = File.expand_path("../../grammars.yml", __FILE__)
CSONC = File.expand_path("../../node_modules/.bin/csonc", __FILE__)
class TarballPackage
def self.fetch(tmp_dir, url)
`curl --silent --location --max-time 10 --output "#{tmp_dir}/archive" "#{url}"`
raise "Failed to fetch GH package: #{url} #{$?.to_s}" unless $?.success?
output = File.join(tmp_dir, 'extracted')
Dir.mkdir(output)
`tar -C "#{output}" -xf "#{tmp_dir}/archive"`
raise "Failed to uncompress tarball: #{tmp_dir}/archive (from #{url}) #{$?.to_s}" unless $?.success?
Dir["#{output}/**/*"].select do |path|
case File.extname(path.downcase)
when '.plist'
path.split('/')[-2] == 'Syntaxes'
when '.tmlanguage'
true
when '.cson'
path.split('/')[-2] == 'grammars'
else
false
end
end
end
attr_reader :url
def initialize(url)
@url = url
end
def fetch(tmp_dir)
self.class.fetch(tmp_dir, url)
end
end
class SingleGrammar
attr_reader :url
def initialize(url)
@url = url
end
def fetch(tmp_dir)
filename = File.join(tmp_dir, File.basename(url))
`curl --silent --location --max-time 10 --output "#{filename}" "#{url}"`
raise "Failed to fetch grammar: #{url}: #{$?.to_s}" unless $?.success?
[filename]
end
end
class SVNPackage
attr_reader :url
def initialize(url)
@url = url
end
def fetch(tmp_dir)
`svn export -q "#{url}/Syntaxes" "#{tmp_dir}/Syntaxes"`
raise "Failed to export SVN repository: #{url}: #{$?.to_s}" unless $?.success?
Dir["#{tmp_dir}/Syntaxes/*.{plist,tmLanguage,tmlanguage}"]
end
end
class GitHubPackage
def self.parse_url(url)
url, ref = url.split("@", 2)
path = URI.parse(url).path.split('/')
[path[1], path[2].chomp('.git'), ref || "master"]
end
attr_reader :user
attr_reader :repo
attr_reader :ref
def initialize(url)
@user, @repo, @ref = self.class.parse_url(url)
end
def url
suffix = "@#{ref}" unless ref == "master"
"https://github.com/#{user}/#{repo}#{suffix}"
end
def fetch(tmp_dir)
url = "https://github.com/#{user}/#{repo}/archive/#{ref}.tar.gz"
TarballPackage.fetch(tmp_dir, url)
end
end
def load_grammar(path)
case File.extname(path.downcase)
when '.plist', '.tmlanguage'
Plist::parse_xml(path)
when '.cson'
cson = `"#{CSONC}" "#{path}"`
raise "Failed to convert CSON grammar '#{path}': #{$?.to_s}" unless $?.success?
JSON.parse(cson)
else
raise "Invalid document type #{path}"
end
end
def install_grammar(tmp_dir, source, all_scopes)
p = if source.end_with?('.tmLanguage', '.plist')
SingleGrammar.new(source)
elsif source.start_with?('https://github.com')
GitHubPackage.new(source)
elsif source.start_with?('http://svn.textmate.org')
SVNPackage.new(source)
elsif source.end_with?('.tar.gz')
TarballPackage.new(source)
else
nil
end
raise "Unsupported source: #{source}" unless p
installed = []
p.fetch(tmp_dir).each do |path|
grammar = load_grammar(path)
scope = grammar['scopeName']
if all_scopes.key?(scope)
$stderr.puts "WARN: Duplicated scope #{scope}\n" +
" Current package: #{p.url}\n" +
" Previous package: #{all_scopes[scope]}"
next
end
File.write(File.join(GRAMMARS_PATH, "#{scope}.json"), JSON.pretty_generate(grammar))
all_scopes[scope] = p.url
installed << scope
end
$stderr.puts("OK #{p.url} (#{installed.join(', ')})")
end
def run_thread(queue, all_scopes)
Dir.mktmpdir do |tmpdir|
loop do
source, index = begin
queue.pop(true)
rescue ThreadError
# The queue is empty.
break
end
dir = "#{tmpdir}/#{index}"
Dir.mkdir(dir)
install_grammar(dir, source, all_scopes)
end
end
end
def generate_yaml(all_scopes, base)
yaml = all_scopes.each_with_object(base) do |(key,value),out|
out[value] ||= []
out[value] << key
end
yaml = yaml.sort.to_h
yaml.each { |k, v| v.sort! }
yaml
end
def main(sources)
begin
Dir.mkdir(GRAMMARS_PATH)
rescue Errno::EEXIST
end
`npm install`
all_scopes = {}
if ARGV[0] == '--add'
Dir.mktmpdir do |tmpdir|
install_grammar(tmpdir, ARGV[1], all_scopes)
end
generate_yaml(all_scopes, sources)
else
queue = Queue.new
sources.each do |url, scopes|
queue.push([url, queue.length])
end
threads = 8.times.map do
Thread.new { run_thread(queue, all_scopes) }
end
threads.each(&:join)
generate_yaml(all_scopes, {})
end
end
sources = File.open(SOURCES_FILE) do |file|
YAML.load(file)
end
yaml = main(sources)
File.write(SOURCES_FILE, YAML.dump(yaml))
$stderr.puts("Done")

57
script/prune-grammars Executable file
View File

@@ -0,0 +1,57 @@
#!/usr/bin/env ruby
require "json"
require "linguist"
require "set"
require "yaml"
def find_includes(json)
case json
when Hash
result = []
if inc = json["include"]
result << inc.split("#", 2).first unless inc.start_with?("#", "$")
end
result + json.values.flat_map { |v| find_includes(v) }
when Array
json.flat_map { |v| find_includes(v) }
else
[]
end
end
def transitive_includes(scope, includes)
scopes = Set.new
queue = includes[scope] || []
while s = queue.shift
next if scopes.include?(s)
scopes << s
queue += includes[s] || []
end
scopes
end
includes = {}
Dir["grammars/*.json"].each do |path|
scope = File.basename(path).sub(/\.json/, '')
json = JSON.load(File.read(path))
incs = find_includes(json)
next if incs.empty?
includes[scope] ||= []
includes[scope] += incs
end
yaml = YAML.load(File.read("grammars.yml"))
language_scopes = Linguist::Language.all.map(&:tm_scope).to_set
# The set of used scopes is the scopes for each language, plus all the scopes
# they include, transitively.
used_scopes = language_scopes + language_scopes.flat_map { |s| transitive_includes(s, includes).to_a }.to_set
unused = yaml.reject { |repo, scopes| scopes.any? { |scope| used_scopes.include?(scope) } }
puts "Unused grammar repos"
puts unused.map { |repo, scopes| sprintf("%-100s %s", repo, scopes.join(", ")) }.sort.join("\n")
yaml.delete_if { |k| unused.key?(k) }
File.write("grammars.yml", YAML.dump(yaml))

BIN
vendor/cache/plist-3.1.0.gem vendored Normal file

Binary file not shown.