Commit in-progress work to request sanity check

This commit is contained in:
Alhadis
2018-04-13 22:54:18 +10:00
parent f34c741c29
commit 6669270e84
9 changed files with 458 additions and 245 deletions

45
script/helpers/all.rb Normal file
View File

@@ -0,0 +1,45 @@
require_relative "./grammar_list"
require_relative "./grammar_source"
require_relative "./host"
require_relative "./submodule"
require "open3"
$quiet = false
# Print debugging feedback to STDOUT if $verbose global is set
def log(msg)
puts msg unless $quiet
end
def command(*args)
log "$ #{args.join(' ')}"
output, status = Open3.capture2e(*args)
if !status.success?
output.each_line do |line|
log " > #{line}"
end
warn "Command failed. Aborting."
exit 1
end
end
ROOT = File.expand_path "../../../", __FILE__
# Expand a file path relative to Linguist's base directory
def repo_path(path)
path = path.sub /^#{Regexp.escape ROOT}\/?/, ""
"#{ROOT}/#{path}"
end
def exists?(path)
File.exist? repo_path(path)
end
def read(path)
File.read repo_path(path)
end
def write(path, data)
File.write repo_path(path), data
end

View File

@@ -0,0 +1,75 @@
require_relative "./grammar_source"
require_relative "./submodule"
require_relative "./helpers"
require "bundler/setup"
require "linguist"
require "json"
require "yaml"
class GrammarList
ROOT = File.expand_path "../../../", __FILE__
def initialize
@submodules = Submodule.list
@language_names = load_languages()
@sources = load_sources()
end
# Grab the name of each language, sorted case-insensitively
def load_languages
Linguist::Language.all.map(&:name).sort do |a, b|
a.downcase() <=> b.downcase()
end
end
# Load grammars.yml
def load_sources
sources = {}
YAML.load_file("#{ROOT}/grammars.yml").each do |path, scopes|
scopes.each { |scope| sources[scope] = @submodules[path] }
end
sources
end
# Format list as Markdown
def to_markdown
markdown = ""
@language_names.each do |item|
lang = Linguist::Language["#{item}"]
scope = lang.tm_scope
next if scope == "none"
path = @sources[scope] || scope
case path
when "https://bitbucket.org/Clams/sublimesystemverilog/get/default.tar.gz"
short_url = "bitbucket:Clams/sublimesystemverilog"
long_url = "https://bitbucket.org/Clams/sublimesystemverilog"
when "https://svn.edgewall.org/repos/genshi/contrib/textmate/Genshi.tmbundle/Syntaxes/Markup%20Template%20%28XML%29.tmLanguage"
short_url = "genshi.edgewall.org/query"
long_url = "https://genshi.edgewall.org/query"
when "vendor/grammars/oz-tmbundle/Syntaxes/Oz.tmLanguage"
short_url = "eregon/oz-tmbundle"
long_url = "https://github.com/eregon/oz-tmbundle"
else
submodule = @submodules[@sources[scope].chomp("/")]
next unless submodule
short_url = submodule[:short]
long_url = submodule[:url]
end
markdown += "- **#{item}:** [#{short_url}](#{long_url})\n"
end
markdown
end
def update_lists
# Update .gitsubmodules
sorted = @sources.sort { |a,b| a[0] <=> b[0] }.collect{ |i| i[1] }
File.write "#{ROOT}/.gitmodules", sorted
# Update the file displaying the reader-friendly list of grammar repos
readme = "#{ROOT}/vendor/README.md"
preamble = File.read(readme).match(/\A.+?<!--.+?-->\n/ms)
list = self.to_markdown
File.write(readme, preamble.to_s + list)
end
end

View File

@@ -0,0 +1,88 @@
require_relative "./all"
require_relative "./host"
require_relative "./unique"
# Represents the source of a language grammar
#
# NOTE: Sources are mostly - but not always - connected to a
# Submodule. Some ad-hoc exceptions exist which aren't
# connected with a Git repository.
#
class GrammarSource < Unique
# RegExp for matching trusted domain hosts
HOSTS = Regexp.union(Host.whitelist)
def initialize(attr = {})
@name = attr[:name] || nil # Unique name of repository
@host = attr[:host] || nil # Hostname of repo's provider
@author = attr[:author] || nil # Username of repo's author
@url = attr[:url] || nil # Resolved absolute URL
# Resolve missing properties
@url ||= "https://#{@host.long}/#{@author}/#{@name}.git"
@short_url ||= @host.prefix + @author + "/#{@name}"
@long_url ||= @url
end
# Format source as a Markdown link
def to_markdown
"[#{self.url.short}](#{self.url.long})"
end
# Define a grammar source by its upstream URL.
#
# url - an HTTPS, HTTP, or SSH address accepted by git-remote(1)
# Only domains listed in HOSTS are accepted; unrecognised
# hostnames or invalid URLs will raise an ArgumentError.
#
# Assumption: Repo URLs will never include subdomains.
# We only check for a possible `www`, nothing else.
def self.by_url(url)
case url
when "https://bitbucket.org/Clams/sublimesystemverilog/get/default.tar.gz"
self.define({
name: "sublimesystemverilog",
host: Host.define("bitbucket.org"),
author: "Clams",
url: url,
short_url: "bitbucket:Clams/sublimesystemverilog",
long_url: "https://bitbucket.org/Clams/sublimesystemverilog"
})
when "https://svn.edgewall.org/repos/genshi/contrib/textmate/Genshi.tmbundle/Syntaxes/Markup%20Template%20%28XML%29.tmLanguage"
self.define({
name: "Genshi.tmbundle",
host: Host.define("genshi.edgewall.org"),
url: url,
short_url: "genshi.edgewall.org/query",
long_url: "https://genshi.edgewall.org/query"
})
when "vendor/grammars/oz-tmbundle/Syntaxes/Oz.tmLanguage"
self.define({
name: "oz-tmbundle",
host: Host.define("github.com"),
author: "eregon",
url: url,
short_url: "eregon/oz-tmbundle",
long_url: "https://github.com/eregon/oz-tmbundle"
})
else
if parsed = URL.parse(url)
self.define(parsed)
else
raise ArgumentError, "Unsupported URL: #{url}"
end
end
end
# Define a new GrammarSource, or reference an existing one
def self.define(attr)
unless attr[:url]
host = Host.define(attr[:host])
author = attr[:author]
name = attr[:name]
attr[:url] = "https://#{host.long}/#{author}/#{name}.git"
end
BY_URL[attr[:url]] ||= self.new(attr)
end
end

57
script/helpers/host.rb Normal file
View File

@@ -0,0 +1,57 @@
# Hostname, which can be expressed with or without a TLD
class Host
attr_accessor :name, :tld
alias_method :short, :name
alias_method :long, :to_s
INSTANCES = {}
def initialize(input)
if input =~ /^(.+)\.([^.]+)$/
@name = $1.downcase
@tld = $2.downcase
else
@name = input.downcase
@tld = ""
end
end
def ==(other)
if other.responds_to?(name)
@name == other.name
else
@name == other_to_s
end
end
# Short-name with colon appended
def prefix
@prefix || "#{@name}:"
end
# Hostname including TLD
def to_s
"#{@name}.#{@tld}"
end
def to_regexp
name = Regexp.escape @name
tld = Regexp.escape @tld
Regexp.new("#{name}(?:\\.#{tld})?")
end
def self.define(input)
INSTANCES[input] ||= Host.new(input)
end
# Whitelist of trusted hosting providers
:github =
:bitbucket = self.define("bitbucket.org")
:gitlab = self.define("gitlab.com")
:github.prefix = ""
WHITELIST = {
:github => self.define("github.com")
}
:github, :bitbucket, :gitlab].freeze
WHITELIST[:"github.com"].prefix = ""
end

View File

@@ -0,0 +1,90 @@
require_relative "./grammar_source"
require_relative "./all"
# Public: Represents a registered Git submodule in use by Linguist.
#
# Any updates to this class should consider submodules which aren't
# grammar-related, such as CodeMirror. See also: GrammarSource
#
# Examples
#
# Submodule.new('vendor/CodeMirror', {url: "codemirror/CodeMirror"})
# # => #<Submodule url="https://github.com/codemirror/CodeMirror.git">
#
# Submodule.for_grammar('vendor/grammars/language-roff')
# # => #<Submodule url="https://github.com/Alhadis/language-roff.git">
#
class Submodule
attr_accessor :id, :attr
def initialize(id, attr = {})
@id = id
@attr = attr
@attr[:path] ||= @id
# If a grammar submodule, store a pointer to source
if /^vendor\/grammars/.test attr[:url]
@grammar = GrammarSource.by_url attr[:url]
end
end
def <=>(other)
@id <=> other.id
end
# Is the submodule registered with Git and checked out locally?
def registered?
@configured? and @exists?
end
# Is the submodule registered with Git?
def configured?
system "git", "config", "submodule.#{@id}.url"
end
# Has the submodule been checked out locally?
def exists?
exists?(@id)
end
# Format an entry to use in `.gitmodules`
def to_s
attr = @attr.to_a.map do |key, value|
"\t#{key} = #{value}"
end
<<~EOS
[submodule "#{@id}"]
#{ attr.sort.join "\n" }
EOS
end
# Define a GrammarSource for an existing registered submodule.
#
# path - path of submodule as used by .gitmodules
def self.for_grammar(path)
path =~ /^(?:.*(?:vendor\/)?grammars\/)?([^\/]+)/i
path = "vendor/grammars/#{$1}"
unless exists?(path)
raise "Submodule '#{path}' does not exist"
end
self.list.by_id[:path]
end
# Load the contents of .gitmodules
def self.list
if @list.nil?
all = []
ids = {}
pattern = /^\[submodule\s*"([^"]+)"\]$\n((?:^(?!\[).+(?:\n|$))+)/is
read_file(".gitmodules").scan(pattern) do |id, data|
attr = {}
data.match(/^\s*(?<key>[^\s=]+)\s*=\s*(?<value>.+)$/m) do |match|
attr[match[:key]] = match[:value].strip
end
all << ids[id] = self.new(id, attr)
end
@list = {all: all.sort, by_id: ids}
end
@list
end
end

80
script/helpers/url.rb Normal file
View File

@@ -0,0 +1,80 @@
require_relative "./host"
# Public: Helper methods for resolving various URL notations
class RepoURL
def initialize(attr = {})
@host = Host.define(attr[:host])
@author = attr[:user] || attr[:author]
@name = attr[:repo] || attr[:name]
@short = attr[:short_url]
@long = attr[:long_url]
end
# Shortened representation of URL: `[provider:]user/repo`
def short
@short || "#{@host.prefix}#{@author}/#{@name}"
end
def to_s
"https://#{@host.}"
end
# Split a URL into named subcomponents
def self.parse(url)
self.match_https(url) ||
self.match_ssh(url) ||
self.match_shorthand(url) ||
self.match_implicit(url)
end
# Match a well-formed HTTP or HTTPS address
def self.match_https(url)
if match = url.match(/
^ (?<protocol> https? :\/\/ )?
(?<userauth> [^@.]+ @ )?
(?<subdomain> www \. )?
(?<host> #{HOSTS} )
\/ (?<user> [^\/]+ )
\/ (?<repo> [^\/]+ )
/xi)
match[:repo].sub! /\.git$/, ""
self.new(match)
end
end
# Match an SSH address starting with `git@`
def self.match_ssh(url)
if match = url.match(/
^ git@
(?<host> #{HOSTS}) :
(?<user> [^\/]+) \/
(?<repo> [^\/]+) \.git $
/xi)
self.new(match)
end
end
# Match `provider:user/repo`
def self.match_shorthand(url)
if match = url.match(/
^ (?<host> #{HOSTS}) : \/?
(?<user> [^\/]+) \/
(?<repo> [^\/]+) \/? $
/xi)
self.new(match)
end
end
# Match `user/repo` shorthand, assumed to be GitHub
def self.match_implicit(url)
if match = url.match(/
^ \/? (?<user>[^\/]+)
\/ (?<repo>[^\/]+)
\/? $
/xi)
match[:host] = "github.com"
self.new(match)
end
end
end