mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Commit in-progress work to request sanity check
This commit is contained in:
45
script/helpers/all.rb
Normal file
45
script/helpers/all.rb
Normal file
@@ -0,0 +1,45 @@
|
||||
require_relative "./grammar_list"
|
||||
require_relative "./grammar_source"
|
||||
require_relative "./host"
|
||||
require_relative "./submodule"
|
||||
require "open3"
|
||||
|
||||
$quiet = false
|
||||
|
||||
# Print debugging feedback to STDOUT if $verbose global is set
|
||||
def log(msg)
|
||||
puts msg unless $quiet
|
||||
end
|
||||
|
||||
def command(*args)
|
||||
log "$ #{args.join(' ')}"
|
||||
output, status = Open3.capture2e(*args)
|
||||
if !status.success?
|
||||
output.each_line do |line|
|
||||
log " > #{line}"
|
||||
end
|
||||
warn "Command failed. Aborting."
|
||||
exit 1
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
ROOT = File.expand_path "../../../", __FILE__
|
||||
|
||||
# Expand a file path relative to Linguist's base directory
|
||||
def repo_path(path)
|
||||
path = path.sub /^#{Regexp.escape ROOT}\/?/, ""
|
||||
"#{ROOT}/#{path}"
|
||||
end
|
||||
|
||||
def exists?(path)
|
||||
File.exist? repo_path(path)
|
||||
end
|
||||
|
||||
def read(path)
|
||||
File.read repo_path(path)
|
||||
end
|
||||
|
||||
def write(path, data)
|
||||
File.write repo_path(path), data
|
||||
end
|
||||
75
script/helpers/grammar_list.rb
Normal file
75
script/helpers/grammar_list.rb
Normal file
@@ -0,0 +1,75 @@
|
||||
require_relative "./grammar_source"
|
||||
require_relative "./submodule"
|
||||
require_relative "./helpers"
|
||||
require "bundler/setup"
|
||||
require "linguist"
|
||||
require "json"
|
||||
require "yaml"
|
||||
|
||||
class GrammarList
|
||||
|
||||
ROOT = File.expand_path "../../../", __FILE__
|
||||
|
||||
def initialize
|
||||
@submodules = Submodule.list
|
||||
@language_names = load_languages()
|
||||
@sources = load_sources()
|
||||
end
|
||||
|
||||
# Grab the name of each language, sorted case-insensitively
|
||||
def load_languages
|
||||
Linguist::Language.all.map(&:name).sort do |a, b|
|
||||
a.downcase() <=> b.downcase()
|
||||
end
|
||||
end
|
||||
|
||||
# Load grammars.yml
|
||||
def load_sources
|
||||
sources = {}
|
||||
YAML.load_file("#{ROOT}/grammars.yml").each do |path, scopes|
|
||||
scopes.each { |scope| sources[scope] = @submodules[path] }
|
||||
end
|
||||
sources
|
||||
end
|
||||
|
||||
# Format list as Markdown
|
||||
def to_markdown
|
||||
markdown = ""
|
||||
@language_names.each do |item|
|
||||
lang = Linguist::Language["#{item}"]
|
||||
scope = lang.tm_scope
|
||||
next if scope == "none"
|
||||
path = @sources[scope] || scope
|
||||
case path
|
||||
when "https://bitbucket.org/Clams/sublimesystemverilog/get/default.tar.gz"
|
||||
short_url = "bitbucket:Clams/sublimesystemverilog"
|
||||
long_url = "https://bitbucket.org/Clams/sublimesystemverilog"
|
||||
when "https://svn.edgewall.org/repos/genshi/contrib/textmate/Genshi.tmbundle/Syntaxes/Markup%20Template%20%28XML%29.tmLanguage"
|
||||
short_url = "genshi.edgewall.org/query"
|
||||
long_url = "https://genshi.edgewall.org/query"
|
||||
when "vendor/grammars/oz-tmbundle/Syntaxes/Oz.tmLanguage"
|
||||
short_url = "eregon/oz-tmbundle"
|
||||
long_url = "https://github.com/eregon/oz-tmbundle"
|
||||
else
|
||||
submodule = @submodules[@sources[scope].chomp("/")]
|
||||
next unless submodule
|
||||
short_url = submodule[:short]
|
||||
long_url = submodule[:url]
|
||||
end
|
||||
markdown += "- **#{item}:** [#{short_url}](#{long_url})\n"
|
||||
end
|
||||
markdown
|
||||
end
|
||||
|
||||
def update_lists
|
||||
# Update .gitsubmodules
|
||||
sorted = @sources.sort { |a,b| a[0] <=> b[0] }.collect{ |i| i[1] }
|
||||
File.write "#{ROOT}/.gitmodules", sorted
|
||||
|
||||
# Update the file displaying the reader-friendly list of grammar repos
|
||||
readme = "#{ROOT}/vendor/README.md"
|
||||
preamble = File.read(readme).match(/\A.+?<!--.+?-->\n/ms)
|
||||
list = self.to_markdown
|
||||
File.write(readme, preamble.to_s + list)
|
||||
end
|
||||
end
|
||||
88
script/helpers/grammar_source.rb
Normal file
88
script/helpers/grammar_source.rb
Normal file
@@ -0,0 +1,88 @@
|
||||
require_relative "./all"
|
||||
require_relative "./host"
|
||||
require_relative "./unique"
|
||||
|
||||
# Represents the source of a language grammar
|
||||
#
|
||||
# NOTE: Sources are mostly - but not always - connected to a
|
||||
# Submodule. Some ad-hoc exceptions exist which aren't
|
||||
# connected with a Git repository.
|
||||
#
|
||||
class GrammarSource < Unique
|
||||
|
||||
# RegExp for matching trusted domain hosts
|
||||
HOSTS = Regexp.union(Host.whitelist)
|
||||
|
||||
def initialize(attr = {})
|
||||
@name = attr[:name] || nil # Unique name of repository
|
||||
@host = attr[:host] || nil # Hostname of repo's provider
|
||||
@author = attr[:author] || nil # Username of repo's author
|
||||
@url = attr[:url] || nil # Resolved absolute URL
|
||||
|
||||
# Resolve missing properties
|
||||
@url ||= "https://#{@host.long}/#{@author}/#{@name}.git"
|
||||
@short_url ||= @host.prefix + @author + "/#{@name}"
|
||||
@long_url ||= @url
|
||||
end
|
||||
|
||||
# Format source as a Markdown link
|
||||
def to_markdown
|
||||
"[#{self.url.short}](#{self.url.long})"
|
||||
end
|
||||
|
||||
# Define a grammar source by its upstream URL.
|
||||
#
|
||||
# url - an HTTPS, HTTP, or SSH address accepted by git-remote(1)
|
||||
# Only domains listed in HOSTS are accepted; unrecognised
|
||||
# hostnames or invalid URLs will raise an ArgumentError.
|
||||
#
|
||||
# Assumption: Repo URLs will never include subdomains.
|
||||
# We only check for a possible `www`, nothing else.
|
||||
def self.by_url(url)
|
||||
case url
|
||||
when "https://bitbucket.org/Clams/sublimesystemverilog/get/default.tar.gz"
|
||||
self.define({
|
||||
name: "sublimesystemverilog",
|
||||
host: Host.define("bitbucket.org"),
|
||||
author: "Clams",
|
||||
url: url,
|
||||
short_url: "bitbucket:Clams/sublimesystemverilog",
|
||||
long_url: "https://bitbucket.org/Clams/sublimesystemverilog"
|
||||
})
|
||||
when "https://svn.edgewall.org/repos/genshi/contrib/textmate/Genshi.tmbundle/Syntaxes/Markup%20Template%20%28XML%29.tmLanguage"
|
||||
self.define({
|
||||
name: "Genshi.tmbundle",
|
||||
host: Host.define("genshi.edgewall.org"),
|
||||
url: url,
|
||||
short_url: "genshi.edgewall.org/query",
|
||||
long_url: "https://genshi.edgewall.org/query"
|
||||
})
|
||||
when "vendor/grammars/oz-tmbundle/Syntaxes/Oz.tmLanguage"
|
||||
self.define({
|
||||
name: "oz-tmbundle",
|
||||
host: Host.define("github.com"),
|
||||
author: "eregon",
|
||||
url: url,
|
||||
short_url: "eregon/oz-tmbundle",
|
||||
long_url: "https://github.com/eregon/oz-tmbundle"
|
||||
})
|
||||
else
|
||||
if parsed = URL.parse(url)
|
||||
self.define(parsed)
|
||||
else
|
||||
raise ArgumentError, "Unsupported URL: #{url}"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Define a new GrammarSource, or reference an existing one
|
||||
def self.define(attr)
|
||||
unless attr[:url]
|
||||
host = Host.define(attr[:host])
|
||||
author = attr[:author]
|
||||
name = attr[:name]
|
||||
attr[:url] = "https://#{host.long}/#{author}/#{name}.git"
|
||||
end
|
||||
BY_URL[attr[:url]] ||= self.new(attr)
|
||||
end
|
||||
end
|
||||
57
script/helpers/host.rb
Normal file
57
script/helpers/host.rb
Normal file
@@ -0,0 +1,57 @@
|
||||
# Hostname, which can be expressed with or without a TLD
|
||||
class Host
|
||||
attr_accessor :name, :tld
|
||||
alias_method :short, :name
|
||||
alias_method :long, :to_s
|
||||
|
||||
INSTANCES = {}
|
||||
|
||||
def initialize(input)
|
||||
if input =~ /^(.+)\.([^.]+)$/
|
||||
@name = $1.downcase
|
||||
@tld = $2.downcase
|
||||
else
|
||||
@name = input.downcase
|
||||
@tld = ""
|
||||
end
|
||||
end
|
||||
|
||||
def ==(other)
|
||||
if other.responds_to?(name)
|
||||
@name == other.name
|
||||
else
|
||||
@name == other_to_s
|
||||
end
|
||||
end
|
||||
|
||||
# Short-name with colon appended
|
||||
def prefix
|
||||
@prefix || "#{@name}:"
|
||||
end
|
||||
|
||||
# Hostname including TLD
|
||||
def to_s
|
||||
"#{@name}.#{@tld}"
|
||||
end
|
||||
|
||||
def to_regexp
|
||||
name = Regexp.escape @name
|
||||
tld = Regexp.escape @tld
|
||||
Regexp.new("#{name}(?:\\.#{tld})?")
|
||||
end
|
||||
|
||||
def self.define(input)
|
||||
INSTANCES[input] ||= Host.new(input)
|
||||
end
|
||||
|
||||
# Whitelist of trusted hosting providers
|
||||
:github =
|
||||
:bitbucket = self.define("bitbucket.org")
|
||||
:gitlab = self.define("gitlab.com")
|
||||
:github.prefix = ""
|
||||
WHITELIST = {
|
||||
:github => self.define("github.com")
|
||||
}
|
||||
:github, :bitbucket, :gitlab].freeze
|
||||
WHITELIST[:"github.com"].prefix = ""
|
||||
end
|
||||
90
script/helpers/submodule.rb
Normal file
90
script/helpers/submodule.rb
Normal file
@@ -0,0 +1,90 @@
|
||||
require_relative "./grammar_source"
|
||||
require_relative "./all"
|
||||
|
||||
# Public: Represents a registered Git submodule in use by Linguist.
|
||||
#
|
||||
# Any updates to this class should consider submodules which aren't
|
||||
# grammar-related, such as CodeMirror. See also: GrammarSource
|
||||
#
|
||||
# Examples
|
||||
#
|
||||
# Submodule.new('vendor/CodeMirror', {url: "codemirror/CodeMirror"})
|
||||
# # => #<Submodule url="https://github.com/codemirror/CodeMirror.git">
|
||||
#
|
||||
# Submodule.for_grammar('vendor/grammars/language-roff')
|
||||
# # => #<Submodule url="https://github.com/Alhadis/language-roff.git">
|
||||
#
|
||||
class Submodule
|
||||
attr_accessor :id, :attr
|
||||
|
||||
def initialize(id, attr = {})
|
||||
@id = id
|
||||
@attr = attr
|
||||
@attr[:path] ||= @id
|
||||
|
||||
# If a grammar submodule, store a pointer to source
|
||||
if /^vendor\/grammars/.test attr[:url]
|
||||
@grammar = GrammarSource.by_url attr[:url]
|
||||
end
|
||||
end
|
||||
|
||||
def <=>(other)
|
||||
@id <=> other.id
|
||||
end
|
||||
|
||||
# Is the submodule registered with Git and checked out locally?
|
||||
def registered?
|
||||
@configured? and @exists?
|
||||
end
|
||||
|
||||
# Is the submodule registered with Git?
|
||||
def configured?
|
||||
system "git", "config", "submodule.#{@id}.url"
|
||||
end
|
||||
|
||||
# Has the submodule been checked out locally?
|
||||
def exists?
|
||||
exists?(@id)
|
||||
end
|
||||
|
||||
# Format an entry to use in `.gitmodules`
|
||||
def to_s
|
||||
attr = @attr.to_a.map do |key, value|
|
||||
"\t#{key} = #{value}"
|
||||
end
|
||||
<<~EOS
|
||||
[submodule "#{@id}"]
|
||||
#{ attr.sort.join "\n" }
|
||||
EOS
|
||||
end
|
||||
|
||||
# Define a GrammarSource for an existing registered submodule.
|
||||
#
|
||||
# path - path of submodule as used by .gitmodules
|
||||
def self.for_grammar(path)
|
||||
path =~ /^(?:.*(?:vendor\/)?grammars\/)?([^\/]+)/i
|
||||
path = "vendor/grammars/#{$1}"
|
||||
unless exists?(path)
|
||||
raise "Submodule '#{path}' does not exist"
|
||||
end
|
||||
self.list.by_id[:path]
|
||||
end
|
||||
|
||||
# Load the contents of .gitmodules
|
||||
def self.list
|
||||
if @list.nil?
|
||||
all = []
|
||||
ids = {}
|
||||
pattern = /^\[submodule\s*"([^"]+)"\]$\n((?:^(?!\[).+(?:\n|$))+)/is
|
||||
read_file(".gitmodules").scan(pattern) do |id, data|
|
||||
attr = {}
|
||||
data.match(/^\s*(?<key>[^\s=]+)\s*=\s*(?<value>.+)$/m) do |match|
|
||||
attr[match[:key]] = match[:value].strip
|
||||
end
|
||||
all << ids[id] = self.new(id, attr)
|
||||
end
|
||||
@list = {all: all.sort, by_id: ids}
|
||||
end
|
||||
@list
|
||||
end
|
||||
end
|
||||
80
script/helpers/url.rb
Normal file
80
script/helpers/url.rb
Normal file
@@ -0,0 +1,80 @@
|
||||
require_relative "./host"
|
||||
|
||||
# Public: Helper methods for resolving various URL notations
|
||||
class RepoURL
|
||||
|
||||
def initialize(attr = {})
|
||||
@host = Host.define(attr[:host])
|
||||
@author = attr[:user] || attr[:author]
|
||||
@name = attr[:repo] || attr[:name]
|
||||
@short = attr[:short_url]
|
||||
@long = attr[:long_url]
|
||||
end
|
||||
|
||||
# Shortened representation of URL: `[provider:]user/repo`
|
||||
def short
|
||||
@short || "#{@host.prefix}#{@author}/#{@name}"
|
||||
end
|
||||
|
||||
def to_s
|
||||
"https://#{@host.}"
|
||||
end
|
||||
|
||||
# Split a URL into named subcomponents
|
||||
def self.parse(url)
|
||||
self.match_https(url) ||
|
||||
self.match_ssh(url) ||
|
||||
self.match_shorthand(url) ||
|
||||
self.match_implicit(url)
|
||||
end
|
||||
|
||||
# Match a well-formed HTTP or HTTPS address
|
||||
def self.match_https(url)
|
||||
if match = url.match(/
|
||||
^ (?<protocol> https? :\/\/ )?
|
||||
(?<userauth> [^@.]+ @ )?
|
||||
(?<subdomain> www \. )?
|
||||
(?<host> #{HOSTS} )
|
||||
\/ (?<user> [^\/]+ )
|
||||
\/ (?<repo> [^\/]+ )
|
||||
/xi)
|
||||
match[:repo].sub! /\.git$/, ""
|
||||
self.new(match)
|
||||
end
|
||||
end
|
||||
|
||||
# Match an SSH address starting with `git@`
|
||||
def self.match_ssh(url)
|
||||
if match = url.match(/
|
||||
^ git@
|
||||
(?<host> #{HOSTS}) :
|
||||
(?<user> [^\/]+) \/
|
||||
(?<repo> [^\/]+) \.git $
|
||||
/xi)
|
||||
self.new(match)
|
||||
end
|
||||
end
|
||||
|
||||
# Match `provider:user/repo`
|
||||
def self.match_shorthand(url)
|
||||
if match = url.match(/
|
||||
^ (?<host> #{HOSTS}) : \/?
|
||||
(?<user> [^\/]+) \/
|
||||
(?<repo> [^\/]+) \/? $
|
||||
/xi)
|
||||
self.new(match)
|
||||
end
|
||||
end
|
||||
|
||||
# Match `user/repo` shorthand, assumed to be GitHub
|
||||
def self.match_implicit(url)
|
||||
if match = url.match(/
|
||||
^ \/? (?<user>[^\/]+)
|
||||
\/ (?<repo>[^\/]+)
|
||||
\/? $
|
||||
/xi)
|
||||
match[:host] = "github.com"
|
||||
self.new(match)
|
||||
end
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user