From 6669270e84148d756bed2542f47f867c3e1d2041 Mon Sep 17 00:00:00 2001 From: Alhadis Date: Fri, 13 Apr 2018 22:54:18 +1000 Subject: [PATCH] Commit in-progress work to request sanity check --- script/add-grammar | 146 ++++--------------------------- script/helpers/all.rb | 45 ++++++++++ script/helpers/grammar_list.rb | 75 ++++++++++++++++ script/helpers/grammar_source.rb | 88 +++++++++++++++++++ script/helpers/host.rb | 57 ++++++++++++ script/helpers/submodule.rb | 90 +++++++++++++++++++ script/helpers/url.rb | 80 +++++++++++++++++ script/list-grammars | 98 +-------------------- script/sort-submodules | 24 ++--- 9 files changed, 458 insertions(+), 245 deletions(-) create mode 100644 script/helpers/all.rb create mode 100644 script/helpers/grammar_list.rb create mode 100644 script/helpers/grammar_source.rb create mode 100644 script/helpers/host.rb create mode 100644 script/helpers/submodule.rb create mode 100644 script/helpers/url.rb diff --git a/script/add-grammar b/script/add-grammar index 280625a8..7b56403a 100755 --- a/script/add-grammar +++ b/script/add-grammar @@ -1,134 +1,24 @@ #!/usr/bin/env ruby require "optparse" -require "open3" +require_relative "./helpers/all" -class GrammarRepo - - # Whitelist of trusted hosting providers - HOSTS = Regexp.union %w[github.com bitbucket.org gitlab.com] - - # Public: Define a repository source by upstream URL. - # - # url - an HTTPS, HTTP, or SSH address accepted by git-remote(1) - # Only domains listed in HOSTS are accepted; unrecognised - # hostnames or invalid URLs will raise an ArgumentError. - # - # Assumption: Repo URLs will never include subdomains. - # We only check for a possible `www`, nothing else. - # - # module_path - path of submodule as registered in `.gitmodules` - # Omit this unless grammar is being replaced. - def initialize(url, module_path = nil) - if https? url - @host = $1.downcase - @user = $2 - @repo = $3.sub /\.git$/, "" - elsif ssh?(url) || shorthand?(url) - @host = $1.downcase - @user = $2 - @repo = $3 - elsif implicit_shorthand? url - @host = "github.com" - @user = $1 - @repo = $2 - else - raise ArgumentError, "Unsupported URL: #{url}" - end - end - - # Match a well-formed HTTP or HTTPS address - def https?(url) - nil unless url =~ / - ^ (? https? ://)? - (? [^@.]+ @ )? - (? www \. )? - (? #{HOSTS} ) - \/ (? [^\/]+ ) - \/ (? [^\/]+ ) /xi - end - - # Match an SSH address starting with `git@` - def ssh?(url) - nil unless url =~ / - ^ git@ - (? #{HOSTS}) : - (? [^\/]+) \/ - (? [^\/]+) \.git $/xi - end - - # Match `provider:user/repo` - def shorthand?(url) - nil unless url =~ / - ^ (? #{HOSTS}) : \/? - (? [^\/]+) \/ - (? [^\/]+) \/? $ /xi - end - - # Match `user/repo` shorthand, assumed to be GitHub - def implicit_shorthand?(url) - nil unless url =~ / - ^ \/? (?[^\/]+) - \/ (?[^\/]+) - \/? $/xi - end -end - - -class GrammarGuardian - - ROOT = File.expand_path("../../", __FILE__) - - def initialize - # Track each change so we can roll back after a failed command - @changes = Hash.new - end - - # Print debugging feedback to STDOUT if running with --verbose - def log(msg) - puts msg if $verbose - end - - def command(*args) - log "$ #{args.join(' ')}" - output, status = Open3.capture2e(*args) - if !status.success? - output.each_line do |line| - log " > #{line}" - end - warn "Command failed. Aborting." - exit 1 - end - end -end - -# Isolate the vendor-name component of a submodule path -def parse_submodule(name) - name =~ /^(?:.*(?:vendor\/)?grammars\/)?([^\/]+)/i - path = "vendor/grammars/#{$1}" - unless File.exist?("#{ROOT}/" + path) - warn "Submodule '#{path}' does not exist. Aborting." - exit 1 - end - path -end - - -usage = """Usage: +usage = <<-EOH +Usage: #{$0} [-v|--verbose] [--replace grammar] url + Examples: #{$0} https://github.com/Alhadis/language-roff #{$0} --replace sublime-apl https://github.com/Alhadis/language-apl -""" +EOH -$replace = nil -$verbose = true $compile = false +$replace = nil OptionParser.new do |opts| opts.banner = usage opts.on("-q", "--quiet", "Do not print output unless there's a failure") do - $verbose = false + $quiet = true end opts.on("-rSUBMODULE", "--replace=SUBMODDULE", "Replace an existing grammar submodule.") do |name| $replace = name @@ -144,29 +34,27 @@ $url = ARGV[0] # No URL? Print a usage message and bail. unless $url warn usage - exit 1; + exit 1 end # Exit early if docker isn't installed or running. -log "Checking docker is installed and running" +log "Checking Docker is installed and running" command('docker', 'ps') -# Ensure the given URL is an HTTPS link -parts = parse_url $url -https = "https://#{parts[:host]}/#{parts[:user]}/#{parts[:repo]}" -repo_new = "vendor/grammars/#{parts[:repo]}" -repo_old = parse_submodule($replace) if $replace +repo_new = GrammarSource.by_url $url Dir.chdir(ROOT) -if repo_old - log "Deregistering: #{repo_old}" - command('git', 'submodule', 'deinit', repo_old) - command('git', 'rm', '-rf', repo_old) +if $replace + repo_old = GrammarSource.by_path $replace + log "Deregistering: #{repo_old.path}" + $removed = repo_old + command('git', 'submodule', 'deinit', repo_old.path) + command('git', 'rm', '-rf', repo_old.path) command('script/grammar-compiler', 'update', '-f') if $compile end -log "Registering new submodule: #{repo_new}" +log "Registering new submodule: #{repo_new.path}" command('git', 'submodule', 'add', '-f', https, repo_new) command('script/grammar-compiler', 'add', repo_new) if $compile diff --git a/script/helpers/all.rb b/script/helpers/all.rb new file mode 100644 index 00000000..8274f35d --- /dev/null +++ b/script/helpers/all.rb @@ -0,0 +1,45 @@ +require_relative "./grammar_list" +require_relative "./grammar_source" +require_relative "./host" +require_relative "./submodule" +require "open3" + +$quiet = false + +# Print debugging feedback to STDOUT if $verbose global is set +def log(msg) + puts msg unless $quiet +end + +def command(*args) + log "$ #{args.join(' ')}" + output, status = Open3.capture2e(*args) + if !status.success? + output.each_line do |line| + log " > #{line}" + end + warn "Command failed. Aborting." + exit 1 + end +end + + +ROOT = File.expand_path "../../../", __FILE__ + +# Expand a file path relative to Linguist's base directory +def repo_path(path) + path = path.sub /^#{Regexp.escape ROOT}\/?/, "" + "#{ROOT}/#{path}" +end + +def exists?(path) + File.exist? repo_path(path) +end + +def read(path) + File.read repo_path(path) +end + +def write(path, data) + File.write repo_path(path), data +end diff --git a/script/helpers/grammar_list.rb b/script/helpers/grammar_list.rb new file mode 100644 index 00000000..5df7a743 --- /dev/null +++ b/script/helpers/grammar_list.rb @@ -0,0 +1,75 @@ +require_relative "./grammar_source" +require_relative "./submodule" +require_relative "./helpers" +require "bundler/setup" +require "linguist" +require "json" +require "yaml" + +class GrammarList + + ROOT = File.expand_path "../../../", __FILE__ + + def initialize + @submodules = Submodule.list + @language_names = load_languages() + @sources = load_sources() + end + + # Grab the name of each language, sorted case-insensitively + def load_languages + Linguist::Language.all.map(&:name).sort do |a, b| + a.downcase() <=> b.downcase() + end + end + + # Load grammars.yml + def load_sources + sources = {} + YAML.load_file("#{ROOT}/grammars.yml").each do |path, scopes| + scopes.each { |scope| sources[scope] = @submodules[path] } + end + sources + end + + # Format list as Markdown + def to_markdown + markdown = "" + @language_names.each do |item| + lang = Linguist::Language["#{item}"] + scope = lang.tm_scope + next if scope == "none" + path = @sources[scope] || scope + case path + when "https://bitbucket.org/Clams/sublimesystemverilog/get/default.tar.gz" + short_url = "bitbucket:Clams/sublimesystemverilog" + long_url = "https://bitbucket.org/Clams/sublimesystemverilog" + when "https://svn.edgewall.org/repos/genshi/contrib/textmate/Genshi.tmbundle/Syntaxes/Markup%20Template%20%28XML%29.tmLanguage" + short_url = "genshi.edgewall.org/query" + long_url = "https://genshi.edgewall.org/query" + when "vendor/grammars/oz-tmbundle/Syntaxes/Oz.tmLanguage" + short_url = "eregon/oz-tmbundle" + long_url = "https://github.com/eregon/oz-tmbundle" + else + submodule = @submodules[@sources[scope].chomp("/")] + next unless submodule + short_url = submodule[:short] + long_url = submodule[:url] + end + markdown += "- **#{item}:** [#{short_url}](#{long_url})\n" + end + markdown + end + + def update_lists + # Update .gitsubmodules + sorted = @sources.sort { |a,b| a[0] <=> b[0] }.collect{ |i| i[1] } + File.write "#{ROOT}/.gitmodules", sorted + + # Update the file displaying the reader-friendly list of grammar repos + readme = "#{ROOT}/vendor/README.md" + preamble = File.read(readme).match(/\A.+?\n/ms) + list = self.to_markdown + File.write(readme, preamble.to_s + list) + end +end diff --git a/script/helpers/grammar_source.rb b/script/helpers/grammar_source.rb new file mode 100644 index 00000000..b0b2e132 --- /dev/null +++ b/script/helpers/grammar_source.rb @@ -0,0 +1,88 @@ +require_relative "./all" +require_relative "./host" +require_relative "./unique" + +# Represents the source of a language grammar +# +# NOTE: Sources are mostly - but not always - connected to a +# Submodule. Some ad-hoc exceptions exist which aren't +# connected with a Git repository. +# +class GrammarSource < Unique + + # RegExp for matching trusted domain hosts + HOSTS = Regexp.union(Host.whitelist) + + def initialize(attr = {}) + @name = attr[:name] || nil # Unique name of repository + @host = attr[:host] || nil # Hostname of repo's provider + @author = attr[:author] || nil # Username of repo's author + @url = attr[:url] || nil # Resolved absolute URL + + # Resolve missing properties + @url ||= "https://#{@host.long}/#{@author}/#{@name}.git" + @short_url ||= @host.prefix + @author + "/#{@name}" + @long_url ||= @url + end + + # Format source as a Markdown link + def to_markdown + "[#{self.url.short}](#{self.url.long})" + end + + # Define a grammar source by its upstream URL. + # + # url - an HTTPS, HTTP, or SSH address accepted by git-remote(1) + # Only domains listed in HOSTS are accepted; unrecognised + # hostnames or invalid URLs will raise an ArgumentError. + # + # Assumption: Repo URLs will never include subdomains. + # We only check for a possible `www`, nothing else. + def self.by_url(url) + case url + when "https://bitbucket.org/Clams/sublimesystemverilog/get/default.tar.gz" + self.define({ + name: "sublimesystemverilog", + host: Host.define("bitbucket.org"), + author: "Clams", + url: url, + short_url: "bitbucket:Clams/sublimesystemverilog", + long_url: "https://bitbucket.org/Clams/sublimesystemverilog" + }) + when "https://svn.edgewall.org/repos/genshi/contrib/textmate/Genshi.tmbundle/Syntaxes/Markup%20Template%20%28XML%29.tmLanguage" + self.define({ + name: "Genshi.tmbundle", + host: Host.define("genshi.edgewall.org"), + url: url, + short_url: "genshi.edgewall.org/query", + long_url: "https://genshi.edgewall.org/query" + }) + when "vendor/grammars/oz-tmbundle/Syntaxes/Oz.tmLanguage" + self.define({ + name: "oz-tmbundle", + host: Host.define("github.com"), + author: "eregon", + url: url, + short_url: "eregon/oz-tmbundle", + long_url: "https://github.com/eregon/oz-tmbundle" + }) + else + if parsed = URL.parse(url) + self.define(parsed) + else + raise ArgumentError, "Unsupported URL: #{url}" + end + end + end + + # Define a new GrammarSource, or reference an existing one + def self.define(attr) + unless attr[:url] + host = Host.define(attr[:host]) + author = attr[:author] + name = attr[:name] + attr[:url] = "https://#{host.long}/#{author}/#{name}.git" + end + BY_URL[attr[:url]] ||= self.new(attr) + end +end diff --git a/script/helpers/host.rb b/script/helpers/host.rb new file mode 100644 index 00000000..71f684ec --- /dev/null +++ b/script/helpers/host.rb @@ -0,0 +1,57 @@ +# Hostname, which can be expressed with or without a TLD +class Host + attr_accessor :name, :tld + alias_method :short, :name + alias_method :long, :to_s + + INSTANCES = {} + + def initialize(input) + if input =~ /^(.+)\.([^.]+)$/ + @name = $1.downcase + @tld = $2.downcase + else + @name = input.downcase + @tld = "" + end + end + + def ==(other) + if other.responds_to?(name) + @name == other.name + else + @name == other_to_s + end + end + + # Short-name with colon appended + def prefix + @prefix || "#{@name}:" + end + + # Hostname including TLD + def to_s + "#{@name}.#{@tld}" + end + + def to_regexp + name = Regexp.escape @name + tld = Regexp.escape @tld + Regexp.new("#{name}(?:\\.#{tld})?") + end + + def self.define(input) + INSTANCES[input] ||= Host.new(input) + end + + # Whitelist of trusted hosting providers + :github = + :bitbucket = self.define("bitbucket.org") + :gitlab = self.define("gitlab.com") + :github.prefix = "" + WHITELIST = { + :github => self.define("github.com") + } + :github, :bitbucket, :gitlab].freeze + WHITELIST[:"github.com"].prefix = "" +end diff --git a/script/helpers/submodule.rb b/script/helpers/submodule.rb new file mode 100644 index 00000000..93978241 --- /dev/null +++ b/script/helpers/submodule.rb @@ -0,0 +1,90 @@ +require_relative "./grammar_source" +require_relative "./all" + +# Public: Represents a registered Git submodule in use by Linguist. +# +# Any updates to this class should consider submodules which aren't +# grammar-related, such as CodeMirror. See also: GrammarSource +# +# Examples +# +# Submodule.new('vendor/CodeMirror', {url: "codemirror/CodeMirror"}) +# # => # +# +# Submodule.for_grammar('vendor/grammars/language-roff') +# # => # +# +class Submodule + attr_accessor :id, :attr + + def initialize(id, attr = {}) + @id = id + @attr = attr + @attr[:path] ||= @id + + # If a grammar submodule, store a pointer to source + if /^vendor\/grammars/.test attr[:url] + @grammar = GrammarSource.by_url attr[:url] + end + end + + def <=>(other) + @id <=> other.id + end + + # Is the submodule registered with Git and checked out locally? + def registered? + @configured? and @exists? + end + + # Is the submodule registered with Git? + def configured? + system "git", "config", "submodule.#{@id}.url" + end + + # Has the submodule been checked out locally? + def exists? + exists?(@id) + end + + # Format an entry to use in `.gitmodules` + def to_s + attr = @attr.to_a.map do |key, value| + "\t#{key} = #{value}" + end + <<~EOS + [submodule "#{@id}"] + #{ attr.sort.join "\n" } + EOS + end + + # Define a GrammarSource for an existing registered submodule. + # + # path - path of submodule as used by .gitmodules + def self.for_grammar(path) + path =~ /^(?:.*(?:vendor\/)?grammars\/)?([^\/]+)/i + path = "vendor/grammars/#{$1}" + unless exists?(path) + raise "Submodule '#{path}' does not exist" + end + self.list.by_id[:path] + end + + # Load the contents of .gitmodules + def self.list + if @list.nil? + all = [] + ids = {} + pattern = /^\[submodule\s*"([^"]+)"\]$\n((?:^(?!\[).+(?:\n|$))+)/is + read_file(".gitmodules").scan(pattern) do |id, data| + attr = {} + data.match(/^\s*(?[^\s=]+)\s*=\s*(?.+)$/m) do |match| + attr[match[:key]] = match[:value].strip + end + all << ids[id] = self.new(id, attr) + end + @list = {all: all.sort, by_id: ids} + end + @list + end +end diff --git a/script/helpers/url.rb b/script/helpers/url.rb new file mode 100644 index 00000000..b8a95e9f --- /dev/null +++ b/script/helpers/url.rb @@ -0,0 +1,80 @@ +require_relative "./host" + +# Public: Helper methods for resolving various URL notations +class RepoURL + + def initialize(attr = {}) + @host = Host.define(attr[:host]) + @author = attr[:user] || attr[:author] + @name = attr[:repo] || attr[:name] + @short = attr[:short_url] + @long = attr[:long_url] + end + + # Shortened representation of URL: `[provider:]user/repo` + def short + @short || "#{@host.prefix}#{@author}/#{@name}" + end + + def to_s + "https://#{@host.}" + end + + # Split a URL into named subcomponents + def self.parse(url) + self.match_https(url) || + self.match_ssh(url) || + self.match_shorthand(url) || + self.match_implicit(url) + end + + # Match a well-formed HTTP or HTTPS address + def self.match_https(url) + if match = url.match(/ + ^ (? https? :\/\/ )? + (? [^@.]+ @ )? + (? www \. )? + (? #{HOSTS} ) + \/ (? [^\/]+ ) + \/ (? [^\/]+ ) + /xi) + match[:repo].sub! /\.git$/, "" + self.new(match) + end + end + + # Match an SSH address starting with `git@` + def self.match_ssh(url) + if match = url.match(/ + ^ git@ + (? #{HOSTS}) : + (? [^\/]+) \/ + (? [^\/]+) \.git $ + /xi) + self.new(match) + end + end + + # Match `provider:user/repo` + def self.match_shorthand(url) + if match = url.match(/ + ^ (? #{HOSTS}) : \/? + (? [^\/]+) \/ + (? [^\/]+) \/? $ + /xi) + self.new(match) + end + end + + # Match `user/repo` shorthand, assumed to be GitHub + def self.match_implicit(url) + if match = url.match(/ + ^ \/? (?[^\/]+) + \/ (?[^\/]+) + \/? $ + /xi) + match[:host] = "github.com" + self.new(match) + end + end +end diff --git a/script/list-grammars b/script/list-grammars index 1e82d78d..208c4d53 100755 --- a/script/list-grammars +++ b/script/list-grammars @@ -1,102 +1,6 @@ #!/usr/bin/env ruby -require "bundler/setup" -require "linguist" -require "json" -require "yaml" - -class GrammarList - - ROOT = File.expand_path "../../", __FILE__ - - def initialize - @submodules = load_submodules() - @sources = load_sources() - @language_names = load_languages() - end - - # Load .gitmodules - def load_submodules - submodules = {} - submodule_file = File.read("#{ROOT}/.gitmodules") - pattern = /^\[submodule\s*"([^"]+)"\]$\n((?:^(?!\[).+(?:\n|$))+)/is - submodule_file.scan(pattern) do |id, attr| - submod = {} - submod[:path] = $1 if attr =~ /^\s*path\s*=\s*(.+)$/ - submod[:url] = $1 if attr =~ /^\s*url\s*=\s*(.+)$/ - submod[:url].gsub!(/\.git$/, "") - submod[:short] = shorten(submod[:url]) - submodules["#{id}"] = submod - end - submodules - end - - # Grab the name of each language, sorted case-insensitively - def load_languages - Linguist::Language.all.map(&:name).sort do |a, b| - a.downcase() <=> b.downcase() - end - end - - # Load grammars.yml - def load_sources - sources = {} - grammars = YAML.load_file("#{ROOT}/grammars.yml") - grammars.each do |path, scopes| - scopes.each { |scope| sources[scope] = path } - end - sources - end - - # Shorten a repository URL - def shorten(url) - if url =~ /^https?:\/\/(?:www\.)?github\.com\/([^\/]+\/[^\/]+)/i - $1 - elsif url =~ /^https?:\/\/(?:www\.)?(bitbucket|gitlab)\.(?:com|org)\/([^\/]+\/[^\/]+)/i - "#{$1.downcase()}:#{$2}" - else - url.replace(/^https?:\/\/(?:www\.)?/i, "") - end - end - - # Markdown: Generate grammar list - def to_markdown - markdown = "" - @language_names.each do |item| - lang = Linguist::Language["#{item}"] - scope = lang.tm_scope - next if scope == "none" - path = @sources[scope] || scope - case path - when "https://bitbucket.org/Clams/sublimesystemverilog/get/default.tar.gz" - short_url = "bitbucket:Clams/sublimesystemverilog" - long_url = "https://bitbucket.org/Clams/sublimesystemverilog" - when "https://svn.edgewall.org/repos/genshi/contrib/textmate/Genshi.tmbundle/Syntaxes/Markup%20Template%20%28XML%29.tmLanguage" - short_url = "genshi.edgewall.org/query" - long_url = "https://genshi.edgewall.org/query" - when "vendor/grammars/oz-tmbundle/Syntaxes/Oz.tmLanguage" - short_url = "eregon/oz-tmbundle" - long_url = "https://github.com/eregon/oz-tmbundle" - else - submodule = @submodules[@sources[scope].chomp("/")] - next unless submodule - short_url = submodule[:short] - long_url = submodule[:url] - end - markdown += "- **#{item}:** [#{short_url}](#{long_url})\n" - end - - markdown - end - - # Update the file displaying the reader-friendly list of grammar repos - def update_readme - readme = "#{ROOT}/vendor/README.md" - preamble = File.read(readme).match(/\A.+?\n/ms) - list = self.to_markdown - File.write(readme, preamble.to_s + list) - end -end +require_relative "./helpers/grammar_list" list = GrammarList.new if ARGV.include? "--print" diff --git a/script/sort-submodules b/script/sort-submodules index a6dccc49..b972fad0 100755 --- a/script/sort-submodules +++ b/script/sort-submodules @@ -1,21 +1,7 @@ #!/usr/bin/env ruby require "optparse" - -ROOT = File.expand_path "../../", __FILE__ - - -# Extract and sort a list of submodules -def sort_entries(file_data) - submodules = [] - file_data.scan(/(^\[submodule[^\n]+\n)((?:\t[^\n]+\n)+)/).each do |head, body| - path = body.match(/^\tpath\s*=\s*\K(.+)$/)[0] - submodules << [path, head + body] - end - submodules.sort! { |a,b| a[0] <=> b[0] } - submodules.collect { |i| i[1] } -end - +require_relative "./helpers/submodule" usage = <<-EOH Usage: @@ -28,6 +14,7 @@ Examples: EOH $testing = false + OptionParser.new do |opts| opts.banner = usage opts.on("-h", "--help") do @@ -39,12 +26,11 @@ OptionParser.new do |opts| end end.parse! - -unsorted = File.read("#{ROOT}/.gitmodules") -sorted = sort_entries(unsorted).join +unsorted = read ".gitmodules" +sorted = Submodule.list.join if $testing exit unsorted == sorted else - File.write "#{ROOT}/.gitmodules", sorted + write ".gitmodules", sorted end