diff --git a/script/add-grammar b/script/add-grammar index 7b56403a..8352b7d9 100755 --- a/script/add-grammar +++ b/script/add-grammar @@ -1,24 +1,75 @@ #!/usr/bin/env ruby require "optparse" -require_relative "./helpers/all" +require "open3" -usage = <<-EOH -Usage: +ROOT = File.expand_path("../../", __FILE__) + + +# Break a repository URL into its separate components +def parse_url(input) + hosts = "github\.com|bitbucket\.org|gitlab\.com" + + # HTTPS/HTTP link pointing to recognised hosts + if input =~ /^(?:https?:\/\/)?(?:[^.@]+@)?(?:www\.)?(#{hosts})\/([^\/]+)\/([^\/]+)/i + { host: $1.downcase(), user: $2, repo: $3.sub(/\.git$/, "") } + # SSH + elsif input =~ /^git@(#{hosts}):([^\/]+)\/([^\/]+)\.git$/i + { host: $1.downcase(), user: $2, repo: $3 } + # provider:user/repo + elsif input =~ /^(github|bitbucket|gitlab):\/?([^\/]+)\/([^\/]+)\/?$/i + { host: $1.downcase(), user: $2, repo: $3 } + # user/repo - Common GitHub shorthand + elsif input =~ /^\/?([^\/]+)\/([^\/]+)\/?$/ + { host: "github.com", user: $1, repo: $2 } + else + raise "Unsupported URL: #{input}" + end +end + +# Isolate the vendor-name component of a submodule path +def parse_submodule(name) + name =~ /^(?:.*(?:vendor\/)?grammars\/)?([^\/]+)/i + path = "vendor/grammars/#{$1}" + unless File.exist?("#{ROOT}/" + path) + warn "Submodule '#{path}' does not exist. Aborting." + exit 1 + end + path +end + +# Print debugging feedback to STDOUT if running with --verbose +def log(msg) + puts msg if $verbose +end + +def command(*args) + log "$ #{args.join(' ')}" + output, status = Open3.capture2e(*args) + if !status.success? + output.each_line do |line| + log " > #{line}" + end + warn "Command failed. Aborting." + exit 1 + end +end + +usage = """Usage: #{$0} [-v|--verbose] [--replace grammar] url - Examples: #{$0} https://github.com/Alhadis/language-roff #{$0} --replace sublime-apl https://github.com/Alhadis/language-apl -EOH +""" -$compile = false $replace = nil +$verbose = true +$compile = false OptionParser.new do |opts| opts.banner = usage opts.on("-q", "--quiet", "Do not print output unless there's a failure") do - $quiet = true + $verbose = false end opts.on("-rSUBMODULE", "--replace=SUBMODDULE", "Replace an existing grammar submodule.") do |name| $replace = name @@ -34,27 +85,29 @@ $url = ARGV[0] # No URL? Print a usage message and bail. unless $url warn usage - exit 1 + exit 1; end # Exit early if docker isn't installed or running. -log "Checking Docker is installed and running" +log "Checking docker is installed and running" command('docker', 'ps') -repo_new = GrammarSource.by_url $url +# Ensure the given URL is an HTTPS link +parts = parse_url $url +https = "https://#{parts[:host]}/#{parts[:user]}/#{parts[:repo]}" +repo_new = "vendor/grammars/#{parts[:repo]}" +repo_old = parse_submodule($replace) if $replace Dir.chdir(ROOT) -if $replace - repo_old = GrammarSource.by_path $replace - log "Deregistering: #{repo_old.path}" - $removed = repo_old - command('git', 'submodule', 'deinit', repo_old.path) - command('git', 'rm', '-rf', repo_old.path) +if repo_old + log "Deregistering: #{repo_old}" + command('git', 'submodule', 'deinit', repo_old) + command('git', 'rm', '-rf', repo_old) command('script/grammar-compiler', 'update', '-f') if $compile end -log "Registering new submodule: #{repo_new.path}" +log "Registering new submodule: #{repo_new}" command('git', 'submodule', 'add', '-f', https, repo_new) command('script/grammar-compiler', 'add', repo_new) if $compile @@ -62,7 +115,6 @@ log "Confirming license" if repo_old command('script/licensed') else - repo_new = File.absolute_path(repo_new) command('script/licensed', '--module', repo_new) end diff --git a/script/helpers/all.rb b/script/helpers/all.rb deleted file mode 100644 index 8274f35d..00000000 --- a/script/helpers/all.rb +++ /dev/null @@ -1,45 +0,0 @@ -require_relative "./grammar_list" -require_relative "./grammar_source" -require_relative "./host" -require_relative "./submodule" -require "open3" - -$quiet = false - -# Print debugging feedback to STDOUT if $verbose global is set -def log(msg) - puts msg unless $quiet -end - -def command(*args) - log "$ #{args.join(' ')}" - output, status = Open3.capture2e(*args) - if !status.success? - output.each_line do |line| - log " > #{line}" - end - warn "Command failed. Aborting." - exit 1 - end -end - - -ROOT = File.expand_path "../../../", __FILE__ - -# Expand a file path relative to Linguist's base directory -def repo_path(path) - path = path.sub /^#{Regexp.escape ROOT}\/?/, "" - "#{ROOT}/#{path}" -end - -def exists?(path) - File.exist? repo_path(path) -end - -def read(path) - File.read repo_path(path) -end - -def write(path, data) - File.write repo_path(path), data -end diff --git a/script/helpers/grammar_list.rb b/script/helpers/grammar_list.rb deleted file mode 100644 index 5df7a743..00000000 --- a/script/helpers/grammar_list.rb +++ /dev/null @@ -1,75 +0,0 @@ -require_relative "./grammar_source" -require_relative "./submodule" -require_relative "./helpers" -require "bundler/setup" -require "linguist" -require "json" -require "yaml" - -class GrammarList - - ROOT = File.expand_path "../../../", __FILE__ - - def initialize - @submodules = Submodule.list - @language_names = load_languages() - @sources = load_sources() - end - - # Grab the name of each language, sorted case-insensitively - def load_languages - Linguist::Language.all.map(&:name).sort do |a, b| - a.downcase() <=> b.downcase() - end - end - - # Load grammars.yml - def load_sources - sources = {} - YAML.load_file("#{ROOT}/grammars.yml").each do |path, scopes| - scopes.each { |scope| sources[scope] = @submodules[path] } - end - sources - end - - # Format list as Markdown - def to_markdown - markdown = "" - @language_names.each do |item| - lang = Linguist::Language["#{item}"] - scope = lang.tm_scope - next if scope == "none" - path = @sources[scope] || scope - case path - when "https://bitbucket.org/Clams/sublimesystemverilog/get/default.tar.gz" - short_url = "bitbucket:Clams/sublimesystemverilog" - long_url = "https://bitbucket.org/Clams/sublimesystemverilog" - when "https://svn.edgewall.org/repos/genshi/contrib/textmate/Genshi.tmbundle/Syntaxes/Markup%20Template%20%28XML%29.tmLanguage" - short_url = "genshi.edgewall.org/query" - long_url = "https://genshi.edgewall.org/query" - when "vendor/grammars/oz-tmbundle/Syntaxes/Oz.tmLanguage" - short_url = "eregon/oz-tmbundle" - long_url = "https://github.com/eregon/oz-tmbundle" - else - submodule = @submodules[@sources[scope].chomp("/")] - next unless submodule - short_url = submodule[:short] - long_url = submodule[:url] - end - markdown += "- **#{item}:** [#{short_url}](#{long_url})\n" - end - markdown - end - - def update_lists - # Update .gitsubmodules - sorted = @sources.sort { |a,b| a[0] <=> b[0] }.collect{ |i| i[1] } - File.write "#{ROOT}/.gitmodules", sorted - - # Update the file displaying the reader-friendly list of grammar repos - readme = "#{ROOT}/vendor/README.md" - preamble = File.read(readme).match(/\A.+?\n/ms) - list = self.to_markdown - File.write(readme, preamble.to_s + list) - end -end diff --git a/script/helpers/grammar_source.rb b/script/helpers/grammar_source.rb deleted file mode 100644 index b0b2e132..00000000 --- a/script/helpers/grammar_source.rb +++ /dev/null @@ -1,88 +0,0 @@ -require_relative "./all" -require_relative "./host" -require_relative "./unique" - -# Represents the source of a language grammar -# -# NOTE: Sources are mostly - but not always - connected to a -# Submodule. Some ad-hoc exceptions exist which aren't -# connected with a Git repository. -# -class GrammarSource < Unique - - # RegExp for matching trusted domain hosts - HOSTS = Regexp.union(Host.whitelist) - - def initialize(attr = {}) - @name = attr[:name] || nil # Unique name of repository - @host = attr[:host] || nil # Hostname of repo's provider - @author = attr[:author] || nil # Username of repo's author - @url = attr[:url] || nil # Resolved absolute URL - - # Resolve missing properties - @url ||= "https://#{@host.long}/#{@author}/#{@name}.git" - @short_url ||= @host.prefix + @author + "/#{@name}" - @long_url ||= @url - end - - # Format source as a Markdown link - def to_markdown - "[#{self.url.short}](#{self.url.long})" - end - - # Define a grammar source by its upstream URL. - # - # url - an HTTPS, HTTP, or SSH address accepted by git-remote(1) - # Only domains listed in HOSTS are accepted; unrecognised - # hostnames or invalid URLs will raise an ArgumentError. - # - # Assumption: Repo URLs will never include subdomains. - # We only check for a possible `www`, nothing else. - def self.by_url(url) - case url - when "https://bitbucket.org/Clams/sublimesystemverilog/get/default.tar.gz" - self.define({ - name: "sublimesystemverilog", - host: Host.define("bitbucket.org"), - author: "Clams", - url: url, - short_url: "bitbucket:Clams/sublimesystemverilog", - long_url: "https://bitbucket.org/Clams/sublimesystemverilog" - }) - when "https://svn.edgewall.org/repos/genshi/contrib/textmate/Genshi.tmbundle/Syntaxes/Markup%20Template%20%28XML%29.tmLanguage" - self.define({ - name: "Genshi.tmbundle", - host: Host.define("genshi.edgewall.org"), - url: url, - short_url: "genshi.edgewall.org/query", - long_url: "https://genshi.edgewall.org/query" - }) - when "vendor/grammars/oz-tmbundle/Syntaxes/Oz.tmLanguage" - self.define({ - name: "oz-tmbundle", - host: Host.define("github.com"), - author: "eregon", - url: url, - short_url: "eregon/oz-tmbundle", - long_url: "https://github.com/eregon/oz-tmbundle" - }) - else - if parsed = URL.parse(url) - self.define(parsed) - else - raise ArgumentError, "Unsupported URL: #{url}" - end - end - end - - # Define a new GrammarSource, or reference an existing one - def self.define(attr) - unless attr[:url] - host = Host.define(attr[:host]) - author = attr[:author] - name = attr[:name] - attr[:url] = "https://#{host.long}/#{author}/#{name}.git" - end - BY_URL[attr[:url]] ||= self.new(attr) - end -end diff --git a/script/helpers/host.rb b/script/helpers/host.rb deleted file mode 100644 index 71f684ec..00000000 --- a/script/helpers/host.rb +++ /dev/null @@ -1,57 +0,0 @@ -# Hostname, which can be expressed with or without a TLD -class Host - attr_accessor :name, :tld - alias_method :short, :name - alias_method :long, :to_s - - INSTANCES = {} - - def initialize(input) - if input =~ /^(.+)\.([^.]+)$/ - @name = $1.downcase - @tld = $2.downcase - else - @name = input.downcase - @tld = "" - end - end - - def ==(other) - if other.responds_to?(name) - @name == other.name - else - @name == other_to_s - end - end - - # Short-name with colon appended - def prefix - @prefix || "#{@name}:" - end - - # Hostname including TLD - def to_s - "#{@name}.#{@tld}" - end - - def to_regexp - name = Regexp.escape @name - tld = Regexp.escape @tld - Regexp.new("#{name}(?:\\.#{tld})?") - end - - def self.define(input) - INSTANCES[input] ||= Host.new(input) - end - - # Whitelist of trusted hosting providers - :github = - :bitbucket = self.define("bitbucket.org") - :gitlab = self.define("gitlab.com") - :github.prefix = "" - WHITELIST = { - :github => self.define("github.com") - } - :github, :bitbucket, :gitlab].freeze - WHITELIST[:"github.com"].prefix = "" -end diff --git a/script/helpers/submodule.rb b/script/helpers/submodule.rb deleted file mode 100644 index 93978241..00000000 --- a/script/helpers/submodule.rb +++ /dev/null @@ -1,90 +0,0 @@ -require_relative "./grammar_source" -require_relative "./all" - -# Public: Represents a registered Git submodule in use by Linguist. -# -# Any updates to this class should consider submodules which aren't -# grammar-related, such as CodeMirror. See also: GrammarSource -# -# Examples -# -# Submodule.new('vendor/CodeMirror', {url: "codemirror/CodeMirror"}) -# # => # -# -# Submodule.for_grammar('vendor/grammars/language-roff') -# # => # -# -class Submodule - attr_accessor :id, :attr - - def initialize(id, attr = {}) - @id = id - @attr = attr - @attr[:path] ||= @id - - # If a grammar submodule, store a pointer to source - if /^vendor\/grammars/.test attr[:url] - @grammar = GrammarSource.by_url attr[:url] - end - end - - def <=>(other) - @id <=> other.id - end - - # Is the submodule registered with Git and checked out locally? - def registered? - @configured? and @exists? - end - - # Is the submodule registered with Git? - def configured? - system "git", "config", "submodule.#{@id}.url" - end - - # Has the submodule been checked out locally? - def exists? - exists?(@id) - end - - # Format an entry to use in `.gitmodules` - def to_s - attr = @attr.to_a.map do |key, value| - "\t#{key} = #{value}" - end - <<~EOS - [submodule "#{@id}"] - #{ attr.sort.join "\n" } - EOS - end - - # Define a GrammarSource for an existing registered submodule. - # - # path - path of submodule as used by .gitmodules - def self.for_grammar(path) - path =~ /^(?:.*(?:vendor\/)?grammars\/)?([^\/]+)/i - path = "vendor/grammars/#{$1}" - unless exists?(path) - raise "Submodule '#{path}' does not exist" - end - self.list.by_id[:path] - end - - # Load the contents of .gitmodules - def self.list - if @list.nil? - all = [] - ids = {} - pattern = /^\[submodule\s*"([^"]+)"\]$\n((?:^(?!\[).+(?:\n|$))+)/is - read_file(".gitmodules").scan(pattern) do |id, data| - attr = {} - data.match(/^\s*(?[^\s=]+)\s*=\s*(?.+)$/m) do |match| - attr[match[:key]] = match[:value].strip - end - all << ids[id] = self.new(id, attr) - end - @list = {all: all.sort, by_id: ids} - end - @list - end -end diff --git a/script/helpers/url.rb b/script/helpers/url.rb deleted file mode 100644 index b8a95e9f..00000000 --- a/script/helpers/url.rb +++ /dev/null @@ -1,80 +0,0 @@ -require_relative "./host" - -# Public: Helper methods for resolving various URL notations -class RepoURL - - def initialize(attr = {}) - @host = Host.define(attr[:host]) - @author = attr[:user] || attr[:author] - @name = attr[:repo] || attr[:name] - @short = attr[:short_url] - @long = attr[:long_url] - end - - # Shortened representation of URL: `[provider:]user/repo` - def short - @short || "#{@host.prefix}#{@author}/#{@name}" - end - - def to_s - "https://#{@host.}" - end - - # Split a URL into named subcomponents - def self.parse(url) - self.match_https(url) || - self.match_ssh(url) || - self.match_shorthand(url) || - self.match_implicit(url) - end - - # Match a well-formed HTTP or HTTPS address - def self.match_https(url) - if match = url.match(/ - ^ (? https? :\/\/ )? - (? [^@.]+ @ )? - (? www \. )? - (? #{HOSTS} ) - \/ (? [^\/]+ ) - \/ (? [^\/]+ ) - /xi) - match[:repo].sub! /\.git$/, "" - self.new(match) - end - end - - # Match an SSH address starting with `git@` - def self.match_ssh(url) - if match = url.match(/ - ^ git@ - (? #{HOSTS}) : - (? [^\/]+) \/ - (? [^\/]+) \.git $ - /xi) - self.new(match) - end - end - - # Match `provider:user/repo` - def self.match_shorthand(url) - if match = url.match(/ - ^ (? #{HOSTS}) : \/? - (? [^\/]+) \/ - (? [^\/]+) \/? $ - /xi) - self.new(match) - end - end - - # Match `user/repo` shorthand, assumed to be GitHub - def self.match_implicit(url) - if match = url.match(/ - ^ \/? (?[^\/]+) - \/ (?[^\/]+) - \/? $ - /xi) - match[:host] = "github.com" - self.new(match) - end - end -end diff --git a/script/list-grammars b/script/list-grammars index 208c4d53..1e82d78d 100755 --- a/script/list-grammars +++ b/script/list-grammars @@ -1,6 +1,102 @@ #!/usr/bin/env ruby -require_relative "./helpers/grammar_list" +require "bundler/setup" +require "linguist" +require "json" +require "yaml" + +class GrammarList + + ROOT = File.expand_path "../../", __FILE__ + + def initialize + @submodules = load_submodules() + @sources = load_sources() + @language_names = load_languages() + end + + # Load .gitmodules + def load_submodules + submodules = {} + submodule_file = File.read("#{ROOT}/.gitmodules") + pattern = /^\[submodule\s*"([^"]+)"\]$\n((?:^(?!\[).+(?:\n|$))+)/is + submodule_file.scan(pattern) do |id, attr| + submod = {} + submod[:path] = $1 if attr =~ /^\s*path\s*=\s*(.+)$/ + submod[:url] = $1 if attr =~ /^\s*url\s*=\s*(.+)$/ + submod[:url].gsub!(/\.git$/, "") + submod[:short] = shorten(submod[:url]) + submodules["#{id}"] = submod + end + submodules + end + + # Grab the name of each language, sorted case-insensitively + def load_languages + Linguist::Language.all.map(&:name).sort do |a, b| + a.downcase() <=> b.downcase() + end + end + + # Load grammars.yml + def load_sources + sources = {} + grammars = YAML.load_file("#{ROOT}/grammars.yml") + grammars.each do |path, scopes| + scopes.each { |scope| sources[scope] = path } + end + sources + end + + # Shorten a repository URL + def shorten(url) + if url =~ /^https?:\/\/(?:www\.)?github\.com\/([^\/]+\/[^\/]+)/i + $1 + elsif url =~ /^https?:\/\/(?:www\.)?(bitbucket|gitlab)\.(?:com|org)\/([^\/]+\/[^\/]+)/i + "#{$1.downcase()}:#{$2}" + else + url.replace(/^https?:\/\/(?:www\.)?/i, "") + end + end + + # Markdown: Generate grammar list + def to_markdown + markdown = "" + @language_names.each do |item| + lang = Linguist::Language["#{item}"] + scope = lang.tm_scope + next if scope == "none" + path = @sources[scope] || scope + case path + when "https://bitbucket.org/Clams/sublimesystemverilog/get/default.tar.gz" + short_url = "bitbucket:Clams/sublimesystemverilog" + long_url = "https://bitbucket.org/Clams/sublimesystemverilog" + when "https://svn.edgewall.org/repos/genshi/contrib/textmate/Genshi.tmbundle/Syntaxes/Markup%20Template%20%28XML%29.tmLanguage" + short_url = "genshi.edgewall.org/query" + long_url = "https://genshi.edgewall.org/query" + when "vendor/grammars/oz-tmbundle/Syntaxes/Oz.tmLanguage" + short_url = "eregon/oz-tmbundle" + long_url = "https://github.com/eregon/oz-tmbundle" + else + submodule = @submodules[@sources[scope].chomp("/")] + next unless submodule + short_url = submodule[:short] + long_url = submodule[:url] + end + markdown += "- **#{item}:** [#{short_url}](#{long_url})\n" + end + + markdown + end + + # Update the file displaying the reader-friendly list of grammar repos + def update_readme + readme = "#{ROOT}/vendor/README.md" + preamble = File.read(readme).match(/\A.+?\n/ms) + list = self.to_markdown + File.write(readme, preamble.to_s + list) + end +end list = GrammarList.new if ARGV.include? "--print" diff --git a/script/sort-submodules b/script/sort-submodules index b972fad0..a6dccc49 100755 --- a/script/sort-submodules +++ b/script/sort-submodules @@ -1,7 +1,21 @@ #!/usr/bin/env ruby require "optparse" -require_relative "./helpers/submodule" + +ROOT = File.expand_path "../../", __FILE__ + + +# Extract and sort a list of submodules +def sort_entries(file_data) + submodules = [] + file_data.scan(/(^\[submodule[^\n]+\n)((?:\t[^\n]+\n)+)/).each do |head, body| + path = body.match(/^\tpath\s*=\s*\K(.+)$/)[0] + submodules << [path, head + body] + end + submodules.sort! { |a,b| a[0] <=> b[0] } + submodules.collect { |i| i[1] } +end + usage = <<-EOH Usage: @@ -14,7 +28,6 @@ Examples: EOH $testing = false - OptionParser.new do |opts| opts.banner = usage opts.on("-h", "--help") do @@ -26,11 +39,12 @@ OptionParser.new do |opts| end end.parse! -unsorted = read ".gitmodules" -sorted = Submodule.list.join + +unsorted = File.read("#{ROOT}/.gitmodules") +sorted = sort_entries(unsorted).join if $testing exit unsorted == sorted else - write ".gitmodules", sorted + File.write "#{ROOT}/.gitmodules", sorted end