From f382abc2f30a9cd53a6ba59b2fe47b27363c46dd Mon Sep 17 00:00:00 2001 From: Alhadis Date: Wed, 7 Sep 2016 03:14:49 +1000 Subject: [PATCH 1/6] Add logic to consume and parse options --- script/add-grammar | 85 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100755 script/add-grammar diff --git a/script/add-grammar b/script/add-grammar new file mode 100755 index 00000000..11bb1889 --- /dev/null +++ b/script/add-grammar @@ -0,0 +1,85 @@ +#!/usr/bin/env ruby + +require "optparse" + +ROOT = File.expand_path("../../", __FILE__) + + +# Break a repository URL into its separate components +def parse_url(input) + hosts = "github\.com|bitbucket\.org|gitlab\.com" + + # HTTPS/HTTP link pointing to recognised hosts + if input =~ /^(?:https?:\/\/)?(?:[^.@]+@)?(?:www\.)?(#{hosts})\/([^\/]+)\/([^\/]+)/i + { host: $1.downcase(), user: $2, repo: $3.sub(/\.git$/, "") } + # SSH + elsif input =~ /^git@(#{hosts}):([^\/]+)\/([^\/]+)\.git$/i + { host: $1.downcase(), user: $2, repo: $3 } + # provider:user/repo + elsif input =~ /^(github|bitbucket|gitlab):\/?([^\/]+)\/([^\/]+)\/?$/i + { host: $1.downcase(), user: $2, repo: $3 } + # user/repo - Common GitHub shorthand + elsif input =~ /^\/?([^\/]+)\/([^\/]+)\/?$/ + { host: "github.com", user: $1, repo: $2 } + else + raise "Unsupported URL: #{input}" + end +end + +# Isolate the vendor-name component of a submodule path +def parse_submodule(name) + name =~ /^(?:.*(?:vendor\/)?grammars\/)?([^\/]+)/i + path = "vendor/grammars/#{$1}" + unless File.exist?("#{ROOT}/" + path) + warn "Submodule '#{path}' does not exist. Aborting." + exit 1 + end + path +end + +usage = """Usage: + #{$0} [--replace grammar] url +Examples: + #{$0} https://github.com/Alhadis/language-roff + #{$0} --replace sublime-apl https://github.com/Alhadis/language-apl +""" + + +$replace = nil +$verbose = false + +OptionParser.new do |opts| + opts.banner = usage + opts.on("-v", "--verbose", "Print verbose feedback to STDOUT") do + $verbose = true + end + opts.on("-rSUBMODULE", "--replace=SUBMODDULE", "Replace an existing grammar submodule.") do |name| + $replace = name + end +end.parse! + + +$url = ARGV[0] + +# No URL? Print a usage message and bail. +unless $url + warn usage + exit 1; +end + + +# Ensure the given URL is an HTTPS link +parts = parse_url $url +https = "https://#{parts[:host]}/#{parts[:user]}/#{parts[:repo]}" +path = "vendor/grammars/#{parts[:repo]}" +repl = parse_submodule($replace) if $replace + +if $verbose + puts "Adding grammar" + puts "\tFrom: #{https}" + puts "\tInto: #{path}" + puts "\tReplacing: #{repl}" if repl + puts "\nRegistering submodule..." +end + +#`git submodule add #{https} #{path}` From 4584963dd209e510b74f5d0f86924b0805c3d3f9 Mon Sep 17 00:00:00 2001 From: Alhadis Date: Wed, 7 Sep 2016 04:25:00 +1000 Subject: [PATCH 2/6] Add logic to update submodules and licenses --- script/add-grammar | 34 +++++++++++++++++++++------------- script/licensed | 11 ++++++++++- 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/script/add-grammar b/script/add-grammar index 11bb1889..584b9cdf 100755 --- a/script/add-grammar +++ b/script/add-grammar @@ -37,6 +37,13 @@ def parse_submodule(name) path end +# Print debugging feedback to STDOUT if running with --verbose +def log(msg) + puts msg if $verbose +end + + + usage = """Usage: #{$0} [--replace grammar] url Examples: @@ -44,7 +51,6 @@ Examples: #{$0} --replace sublime-apl https://github.com/Alhadis/language-apl """ - $replace = nil $verbose = false @@ -67,19 +73,21 @@ unless $url exit 1; end - # Ensure the given URL is an HTTPS link -parts = parse_url $url -https = "https://#{parts[:host]}/#{parts[:user]}/#{parts[:repo]}" -path = "vendor/grammars/#{parts[:repo]}" -repl = parse_submodule($replace) if $replace +parts = parse_url $url +https = "https://#{parts[:host]}/#{parts[:user]}/#{parts[:repo]}" +repo_new = "vendor/grammars/#{parts[:repo]}" +repo_old = parse_submodule($replace) if $replace -if $verbose - puts "Adding grammar" - puts "\tFrom: #{https}" - puts "\tInto: #{path}" - puts "\tReplacing: #{repl}" if repl - puts "\nRegistering submodule..." +if repo_old + log "Deregistering: #{repo_old}" + `git submodule deinit #{repo_old}` + `git rm -rf #{repo_old}` end -#`git submodule add #{https} #{path}` +log "Registering new submodule: #{repo_new}" +`git submodule add -f #{https} #{repo_new}` +`script/convert-grammars --add #{repo_new}` + +log "Confirming license" +`script/licensed --module "#{repo_new}"` diff --git a/script/licensed b/script/licensed index ea3f538f..68214d34 100755 --- a/script/licensed +++ b/script/licensed @@ -4,6 +4,7 @@ require "bundler/setup" require "licensed/cli" +require "optparse" module Licensed module Source @@ -32,7 +33,14 @@ module Licensed end end -source = Licensed::Source::Filesystem.new("vendor/grammars/*/", type: "grammar") +module_path = nil +OptionParser.new do |opts| + opts.on("-mPATH", "--module=PATH", "Cache license file for specific grammar") do |p| + module_path = p + end +end.parse! + +source = Licensed::Source::Filesystem.new(module_path || "vendor/grammars/*/", type: "grammar") config = Licensed::Configuration.new config.sources << source @@ -43,4 +51,5 @@ else end command.run +`git checkout -- vendor/licenses/grammar/` if module_path exit command.success? From 68c45be47da1205df5a52840265cb58c92e35bef Mon Sep 17 00:00:00 2001 From: Alhadis Date: Wed, 7 Sep 2016 04:37:04 +1000 Subject: [PATCH 3/6] Flatten whitespace --- script/add-grammar | 75 +++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/script/add-grammar b/script/add-grammar index 584b9cdf..779dd62b 100755 --- a/script/add-grammar +++ b/script/add-grammar @@ -7,43 +7,42 @@ ROOT = File.expand_path("../../", __FILE__) # Break a repository URL into its separate components def parse_url(input) - hosts = "github\.com|bitbucket\.org|gitlab\.com" - - # HTTPS/HTTP link pointing to recognised hosts - if input =~ /^(?:https?:\/\/)?(?:[^.@]+@)?(?:www\.)?(#{hosts})\/([^\/]+)\/([^\/]+)/i - { host: $1.downcase(), user: $2, repo: $3.sub(/\.git$/, "") } - # SSH - elsif input =~ /^git@(#{hosts}):([^\/]+)\/([^\/]+)\.git$/i - { host: $1.downcase(), user: $2, repo: $3 } - # provider:user/repo - elsif input =~ /^(github|bitbucket|gitlab):\/?([^\/]+)\/([^\/]+)\/?$/i - { host: $1.downcase(), user: $2, repo: $3 } - # user/repo - Common GitHub shorthand - elsif input =~ /^\/?([^\/]+)\/([^\/]+)\/?$/ - { host: "github.com", user: $1, repo: $2 } - else - raise "Unsupported URL: #{input}" - end + hosts = "github\.com|bitbucket\.org|gitlab\.com" + + # HTTPS/HTTP link pointing to recognised hosts + if input =~ /^(?:https?:\/\/)?(?:[^.@]+@)?(?:www\.)?(#{hosts})\/([^\/]+)\/([^\/]+)/i + { host: $1.downcase(), user: $2, repo: $3.sub(/\.git$/, "") } + # SSH + elsif input =~ /^git@(#{hosts}):([^\/]+)\/([^\/]+)\.git$/i + { host: $1.downcase(), user: $2, repo: $3 } + # provider:user/repo + elsif input =~ /^(github|bitbucket|gitlab):\/?([^\/]+)\/([^\/]+)\/?$/i + { host: $1.downcase(), user: $2, repo: $3 } + # user/repo - Common GitHub shorthand + elsif input =~ /^\/?([^\/]+)\/([^\/]+)\/?$/ + { host: "github.com", user: $1, repo: $2 } + else + raise "Unsupported URL: #{input}" + end end # Isolate the vendor-name component of a submodule path def parse_submodule(name) - name =~ /^(?:.*(?:vendor\/)?grammars\/)?([^\/]+)/i - path = "vendor/grammars/#{$1}" - unless File.exist?("#{ROOT}/" + path) - warn "Submodule '#{path}' does not exist. Aborting." - exit 1 - end - path + name =~ /^(?:.*(?:vendor\/)?grammars\/)?([^\/]+)/i + path = "vendor/grammars/#{$1}" + unless File.exist?("#{ROOT}/" + path) + warn "Submodule '#{path}' does not exist. Aborting." + exit 1 + end + path end # Print debugging feedback to STDOUT if running with --verbose def log(msg) - puts msg if $verbose + puts msg if $verbose end - usage = """Usage: #{$0} [--replace grammar] url Examples: @@ -55,13 +54,13 @@ $replace = nil $verbose = false OptionParser.new do |opts| - opts.banner = usage - opts.on("-v", "--verbose", "Print verbose feedback to STDOUT") do - $verbose = true - end - opts.on("-rSUBMODULE", "--replace=SUBMODDULE", "Replace an existing grammar submodule.") do |name| - $replace = name - end + opts.banner = usage + opts.on("-v", "--verbose", "Print verbose feedback to STDOUT") do + $verbose = true + end + opts.on("-rSUBMODULE", "--replace=SUBMODDULE", "Replace an existing grammar submodule.") do |name| + $replace = name + end end.parse! @@ -69,8 +68,8 @@ $url = ARGV[0] # No URL? Print a usage message and bail. unless $url - warn usage - exit 1; + warn usage + exit 1; end # Ensure the given URL is an HTTPS link @@ -80,9 +79,9 @@ repo_new = "vendor/grammars/#{parts[:repo]}" repo_old = parse_submodule($replace) if $replace if repo_old - log "Deregistering: #{repo_old}" - `git submodule deinit #{repo_old}` - `git rm -rf #{repo_old}` + log "Deregistering: #{repo_old}" + `git submodule deinit #{repo_old}` + `git rm -rf #{repo_old}` end log "Registering new submodule: #{repo_new}" From be316c29434b62c79aa989174986948ad02394fb Mon Sep 17 00:00:00 2001 From: Alhadis Date: Wed, 7 Sep 2016 05:36:00 +1000 Subject: [PATCH 4/6] Update contributor notes to mention new script --- CONTRIBUTING.md | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ec00ae14..c3059d98 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -17,7 +17,7 @@ To add support for a new extension: In addition, if this extension is already listed in [`languages.yml`][languages] then sometimes a few more steps will need to be taken: 0. Make sure that example `.yourextension` files are present in the [samples directory][samples] for each language that uses `.yourextension`. -0. Test the performance of the Bayesian classifier with a relatively large number (1000s) of sample `.yourextension` files. (ping @arfon or @bkeepers to help with this) to ensure we're not misclassifying files. +0. Test the performance of the Bayesian classifier with a relatively large number (1000s) of sample `.yourextension` files. (ping **@arfon** or **@bkeepers** to help with this) to ensure we're not misclassifying files. 0. If the Bayesian classifier does a bad job with the sample `.yourextension` files then a [heuristic](https://github.com/github/linguist/blob/master/lib/linguist/heuristics.rb) may need to be written to help. @@ -28,17 +28,14 @@ We try only to add languages once they have some usage on GitHub. In most cases To add support for a new language: 0. Add an entry for your language to [`languages.yml`][languages]. -0. Add a grammar for your language. Please only add grammars that have [one of these licenses](https://github.com/github/linguist/blob/257425141d4e2a5232786bf0b13c901ada075f93/vendor/licenses/config.yml#L2-L11). - 0. Add your grammar as a submodule: `git submodule add https://github.com/JaneSmith/MyGrammar vendor/grammars/MyGrammar`. - 0. Add your grammar to [`grammars.yml`][grammars] by running `script/convert-grammars --add vendor/grammars/MyGrammar`. - 0. Download the license for the grammar: `script/licensed`. Be careful to only commit the file for the new grammar, as this script may update licenses for other grammars as well. +0. Add a grammar for your language: `script/add-grammar https://github.com/JaneSmith/MyGrammar` Please only add grammars that have [one of these licenses][licenses]. 0. Add samples for your language to the [samples directory][samples] in the correct subdirectory. 0. Open a pull request, linking to a [GitHub search result](https://github.com/search?utf8=%E2%9C%93&q=extension%3Aboot+NOT+nothack&type=Code&ref=searchresults) showing in-the-wild usage. In addition, if your new language defines an extension that's already listed in [`languages.yml`][languages] (such as `.foo`) then sometimes a few more steps will need to be taken: 0. Make sure that example `.foo` files are present in the [samples directory][samples] for each language that uses `.foo`. -0. Test the performance of the Bayesian classifier with a relatively large number (1000s) of sample `.foo` files. (ping @arfon or @bkeepers to help with this) to ensure we're not misclassifying files. +0. Test the performance of the Bayesian classifier with a relatively large number (1000s) of sample `.foo` files. (ping **@arfon** or **@bkeepers** to help with this) to ensure we're not misclassifying files. 0. If the Bayesian classifier does a bad job with the sample `.foo` files then a [heuristic](https://github.com/github/linguist/blob/master/lib/linguist/heuristics.rb) may need to be written to help. Remember, the goal here is to try and avoid false positives! @@ -81,9 +78,9 @@ Here's our current build status: [![Build Status](https://api.travis-ci.org/gith Linguist is maintained with :heart: by: -- @arfon (GitHub Staff) -- @larsbrinkhoff -- @pchaigno +- **@arfon** (GitHub Staff) +- **@larsbrinkhoff** +- **@pchaigno** As Linguist is a production dependency for GitHub we have a couple of workflow restrictions: @@ -112,5 +109,6 @@ If you are the current maintainer of this gem: [grammars]: /grammars.yml [languages]: /lib/linguist/languages.yml +[licenses]: https://github.com/github/linguist/blob/257425141d4e2a5232786bf0b13c901ada075f93/vendor/licenses/config.yml#L2-L11 [samples]: /samples [new-issue]: https://github.com/github/linguist/issues/new From e6c849d92cb538183b2db6541004c16ad2617e28 Mon Sep 17 00:00:00 2001 From: Alhadis Date: Mon, 12 Sep 2016 02:08:52 +1000 Subject: [PATCH 5/6] Document --verbose option in usage message --- script/add-grammar | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/add-grammar b/script/add-grammar index 779dd62b..e72fe89b 100755 --- a/script/add-grammar +++ b/script/add-grammar @@ -44,7 +44,7 @@ end usage = """Usage: - #{$0} [--replace grammar] url + #{$0} [-v|--verbose] [--replace grammar] url Examples: #{$0} https://github.com/Alhadis/language-roff #{$0} --replace sublime-apl https://github.com/Alhadis/language-apl From b61fe90d120d24b2ec138729fc860703373f16ff Mon Sep 17 00:00:00 2001 From: Alhadis Date: Mon, 12 Sep 2016 02:17:10 +1000 Subject: [PATCH 6/6] Terminate script if submodule registration failed --- script/add-grammar | 1 + 1 file changed, 1 insertion(+) diff --git a/script/add-grammar b/script/add-grammar index e72fe89b..25c12176 100755 --- a/script/add-grammar +++ b/script/add-grammar @@ -86,6 +86,7 @@ end log "Registering new submodule: #{repo_new}" `git submodule add -f #{https} #{repo_new}` +exit 1 if $?.exitstatus > 0 `script/convert-grammars --add #{repo_new}` log "Confirming license"