From fd9ce2d1cf1e738058be7f39505431a55a4ed9e6 Mon Sep 17 00:00:00 2001 From: Ben Balter Date: Wed, 19 Aug 2015 12:54:21 -0400 Subject: [PATCH 1/5] use licensee to classify submodule licenses --- github-linguist.gemspec | 2 + test/helper.rb | 1 + test/test_grammars.rb | 126 ++++++++++++++++++++++++++-------------- 3 files changed, 84 insertions(+), 45 deletions(-) diff --git a/github-linguist.gemspec b/github-linguist.gemspec index ce2303bf..548812a3 100644 --- a/github-linguist.gemspec +++ b/github-linguist.gemspec @@ -24,4 +24,6 @@ Gem::Specification.new do |s| s.add_development_dependency 'rake' s.add_development_dependency 'yajl-ruby' s.add_development_dependency 'color-proximity', '~> 0.2.1' + s.add_development_dependency 'licensee', '~> 4.7.3' + end diff --git a/test/helper.rb b/test/helper.rb index ab3cc8fa..80060c29 100644 --- a/test/helper.rb +++ b/test/helper.rb @@ -3,6 +3,7 @@ require "minitest/autorun" require "mocha/setup" require "linguist" require 'color-proximity' +require 'licensee' def fixtures_path File.expand_path("../fixtures", __FILE__) diff --git a/test/test_grammars.rb b/test/test_grammars.rb index 88624ae8..a510585c 100644 --- a/test/test_grammars.rb +++ b/test/test_grammars.rb @@ -3,12 +3,13 @@ require_relative "./helper" class TestGrammars < Minitest::Test ROOT = File.expand_path("../..", __FILE__) - LICENSE_WHITELIST = [ + PROJECT_WHITELIST = [ # This grammar's MIT license is inside a subdirectory. "vendor/grammars/SublimePapyrus", # This grammar has a nonstandard but acceptable license. "vendor/grammars/gap-tmbundle", + "vendor/grammars/factor", # These grammars have no license but have been grandfathered in. New grammars # must have a license that allows redistribution. @@ -16,6 +17,22 @@ class TestGrammars < Minitest::Test "vendor/grammars/x86-assembly-textmate-bundle" ].freeze + # List of allowed SPDX license names + LICENSE_WHITELIST = %w[ + apache-2.0 + bsd-2-clause + bsd-3-clause + cc-by-sa-3.0 + gpl-2.0 + gpl-3.0 + lgpl-3.0 + mit + textmate + unlicense + wtfpl + zlib + ].freeze + def setup @grammars = YAML.load(File.read(File.join(ROOT, "grammars.yml"))) end @@ -63,47 +80,35 @@ class TestGrammars < Minitest::Test end def test_submodules_have_licenses - categories = submodule_paths.group_by do |submodule| - files = Dir[File.join(ROOT, submodule, "*")] - license = files.find { |path| File.basename(path) =~ /\b(un)?licen[cs]e\b/i } || files.find { |path| File.basename(path) =~ /\bcopying\b/i } - if license.nil? - if readme = files.find { |path| File.basename(path) =~ /\Areadme\b/i } - license = readme if File.read(readme) =~ /\blicen[cs]e\b/i - end - end - if license.nil? - :unlicensed - elsif classify_license(license) - :licensed + submodule_licenses.each do |submodule, license| + next if PROJECT_WHITELIST.include?(submodule) + + license_file = Licensee::Project.new(submodule).license_file + submodule_name = submodule.split("/").last + + if license_file + assert license, "Submodule #{submodule_name} contains an unrecognized license. Please update #{__FILE__} to recognize the license" else - :unrecognized + assert license, "Submodule #{submodule_name} is unlicensed. All grammars must have a license that permits redistribution" end + + assert LICENSE_WHITELIST.include?(license), "Submodule #{submodule_name} is licensed under the #{license} license" end - unlicensed = categories[:unlicensed] || [] - unrecognized = categories[:unrecognized] || [] - disallowed_unlicensed = unlicensed - LICENSE_WHITELIST - disallowed_unrecognized = unrecognized - LICENSE_WHITELIST - extra_whitelist_entries = LICENSE_WHITELIST - (unlicensed | unrecognized) + unlicensed = submodule_licenses.select { |k,v| v.nil? } + unrecognized = submodule_licenses.select { |k,v| v.nil? && Licensee::Project.new(k).license_file} + licensed = submodule_licenses.reject { |k,v| v.nil? } + unapproved = licensed.reject { |k,v| LICENSE_WHITELIST.include?(v) } + extra_whitelist_entries = PROJECT_WHITELIST - unlicensed.keys - unrecognized.keys + assert unapproved.empty? - message = "" - if disallowed_unlicensed.any? - message << "The following grammar submodules don't seem to have a license. All grammars must have a license that permits redistribution.\n" - message << disallowed_unlicensed.sort.join("\n") - end - if disallowed_unrecognized.any? - message << "\n\n" unless message.empty? - message << "The following grammar submodules have an unrecognized license. Please update #{__FILE__} to recognize the license.\n" - message << disallowed_unrecognized.sort.join("\n") - end - if extra_whitelist_entries.any? - message << "\n\n" unless message.empty? - message << "The following grammar submodules are listed in LICENSE_WHITELIST but either have a license (yay!)\n" - message << "or have been removed from the repository. Please remove them from the whitelist.\n" - message << extra_whitelist_entries.sort.join("\n") - end + extra_whitelist_entries.each do |submodule| + license = submodule_licenses[submodule] + submodule_name = submodule.split("/").last - assert disallowed_unlicensed.empty? && disallowed_unrecognized.empty? && extra_whitelist_entries.empty?, message + assert_equal nil, license, "Submodule #{submodule_name} is listed in PROJECT_WHITELIST but has a license" + assert Dir.exists?(submodule), "Submodule #{submodule_name} is listed in PROJECT_WHITELIST but doesn't appear to be part of the project" + end end private @@ -112,30 +117,61 @@ class TestGrammars < Minitest::Test @submodule_paths ||= `git config --list --file "#{File.join(ROOT, ".gitmodules")}"`.lines.grep(/\.path=/).map { |line| line.chomp.split("=", 2).last } end + # Returns a hash of submodules in the form of submodule_path => license + def submodule_licenses + @submodule_licenses = begin + submodules = {} + submodule_paths.each { |submodule| submodules[submodule] = submodule_license(submodule) } + submodules + end + end + + # Given the path to a submodule, return its SPDX-compliant license key + def submodule_license(submodule) + # Prefer Licensee to detect a submodule's license + project = Licensee::Project.new(submodule) + return project.license.key if project.license + + # We know a license file exists, but Licensee wasn't able to detect the license, + # Let's try our own more permissive regex method + if project.license_file + path = File.expand_path project.license_file.path, submodule + license = classify_license(path) + return license if license + end + + # Neither Licensee nor our own regex was able to detect the license, lets check the readme + files = Dir[File.join(ROOT, submodule, "*")] + if readme = files.find { |path| File.basename(path) =~ /\Areadme\b/i } + classify_license(readme) + end + end + def classify_license(path) content = File.read(path) + return unless content =~ /\blicen[cs]e\b/i if content.include?("Apache License") && content.include?("2.0") - "Apache 2.0" + "apache-2.0" elsif content.include?("GNU") && content =~ /general/i && content =~ /public/i if content =~ /version 2/i - "GPLv2" + "gpl-2.0" elsif content =~ /version 3/i - "GPLv3" + "gpl-3.0" end elsif content.include?("GPL") && content.include?("http://www.gnu.org/licenses/gpl.html") - "GPLv3" - elsif content.include?("Creative Commons") - "CC" + "gpl-3.0" + elsif content.include?("Creative Commons Attribution-Share Alike 3.0") + "cc-by-sa-3.0" elsif content.include?("tidy-license.txt") || content.include?("If not otherwise specified (see below)") "textmate" elsif content =~ /^\s*[*-]\s+Redistribution/ || content.include?("Redistributions of source code") - "BSD" + "bsd" elsif content.include?("Permission is hereby granted") || content =~ /\bMIT\b/ - "MIT" + "mit" elsif content.include?("unlicense.org") "unlicense" elsif content.include?("http://www.wtfpl.net/txt/copying/") - "WTFPL" + "wtfpl" elsif content.include?("zlib") && content.include?("license") && content.include?("2. Altered source versions must be plainly marked as such") "zlib" end From 8a7ceaa84530cc9522a4b6d597522a95574fced0 Mon Sep 17 00:00:00 2001 From: Ben Balter Date: Wed, 19 Aug 2015 13:22:31 -0400 Subject: [PATCH 2/5] bump licensee to support ruby 1.9.3 --- github-linguist.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github-linguist.gemspec b/github-linguist.gemspec index 548812a3..87bbc8bf 100644 --- a/github-linguist.gemspec +++ b/github-linguist.gemspec @@ -24,6 +24,6 @@ Gem::Specification.new do |s| s.add_development_dependency 'rake' s.add_development_dependency 'yajl-ruby' s.add_development_dependency 'color-proximity', '~> 0.2.1' - s.add_development_dependency 'licensee', '~> 4.7.3' + s.add_development_dependency 'licensee', '~> 4.7.4' end From fbb3ab22920e2ffb305aa3e1806e8348b950e76b Mon Sep 17 00:00:00 2001 From: Ben Balter Date: Thu, 20 Aug 2015 11:38:31 -0400 Subject: [PATCH 3/5] batch license test output --- test/test_grammars.rb | 52 ++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 30 deletions(-) diff --git a/test/test_grammars.rb b/test/test_grammars.rb index a510585c..12b22bd6 100644 --- a/test/test_grammars.rb +++ b/test/test_grammars.rb @@ -79,36 +79,32 @@ class TestGrammars < Minitest::Test end end + def test_submodules_have_recognized_licenses + unrecognized = submodule_licenses.select { |k,v| v.nil? && Licensee::Project.new(k).license_file } + unrecognized.reject! { |k,v| PROJECT_WHITELIST.include?(k) } + assert_equal Hash.new, unrecognized, "The following submodules have unrecognized licenses:\n* #{unrecognized.keys.join("\n* ")}" + end + def test_submodules_have_licenses - submodule_licenses.each do |submodule, license| - next if PROJECT_WHITELIST.include?(submodule) + unlicensed = submodule_licenses.select { |k,v| v.nil? }.reject { |k,v| PROJECT_WHITELIST.include?(k) } + assert_equal Hash.new, unlicensed, "The following submodules don't have licenses:\n* #{unlicensed.keys.join("\n* ")}" + end - license_file = Licensee::Project.new(submodule).license_file - submodule_name = submodule.split("/").last + def test_submodules_have_approved_licenses + unapproved = submodule_licenses.reject { |k,v| LICENSE_WHITELIST.include?(v) || PROJECT_WHITELIST.include?(k) }.map { |k,v| "#{k}: #{v}"} + assert_equal [], unapproved, "The following submodules have unapproved licenses:\n* #{unapproved.join("\n* ")}" + end - if license_file - assert license, "Submodule #{submodule_name} contains an unrecognized license. Please update #{__FILE__} to recognize the license" - else - assert license, "Submodule #{submodule_name} is unlicensed. All grammars must have a license that permits redistribution" - end + def test_submodules_whitelist_has_no_extra_entries + extra_whitelist_entries = PROJECT_WHITELIST - submodule_licenses.select { |k,v| v.nil? }.keys + not_present = extra_whitelist_entries.reject { |k,v| Dir.exists?(k) } + licensed = extra_whitelist_entries.select { |k,v| submodule_licenses[k] } - assert LICENSE_WHITELIST.include?(license), "Submodule #{submodule_name} is licensed under the #{license} license" - end + msg = "The following whitelisted submodules don't appear to be part of the project:\n* #{not_present.join("\n* ")}" + assert_equal [], not_present, msg - unlicensed = submodule_licenses.select { |k,v| v.nil? } - unrecognized = submodule_licenses.select { |k,v| v.nil? && Licensee::Project.new(k).license_file} - licensed = submodule_licenses.reject { |k,v| v.nil? } - unapproved = licensed.reject { |k,v| LICENSE_WHITELIST.include?(v) } - extra_whitelist_entries = PROJECT_WHITELIST - unlicensed.keys - unrecognized.keys - assert unapproved.empty? - - extra_whitelist_entries.each do |submodule| - license = submodule_licenses[submodule] - submodule_name = submodule.split("/").last - - assert_equal nil, license, "Submodule #{submodule_name} is listed in PROJECT_WHITELIST but has a license" - assert Dir.exists?(submodule), "Submodule #{submodule_name} is listed in PROJECT_WHITELIST but doesn't appear to be part of the project" - end + msg = "The following whitelisted submodules actually have licenses and don't need to be whitelisted:\n* #{licensed.join("\n* ")}" + assert_equal [], licensed, msg end private @@ -119,7 +115,7 @@ class TestGrammars < Minitest::Test # Returns a hash of submodules in the form of submodule_path => license def submodule_licenses - @submodule_licenses = begin + @@submodule_licenses ||= begin submodules = {} submodule_paths.each { |submodule| submodules[submodule] = submodule_license(submodule) } submodules @@ -164,12 +160,8 @@ class TestGrammars < Minitest::Test "cc-by-sa-3.0" elsif content.include?("tidy-license.txt") || content.include?("If not otherwise specified (see below)") "textmate" - elsif content =~ /^\s*[*-]\s+Redistribution/ || content.include?("Redistributions of source code") - "bsd" elsif content.include?("Permission is hereby granted") || content =~ /\bMIT\b/ "mit" - elsif content.include?("unlicense.org") - "unlicense" elsif content.include?("http://www.wtfpl.net/txt/copying/") "wtfpl" elsif content.include?("zlib") && content.include?("license") && content.include?("2. Altered source versions must be plainly marked as such") From 62a0faa729593936cfd80b3996284f12da71dbf5 Mon Sep 17 00:00:00 2001 From: Ben Balter Date: Thu, 20 Aug 2015 12:23:52 -0400 Subject: [PATCH 4/5] let us --- test/test_grammars.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_grammars.rb b/test/test_grammars.rb index 12b22bd6..6aab2935 100644 --- a/test/test_grammars.rb +++ b/test/test_grammars.rb @@ -136,7 +136,7 @@ class TestGrammars < Minitest::Test return license if license end - # Neither Licensee nor our own regex was able to detect the license, lets check the readme + # Neither Licensee nor our own regex was able to detect the license, let's check the readme files = Dir[File.join(ROOT, submodule, "*")] if readme = files.find { |path| File.basename(path) =~ /\Areadme\b/i } classify_license(readme) From cb550a36623b0516262701b9cb7c4e2c1e5756aa Mon Sep 17 00:00:00 2001 From: Ben Balter Date: Thu, 20 Aug 2015 15:30:51 -0400 Subject: [PATCH 5/5] remove some random submodules --- vendor/grammars/prolog.tmbundle | 1 - vendor/grammars/sublime-better-typescript | 1 - 2 files changed, 2 deletions(-) delete mode 160000 vendor/grammars/prolog.tmbundle delete mode 160000 vendor/grammars/sublime-better-typescript diff --git a/vendor/grammars/prolog.tmbundle b/vendor/grammars/prolog.tmbundle deleted file mode 160000 index d955aca3..00000000 --- a/vendor/grammars/prolog.tmbundle +++ /dev/null @@ -1 +0,0 @@ -Subproject commit d955aca38b7aadce76e6815eb09487a370206742 diff --git a/vendor/grammars/sublime-better-typescript b/vendor/grammars/sublime-better-typescript deleted file mode 160000 index 8266c06a..00000000 --- a/vendor/grammars/sublime-better-typescript +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 8266c06aabe5e473766bd9bb62dfeb106fe296a2