Files
linguist/test/test_grammars.rb
Paul Chaignon 524337d07b Use Licensee hashes to uniquely identify licenses
Since v6.1.0, Licensee exposes the hash of the license
We can use it to uniquely identify unrecognized licenses,
Thus, tests will fail if the content of an unrecognized license changes

Projects for which no license was found are kept in the whitelist
2016-09-27 10:44:25 +02:00

205 lines
8.3 KiB
Ruby

require_relative "./helper"
class TestGrammars < Minitest::Test
ROOT = File.expand_path("../..", __FILE__)
# List of projects that are allowed without licenses
PROJECT_WHITELIST = [
"vendor/grammars/language-csharp",
"vendor/grammars/sublimeassembly"
].freeze
HASH_WHITELIST = [
"ebae2d87e06d3acef075d049fcfc8958c0364863", # go-tmbundle
"ff21db2554d69d78b2220db5615b16bbba0788d3", # factor
"b9a7428fd036eed8503995e06e989180c276b17d" # jflex.tmbundle
].freeze
# List of allowed SPDX license names
LICENSE_WHITELIST = %w[
apache-2.0
bsd-2-clause
bsd-3-clause
isc
mit
mpl-2.0
public
textmate
unlicense
wtfpl
zlib
].freeze
def setup
@grammars = YAML.load(File.read(File.join(ROOT, "grammars.yml")))
end
def test_no_duplicate_scopes
scopes = @grammars.values.flatten
duplicates = scopes.group_by { |s| s }.select { |k, v| v.length > 1 }.map(&:first)
assert duplicates.empty?, "The following scopes appear in grammars.yml more than once:\n#{duplicates.sort.join("\n")}"
end
def test_submodules_are_in_sync
# Strip off paths inside the submodule so that just the submodule path remains.
listed_submodules = @grammars.keys.grep(/vendor\/grammars/).map { |source| source[%r{vendor/grammars/[^/]+}] }
nonexistent_submodules = listed_submodules - submodule_paths
unlisted_submodules = submodule_paths - listed_submodules
message = ""
unless nonexistent_submodules.empty?
message << "The following submodules are listed in grammars.yml but don't seem to exist in the repository.\n"
message << "Either add them using `git submodule add` or remove them from grammars.yml.\n"
message << nonexistent_submodules.sort.join("\n")
end
unless unlisted_submodules.empty?
message << "\n" unless message.empty?
message << "The following submodules exist in the repository but aren't listed in grammars.yml.\n"
message << "Either add them to grammars.yml or remove them from the repository using `git rm`.\n"
message << unlisted_submodules.sort.join("\n")
end
assert nonexistent_submodules.empty? && unlisted_submodules.empty?, message
end
def test_local_scopes_are_in_sync
actual = YAML.load(`"#{File.join(ROOT, "script", "convert-grammars")}" --output - --no-install --no-remote`)
assert $?.success?, "script/convert-grammars failed"
# We're not checking remote grammars. That can take a long time and make CI
# flaky if network conditions are poor.
@grammars.delete_if { |k, v| k.start_with?("http:", "https:") }
@grammars.each do |k, v|
assert_equal v, actual[k], "The scopes listed for #{k} in grammars.yml don't match the scopes found in that repository"
end
end
def test_submodules_have_recognized_licenses
unrecognized = submodule_licenses.select { |k,v| v.nil? && Licensee::FSProject.new(k).license_file }
unrecognized.reject! { |k,v| PROJECT_WHITELIST.include?(k) }
message = "The following submodules have unrecognized licenses:\n* #{unrecognized.keys.join("\n* ")}\n"
message << "Please ensure that the project's LICENSE file contains the full text of the license."
assert_equal Hash.new, unrecognized, message
end
def test_submodules_have_licenses
unlicensed = submodule_licenses.select { |k,v| v.nil? }.reject { |k,v| PROJECT_WHITELIST.include?(k) }
message = "The following submodules don't have licenses:\n* #{unlicensed.keys.join("\n* ")}\n"
message << "Please ensure that the project has a LICENSE file, and that the LICENSE file contains the full text of the license."
assert_equal Hash.new, unlicensed, message
end
def test_submodules_have_approved_licenses
unapproved = submodule_licenses.reject { |k,v| LICENSE_WHITELIST.include?(v) ||
PROJECT_WHITELIST.include?(k) ||
HASH_WHITELIST.include?(v) }
.map { |k,v| "#{k}: #{v}"}
message = "The following submodules have unapproved licenses:\n* #{unapproved.join("\n* ")}\n"
message << "The license must be added to the LICENSE_WHITELIST in /test/test_grammars.rb once approved."
assert_equal [], unapproved, message
end
def test_submodules_whitelist_has_no_extra_entries
skip("Need to work out how to handle dual-licensed entities")
extra_whitelist_entries = PROJECT_WHITELIST - submodule_licenses.select { |k,v| v.nil? }.keys
not_present = extra_whitelist_entries.reject { |k,v| Dir.exist?(k) }
licensed = extra_whitelist_entries.select { |k,v| submodule_licenses[k] }
msg = "The following whitelisted submodules don't appear to be part of the project:\n* #{not_present.join("\n* ")}"
assert_equal [], not_present, msg
msg = "The following whitelisted submodules actually have licenses and don't need to be whitelisted:\n* #{licensed.join("\n* ")}"
assert_equal [], licensed, msg
end
def test_submodules_use_https_links
File.open(".gitmodules", "r") do |fh|
ssh_submodules = []
fh.each_line do |line|
if matches = line.match(/url = (git@.*)/)
submodule_link = matches.captures[0]
ssh_submodules.push(submodule_link)
end
end
msg = "The following submodules don't have an HTTPS link:\n* #{ssh_submodules.join("\n* ")}"
assert_equal [], ssh_submodules, msg
end
end
private
def submodule_paths
@submodule_paths ||= `git config --list --file "#{File.join(ROOT, ".gitmodules")}"`.lines.grep(/\.path=/).map { |line| line.chomp.split("=", 2).last }.reject { |path| path =~ /CodeMirror/ }
end
# Returns a hash of submodules in the form of submodule_path => license
def submodule_licenses
@@submodule_licenses ||= begin
submodules = {}
submodule_paths.each { |submodule| submodules[submodule] = submodule_license(submodule) }
submodules
end
end
# Given the path to a submodule, return its SPDX-compliant license key
# If the license is unrecognized, return its hash
def submodule_license(submodule)
# Prefer Licensee to detect a submodule's license
project = Licensee::FSProject.new(submodule, detect_readme: true)
return project.license.key if project.license
# We know a license file exists, but Licensee wasn't able to detect the license,
# Let's try our own more permissive regex method
if project.license_file
path = File.expand_path project.license_file.path, submodule
license = classify_license(path)
return license if license
end
# Neither Licensee nor our own regex was able to detect the license, let's check the readme
files = Dir[File.join(ROOT, submodule, "*")]
if readme = files.find { |file| File.basename(file) =~ /\Areadme\b/i }
license = classify_license(readme)
return license if license
end
# We know a license exists, but no method was able to recognize it.
# We return the license hash in this case, to uniquely identify it.
if project.license_file
return project.license_file.hash
elsif project.readme
return project.readme.hash
end
end
def classify_license(path)
content = File.read(path)
return unless content =~ /\blicen[cs]e\b/i
if content.include?("Apache License") && content.include?("2.0")
"apache-2.0"
elsif content.include?("GNU") && content =~ /general/i && content =~ /public/i
if content =~ /version 2/i
"gpl-2.0"
elsif content =~ /version 3/i
"gpl-3.0"
end
elsif content.include?("GPL") && content.include?("http://www.gnu.org/licenses/gpl.html")
"gpl-3.0"
elsif content.include?("Creative Commons Attribution-Share Alike 3.0")
"cc-by-sa-3.0"
elsif content.include?("tidy-license.txt") || content.include?("If not otherwise specified (see below)") || content.include?("Permission to copy, use, modify, sell and distribute this")
"textmate"
elsif content.include?("Permission is hereby granted") || content =~ /\bMIT\b/
"mit"
elsif content.include?("This package is provided as-is and is placed in the Public Domain")
"public"
elsif content.include?("http://www.wtfpl.net/txt/copying/")
"wtfpl"
elsif content.include?("zlib") && content.include?("license") && content.include?("2. Altered source versions must be plainly marked as such")
"zlib"
end
end
end